diff --git a/build.py b/build.py index 35cdd2a..ea928f8 100644 --- a/build.py +++ b/build.py @@ -1,26 +1,37 @@ -def get_categorical_variables(df): - return [] +import pandas as pd +df = pd.read_csv('data/conversion_data.csv') +def get_categorical_variables(df): + df['converted'] = df['converted'].astype('category') + df['new_user'] = df['new_user'].astype('category') + return df.select_dtypes(include=['category','object']).columns def get_numerical_variables(df): - return [] - + df['converted'] = df['converted'].astype('category') + df['new_user'] = df['new_user'].astype('category') + return df.select_dtypes(exclude=['category','object']).columns def get_numerical_variables_percentile(df): - pass + df['converted'] = df['converted'].astype('category') + df['new_user'] = df['new_user'].astype('category') + var = df.select_dtypes(exclude=['category','object']) + return var.describe() def get_categorical_variables_modes(df): - pass + df['converted'] = df['converted'].astype('category') + df['new_user'] = df['new_user'].astype('category') + var = df.select_dtypes(include=['category','object']) + return var.mode() def get_missing_values_count(df): - pass + return df.isnull() def plot_histogram_with_numerical_values(df): - pass + return df.plot.hist() def plot_facet_box(df): - pass + return df.plot.box() diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000..84ddb43 Binary files /dev/null and b/build.pyc differ diff --git a/tests/__init__.pyc b/tests/__init__.pyc new file mode 100644 index 0000000..4240eb6 Binary files /dev/null and b/tests/__init__.pyc differ diff --git a/tests/test_get_categorical_variables.pyc b/tests/test_get_categorical_variables.pyc new file mode 100644 index 0000000..66b1615 Binary files /dev/null and b/tests/test_get_categorical_variables.pyc differ