diff --git a/build.py b/build.py index 35cdd2a..f1d1944 100644 --- a/build.py +++ b/build.py @@ -1,26 +1,51 @@ +import matplotlib.pyplot as plt +import seaborn as sns +from scipy.stats import norm +import pandas as pd + def get_categorical_variables(df): - return [] + df['new_user'] = df['new_user'].astype('category') + df['converted'] = df['converted'].astype('category') + cat_cols = df.select_dtypes(include=['object','category']).columns + return cat_cols def get_numerical_variables(df): - return [] - + df['new_user'] = df['new_user'].astype('category') + df['converted'] = df['converted'].astype('category') + num_cols = df.select_dtypes(exclude=['object','category']).columns + return num_cols def get_numerical_variables_percentile(df): - pass + df['new_user'] = df['new_user'].astype('category') + df['converted'] = df['converted'].astype('category') + num_cols = df.select_dtypes(exclude=['object','category']).columns + return df[num_cols].describe() def get_categorical_variables_modes(df): - pass + df['new_user'] = df['new_user'].astype('category') + df['converted'] = df['converted'].astype('category') + cat_cols = df.select_dtypes(include=['object','category']).columns + return df[cat_cols].mode() def get_missing_values_count(df): - pass - + return pd.isnull(df).sum().reset_index() def plot_histogram_with_numerical_values(df): - pass - + df['new_user'] = df['new_user'].astype('category') + df['converted'] = df['converted'].astype('category') + num_cols = df.select_dtypes(exclude=['object','category']).columns + plt.subplot(121) + plt.title(num_cols[0]) + sns.distplot(df[num_cols[0]], color='yellow', fit=norm, kde=False) + plt.subplot(122) + plt.title(num_cols[1]) + sns.distplot(df[num_cols[1]], color='yellow', fit=norm, kde=False) def plot_facet_box(df): - pass + plt.subplot(121) + sns.boxplot('converted','age',data=df) + plt.subplot(122) + sns.boxplot('converted','total_pages_visited',data=df) diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000..07984a5 Binary files /dev/null and b/build.pyc differ diff --git a/tests/__init__.pyc b/tests/__init__.pyc new file mode 100644 index 0000000..ca9c535 Binary files /dev/null and b/tests/__init__.pyc differ diff --git a/tests/test_get_categorical_variables.pyc b/tests/test_get_categorical_variables.pyc new file mode 100644 index 0000000..14817c8 Binary files /dev/null and b/tests/test_get_categorical_variables.pyc differ