diff --git a/build.py b/build.py index 35cdd2a..98f0404 100644 --- a/build.py +++ b/build.py @@ -1,26 +1,53 @@ +import pandas as pd + def get_categorical_variables(df): - return [] + categorical_data = list(df[['country', 'new_user', 'source', 'converted']]) + return categorical_data def get_numerical_variables(df): - return [] + numerical_data = list(df[['age', 'total_pages_visited']]) + return numerical_data def get_numerical_variables_percentile(df): - pass + return df.describe() def get_categorical_variables_modes(df): - pass - + return df[get_categorical_variables(df)].mode() def get_missing_values_count(df): - pass + return pd.DataFrame(df.isnull().sum()) def plot_histogram_with_numerical_values(df): - pass + fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10,5)) + # Histogram + ax[0].hist(df['age'],color='yellow',bins=10,align='mid',normed=True) + ax[1].hist(df['total_pages_visited'],color='yellow',bins=10,align='mid',normed=True) + ax[0].set_xlabel('age') + ax[1].set_xlabel('total_pages_visited') + plt.tight_layout() + # Normal Curve for age + mu, std = norm.fit(df['age']) + xmin, xmax = plt.xlim() + x = np.linspace(xmin, xmax, 20) + p = norm.pdf(x, mu, std) + ax[0].plot(x, p, 'k', linewidth=2) + # Normal Curve for total_pages_visited + mu, std = norm.fit(df['total_pages_visited']) + xmin, xmax = plt.xlim() + x = np.linspace(xmin, xmax, 70) + p = norm.pdf(x, mu, std) + ax[1].plot(x, p, 'k', linewidth=2) + plt.show() def plot_facet_box(df): - pass + plt.figure(figsize=(10,5)) + plt.subplot(121) + sns.boxplot('converted', 'age', data=df) + plt.subplot(122) + sns.boxplot('converted', 'total_pages_visited', data=df) + plt.tight_layout() diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000..7b3b278 Binary files /dev/null and b/build.pyc differ diff --git a/tests/__init__.pyc b/tests/__init__.pyc new file mode 100644 index 0000000..0a8723b Binary files /dev/null and b/tests/__init__.pyc differ diff --git a/tests/test_get_categorical_variables.pyc b/tests/test_get_categorical_variables.pyc new file mode 100644 index 0000000..1662ec1 Binary files /dev/null and b/tests/test_get_categorical_variables.pyc differ