From 4a2813c579cf127e98174f9a60f9656db8198b06 Mon Sep 17 00:00:00 2001 From: gowthamdongari Date: Mon, 10 Jul 2017 11:38:32 +0000 Subject: [PATCH] Done --- build.py | 65 +++++++++++++++++++---- build.pyc | Bin 0 -> 3080 bytes tests/__init__.pyc | Bin 0 -> 172 bytes tests/test_get_categorical_variables.pyc | Bin 0 -> 2765 bytes 4 files changed, 55 insertions(+), 10 deletions(-) create mode 100644 build.pyc create mode 100644 tests/__init__.pyc create mode 100644 tests/test_get_categorical_variables.pyc diff --git a/build.py b/build.py index 35cdd2a..e684cbf 100644 --- a/build.py +++ b/build.py @@ -1,26 +1,71 @@ -def get_categorical_variables(df): - return [] +import numpy as np +import pandas as pd +from scipy.stats import norm +import seaborn as sns +import matplotlib.pyplot as plt + + + +df=pd.read_csv('data/conversion_data.csv') +def get_categorical_variables(df): + list1=[] + for col in df.columns: + if df.loc[:,col].nunique()<5: + list1.append(col) + return list1 def get_numerical_variables(df): - return [] + numeric = pd.DataFrame._get_numeric_data(df) + return list(numeric) def get_numerical_variables_percentile(df): - pass - + per = df.describe().T + return per def get_categorical_variables_modes(df): - pass - + return df[get_categorical_variables(df)].mode() def get_missing_values_count(df): - pass + return pd.DataFrame(df.isnull().sum()) def plot_histogram_with_numerical_values(df): - pass + num_cols = get_numerical_variables(df) + plt.figure(figsize=(15,6)) + + plt.subplot(221) + plt.title(num_cols[0]) + sns.distplot(df[num_cols[0]], color='Blue', fit=norm, kde=False) + + plt.subplot(222) + plt.title(num_cols[1]) + sns.distplot(df[num_cols[1]], color='Blue', fit=norm, kde=False) + + plt.subplot(223) + plt.title(num_cols[2]) + sns.distplot(df[num_cols[2]], color='Blue', fit=norm, kde=False) + + plt.subplot(224) + plt.title(num_cols[3]) + sns.distplot(df[num_cols[3]], color='Blue', fit=norm, kde=False) + + plt.tight_layout() + plt.show() def plot_facet_box(df): - pass + plt.figure(figsize=(10,10)) + + plt.subplot(221) + plt.title('Age') + sns.boxplot('converted','age',data=df) + + plt.subplot(222) + plt.title('Total Pages Visited') + + sns.boxplot('converted','total_pages_visited',data=df) + + plt.tight_layout() + plt.show() diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4416e8162f8d8c727d40d752b762830e33e57ae5 GIT binary patch literal 3080 zcmcgu+fE!u5Ut(|co#6(MvhT##EQkf0Lp6;MY&ig4-tw6D^`%A(d3H) zvY$w#{6Ic0mw(6yBr@{UZpvfmYRwr)=L=g-&Ol5tP=`(oQNUXZLUr$+V{CF{s9P`4ym zSDD+A%_*}i*}O65I<{qut8!W&w_b-0C0|Pd6K#H=O_y;sO3CVU^ z7Du-7t}J4Xw^DzUa%me6SAT=Y?}3a&UKx3@BQHh9B9DLiOkTBg&^RpeD~2sOXvx~R zC8rvXXCl>#yfiXya}JJ~=PQE$Q^@uPjNq>8f)2P>meE38^f`S1AqQrj(Zu_vD*zHky91fF7Ru-eAa@$YK>Y1M= zsYM7fyKS>%^PsFYXYtsql3+I(6mP@!uy%R2H9ZMyP&l$7PBA+wE0-pDd<2EbFt?uZ z_I(gBb7sXnGOI?TNkjQ)G~WXg5JW^p1Bj>~valU70l%lI+HE4jkEU7Z;q@4n>{Q9b zhC3L=w4tac78YpcG`Ea`Mm1tg&SIx)ZZahAV6O}2TnxxvOnK&3%-vZi2rv!BJunTP zPBkEig93vKcGZz^3v6b6TDhSOls~!PAW<;2)lEA05L>*#xfolgwg@h_48fV7%Z;Ds zMWlYNVj6xT>!8T-8j=r>&r{7TM|4z^H~}6N1DeE}^zB1z`9>_qw8OuPC7$-l%qcn~PiV;dSudR>MvM$00 zn7imB{bM%N$8V9y*Lh=|YT2T@W-IX27>?2AJKEeh=n%r_*=q=bL2nw*G3jTrHS}xCz>|keQMh0O8S?SEL@=kvg?&BUQ)7jg%ZfFS91H3lI1IuKz5Ci9&wD8LmCiL@Z9u$SJpsuVM>S1o2fK` zl&26+bH$G5$@8QPNxuF)CjI&k&O5HAJWw7_DUXjDc>^ikie$jNM(>lbc|>fa(K^XT zAd0DB`9eX%%tVhs-cFw*%oPX+xY~pmR(uMH=Q%8z@FjB}314d6Z}rV!6;ee#Fp9ek z%cz)Ybxe~YOT0GjptqXTO;5IbNP=(bfOj=-lVMpEjnk~{NurgKyCJShG^5d_tcFVq zs-IXmHl7Fn0aUh?tN;K2 literal 0 HcmV?d00001 diff --git a/tests/__init__.pyc b/tests/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6031c7b37afa3e036b19088238e92f7aa80b5b23 GIT binary patch literal 172 zcmZ9Fu?hk)5JV$dh~R%*6ZZ#1`~n*}M4PZA>lyU2M|M^GXe)ofJ+N|M-oO+y)6a5o zdbh{uewtWr%`!72=l0Fm>>7jngS6Jcg?oaIpL)SQap|yTIB>nnfRP|_BA|r+%+*IO n52}*6_=-wCp`wt2lBhrkxnvDtHN13g%j=%g{YIPdU(D_cWN#}n literal 0 HcmV?d00001 diff --git a/tests/test_get_categorical_variables.pyc b/tests/test_get_categorical_variables.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b5be8012671007f69eda040e197b30bb6e2ffb5 GIT binary patch literal 2765 zcmc&$%Wl&^6uovHZFq?Ceu#%aNMzGy#SS4v5r_>77lg>h8th5x*0CecxCp7N;CET^ z0dUTBNkiKp7N|)(*K^0$<2mQtJLA@m^Jj+N-+f5r;McCvC232BZLuxc>47JIgGSy{yNHxjq!?;HfbvOZ3Xx8t(P5|hV!rpnC6 z+`1VW&ix$CJ%+JD5Ws+?3_~Jg2S`HgvEv2t1ms0dkWXZdzs6Aixe>-qVqHikBCfFiKtX_<1EL} zmL$h*ym=dXZX0G&*W~~KCQ8;f3R1k|~nrb1vK*qkT%%mxhMhbm+$x=MR68|Q} z9Cl`>W6CzFoPs9z@UDe*9Z#@BxTAtx!y|`+M3?BYTB0j3b7i1$OXmI|mNeu3#1pH* z$Z|Pd;&0%MBz}@PWb+tDXkKHR(G%k+745>-6a=pt{IB zsfaKjdhAb4ks7Dff=yFog#AuKD{Io2UsxRF(W}MOXwLU68LO`GNu$V9u17Ji1?R#p IPVx)kFRo^x-T(jq literal 0 HcmV?d00001