From 1b7946bd4dba7c26907ddec18b9cacd1b698f020 Mon Sep 17 00:00:00 2001 From: varunpanicker Date: Thu, 6 Jul 2017 10:26:23 +0000 Subject: [PATCH 1/4] Done --- build.py | 50 +++++++++++++++++++---- build.pyc | Bin 0 -> 2805 bytes tests/__init__.pyc | Bin 0 -> 171 bytes tests/test_get_categorical_variables.pyc | Bin 0 -> 2758 bytes 4 files changed, 42 insertions(+), 8 deletions(-) create mode 100644 build.pyc create mode 100644 tests/__init__.pyc create mode 100644 tests/test_get_categorical_variables.pyc diff --git a/build.py b/build.py index 35cdd2a..df70ec2 100644 --- a/build.py +++ b/build.py @@ -1,26 +1,60 @@ +import pandas as pd + def get_categorical_variables(df): - return [] + #returning for now based on visual analysis + return df[['country','new_user','source','converted']] def get_numerical_variables(df): - return [] + return pd.DataFrame._get_numeric_data(df) def get_numerical_variables_percentile(df): - pass + df = pd.DataFrame._get_numeric_data(df) + df = df.groupby('converted').describe() + return df def get_categorical_variables_modes(df): - pass - + dfn = df[['country','new_user','source','converted']] + df_mode = dfn.mode() + return df_mode def get_missing_values_count(df): - pass + ndf = df.isnull().sum() + ndf = pd.DataFrame(ndf) + ndf2 = ndf.reset_index() + ndf2 = ndf2.rename(columns={'index':'var_name',0:'missing_value_count'}) + return ndf2 def plot_histogram_with_numerical_values(df): - pass + age = df['age'].tolist() + new_user = df['new_user'].tolist() + total_pages_visited = df['total_pages_visited'].tolist() + converted = df['converted'].tolist() + + fig, axes = plt.subplots(2, 2) + + axes[0,0].hist(age) + axes[0,0].set_title('age') + + axes[0,1].hist(new_user) + axes[0,1].set_title('new_user') + + axes[1,0].hist(total_pages_visited) + axes[1,0].set_title('total_pages_visited') + + axes[1,1].hist(converted) + axes[1,1].set_title('converted') + + plt.tight_layout() + plt.show() def plot_facet_box(df): - pass + list_of_columns = df.columns.values + for col in list_of_columns: + plt.boxplot(df[col]) + plt.title(col) + plt.show() diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000000000000000000000000000000000000..63887fe3aaf24a87b7d2a72f1d23ac3eb6be97ed GIT binary patch literal 2805 zcmcguO>ZMb5bc@Rj-5?5$u1v>_!L5{rB&7vhae<`kamTHkc^ldRv_5Xc)IO$c06OI zd-5SAr|^%taD(@%?Km5f+Xma+uC6I}bydB3?Ulba*1q0)^-HeiuNdFoVYr_#X?!A8 zDD`Wkbgce_5G%bR!iv%@5n4*OMQAJC5uu~>stBt}cSY!OpM!o6d)j~S8s-s1jyA?~ z4EGMEbMXgH@S$63(o(wzeOe6tD(1XuN?)J(F67djq>VGR?_lAorp}G$_IXtvLu+&& z3)QC!&zST$D5!NmVj3J)Q!_Zu>ZY7!rOjWPdhoKUU%OeBn?YV_GcY>a8&~yS*wfjn z+^aKh_H^bmXZ&E)*g`*>o%vntH!(iV@pMwvHqVL_JKJnj7`H-?Jc5d^qUKVf-h$wj zI*Qa$%Ao}~0wHd6y{DGwySN?LeykAc3A?XNk$ z9wv(t!i@eK=2JYp5$1)I`v`@O81+$0T`Con2Yu#em@orU{)9zN2!Vud1Pqh9YG$JV z25nqk+mZQSa_|{Wa7PZ(nE?(yE5~E2=uz};ND1S)!uuCk+=BOuNSz}aKmxJ> zNW{Q{bU?zr5<(+kc0zIl-mLi)cooD50z5tGaVd$?<5ZqP6`=9f;r|JCz7zi9+&zNH zJRR>v>$jxiXBZ$IA#k(bK^MXp`=AU^(f*~17MWuK z7Ti%Z)6xldpf%Hyjl{&-Qkzr%7}C@_XUhpXw`fc%WMiRLeeNPWV_n%4g=E(?(0=m$Bm+tdhA&*e1Cb5eYyh|tjHT7xeyb3*sWnAGB;6-PeVD_Uc}9F z4EHytfX>}H7J*A(DprfN&=q)ku|}Ej9E?4;Zz(UNhKk^G8LQu7bJXQe2>jw5fnThPKG>C>WHKj z`i-PKlqzQvBP}QR@RbMQ&7gDXv2_-gjC3IY2pC}~%YGE{3QOwlUfwWXE-3}%$X|*JFHIn z!n%ZP@*yXBNh&YC)OV>EHf50}U`(&OMzds`p&zT4oXaT~BiWXOkMS#*D zkJU>75fe2O>PUEBOJRgrT*4CyHIJ%OMkG8Wbx0WGHOkn;lC?~$F`73p81oMK9y9$A z9wgIa{1x&kPWcrybIw?gHll3|`7owL{!2~Nz7d%KEA<)*A(Vk*1$2E^|;sWwR#=Q-QLc>6^~f^ literal 0 HcmV?d00001 diff --git a/tests/__init__.pyc b/tests/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..025949e295d475068b170d10edc3865819f3aab4 GIT binary patch literal 171 zcmZ9FK?(vf5CkK75W#=!CC&##e1R7g@f3z+#ISB=$;>YJAU~)da27n-(ACg~?)hiE zdcDur=zf}6?#;3=B nj8r9a@fDSPLPa43B~gJ8a>*LPrhn?vhPNH3;X%9UUd-+bXqGD< literal 0 HcmV?d00001 diff --git a/tests/test_get_categorical_variables.pyc b/tests/test_get_categorical_variables.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eff9d3aed9148e74e1567272be12dbc377b2c1d9 GIT binary patch literal 2758 zcmc&$T~E|d5S?~EU{NEA-|-_F6ZZx7$v0w*7>zGH6pV3Sn%dsQLbu)Y-eO4L3IB?J zP=A1)GlgXZ*6?81g*&}7o!;I#XXZ{DzpgF(`1$d3EJwcvuCFm{A0olOCz;65lfEY& zbzd@H`VC1M(r=1s$aYIiQx4(LR&`EHTh)0<;NZ!?lcXizMGi$i_d8r-qua*v>2LhH z9%gPHV$)c+7Fn}yA3$5_C6SAIagpzu(xycoQeTPfu6uOk{NA`Qj-1&l$~2C$a5pN` z=u2koI2x|~2E)FDFv1`Z1Gdul85t*t#MeoaA<@#}zBy@2u+eSd$(ZX98ICm!TR>m| zqM_&<0G)m44RJo=^bUO47um+*P1*NlM-lOkOs%t~_F2>1geeg~MvT@cFuMjQUHJW=X(x0>>+sh@0T_)w$xSm65Se4<_h*w5??lE3R-|1eA)n0`VZ_&H%E$QD`em}tBzwoQf z5$j3j?m}lAc{MbrQn!YhKt}Lb!qpWHxjlkv!`;-T#~cOBhu|6o-2+CZqy!|YzHx;% zjM=d95HB9HaZao^aNjbDV=l%#Fs;^jcNOA7JUFd0WRm zM+Vi9yEHS3XqUB9qs*P2psbYuCUWbd9QDHz86{dqZ;`O?%4k@ZN6myjJX(T0!yn?zPi1qY_c0B4=LmL^IYR$(hZbw62^sf4C I^ziHcA16hhc>n+a literal 0 HcmV?d00001 From e3dc9390428c9ff3478b7591cbaaefe401edf8e8 Mon Sep 17 00:00:00 2001 From: varunpanicker Date: Thu, 6 Jul 2017 10:27:26 +0000 Subject: [PATCH 2/4] Done --- build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/build.py b/build.py index df70ec2..4cad4c9 100644 --- a/build.py +++ b/build.py @@ -58,3 +58,4 @@ def plot_facet_box(df): plt.boxplot(df[col]) plt.title(col) plt.show() +print plot_facet_box(get_categorical_variables(df)) From 1f2438a511695ccc8b17d9f4212b582f9d9b0d0b Mon Sep 17 00:00:00 2001 From: varunpanicker Date: Thu, 6 Jul 2017 10:27:52 +0000 Subject: [PATCH 3/4] Done --- build.py | 1 - 1 file changed, 1 deletion(-) diff --git a/build.py b/build.py index 4cad4c9..df70ec2 100644 --- a/build.py +++ b/build.py @@ -58,4 +58,3 @@ def plot_facet_box(df): plt.boxplot(df[col]) plt.title(col) plt.show() -print plot_facet_box(get_categorical_variables(df)) From f17cc6a8ccd6a5965b6b4b7385d0adb8deffbf07 Mon Sep 17 00:00:00 2001 From: varunpanicker Date: Thu, 6 Jul 2017 10:28:07 +0000 Subject: [PATCH 4/4] Done --- build.pyc | Bin 2805 -> 2805 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/build.pyc b/build.pyc index 63887fe3aaf24a87b7d2a72f1d23ac3eb6be97ed..f73bc22a7bb862bab64ae442fb74d2f29a8f8d6f 100644 GIT binary patch delta 14 Vcmew=`c;&b`7