From dc74b10b11e8bef846cd9545dd23a361186a5d59 Mon Sep 17 00:00:00 2001 From: nixen420 Date: Sun, 9 Jul 2017 12:05:16 +0000 Subject: [PATCH] Done --- build.py | 43 ++++++++++++++++++----- build.pyc | Bin 0 -> 2864 bytes tests/__init__.pyc | Bin 0 -> 166 bytes tests/test_get_categorical_variables.pyc | Bin 0 -> 2723 bytes 4 files changed, 35 insertions(+), 8 deletions(-) create mode 100644 build.pyc create mode 100644 tests/__init__.pyc create mode 100644 tests/test_get_categorical_variables.pyc diff --git a/build.py b/build.py index 35cdd2a..98f0404 100644 --- a/build.py +++ b/build.py @@ -1,26 +1,53 @@ +import pandas as pd + def get_categorical_variables(df): - return [] + categorical_data = list(df[['country', 'new_user', 'source', 'converted']]) + return categorical_data def get_numerical_variables(df): - return [] + numerical_data = list(df[['age', 'total_pages_visited']]) + return numerical_data def get_numerical_variables_percentile(df): - pass + return df.describe() def get_categorical_variables_modes(df): - pass - + return df[get_categorical_variables(df)].mode() def get_missing_values_count(df): - pass + return pd.DataFrame(df.isnull().sum()) def plot_histogram_with_numerical_values(df): - pass + fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10,5)) + # Histogram + ax[0].hist(df['age'],color='yellow',bins=10,align='mid',normed=True) + ax[1].hist(df['total_pages_visited'],color='yellow',bins=10,align='mid',normed=True) + ax[0].set_xlabel('age') + ax[1].set_xlabel('total_pages_visited') + plt.tight_layout() + # Normal Curve for age + mu, std = norm.fit(df['age']) + xmin, xmax = plt.xlim() + x = np.linspace(xmin, xmax, 20) + p = norm.pdf(x, mu, std) + ax[0].plot(x, p, 'k', linewidth=2) + # Normal Curve for total_pages_visited + mu, std = norm.fit(df['total_pages_visited']) + xmin, xmax = plt.xlim() + x = np.linspace(xmin, xmax, 70) + p = norm.pdf(x, mu, std) + ax[1].plot(x, p, 'k', linewidth=2) + plt.show() def plot_facet_box(df): - pass + plt.figure(figsize=(10,5)) + plt.subplot(121) + sns.boxplot('converted', 'age', data=df) + plt.subplot(122) + sns.boxplot('converted', 'total_pages_visited', data=df) + plt.tight_layout() diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a76516fb41d70523dfe53da26b9bb9dfd8a591f9 GIT binary patch literal 2864 zcmcImZEG7x5Z=>Cwj|q$ofoI>3k@L!e^FIR>8DT%E%~UzwNP3L$LV%0t#dEpcF&fB z{7L;8{gv$xXrE_JmXy#C3YFzsjrL}D_L+HRX2ZXZ`hR{j{v}nbzZSl~#p8a!pz$B5 zOsQW3rCaKE2rZ>U5kjTgBD9t6h|p1bPlP?C_eI!Ox+_AL`}}y`!=CnU{EgB8B5m#A z`3{eJioq+j5R)(lGrFxNZFRN`)NP;^+@J%ck>AIdmUZE)8{dUonCrN9rt;j(m35UG zPg7}GTw&Jeb6WFl44HNQoOV42ec_Msl_uUy%F3om7VE?(=a^G6xR;QI(=s>1!p=?c z)t6rke=e(6ZkD8Gn3mcMjZOv^Wi@DwIa?Nk3i}PXku!ccu5G42pWXN~Y&9`H-nMp? zR5lrB#)Ww4ct90=9ITzg331Mc5VoB&X}#eVt{!=g2`oIMa_IqPhMz9Jvd-Q)A>x+e zfQY{I_b?Q7Zgvg+3G5L3#?~4-Be;To+CXQt_z3hq1d!rVsigt~I6rFQ$%Wduw6bF( zGXiNh!FvWK)vGay{yZDx+31~zzS0LuAF?p&Bc<;teP03YGH3@>Mi#G>kKeD%ol$FhbnkjK zCXQu-=j+h<0X%!Te|G}t@_3=TZ3~1hRXEd8w|fd9x$LV&M=fx^6SaiezT)}=wZImk zTC`=dyM}fsFHp`ODE*KHdKm^=76IcwSbX$8i}cUGTLju8L91C5P7$e*XF)5f^4eJz ziRNcoX3mna(ljpY#M#%zGP0KH7LSTpNJJglWz$7&OqP|`l8R$nIC-07c2WpgbE}2F zMOo!WN7B~FAWy@ugy@+?SAyEqvV};+-IzUR*_EAUnU@yk>hUZqy%UR5G$|2&s%qnT zSTvG&o+V?G`9m(Xlc|rh6akiW4`+RlSn4T#os2}mQ_0u=H33Hhl z?w(;#!SmK27z8Ky^jl}agWxefpKPQ}eI^Yg%fY0rEboI|V73%C(i}Ml4=@4cnu7}t za(vlZt01x7Q0egww58JIrm*z@hDH%P;)W6S20tS@zF{N7!yANs%_$P>Z3^+O6FtQs zOhmZq$_U9awSd%Q=G`eFk~gWtQBgvG@XaBp<`{5HpUvRl#v^6r9Qfgc5FI-oZ3>9;{UJUCLL!$m2*IsFh$0Xd4sH=@E|#%3#jRsU-gN<~J(a)9 zi6aMiZ(P#QHi!di)9%=_9`EjZZ)Vqx@8=eNe0#SZ%l@x{>vJ?afJyN0NhUJ#WZ;QM z-Tls!bW)twX5R(DTF?)1z;v#A+IG6s%Vr2Sx`q|`99zW{8FUl=Dj$#9C6Vo?I)EgFM z??C8Y2~B%c+Zxw*$O@}AoC$Vm{KehDOpKi!MVR(1jChOQdGC_`9hLSC0{)A%DjI2? z7VR2*;>fF!Ih3_E%j7UZ#3fu^;gHs2xHjBQZTefDfT<9iqhWYJWU5C%jp_$y*oSuF zLfpcf6CsX)c>&KY0sNL>A_0?Wg-@4Z{z(F-)rD-`M1i63>OE>zb9kEd`xtNr>*3g- z26C5XM#=25Vrqi9(gT7u{~->Y}Jc(3ek5gojw%-$a-~ z&5X26(^`)cP~$G