From b9c75eae3b55a1cb55947e363ca488b52477503e Mon Sep 17 00:00:00 2001 From: arunabh15091989 Date: Fri, 30 Jun 2017 17:33:26 +0000 Subject: [PATCH] Done --- build.py | 41 +++++++++++++++++++---- build.pyc | Bin 0 -> 3147 bytes tests/__init__.pyc | Bin 0 -> 173 bytes tests/test_get_categorical_variables.pyc | Bin 0 -> 2772 bytes 4 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 build.pyc create mode 100644 tests/__init__.pyc create mode 100644 tests/test_get_categorical_variables.pyc diff --git a/build.py b/build.py index 35cdd2a..310c43b 100644 --- a/build.py +++ b/build.py @@ -1,26 +1,53 @@ +import pandas as pd +import matplotlib.pyplot as plt def get_categorical_variables(df): - return [] + result = [] + for i in df.columns: + if df.loc[:,i].nunique() < 20: + result.append(i) + return result + def get_numerical_variables(df): - return [] + cols = df._get_numeric_data().columns + res=list(cols) + return list((x for x in res if x not in get_categorical_variables(df))) def get_numerical_variables_percentile(df): - pass + final = pd.DataFrame(columns = ['variable name','mean','median','25th percentile','50th percentile','75th percentile']) + final = df[get_numerical_variables(df)] + return final.describe() def get_categorical_variables_modes(df): - pass + x = df[get_categorical_variables(df)].mode() + # return x.to_dict(orient='records') + return x def get_missing_values_count(df): - pass + return pd.DataFrame(len(df.index) - df.count()) def plot_histogram_with_numerical_values(df): - pass + df_sub = df.loc[:,cols] + sub_cols = list(df_sub.columns) + col_length = len(sub_cols) + for i in range(0,col_length): + plt.figure(figsize=(30,10)) + plt.subplot(col_length,1,i+1) + plt.hist(df_sub.loc[:,sub_cols[i]], bins=10) + plt.title(sub_cols[i]) + plt.xlabel("Value") + plt.ylabel(sub_cols[i]) + plt.show() def plot_facet_box(df): - pass + df_sub = df.loc[:,get_numerical_variables(df)] + sub_cols = list(df_sub.columns) + df_sub.groupby(sub_cols).agg('sum').unstack(1).plot(kind='bar',subplots=True) + plt.show() + diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2157473b901bc8071c9085ada09e893fad3a281a GIT binary patch literal 3147 zcmcImU2hvj6uo1+Nt`A$MWt;(&7$*U$(@`vC|AM4VHI^i8sj)2S1vOSAT~ebh>9QKDlCG$6k<%V@ z+E~KFq+ZsqXer zq}-5W!+m1x)QCCS5Im32chI4KU3Ud_hiippR7sIlliUYxAS)8>%d6ZSRW>Z4FP=_q zo_07K_KDxUBNHKf1lQd*}9@ukP$TD#}AYjT5_* z6sg^@X}mos%I&1cpW4#9BHu3KV7Jpa#NLLTe&w?C*7SMU#Ee55dI?5{Md^|_>phK2 z7xy#kxhublO3aekFdvyK<_t}a>G&Ru-xS1C)P}Ia6958u04eNWMFv3C$S*?J_A!9x z1ztKKJtt&ep!knbft4rkb1KXebbfqsxGW`rH}cRNH7S~d4gu?d`ta_vL@ztAmmrOw&Nt_Cdvi5-UY-p%u87)cT}!eXByMIk|xDfy?8p zCIc;501qEx+BfjfJ9qVME?!e|69IV^I}Ajnv+X8AjDj4sFU*?u1;=Ypw=A|4Fm`=smjJJI(r?BxvL~E zVhl!vuFk?x4ZB^Q+L>BOQRU&4M4=EYfM>c!?2!jkQ-t z*GZN++LuT0s6Mf~4qny==fDlU`_Vdr;&=u*YCsj+cQ{Z9&O?Hy+C6`*!&r_qWio+9 z7g?c5MCkRiGR{Fk6U`~kSR#zYs?zG=?5qAXD}pXGLUz_Kf(u!FUeB_)Z!`Vqd2Lds zepEc`sMgrh(R9$mRGL)q9|R+-(+wEC%;v)|in#JZL+vIOpff!>QVfymde2-K&CeoD z_LLu!kH3ma%%)JIcGtKm~Co10{J_ z7S*)>T)&KmL+z^|syI1R;?ETDU0K;^3u{Nbm7Jf{5qXO~VIRt(?iPVtR{{sP*@a%e znC+s+uW(Is1618M9jaUvH;wZ&_L}ViQ{E&F{5EraT=di@iiPrNee*%!S-i+6$O pJyxHw;>GUVJGB;&5b4WZWc@0$_ehuDK(z+f+U@p|{glB!mz_AcVxi0U>&0RqX1-!ELu(<&2QR ziuv5x%?He!a~(QOl1400x|6#0EnipFIp^LgxBfkU=IuWpJ|%MSYvKAF%?)8v{0EYY zi~|`463`DN4`tYrv?ar~*p}>e#J1%dBD(4>iS4SpEGYs483mGd->gs_MN*4Z*7noZw5(O?Agj?Wf9X~OWdBneGvTG`ZzJ(ZkJV- zm^|JyRc5ww>!xTp_6s!k1jY(M00WjX42g_4KoaUrn~>Pk;ktR#m1wiy!J7%kAv0_n zXs(380Axei`3iRS;kU$xz!`iEWnW|$gSTZrl+VgYaM0B`-qk(cwKUU}3MeB-?`Al= z0e^cQ1y}}FIPy!lBxPOr>Wh+G*uUc1*~(J@SJqWxeNSV^v$tu#4bKzxR-NT(#Bn0x z_Ge&>+!A?2015?$7+1h#hZu+lpuLFajyS%nF;#-8wZW%PVSY&oueFBU-9@>f44WNm zQ!{#&{QDSi4*BuKq9O{PR}K}4F41MZL|0&D3PIx* zP5uL{X-fWyr&a@!1#`O0U&oAOew;bv^EQsryv9zWr^Zpr8T%`gYuutK=Xb)C@{`cr z`;lucc0DEIxIq2Ju~vCZVd@-i#A72`q4|S`RTA+SKHOpT?$nRHh#x52MICqkZqQrq zb$gv&r%zWA)kWsXMT8E~!+&Cm)Hp2|Y@8w^?0FhmS(C>6&SEdmU#+J`btXr2qf` literal 0 HcmV?d00001