In [320]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats.stats import pearsonr
% matplotlib inline
pd.set_option('display.precision', 5)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [321]:
df_ant_cd = pd.read_csv('malawi/mwi_anthropometry_codebook.csv')
df_ant = pd.read_csv('malawi/mwi_anthropometry.csv')
df_nut_cd = pd.read_csv('malawi/mwi_nutrition_codebook.csv')
df_nut = pd.read_csv('malawi/mwi_nutrition.csv')

In [322]:
df_ant = df_ant.rename(columns = {'haz06' : 'stunted', 'waz06' : 'underweight', 'whz06' : 'wasting', 'bmiz06' : 'bmi'})

In [323]:
df = df_nut.merge(df_ant, on = 'hhid')

In [324]:
df_y = df[['stunted', 'underweight', 'wasting', 'bmi']]

In [325]:
df_y.columns

Index([u'stunted', u'underweight', u'wasting', u'bmi'], dtype='object')

In [326]:
df_bmi = df[df.bmi.notnull()]

In [327]:
def get_corr(df, col):
    df_corr = df[df[col].notnull()]
    df_corr = df_corr.corr()[[col]]
    df_corr = df_corr.applymap(lambda x: abs(x))
    df_corr = df_corr.sort(columns = col, ascending= False)
    return df_corr

In [338]:
def corr_and_pvals(df, metric):
    temp_list = []
    temp_cols = []
    for col in df.columns:
        temp_df = df[df[metric].notnull() & df[col].notnull()]
        if df[col].dtype != 'O':
            temp_list.append(pearsonr(temp_df[metric], temp_df[col]))
            temp_cols.append(col)
    df_p = pd.DataFrame(temp_list)
    df_p.index = temp_cols
    df_p.columns = [metric + '_correlation', metric + '_p_vals']
    return df_p

In [375]:
df_cp = pd.DataFrame()
for col in df_y.columns:
    if df_cp.empty:
        df_cp = df_cp.append(corr_and_pvals(df, col))
    else:
        df_cp = df_cp.join(corr_and_pvals(df, col))

In [377]:
df_cp.to_csv('correlations_and_Pvals.csv')