In [1]:
%matplotlib inline
import os 
import numpy as np
from numpy import mean, std
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_rel, ttest_ind, mannwhitneyu, wilcoxon
import statsmodels.api as sm
pd.options.display.max_columns = 99
import statsmodels.formula.api as smf
import math 
import scipy.stats as ss
plt.rcParams['font.family'] = 'Myriad Pro'
sns.set_style('white')

clinical_dir  = '/Users/kanaan/SCR/Dataframes_20150820/DF_clinical/'
datadir = '/Users/kanaan/Google Drive/TS-EUROTRAIN/Presentations/2016_06_ESSTS_QSM'

measures = [ #'Age','SNR' , 'CNR' , 'FBER',  'QI1', 
            # 'R_Caud', 'L_Caud', 'R_Puta', 'L_Puta', 'R_Pall', 'L_Pall',
            # 'R_Amyg', 'L_Amyg', 'R_Hipp', 'L_Hipp', 'R_Accu', 'L_Accu',
            # 'R_Thal', 'L_Thal', 'L_Insula', 'R_Insula', 'L_Cingulum', 'R_Cingulum', 
            # 'L_ant_Insula', 'R_ant_Insula', 'L_inf_Insula', 'R_inf_Insula',  'L_sup_Insula', 'R_sup_Insula', 
            # 'L_ant_Cingulum', 'R_ant_Cingulum', 'L_ant_mid_Cingulum', 'R_ant_mid_Cingulum',
            # 'L_post_mid_Cingulum', 'R_post_mid_Cingulum', 'L_post_dors_Cingulum',
            # 'R_post_dors_Cingulum','L_post_vent_Cingulum', 'R_post_vent_Cingulum', 
            # 'L_RN', 'R_RN', 'L_STN', 'R_STN', 'L_SN', 'R_SN',
             'Caud', 'Puta', 'Pall', 'Thal', 'Accu', 'Amyg', 'Hipp', 'RN', 'SN','STN',
             'Insula', #'ant_Insula', 'inf_Insula', 'sup_Insula',
             'Cingulum',# 'post_mid_Cingulum', 'post_dors_Cingulum','post_vent_Cingulum', 'ant_Cingulum', 'ant_mid_Cingulum',
            'BrainStem', 'BasalGanglia', 'Brain', 'GM', 'WM', 'CSF'
            ]

drop_c =  []
drop_p = ['NL2P', 'STDP', 'HSPP', 'CB4P', 'SA5U']  # Based on quality control of MP2RAGE_UNI 
drop_p = ['HSPP', 'THCP', 'CB4P', 'LA9P', 'RA9P', 'SA5U']
#drop_p = []

In [2]:
def plot(df1,df2,measure):
    palette = sns.xkcd_palette([ "red", 'blue'])
    data = [df1[measure], df2[measure]]
    sns.violinplot(data=data, palette= palette)
    sns.stripplot(data=data,palette= palette, jitter=1)

    
def detect_outliers(df, measure):
    mu = np.mean(df[measure])
    sd = np.std(df[measure])
    thresh = 3
    upper = mu +thresh*sd
    lower = mu -thresh*sd
    outliers = []
    for subject in df.index:
        i = df.loc[subject][measure]
        if i > upper or i < lower:
            outliers.append(subject)
            df.ix[subject, measure] = np.nan
            print outliers
            print 'OUTLIER: %s %s= %s, Limits= (%s,%s)'%(subject, measure, i, lower, upper)
    
    return outliers

def plot_partial_correlation(data, x, y,  regressors = None, color = None, annotate = None, fig_name = None, 
                             dpi = 100, labels = True, jitter = None):
    
    regressors_columns = [data[regressor] for regressor in regressors]
    df_reg = pd.concat(regressors_columns, axis = 1).dropna(axis=0) 
    df = pd.concat([df_reg, data[x], data[y]], axis=1).dropna()
    
    # inititlize grid and plot regression and margins 
    grid = sns.JointGrid(x, y, data=df)#, size=size, ratio=ratio, space=space, #xlim=xlim, ylim=ylim), xlim=xlim, ylim=ylim
    grid.plot_marginals(sns.distplot, color = color, hist=False, kde = True, rug = 0, kde_kws={'shade':True})  
    
    grid.plot_joint(sns.regplot, color = color, x_partial = df[regressors], y_jitter = jitter)
    if labels:
        grid.set_axis_labels(x, y, fontsize= 15, weight='bold', color='blue', labelpad=10)
    else:
        grid.set_axis_labels(None,None)
        
    ## calculate regression parameters 
    
    formula = [ '%s ~ %s'%(y,x) + ' + %s'%regressor for regressor in regressors][0]
    model   = smf.ols(formula=formula, data= df)
    result = model.fit() 
    p_val  = np.round(result.pvalues[1], 3)
    pcor   = math.copysign(np.round(np.sqrt(result.rsquared), 3) , result.params[1])
    
    if annotate:
        plt.annotate('R=%s, P=%s' %(pcor, p_val), xy = annotate, fontsize = 13,  color='r')
                      
    if fig_name:
        plt.savefig(os.path.join(save_fig_dir, fig_name), dpi = dpi, bbox_inches='tight', transparent = True)

df1 = pd.read_csv(os.path.join(datadir, 'controls_a.csv'), index_col = 0)
df2 = pd.read_csv(os.path.join(datadir, 'patients_a.csv'), index_col = 0)

df1['Brain'] = df1.GM + df1.WM + df1.CSF
df2['Brain'] = df2.GM + df2.WM + df2.CSF

In [9]:
df1 = pd.read_csv(os.path.join(datadir, 'controls_a.csv'), index_col = 0)
len(df1)

22

In [3]:
def make_ttest():
    df1 = pd.read_csv(os.path.join(datadir, 'controls_a.csv'), index_col = 0)
    df2 = pd.read_csv(os.path.join(datadir, 'patients_a.csv'), index_col = 0)

    df1['Brain'] = df1.GM + df1.WM + df1.CSF
    df2['Brain'] = df2.GM + df2.WM + df2.CSF
    df = pd.DataFrame(index= ['Controls', 'Patients', 'Z', 'p_val',], columns=measures)
    for measure in measures:
        #drop_c = detect_outliers(df1, measure)
        #drop_p = detect_outliers(df2, measure)
        t_val, p_val = mannwhitneyu(df1[measure], df2[measure].drop(drop_p, axis = 0))
        #t_val, p_val = ttest_ind(df1[measure], df2[measure].drop(drop_p, axis = 0))
        x = 100
        df.ix['Controls'][measure] = '%s±%s'%(x*np.round(np.mean(df1[measure]),4), 
                                              x*np.round(np.std(df1[measure]),4))     
        df.ix['Patients'][measure] = '%s±%s'%(x*np.round(np.mean(df2[measure].drop(drop_p, axis = 0)),4), 
                                              x*np.round(np.std(df2[measure].drop(drop_p, axis = 0)),4))   
        df.ix['Z'][measure] = np.round(t_val,2)        
        df.ix['p_val'][measure] = np.round(p_val,5)
        
        if p_val < 0.05:
            print measure, 'p =', np.round(p_val,3)
        
    return df.T
x = make_ttest()
x.sort(columns='p_val')

Caud p = 0.045
Puta p = 0.031
Hipp p = 0.026
SN p = 0.024
BrainStem p = 0.028
WM p = 0.047


Unnamed: 0,Controls,Patients,Z,p_val
SN,11.23±2.43,10.29±4.29,125,0.02436
Hipp,-1.44±0.9,-2.1±1.25,126,0.02596
BrainStem,6.05±2.87,4.35±3.02,127,0.02764
Puta,0.75±1.54,-0.14±1.63,129,0.03128
Caud,1.8±1.23,1.18±0.82,135,0.04465
WM,-2.98±0.78,-3.52±0.99,136,0.04727
STN,1.43±3.33,-0.63±2.72,137,0.05001
Accu,-2.2±1.93,-2.98±1.66,139,0.05587
GM,-2.18±0.7,-2.66±0.9,139,0.05587
RN,5.5±4.06,3.41±3.71,140,0.059


In [4]:
pd.DataFrame(x.p_val).sort(columns='p_val')

Unnamed: 0,p_val
SN,0.02436
Hipp,0.02596
BrainStem,0.02764
Puta,0.03128
Caud,0.04465
WM,0.04727
STN,0.05001
Accu,0.05587
GM,0.05587
RN,0.059


In [5]:
def make_fdr(df):
    fdr = pd.DataFrame()
    for i in df.index:
        for c in df.columns:
            fdr.ix['%s_%s'%(i,c),'p'] = df.loc[i][c]
    fdr = fdr.dropna()
    fdr = fdr.sort('p')
    
    n_comparisons = len(fdr.index)
    for i, corr in enumerate(fdr.index):
        fdr.ix[corr, 'FDR_threshold'] = np.round((0.05 * (i + 1.) / n_comparisons), 3)
        
    return fdr.reindex()

make_fdr(pd.DataFrame(x.p_val))

Unnamed: 0,p,FDR_threshold
SN_p_val,0.02436,0.003
Hipp_p_val,0.02596,0.006
BrainStem_p_val,0.02764,0.008
Puta_p_val,0.03128,0.011
Caud_p_val,0.04465,0.014
WM_p_val,0.04727,0.017
STN_p_val,0.05001,0.019
GM_p_val,0.05587,0.022
Accu_p_val,0.05587,0.025
RN_p_val,0.059,0.028


In [6]:
controls_a = pd.read_csv(os.path.join(datadir, 'controls_a.csv'), index_col = 0)
patients_a = pd.read_csv(os.path.join(datadir, 'patients_a.csv'), index_col = 0)

In [7]:
def get_clinical(df, pop_name):
    clinical_df = pd.read_csv(os.path.join(clinical_dir, 'clinical_%s.csv'%pop_name), index_col=0)
    
    for subject in df.index:
        if subject in clinical_df.index:
            df.ix[subject, 'YGTSS']             = clinical_df.loc[subject]['YGTSS_Total_Tic_Score']
            df.ix[subject, 'RVTRS']             = clinical_df.loc[subject]['RVTRS']
            df.ix[subject, 'PUTS']              = clinical_df.loc[subject]['puts']
            df.ix[subject, 'YBOCS']             = clinical_df.loc[subject]['YBOCS_Totalscore_Items_1to10']
            df.ix[subject, 'OCI']               = clinical_df.loc[subject]['OCIR_total score']
            df.ix[subject, 'CAARS' ]            = clinical_df.loc[subject]['CAARS_Score_ADHS_Symptoms_Total_Cat_G_T_Score']
            df.ix[subject, 'DSM4']              = clinical_df.loc[subject]['DSM4_clinically_significant_ADHD']
            df.ix[subject, 'QOL']               = clinical_df.loc[subject]['qol_scale']
            df.ix[subject, 'QOLs']              = clinical_df.loc[subject]['qol_score']
            df.ix[subject, 'BDI']               = clinical_df.loc[subject]['BDI12']
            df.ix[subject, 'Ferritin']          = clinical_df.loc[subject]['Ferritin']
    
    return df, clinical_df

In [None]:
Xcontrols_a, controls_a_clinical = get_clinical(controls_a, 'controls_a')
Xpatients_a, patients_a_clinical = get_clinical(patients_a, 'patients_a')

In [None]:
Xpatients_a

In [None]:
#drop = ['CB4P', 'NL2P', 'YU1P', 'LA9P','SA5U']
drop = drop_p # +  ['SA5U'] 
measure_1 = 'Puta'
measure_2 = 'YGTSS'
sns.jointplot(Xpatients_a[measure_1].drop(drop, axis = 0), Xpatients_a[measure_2].drop(drop, axis = 0), kind = 'reg')
#sns.jointplot(Xcontrols_a.Ferritin, Xcontrols_a.R_STN, kind = 'reg')

#plt.savefig('/Users/kanaan/Desktop/YGTSS_R_Ins.png')

In [None]:
plot_partial_correlation(Xpatients_a.drop(drop, axis = 0), 'Pall', 'Ferritin',  regressors =['Age', 'Gender'] , 
                         color = None, annotate = (0.025, 200), fig_name = None, 
                         dpi = 100, labels = True, jitter = None)

In [None]:
drop = ['SA5U']
sns.jointplot(data = Xpatients_a.drop(drop, axis =0), x = 'L_RN', y = 'Ferritin', kind = 'reg')

In [None]:
Xpatients_a.Ferritin

In [None]:
%matplotlib inline
import os 
import numpy as np
from numpy import mean, std
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sns
sns.set_style('white')

plt.rcParams['font.family'] = 'Myriad Pro'
p = sns.palplot(sns.xkcd_palette(['green', "red"]), size = 3)

In [None]:
sns.palplot(sns.color_palette("hls",10 ))

In [None]:
sns.set_style('darkgrid', {"xtick.direction": "in","ytick.direction": "in"})
f, (ax1,ax2,ax3)= plt.subplots(nrows = 1, ncols = 3)
f.set_size_inches([20,10])

jitter = 0.05
drop_p = drop_p + ['RA9P']
fontsize = 22

C = controls_a.drop(drop_c, axis = 0) * 100
P = patients_a.drop(drop_p, axis = 0) * 100

measure_1 = 'SN' 
measure_2 = 'STN'
measure_3 = 'RN'

data_1 = [C[measure_1],     P[measure_1]]
data_2 = [C[measure_2],     P[measure_2]]
data_3 = [C[measure_3],     P[measure_3]]

palette = ['#3498db', '#2ecc71', '#e74c3c']
#palette = sns.xkcd_palette(["windows blue", "amber", "greyish", "faded green", "dusty purple"])
palette = sns.color_palette("hls", 10)

sns.violinplot(data = data_1, color = palette[0], ax = ax1)
sns.stripplot(data=data_1, color = palette[0], jitter= jitter, ax = ax1)
sns.pointplot(data=data_1, color = palette[0], linestyles=['--'], markers=['o'], ax =ax1)
ax1.tick_params(axis='y', labelsize=fontsize)

sns.violinplot(data = data_2, color = palette[1],ax = ax2)
sns.stripplot(data=data_2, color = palette[1], jitter= jitter, ax = ax2)
sns.pointplot(data=data_2, color = palette[1], linestyles=['--'], markers=['o'], ax =ax2)
ax2.tick_params(axis='y', labelsize=fontsize)

sns.violinplot(data = data_3, color = palette[2], ax = ax3)
sns.stripplot(data=data_3, color = palette[2], jitter= jitter, ax = ax3)
sns.pointplot(data=data_3, color = palette[2], linestyles=['--'], markers=['o'], ax =ax3)
ax3.tick_params(axis='y', labelsize=fontsize)

#ax1.set_ylim(0, 23)
#ax2.set_ylim(-10, 18) 
#ax3.set_ylim(-10, 30)

ax1.set_xticks([])
ax2.set_xticks([])
ax3.set_xticks([])

f.savefig(os.path.join(datadir,'qsm_py_stats','QSM_STATS_1.png'), dpi = 600, bbox_inches='tight', transparent = False)

In [None]:
sns.set_style('darkgrid', {"xtick.direction": "in","ytick.direction": "in"})
f, (ax1,ax2,ax3)= plt.subplots(nrows = 1, ncols = 3)
f.set_size_inches([20,10])

jitter = 0.05
drop_p = drop_p + ['RA9P']
fontsize = 22

C = controls_a.drop(drop_c, axis = 0) * 100
P = patients_a.drop(drop_p, axis = 0) * 100

measure_1 = 'BrainStem' 
measure_2 = 'Caud'
measure_3 = 'Puta'

data_1 = [C[measure_1],     P[measure_1]]
data_2 = [C[measure_2],     P[measure_2]]
data_3 = [C[measure_3],     P[measure_3]]

palette = ['#3498db', '#2ecc71', '#e74c3c']
#palette = sns.xkcd_palette(["windows blue", "amber", "greyish", "faded green", "dusty purple"])
palette = sns.color_palette("hls", 10)[3:6]

sns.violinplot(data = data_1, color = palette[0], ax = ax1)
sns.stripplot(data=data_1, color = palette[0], jitter= jitter, ax = ax1)
sns.pointplot(data=data_1, color = palette[0], linestyles=['--'], markers=['o'], ax =ax1)
ax1.tick_params(axis='y', labelsize=fontsize)

sns.violinplot(data = data_2, color = palette[1],ax = ax2)
sns.stripplot(data=data_2, color = palette[1], jitter= jitter, ax = ax2)
sns.pointplot(data=data_2, color = palette[1], linestyles=['--'], markers=['o'], ax =ax2)
ax2.tick_params(axis='y', labelsize=fontsize)

sns.violinplot(data = data_3, color = palette[2], ax = ax3)
sns.stripplot(data=data_3, color = palette[2], jitter= jitter, ax = ax3)
sns.pointplot(data=data_3, color = palette[2], linestyles=['--'], markers=['o'], ax =ax3)
ax3.tick_params(axis='y', labelsize=fontsize)

#ax1.set_ylim(-5, 22)
#ax2.set_ylim(-5, 10)
#ax3.set_ylim(-5, 8)


ax1.set_xticks([])
ax2.set_xticks([])
ax3.set_xticks([])

f.savefig(os.path.join(datadir,'qsm_py_stats','QSM_STATS_2.png'), dpi = 600, bbox_inches='tight', transparent = False)

In [None]:
sns.set_style('darkgrid', {"xtick.direction": "in","ytick.direction": "in"})
f, (ax1,ax2,ax3)= plt.subplots(nrows = 1, ncols = 3)
f.set_size_inches([20,10])

jitter = 0.05
drop_p = drop_p + ['RA9P']
fontsize = 22

C = controls_a.drop(drop_c, axis = 0) * 100
P = patients_a.drop(drop_p, axis = 0) * 100

measure_1 = 'Pall' 
measure_2 = 'Thal'
measure_3 = 'BasalGanglia'

data_1 = [C[measure_1],     P[measure_1]]
data_2 = [C[measure_2],     P[measure_2]]
data_3 = [C[measure_3],     P[measure_3]]

palette = ['#3498db', '#2ecc71', '#e74c3c']
#palette = sns.xkcd_palette(["windows blue", "amber", "greyish", "faded green", "dusty purple"])
palette = sns.color_palette("hls", 10)[6:9]

sns.violinplot(data = data_1, color = palette[0], ax = ax1)
sns.stripplot(data=data_1, color = palette[0], jitter= jitter, ax = ax1)
sns.pointplot(data=data_1, color = palette[0], linestyles=['--'], markers=['o'], ax =ax1)
ax1.tick_params(axis='y', labelsize=fontsize)

sns.violinplot(data = data_2, color = palette[1],ax = ax2)
sns.stripplot(data=data_2, color = palette[1], jitter= jitter, ax = ax2)
sns.pointplot(data=data_2, color = palette[1], linestyles=['--'], markers=['o'], ax =ax2)
ax2.tick_params(axis='y', labelsize=fontsize)

sns.violinplot(data = data_3, color = palette[2], ax = ax3)
sns.stripplot(data=data_3, color = palette[2], jitter= jitter, ax = ax3)
sns.pointplot(data=data_3, color = palette[2], linestyles=['--'], markers=['o'], ax =ax3)
ax3.tick_params(axis='y', labelsize=fontsize)

#ax1.set_ylim(1, 16)
#ax2.set_ylim(-9, 9)
#ax3.set_ylim(-2, 8)

ax1.set_xticks([])
ax2.set_xticks([])
ax3.set_xticks([])

f.savefig(os.path.join(datadir,'qsm_py_stats','QSM_STATS_3.png'), dpi = 600, bbox_inches='tight', transparent = False)