# Analyze old and young subjects separately (Figure 3)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pingouin
import seaborn as sns
from sklearn.linear_model import LinearRegression
import sklearn.preprocessing

df = pd.read_pickle('df.pkl')
tracts = ['AF_left','AF_right','SLF_I_left','SLF_I_right','SLF_II_left','SLF_II_right',
         'SLF_III_left','SLF_III_right','ILF_left','ILF_right','IFO_left','IFO_right',
         'UF_left','UF_right','SCP_left','SCP_right','ICP_left','ICP_right','MCP','CC_7']

# Separate the dataframe into young and old
age_cutoff = 9 # in years
young_inds = np.where(df['AGE']<age_cutoff)[0]
old_inds = np.where(df['AGE']>=age_cutoff)[0]
df_y = df.iloc[young_inds] # young dataframe
df_o = df.iloc[old_inds] # old dataframe

### Run stats and create plots (Figure 5)

In [None]:
df = df_o.copy() # df_y.copy() - young, df_o.copy() - old

# Plot parameters
sns.set(font_scale = 1.5)
sns.set_style("white")
# Define x_label and group column
x_label = 'PDE' # Use composite TOWRE score by defauls
hue = 'TR_RD' # Switch to HT_LT to look at groups based off of TOWRE score cutoff
hue_rev = hue.split('_')[1]+'_'+hue.split('_')[0] # hack to make violinplot order correct

# Define covariates for correlation and group difference analyses
covars_corr = ['HAND','SEX','ICV']
covars_group = ['HAND','SEX','gFA','AGE','TRRDxSEX']

# Initialize output variable for p-values
p_vals_corr = []
p_vals_group = []

for tract in tracts:
    #tract=tract+'2' #use tractometry average instead

    # Filter out nans and tract mean FAs less than 0.2
    valid_inds = np.asarray(np.invert(np.logical_or(df[tract] < .2, np.isnan(df[tract]))))
    df_valid = df.iloc[valid_inds,:]
    
    # De-mean and rescale covariates to unit variance
    covars_corr_normed = sklearn.preprocessing.scale(df_valid[covars_corr], with_mean=True, with_std=True, copy=True)
    covars_group_normed = sklearn.preprocessing.scale(df_valid[covars_group], with_mean=True, with_std=True, copy=True)

    # Get indices of Participants by Group
    inds_0 = np.asarray(df_valid[hue] == 0) # HT or TR
    inds_1= np.asarray(df_valid[hue] == 1) # LT or RD
    
    # Define y label name (tract FA residuals)
    y_label_corr = tract.replace('_',' ')+' residuals corr'
    y_label_group = tract.replace('_',' ')+', residuals group'
    
    # COMPUTE RESIDUALS FOR CORRELATION ANALYSIS
    # get residuals after confounds for correlation analyses (no age)
    lr_corr = LinearRegression(fit_intercept=True).fit(covars_corr_normed, df_valid[tract])
    y_predicted_by_confound = lr_corr.coef_ @ covars_corr_normed.T
    y_corrected = df_valid[tract].T - y_predicted_by_confound
    df_valid[y_label_corr] = y_corrected
    
    # COMPUTE RESIDUALS FOR GROUP ANALYSIS
    # get residuals after confounds for group differences (with age)
    lr_group = LinearRegression(fit_intercept=True).fit(covars_group_normed, df_valid[tract])
    y_predicted_by_confound = lr_group.coef_ @ covars_group_normed.T
    y_corrected = df_valid[tract].T - y_predicted_by_confound
    df_valid[y_label_group] = y_corrected
    
    # Set up plot
    g = sns.JointGrid(data=df_valid, x=x_label, y=y_label_corr, hue=hue)
    
    # Compute correlations
    corr = pingouin.corr(x=df_valid[x_label], y=df_valid[y_label_corr], method='skipped')
    # Plot whole cohort correlation
    sns.regplot(x=x_label, y=y_label_corr, data=df_valid, robust=False, ax=g.ax_joint, 
                scatter_kws={'s':0}, line_kws={"color":"black"})
    g.ax_joint.set(ylabel='Fractional Anisotropy Residuals')
    
    # Compute group difference (additionally controlling for age)
    group_diff = pingouin.ttest(df_valid.iloc[inds_0,:][y_label_group],
                                df_valid.iloc[inds_1,:][y_label_group])
    # Plot group difference violin plot
    sns.violinplot(data=df_valid, x='all', y=y_label_group, hue='LT_HT', ax=g.ax_marg_y, palette=['teal','red'])
    
    # Plot all data points (colored for group)
    sns.scatterplot(x=x_label,y=y_label_corr, data=df_valid, hue=hue,ax=g.ax_joint, edgecolor="black", palette=['red','teal'])
    plt.ylim([0,.6])

    # Remove legends
    legend = g.ax_joint.legend()
    legend.remove()
    legend = g.ax_marg_y.legend()
    legend.remove()

    # Add descriptive stats and a title text to the figure
    plt.text(-8,-.13,"Spearman's ρ = "+str(round(corr['r'][0],2)) + ' ' + 
             str(corr['CI95%'][0]) + ', p = ' + str(round(corr['p-val'][0],4)))
    plt.text(-8,-0.175, 't = '+str(round(group_diff['T'][0],2)) + ' ' +
             str(group_diff['CI95%'][0]) + ', p = ' + str(round(group_diff['p-val'][0],3)))
    plt.text(-4.5,.65, tract.replace('_',' '), fontsize=30)
    plt.savefig(('figures_PDE/'+tract+'_'+x_label),format='pdf', bbox_inches='tight',pad_inches=.5)
    plt.show()
    
    # Save out p-values
    p_vals_corr.append(corr['p-val'])
    p_vals_group.append(group_diff['p-val'])

In [None]:
pingouin.multicomp(p_vals_corr+p_vals_group,alpha=0.05,method='FDR')