# Plot statistical analyses (correlations and group differences)
### Start by importing packages and loading the dataframe from the previous step

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pingouin
import seaborn as sns
import os
import os.path
from sklearn.linear_model import LinearRegression
import sklearn.preprocessing

df = pd.read_pickle('df.pkl')
tracts = ['AF_left','AF_right','SLF_I_left','SLF_I_right','SLF_II_left','SLF_II_right',
         'SLF_III_left','SLF_III_right','ILF_left','ILF_right','IFO_left','IFO_right',
         'UF_left','UF_right','SCP_left','SCP_right','ICP_left','ICP_right','MCP','CC_7']


## If you want to visuzlize any correlations, do it here. This is just exploratory

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set(font_scale = 2)
sns.set_style("white") # Format background style

x_label = 'AGE' # Set independent variable
covars = ['SEX'] # Set covariates
y = 'ICV' # Set dependent variable
y_label = y+' residuals'

# FIT THE REGRESSION WITH COVARIATES TO CALCULATE RESIDUALS
lr = LinearRegression(fit_intercept=True).fit(df[covars], df[y])
y_predicted_by_confound = lr.coef_ @ df[covars].T
y_corrected = df[y].T - y_predicted_by_confound
df[y_label] = y_corrected

# PLOT THE CORRELATION BETWEEN INDEPENDENT VARIABLE AND RESIDUALS
sns.regplot(x=x_label, y=y_label, data=df, robust=False, # You can make robust=True to plot skipped correlation, but takes longer to run
                scatter_kws={'s':3}, line_kws={"color":"black"})
# PRINT OUT THE CORRELATION STATS
all_stat = pingouin.corr(x=df[x_label], y=df[y_label],method='spearman')
print(all_stat)
plt.show()

## Run cohort-wide correlations and group differences across all participants

In [None]:
# Plot parameters
sns.set(rc={'figure.figsize':(10,8)})
sns.set(font_scale = 1.6)
sns.set_style("white")

# Define x_label and group column
x_label = 'SWE' # Use composite TOWRE score by defauls ('TOWRE' - composite, 'PDE' - nonword, 'SWE' - single word)
hue = 'TR_RD' # Switch to HT_LT to look at groups based off of TOWRE score cutoff
hue_rev = hue.split('_')[1]+'_'+hue.split('_')[0] # hack to make violinplot order correct

# Define covariates for correlation and group difference analyses
covars_corr = ['HAND','SEX','ICV']
covars_group = ['HAND','SEX','gFA','AGE','TRRDxSEX']

# Initialize output variable for p-values
p_vals_corr = []
p_vals_group = []

# Create folder for saving plots
if not os.path.exists(('figures_'+x_label)):
    os.makedirs(('figures_'+x_label))

for tract in tracts:
    #tract=tract+'2' #use tractometry average instead

    # Filter out nans and tract mean FAs less than 0.2
    valid_inds = np.asarray(np.invert(np.logical_or(df[tract] < .2, np.isnan(df[tract]))))
    df_valid = df.iloc[valid_inds,:]
    
    # De-mean and rescale covariates to unit variance
    covars_corr_normed = sklearn.preprocessing.scale(df_valid[covars_corr], with_mean=True, with_std=True, copy=True)
    covars_group_normed = sklearn.preprocessing.scale(df_valid[covars_group], with_mean=True, with_std=True, copy=True)

    # Get indices of Participants by Group
    inds_0 = np.asarray(df_valid[hue] == 0) # HT or TR
    inds_1= np.asarray(df_valid[hue] == 1) # LT or RD
    
    # Define y label name (tract FA residuals)
    y_label_corr = tract.replace('_',' ')+' residuals corr'
    y_label_group = tract.replace('_',' ')+', residuals group'
    
    # COMPUTE RESIDUALS FOR CORRELATION ANALYSIS
    # get residuals after confounds for correlation analyses (no age)
    lr_corr = LinearRegression(fit_intercept=True).fit(covars_corr_normed, df_valid[tract])
    y_predicted_by_confound = lr_corr.coef_ @ covars_corr_normed.T
    y_corrected = df_valid[tract].T - y_predicted_by_confound
    df_valid[y_label_corr] = y_corrected
    
    # COMPUTE RESIDUALS FOR GROUP ANALYSIS
    # get residuals after confounds for group differences (with age)
    lr_group = LinearRegression(fit_intercept=True).fit(covars_group_normed, df_valid[tract])
    y_predicted_by_confound = lr_group.coef_ @ covars_group_normed.T
    y_corrected = df_valid[tract].T - y_predicted_by_confound
    df_valid[y_label_group] = y_corrected
    
    # Set up plot
    g = sns.JointGrid(data=df_valid, x=x_label, y=y_label_corr, hue=hue)
    
    # Compute correlations
    corr = pingouin.corr(x=df_valid[x_label], y=df_valid[y_label_corr], method='skipped')
    # Plot whole cohort correlation
    sns.regplot(x=x_label, y=y_label_corr, data=df_valid, robust=False, ax=g.ax_joint, 
                scatter_kws={'s':0}, line_kws={"color":"black"})
    g.ax_joint.set(ylabel='Fractional Anisotropy Residuals')
    
    # Compute group difference
    group_diff = pingouin.ttest(df_valid.iloc[inds_0,:][y_label_group],
                                df_valid.iloc[inds_1,:][y_label_group])
    # Plot group difference violin plot (FOR FIGURE 3)
    sns.violinplot(data=df_valid, x='all', y=y_label_group, hue=hue_rev, ax=g.ax_marg_y, palette=['teal','red'])
    
    # Plot all data points (colored for group)
    sns.scatterplot(x=x_label,y=y_label_corr, data=df_valid, hue=hue,ax=g.ax_joint, edgecolor="black", palette=['red','teal'])
    plt.ylim([0,.6])

    # Remove legends
    legend = g.ax_joint.legend()
    legend.remove()
    legend = g.ax_marg_y.legend()
    legend.remove()

    # Add descriptive stats and a title text to the figure
    plt.text(-8,-.14,"Spearman's ρ = "+str(round(corr['r'][0],3)) + " " +
             str(corr['CI95%'][0]) + ', p = ' + str(round(corr['p-val'][0],4)))
    plt.text(-8,-0.18, 't = '+str(round(group_diff['T'][0],2)) + ' ' +
             str(group_diff['CI95%'][0]) + ', p = ' + str(round(group_diff['p-val'][0],3))) # UNCOMMENT TO ADD GROUP DIFF TEXT
    plt.text(-4.5,.65, tract.replace('_',' '),fontsize=30)
    #plt.savefig(('figures_'+x_label+'/'+tract+'_'+x_label),format='pdf', bbox_inches='tight',pad_inches=.5)
    plt.show()
    
    # Save out p-values
    p_vals_corr.append(corr['p-val'])
    p_vals_group.append(group_diff['p-val'])

## Check for significance after multiple comparison correction

In [None]:
pingouin.multicomp(p_vals_corr, alpha = 0.05, method = 'fdr_bh')