# PLS regression analysis for EP DTI and clinical data

- Project:        HCP-EP
- File Name:      EP_PLS
- Author:         Haley Wang
- Date Created:   2023-07-10
- Last Modified:  2023-07-10
- Code Status:    Finished

In [None]:
import numpy as np
import pandas as pd
import pyls
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
from netneurotools import datasets, stats, plotting
from scipy.stats import zscore, pearsonr, ttest_ind
from scipy.spatial.distance import squareform, pdist

In [None]:
from IPython.display import display, Javascript

def beep():
    display(Javascript('new Audio("https://www.soundjay.com/buttons/sounds/button-09a.mp3").play()'))

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
#pd.reset_option('all')

In [None]:
"""
load data (age and sex were regressed out)
"""

df = pd.read_csv('/u/project/CCN/cbearden/haleywan/qunex_studyfolder/analysis/scripts/PLS_MEND_Replication/EP_PLS_all.csv')


In [None]:
for column in df.columns:
    print(column)

In [None]:
"""
Create X and Y for PLS
"""
predictor_cleaned =  df.filter(regex='^(FA|MD|AD|RD)')
response_cleaned = df.filter(regex='^(pos|neg|gps|ymrs)')

In [None]:
response_cleaned.columns = ['PANSS_Delusions', 'PANSS_Conceptual Disorganization', 'PANSS_Hallucinations', 'PANSS_Excitement', 'PANSS_Grandiosity', 
                            'PANSS_Suspiciousness/Persecution', 'PANSS_Hostility', 'PANSS_Blunted Affect', 'PANSS_Emotional Withdrawal', 'PANSS_Poor Rapport', 
                            'PANSS_Social Withdrawal', 'PANSS_Difficulty in abstract thinking', 'PANSS_Lack of Spontaneity', 'PANSS_Stereotyped Thinking', 
                            'PANSS_Somatic Concern', 'PANSS_Anxiety', 'PANSS_Guilt feeling', 'PANSS_Tension', 
                            'PANSS_Mannerisms and Posturing', 'PANSS_Depression', 'PANSS_Motor Retardation', 'PANSS_Uncooperativeness', 'PANSS_Unusual Thought Content', 
                            'PANSS_Disorientation', 'PANSS_Poor Attention', 'PANSS_Lack of Judgement and Insight', 'PANSS_Disturbance of Volition', 'PANSS_Poor Impulse Control', 
                            'PANSS_Preoccupation', 'PANSS_Active Social Avoidance', 
                            
                            'YMRS_Elevated Mood', 'YMRS_Increased Motor Activity-Energy', 'YMRS_Sexual Interest', 'YMRS_Sleep', 'YMRS_Irritability', 'YMRS_Speech', 
                            'YMRS_Language-Thought Disorder', 'YMRS_Content', 'YMRS_Disruptive-Aggressive Behavior', 'YMRS_Appearance', 'YMRS_Insight']

In [None]:
"""
behavioral PLS
"""
X = zscore(predictor_cleaned)
Y = zscore(response_cleaned)

In [None]:
pls_result = pyls.behavioral_pls(X, Y, n_boot=5000, n_perm=5000, n_split=2, 
                                 test_split=2, covariance=False, rotate=True, 
                                 ci=95, verbose=True, n_proc='max')

In [None]:
pyls.save_results('EP_pls_result.hdf5', pls_result)

In [None]:
pls_result.permres.pvals

In [None]:
pls_result.varexp

In [None]:
"""
loadings - neural
"""
xload = pyls.behavioral_pls(Y, X, n_boot=3000, n_perm=0, test_split=2)

In [None]:
pyls.save_results('EP_pls_loadings.hdf5', xload)

In [None]:
lv = 0 # latent variable
beep()

In [None]:
# Assessing contribution of variables within a dimension
effect_size = pow(xload["y_loadings"][:, lv],2)
std_effect_size = zscore(effect_size)
relidx = abs(std_effect_size) > 2

In [None]:
err = (xload["bootres"]["y_loadings_ci"][:, lv, 1]
      - xload["bootres"]["y_loadings_ci"][:, lv, 0]) / 2
#relidx = (abs(xload["y_loadings"][:, lv]) - err) > 0
sorted_idx = np.argsort(xload["y_loadings"][relidx, lv])

# Extract column names as a list
diffusion_idx = predictor_cleaned.columns.tolist()

#plot the figure
plt.figure(figsize=(8,6))
plt.ion()

#plt.bar(np.sort(xload["y_loadings"][relidx, lv]), np.arange(sum(relidx)), yerr=err[relidx][sorted_idx])
plt.barh(np.arange(sum(relidx)), np.sort(xload["y_loadings"][relidx, lv]), xerr=err[relidx][sorted_idx], align='center')
#plt.barh(np.arange(0,10), loadings, xerr=err[relidx][sorted_idx], align='center')
plt.yticks(np.arange(sum(relidx)), labels=predictor_cleaned.columns[relidx][sorted_idx],
           rotation='horizontal', fontsize=14)
plt.xticks(fontsize=14)
#plt.yticks(np.arange(0,10), labels=predictor_cleaned.columns[relidx][sorted_idx],
#           rotation='horizontal')
plt.xlabel("Microstructural Loading Coefficients", fontsize=16, fontweight="bold")
plt.tight_layout()
#plt.savefig("/u/project/cbearden/haleywan/qunex_studyfolder/analysis/scripts/PLS_ROI_Analysis/figures/bar_pls_lv0_nload.tiff")

In [None]:
"""
loadings - psychopathology
"""

err = (pls_result["bootres"]["y_loadings_ci"][:, lv, 1]
      - pls_result["bootres"]["y_loadings_ci"][:, lv, 0]) / 2
relidx = (abs(pls_result["y_loadings"][:, lv]) - err) > 0  # CI doesnt cross 0
sorted_idx = np.argsort(pls_result["y_loadings"][relidx, lv])
plt.figure(figsize=(7,6))
plt.ion()
plt.barh(np.arange(sum(relidx)), np.sort(pls_result["y_loadings"][relidx, lv]), 
         xerr=err[relidx][sorted_idx], align='center')
plt.yticks(np.arange(sum(relidx)), labels=response_cleaned.columns[relidx][sorted_idx],
           rotation='horizontal', fontsize=14)
plt.xticks(fontsize=14)
plt.xlabel("Psychopathology Loading Coefficients", fontsize=16, fontweight="bold")
plt.tight_layout()
#plt.savefig("/u/project/cbearden/haleywan/qunex_studyfolder/analysis/scripts/PLS_ROI_Analysis/figures/bar_pls_lv1_pload.tiff")

In [None]:
## for visualization

## export the list of ROI loadings with significant contributions
ROIs = predictor_cleaned.columns[relidx][sorted_idx].tolist()
ROIs

In [None]:
coefs = xload["y_loadings"][relidx, lv].tolist()
coefs

In [None]:
# put them together as a df
chart = pd.DataFrame({'ROIs': ROIs, 'Coefs': coefs})

# Make the coeeficients absolute values
chart['Coefs'] = chart['Coefs'].abs()
chart = chart.sort_values(by='Coefs', ascending=False)

# Remove the suffix of everything in front of the underscore
chart['ROIs'] = chart['ROIs'].str.split('_', expand=True).iloc[:, 1]
chart = chart.reset_index()

# Remove the repetative ROIs and keep the largest coefs
chart = chart.drop_duplicates(subset='ROIs', keep='first')
chart['ROIs'] = chart['ROIs'].str.replace(' ', '_')
chart


In [None]:
chart.to_csv('EP_loading_chart_for_vis.csv', index=False)