# PLS regression analysis for MEND DTI and clinical data

- Project:        HCP-EP
- File Name:      PLS_regression_MEND
- Author:         Haley Wang
- Date Created:   2023-07-06
- Last Modified:  2023-07-06
- Code Status:    Finished

In [None]:
import numpy as np
import pandas as pd
import pyls
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
from netneurotools import datasets, stats, plotting
from scipy.stats import zscore, pearsonr, ttest_ind
from scipy.spatial.distance import squareform, pdist

In [None]:
from IPython.display import display, Javascript

def beep():
    display(Javascript('new Audio("https://www.soundjay.com/buttons/sounds/button-09a.mp3").play()'))

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
#pd.reset_option('all')

In [None]:
"""
load data (age and sex were regressed out)
"""

df = pd.read_csv('/u/project/CCN/cbearden/haleywan/qunex_studyfolder/analysis/scripts/PLS_MEND_Replication/MEND_PLS_all.csv')


In [None]:
"""
Create X and Y for PLS
"""
predictor_cleaned =  df.filter(regex='^(FA|MD|AD|RD)')
response_cleaned = df.filter(regex='^(BPRS|SANS|YMRS)')

In [None]:
BPRS = df.filter(regex='^(SANS)')
len(BPRS.columns)

In [None]:
"""
behavioral PLS
"""
X = zscore(predictor_cleaned)
Y = zscore(response_cleaned)

In [None]:
len(Y.columns)

In [None]:
pls_result = pyls.behavioral_pls(X, Y, n_boot=5000, n_perm=10000, n_split=0, 
                                 test_split=0, covariance=False, rotate=True, 
                                 flip_sign=False, ci=95, verbose=True, n_proc='max')

In [None]:
pyls.save_results('MEND_pls_result.hdf5', pls_result)

In [None]:
pls_result.permres.pvals

In [None]:
pls_result.varexp

In [None]:
"""
loadings - neural
"""
xload = pyls.behavioral_pls(Y, X, n_boot=5000, n_perm=0, test_split=0)

In [None]:
pyls.save_results('MEND_pls_loadings.hdf5', xload)

In [None]:
lv = 0 # latent variable
beep()

In [None]:
# Assessing contribution of variables within a dimension
effect_size = pow(xload["y_loadings"][:, lv],2)
std_effect_size = zscore(effect_size)
relidx = abs(std_effect_size) > 2

In [None]:
err = (xload["bootres"]["y_loadings_ci"][:, lv, 1]
      - xload["bootres"]["y_loadings_ci"][:, lv, 0]) / 2
#relidx = (abs(xload["y_loadings"][:, lv]) - err) > 0.10 # CI doesnt cross 0 , LV 0 - 0.265; LV1 - 0.1395, LV2 - 0.1365
sorted_idx = np.argsort(-1 * xload["y_loadings"][relidx, lv])

# Extract column names as a list
diffusion_idx = predictor_cleaned.columns.tolist()

#plot the figure
plt.figure(figsize=(7.5,6))
plt.ion()

#plt.bar(np.sort(xload["y_loadings"][relidx, lv]), np.arange(sum(relidx)), yerr=err[relidx][sorted_idx])
plt.barh(np.arange(sum(relidx)), np.sort(-1 * xload["y_loadings"][relidx, lv]), xerr=err[relidx][sorted_idx], align='center')
#plt.barh(np.arange(0,10), loadings, xerr=err[relidx][sorted_idx], align='center')
plt.yticks(np.arange(sum(relidx)), labels=predictor_cleaned.columns[relidx][sorted_idx],
           rotation='horizontal', fontsize=14)
plt.xticks(fontsize=14)
#plt.yticks(np.arange(0,10), labels=predictor_cleaned.columns[relidx][sorted_idx],
#           rotation='horizontal')
plt.xlabel("Microstructural Loading Coefficients", fontsize=16, fontweight="bold")
plt.tight_layout()
#plt.savefig("/u/project/cbearden/haleywan/qunex_studyfolder/analysis/scripts/PLS_ROI_Analysis/figures/bar_pls_lv0_nload.tiff")

In [None]:
"""
loadings - psychopathology
"""

err = (pls_result["bootres"]["y_loadings_ci"][:, lv, 1]
      - pls_result["bootres"]["y_loadings_ci"][:, lv, 0]) / 2
relidx = (abs(pls_result["y_loadings"][:, lv]) - err) > 0  # CI doesnt cross 0
sorted_idx = np.argsort(pls_result["y_loadings"][relidx, lv])
plt.figure(figsize=(7,6))
plt.ion()
plt.barh(np.arange(sum(relidx)), np.sort(pls_result["y_loadings"][relidx, lv]), 
         xerr=err[relidx][sorted_idx], align='center')
plt.yticks(np.arange(sum(relidx)), labels=response_cleaned.columns[relidx][sorted_idx],
           rotation='horizontal', fontsize=14)
plt.xticks(fontsize=14)
plt.xlabel("Psychopathology Loading Coefficients", fontsize=16, fontweight="bold")
plt.tight_layout()

In [None]:
"""
loadings - psychopathology
"""

err = (pls_result["bootres"]["y_loadings_ci"][:, lv, 1]
      - pls_result["bootres"]["y_loadings_ci"][:, lv, 0]) / 2
relidx = (abs(pls_result["y_loadings"][:, lv]) - err) > 0  # CI doesnt cross 0
sorted_idx = np.argsort(-1 * pls_result["y_loadings"][relidx, lv])
plt.figure(figsize=(7,6))
plt.ion()
plt.barh(np.arange(sum(relidx)), np.sort(-1 * pls_result["y_loadings"][relidx, lv]), 
         xerr=err[relidx][sorted_idx], align='center')
plt.yticks(np.arange(sum(relidx)), labels=response_cleaned.columns[relidx][sorted_idx],
           rotation='horizontal', fontsize=14)
plt.xticks(fontsize=14)
plt.xlabel("Psychopathology Loading Coefficients", fontsize=16, fontweight="bold")
plt.tight_layout()
#plt.savefig("/u/project/cbearden/haleywan/qunex_studyfolder/analysis/scripts/PLS_ROI_Analysis/figures/bar_pls_lv1_pload.tiff")

In [None]:
## for visualization

## export the list of ROI loadings with significant contributions
ROIs = predictor_cleaned.columns[relidx][sorted_idx].tolist()
ROIs

In [None]:
coefs = xload["y_loadings"][relidx, lv].tolist()
coefs

In [None]:
# put them together as a df
chart = pd.DataFrame({'ROIs': ROIs, 'Coefs': coefs})

# Make the coeeficients absolute values
chart['Coefs'] = chart['Coefs'].abs()
chart = chart.sort_values(by='Coefs', ascending=False)

# Remove the suffix of everything in front of the underscore
chart['ROIs'] = chart['ROIs'].str.split('_', expand=True).iloc[:, 1]
chart = chart.reset_index()

# Remove the repetative ROIs and keep the largest coefs
chart = chart.drop_duplicates(subset='ROIs', keep='first')
chart['ROIs'] = chart['ROIs'].str.replace(' ', '_')
chart

In [None]:
chart.to_csv('MEND_loading_chart_for_vis.csv', index=False)