In [None]:
import numpy as np
import pandas as pd
from scipy import stats

import pylab as plt
import seaborn as sns
import matplotlib.patches as mpatches

from importlib import reload
import os
import re

import sys
sys.path.insert(1,'../scripts')
import utils
import plots
import predict_regress

In [None]:
neuropsychiatric = ['stai_trait','stai_state','gds','quip']
cognition = ['semantic_fluency','moca','benton','lns','hvlt_recall','hvlt_recognition','hvlt_retention','symbol_digit']
autonome = ['epworth','rbd','systolic_bp_drop','scopa_aut']
daily = ['se_adl','updrs_i']
motor = ['updrs_ii','updrs_iii_OFF','updrs_iii_ON','updrs_iv']
medication = ['LEDD']
targets = np.hstack([motor,cognition,neuropsychiatric,autonome,daily,medication])
covs = ['diagnosis_age','time_since_diagnosis','male']
grouped_mean = pd.read_csv('/scratch/c.c21013066/data/ppmi/accelerometer/weekly_mean.csv',parse_dates=['date_y'])
predictors = grouped_mean.filter(regex='(walking|step|efficiency|total_sleep|pulse|deep|light|rem|nrem|rmssd|wake)').columns

# Performance plot

In [None]:
sign = pd.DataFrame(columns=['t','p-value','sign'], index=targets)
scores = pd.DataFrame(columns=['r2','baseline'],index=pd.MultiIndex.from_product([targets,np.arange(5)],names=['clinical','cv']))
for i,t in enumerate(targets):
    score = pd.read_csv(f'/scratch/c.c21013066/data/ppmi/analyses/predictclinical/{t}/test_scores.csv',index_col=0)
    scores.loc[(t,slice(None)),'r2'] = score.values
    score = pd.read_csv(f'/scratch/c.c21013066/data/ppmi/analyses/predictclinical/{t}/baseline/test_scores.csv',index_col=0)
    scores.loc[(t,slice(None)),'baseline'] = score.values
    sign.loc[t,['t','p-value']] = stats.ttest_ind(scores.loc[(t,slice(None)),'r2'].astype(float),scores.loc[(t,slice(None)),'baseline'].astype(float))
sign['sign'] = sign['p-value'] < 0.05
sign['sign'] = sign['sign'].replace([True,False],['*',''])
scores = scores.astype(float)

In [None]:
order = np.hstack([autonome,cognition,neuropsychiatric,daily,motor,medication])
cl_names = ['ESS', 'RBDSQ', 'Systolic BP Drop', 'SCOPA autonome','Semantic Fluency', 'MOCA', 'Benton',
       'Letter Number Sequencing', 'HVLT Recall', 'HVLT Recognition', 'HVLT Retention',
       'Symbol Digit', 'STAI trait', 'STAI state', 'GDS', 'QUIP',
       'Schwab England ADL', 'UPDRS I','UPDRS II','UPDRS III OFF', 'LEDD','UPDRS III ON','UPDRS IV']
color_map = dict(zip(['motor','cognition','psychiatric','autonomic','daily','medication','physical activity','sleep','vital signs'],
                     sns.color_palette('deep')))
labels = pd.Series(np.hstack([np.repeat('autonomic',4),np.repeat('cognition',8),np.repeat('psychiatric',4),np.repeat('daily',2),np.repeat('motor',2),
                              np.repeat('medication',3)]),index=order)

In [None]:
fig = plt.figure(figsize=(5,8))
plots.plot_context()
clean = scores.astype(float).reset_index()
ax = sns.barplot(y='clinical',x='r2',data=scores.reset_index(),order=order,palette=labels.map(color_map))
b = sns.barplot(y='clinical',x='baseline',data=scores.reset_index(),order=order,color='gray',alpha=0.3)
ax.set_yticklabels(cl_names)

for i, bar in enumerate(ax.patches[:23]):
    color = bar.get_facecolor()
    ax.get_yticklabels()[i].set_color(color)
ax.set_ylabel('')
ax.set_xlabel('R2')

patches = b.patches
lines_per_err = 3

for i, line in enumerate(b.get_lines()):
    if i>len(order)-1:
        line.set_color('gray')
        line.set_alpha(0.3)
        
mean = scores.groupby('clinical',sort=False)['r2'].mean()[order]
for i,(key,row) in enumerate(sign.loc[order].iterrows()):
    if row['sign']=='*':
        ax.text(mean.iloc[i]+0.06,i+0.4, "*", ha='center', va='bottom', fontsize=12)
        
# Create legend handles and labels based on the filtered color_map
legend_handles = [mpatches.Patch(color=color, label=label) for (label, color),i in zip(color_map.items(),np.arange(6))]
ax.legend(handles=legend_handles, bbox_to_anchor=(1,1))
        

plt.savefig(f'/scratch/c.c21013066/images/paper/digitalPPMI/performance_regression_clinical.png',dpi=300,bbox_inches='tight')
plt.savefig(f'/scratch/c.c21013066/images/paper/digitalPPMI/performance_regression_clinical.pdf',dpi=300,bbox_inches='tight')

# Coefficient Plot

In [None]:
#coefs plot
coefs = pd.DataFrame(columns=targets,index=pd.MultiIndex.from_product([np.hstack([preds,covs,'intercept']),np.arange(5)],names=['digital','cv']))
for i,t in enumerate(targets):
    cfs = pd.read_csv(f'/scratch/c.c21013066/data/ppmi/analyses/predictclinical/{t}/coefs.csv',index_col=0)
    for cv in np.arange(5):
        coefs.loc[(np.hstack([preds,covs,'intercept']),cv),t] = cfs.loc[cfs['cv']==cv,'coef'].values
        
mean_coefs = coefs.astype(float).groupby('digital',sort=False).mean().loc[np.hstack([preds,covs,'intercept'])]
std_coefs = coefs.astype(float).groupby('digital',sort=False).std().loc[np.hstack([preds,covs,'intercept'])]
sign_coefs = (np.abs(mean_coefs) - 3*std_coefs)>0
sign_coefs = sign_coefs.replace([True,False],['*',''])

In [None]:
fig = plt.figure(figsize=(10,5))
plots.plot_context()
ax = sns.heatmap(mean_coefs,annot=sign_coefs,fmt='',cmap='coolwarm',center=0,cbar_kws={'label': 'coefficient'})
ax.set_xticklabels(cl_names);
ax.set_ylabel('');

plt.savefig(f'/scratch/c.c21013066/images/paper/digitalPPMI/coefs_regression_clinical.png',dpi=300,bbox_inches='tight')
plt.savefig(f'/scratch/c.c21013066/images/paper/digitalPPMI/coefs_regression_clinical.pdf',dpi=300,bbox_inches='tight')