In [6]:
import pandas as pd
import numpy as np
import nibabel as nib
import os
import scipy.stats as scp
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date
import itertools
from scipy.signal import hilbert
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
import scipy.signal as scs
import json
import pickle
import plotly.graph_objects as go
from tqdm.auto import tqdm
from itertools import combinations
import statsmodels.formula.api as smf
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore')

sns.set(context='talk', style='white', font='Arial')

today = date.today().strftime('%Y%m%d')

project_dir = '/Users/catcamacho/Library/CloudStorage/Box-Box/CCP/HBN_study/'
data_dir = project_dir + 'proc/group/parcel_timeseries/sub_ts/'
out_dir = project_dir + 'proc/clin/'
os.makedirs(out_dir,exist_ok=True)

big_data_dir = '/Users/catcamacho/Documents/bigdata/hbn_clin/'

sample_file = project_dir + 'proc/group/datasets_info/sample_gord.32k_fs_LR.pscalar.nii'
atlas_file = project_dir + 'proc/null_lL_WG33/Gordon333_SeitzmanSubcortical.32k_fs_LR.dlabel.nii'

ax0 = nib.load(sample_file).header.get_axis(0)
ax1 = nib.load(sample_file).header.get_axis(1)

TR = 0.8

# get network labels
parcel_labels = nib.load(sample_file).header.get_axis(1).name
network_labels = []
for s in parcel_labels:
    b = s.split('_')
    if len(b)<2:
        network_labels.append(b[0])
    else:
        network_labels.append(b[1])
network_labels = np.array(network_labels)
network_names, network_sizes = np.unique(network_labels, return_counts=True)

# load timeseries data info
subinfo = pd.read_csv(project_dir + 'proc/group/datasets/firstleveldatalabels_withpub_thresh0.8_20220412.csv', index_col=0)
mfqsr = pd.read_csv(os.path.join(out_dir, 'MFQsr_factorscores_20220629.csv'), index_col=0)
mfqsr.index = ['sub-{0}'.format(s) for s in mfqsr.index]
mfqsr.index.name='sub'
mfqpr = pd.read_csv(os.path.join(out_dir, 'MFQpr_factorscores_20220629.csv'), index_col=0)
mfqpr.index = ['sub-{0}'.format(s) for s in mfqpr.index]
mfqpr.index.name='sub'

scaredsr = pd.read_csv(os.path.join(project_dir, 'phenotypic_data','9994_SCARED_SR_20210322.csv'), 
                       index_col='EID', skiprows=[1]).loc[:,['SCARED_SR_SC','SCARED_SR_GD','SCARED_SR_SP']]
scaredsr.index = ['sub-{0}'.format(a) for a in scaredsr.index]
scaredsr.index.name = 'sub'

scaredpr = pd.read_csv(os.path.join(project_dir, 'phenotypic_data','9994_SCARED_P_20210322.csv'), 
                       index_col='EID', skiprows=[1]).loc[:,['SCARED_P_SC','SCARED_P_GD','SCARED_P_SP']]
scaredpr.index = ['sub-{0}'.format(a) for a in scaredpr.index]
scaredpr.index.name = 'sub'

raceethn = pd.read_csv(os.path.join(project_dir, 'phenotypic_data','9994_PreInt_Demos_Fam_20210322.csv'), 
                       index_col='EID', skiprows=[1]).loc[:,['Child_Ethnicity','Child_Race']]
raceethn.index = ['sub-{0}'.format(a) for a in raceethn.index]
raceethn.index.name = 'sub'

income = pd.read_csv(os.path.join(project_dir, 'phenotypic_data','9994_FSQ_20210322.csv'), 
                     index_col='EID', skiprows=[1]).loc[:,['FSQ_04']]
income.index = ['sub-{0}'.format(a) for a in income.index]
income.index.name = 'sub'

clininfo = pd.read_csv(os.path.join(out_dir, 'depanx_scores_preproc_20220519.csv'), 
                       low_memory=False, index_col=0).drop(['age'],axis=1)
subinfo = subinfo.merge(clininfo, how='left', left_index=True, right_index=True)
subinfo = subinfo.merge(mfqpr, how='left', left_index=True, right_index=True)
subinfo = subinfo.merge(mfqsr, how='left', left_index=True, right_index=True)
subinfo = subinfo.drop(['set','sub','cond','SCARED_P_SC','SCARED_SR_SC'], axis=1)
subinfo = subinfo.drop_duplicates()
subinfo.index.name='sub'

subinfo = subinfo.merge(scaredsr, how='left', left_index=True, right_index=True)
subinfo = subinfo.merge(scaredpr, how='left', left_index=True, right_index=True)
subinfo = subinfo.merge(raceethn, how='left', left_index=True, right_index=True)
subinfo = subinfo.merge(income, how='left', left_index=True, right_index=True)
subinfo = subinfo.drop_duplicates()

subinfo['age_group'] = 'younger'
subinfo.loc[(subinfo['age']>10), 'age_group'] = 'older'

subinfo = subinfo.loc[(np.isfinite(subinfo['SCARED_SR_SC']) | np.isfinite(subinfo['SCARED_P_SC'])), :]

In [11]:
clin = 'SCARED_SR_SC'
ref = 'lower'

for age in ['all','older']:
    for mov in ['DM','TP']:
        folder = os.path.join(out_dir, 'agegroup_similarity_regagesxs', age, 'dynamic_movie{0}_{1}'.format(mov, clin), 
                                'peak_analysis_20','global_comparison_ref-{0}'.format(ref))
        results = pd.read_pickle(os.path.join(folder, 'final_peaks_data_{0}.pkl'.format(ref)))
        de = results['global']['long_peaks'].describe()
        print(mov, clin, age, de.loc['mean','both'])

DM SCARED_SR_SC all 0.45066666666666666
TP SCARED_SR_SC all 0.0
DM SCARED_SR_SC older 0.4493333333333333
TP SCARED_SR_SC older 0.0


In [10]:
clin = 'SCARED_P_SC'
ref = 'lower'

for age in ['all','older','younger']:
    for mov in ['DM','TP']:
        folder = os.path.join(out_dir, 'agegroup_similarity_regagesxs', age, 'dynamic_movie{0}_{1}'.format(mov, clin), 
                                'peak_analysis_20','global_comparison_ref-{0}'.format(ref))
        results = pd.read_pickle(os.path.join(folder, 'final_peaks_data_{0}.pkl'.format(ref)))
        de = results['global']['long_peaks'].describe()
        print(mov, clin, age, de.loc['mean','both'])

DM SCARED_P_SC all 0.5253333333333333
TP SCARED_P_SC all 0.164
DM SCARED_P_SC older 0.304
TP SCARED_P_SC older 0.136
DM SCARED_P_SC younger 0.408
TP SCARED_P_SC younger 0.0


In [18]:
mov='DM'
clin = 'SCARED_P_SC'
ref = 'lower'

olderfolder = os.path.join(out_dir, 'agegroup_similarity_regagesxs', 'older', 'dynamic_movie{0}_{1}'.format(mov, clin), 
                        'peak_analysis_20','global_comparison_ref-{0}'.format(ref))
olderresults = pd.read_pickle(os.path.join(olderfolder, 'final_peaks_data_{0}.pkl'.format(ref)))
olderde = olderresults['global']['long_peaks']['both']==1

youngerfolder = os.path.join(out_dir, 'agegroup_similarity_regagesxs', 'younger', 'dynamic_movie{0}_{1}'.format(mov, clin), 
                        'peak_analysis_20','global_comparison_ref-{0}'.format(ref))
youngerresults = pd.read_pickle(os.path.join(youngerfolder, 'final_peaks_data_{0}.pkl'.format(ref)))
youngerde = youngerresults['global']['long_peaks']['both']==1

identified = olderde | youngerde
overlap = olderde & youngerde
print(overlap.mean()/identified.mean())

0.35877862595419846


## make conjunction plots

In [4]:
# parent-report
colors = {'full-only':(175/255, 175/255, 0/255, 1), # dark yellow
          'older-only':(255/255, 255/255, 0/255, 1), # medium yellow 
          'younger-only':(255/255, 255/255, 204/255, 1), # light yellow
          'full-older':(255/255, 178/255, 102/255, 1), # light orange
          'full-younger':(204/255, 104/255, 0/255, 1), # dark orange
          'all':(204/255, 0/255, 0/255, 1)} # red

for movie in ['TP','DM']:
    for clin in ['SCARED_P_SC','SCARED_P_GD']:

        out_folder = os.path.join(out_dir, 'agegroup_similarity_regagesxs','conjunction_plots', 'ts_isc_movie{0}_{1}'.format(movie, clin))
        os.makedirs(out_folder, exist_ok=True)

        # load data
        full_sample = nib.load(os.path.join(out_dir, 'agegroup_similarity_regagesxs', 'all', 'ts_isc_movie{0}_{1}'.format(movie, clin),
                                            'movie{0}_isc_{1}_AnnaKmaxminmax_maskedrho_fdr0.01127.pscalar.nii'.format(movie, clin))).get_fdata()
        full_sample = np.squeeze(np.isfinite(full_sample))

        older_sample = nib.load(os.path.join(out_dir, 'agegroup_similarity_regagesxs', 'older', 'ts_isc_movie{0}_{1}'.format(movie, clin),
                                            'movie{0}_isc_{1}_AnnaKmaxminmax_maskedrho_fdr0.01127.pscalar.nii'.format(movie, clin))).get_fdata()
        older_sample = np.squeeze(np.isfinite(older_sample))

        younger_sample = nib.load(os.path.join(out_dir, 'agegroup_similarity_regagesxs', 'younger', 'ts_isc_movie{0}_{1}'.format(movie, clin),
                                            'movie{0}_isc_{1}_AnnaKmaxminmax_maskedrho_fdr0.01127.pscalar.nii'.format(movie, clin))).get_fdata()
        younger_sample = np.squeeze(np.isfinite(younger_sample))

        # make cifti label file with best model fit
        ax1 = nib.load(atlas_file).header.get_axis(1)
        data = nib.load(atlas_file).get_fdata()
        ax0 = nib.load(atlas_file).header.get_axis(0)
        newmap=dict()
        newmap[0] = ax0[0][1][0]
        for a in range(0,len(parcel_labels)):
            if full_sample[a] & older_sample[a] & younger_sample[a]:
                newmap[a+1] = ('all_{0}'.format(a), colors['all'])
            elif full_sample[a] & older_sample[a] & ~younger_sample[a]:
                newmap[a+1] = ('full-older_{0}'.format(a), colors['full-older'])
            elif full_sample[a] & ~older_sample[a] & younger_sample[a]:
                newmap[a+1] = ('full-younger_{0}'.format(a), colors['full-younger'])
            elif ~full_sample[a] & ~older_sample[a] & younger_sample[a]:
                newmap[a+1] = ('younger-only_{0}'.format(a), colors['younger-only'])
            elif ~full_sample[a] & older_sample[a] & ~younger_sample[a]:
                newmap[a+1] = ('older-only_{0}'.format(a), colors['older-only'])
            elif full_sample[a] & ~older_sample[a] & ~younger_sample[a]:
                newmap[a+1] = ('full-only_{0}'.format(a), colors['full-only'])
            else:
                newmap[a+1] = ('none_{0}'.format(a), (1,1,1,0))

        ax0.label[0] = newmap
        img = nib.cifti2.cifti2.Cifti2Image(data, (ax0, ax1))
        nib.save(img, os.path.join(out_folder, 'top_model_fits_conjunction.dlabel.nii'))

## make scene plot

In [None]:
for mov in ['DM','TP']:
    if mov=='DM':
        dur=600
    else:
        dur=200
    scene_timing = pd.read_csv(os.path.join(project_dir, 'HBN_video_coding','Videos','{0}_scenes.csv'.format(mov)), index_col=0)
    dm_features = pd.read_csv(os.path.join(project_dir, 'HBN_video_coding','processing','v1','summary',
                                           '{0}_summary_codes10Hz_intuitivenames.csv'.format(mov)), index_col=0)

    feats = ['positive','negative','brightness','loudness','motion']
    dm_features.loc[:, feats] = MinMaxScaler().fit_transform(dm_features.loc[:,feats])

    fig, ax = plt.subplots(5,1,sharex=True,figsize=(16,8))
    for i, f in enumerate(feats):
        ax[i].plot(dm_features.index, dm_features[f], color='k')
        ax[i].vlines(scene_timing['start'], -0.1, 1.1, color='#7203E0', linestyles='--')
        ax[i].set_xlim(0,dur)
        ax[i].set_ylim(0,1)
        ax[i].set_xlabel('Time (s)')
        ax[i].set_ylabel('Intensity')
    sns.despine()
    plt.tight_layout()
    plt.savefig(os.path.join(project_dir, 'HBN_video_coding','processing','v1','summary',
                                           '{0}_summary_codes_scenes.svg'.format(mov)))
    plt.show()

## Compute and plot parent- and self-report correlation

In [None]:
data = subinfo.drop_duplicates()

corr = data.loc[:, ['age','SCARED_SR_SC','SCARED_SR_GD','SCARED_SR_SP','SCARED_P_SC','SCARED_P_GD','SCARED_P_SP','SWAN_Avg','MFQ_SR_Total','MFQ_P_Total']].corr()
sns.heatmap(corr, center=0, vmin=-0.5, vmax=0.5)
corr

In [None]:
for c in ['SCARED_SR_SC','SCARED_SR_GD','SCARED_SR_SP','SCARED_P_SC','SCARED_P_GD','SCARED_P_SP','SWAN_Avg','MFQ_SR_Total','MFQ_P_Total']:
    r, p = scp.spearmanr(data['age'], data[c], nan_policy='omit')
    print('{0} and age:'.format(c), round(r,2), round(p,3))
    sns.lmplot(x='age',y=c, data=data, ci=None, line_kws={'lw':4, 'color':'k'}, 
               scatter_kws={'alpha':0.5, 'color':'lightgray'})
    plt.show()
    plt.close()

In [None]:
data = subinfo.drop(['meanFD','movie'], axis=1)
data = data.drop_duplicates()

r, p = scp.spearmanr(data['SCARED_P_SC'], data['SCARED_SR_SC'], nan_policy='omit')
print(round(r,2), round(p,3))
sns.lmplot(x='SCARED_P_SC',y='SCARED_SR_SC', data=data, ci=None, line_kws={'lw':4, 'color':'k'}, 
           scatter_kws={'alpha':0.5, 'color':'lightgray'})

In [None]:
#data = subinfo.drop(['meanFD','movie'], axis=1)
data = subinfo.drop_duplicates()

r, p = scp.spearmanr(data['SCARED_P_GD'], data['SCARED_SR_GD'], nan_policy='omit')
print(round(r,2), round(p,3))
sns.lmplot(x='SCARED_P_GD',y='SCARED_SR_GD', data=data, ci=None, line_kws={'lw':4, 'color':'k'}, 
           scatter_kws={'alpha':0.5, 'color':'lightgray'})

In [None]:
r, p = scp.spearmanr(data['SCARED_P_GD'], data['SCARED_P_SC'], nan_policy='omit')
print(round(r,2), round(p,3))
sns.lmplot(x='SCARED_P_GD',y='SCARED_P_SC', data=data, ci=None, line_kws={'lw':4, 'color':'k'}, 
           scatter_kws={'alpha':0.5, 'color':'lightgray'})

In [None]:
r, p = scp.spearmanr(data['SCARED_SR_SC'], data['SCARED_SR_GD'], nan_policy='omit')
print(round(r,2), round(p,3))
sns.lmplot(x='SCARED_SR_SC',y='SCARED_SR_GD', data=data, ci=None, line_kws={'lw':4, 'color':'k'}, 
           scatter_kws={'alpha':0.5, 'color':'lightgray'})

In [None]:
r, p = scp.spearmanr(data['MFQ_P_Total'], data['MFQ_SR_Total'], nan_policy='omit')
print(round(r,2), round(p,3))
sns.lmplot(x='MFQ_P_Total',y='MFQ_SR_Total', data=data, ci=None, line_kws={'lw':4, 'color':'k'}, 
           scatter_kws={'alpha':0.5, 'color':'lightgray'})

## Make age by symptom levels figures

In [None]:
data = subinfo.drop(['meanFD','movie'], axis=1)
data = data.drop_duplicates()
data = data.sort_values('age', ascending=False)
sns.displot(x='SCARED_P_SC', data=data, bins=20, hue='age_group', multiple='stack')
plt.xlabel('Raw Score')
plt.title('Parent-reported Social Anxiety', {'fontsize': 22})
plt.tight_layout()
plt.savefig(os.path.join(project_dir,'__papers','complex_emoproc_socanx','prsocanx_age.png'), dpi=300)
plt.show()
plt.close()
sns.displot(x='SCARED_SR_SC', data=data, bins=20, hue='age_group', multiple='stack')
plt.xlabel('Raw Score')
plt.title('Self-reported Social Anxiety', {'fontsize': 22})
plt.tight_layout()
plt.savefig(os.path.join(project_dir,'__papers','complex_emoproc_socanx','srsocanx_age.png'), dpi=300)
plt.show()
plt.close()

In [None]:
sns.lmplot(x='age',y='SCARED_P_SC', data=data, ci=None, line_kws={'lw':4, 'color':'k'}, 
           scatter_kws={'alpha':0.5, 'color':'lightgray'})
plt.title('Parent-Reported Social Anxiety', {'fontsize': 22})
plt.ylabel('Raw Score')
plt.xlabel('Age')
plt.tight_layout()
plt.savefig(os.path.join(project_dir,'__papers','complex_emoproc_socanx','srsocanx_age_corr.png'), dpi=300)
plt.show()
plt.close()

sns.lmplot(x='age',y='SCARED_SR_SC', data=data, ci=None, line_kws={'lw':4, 'color':'k'}, 
           scatter_kws={'alpha':0.5, 'color':'lightgray'})
plt.title('Self-Reported Social Anxiety', {'fontsize': 22})
plt.ylabel('Raw Score')
plt.xlabel('Age')
plt.tight_layout()
plt.savefig(os.path.join(project_dir,'__papers','complex_emoproc_socanx','prsocanx_age_corr.png'), dpi=300)
plt.show()
plt.close()

sns.lmplot(x='PPS_score',y='SCARED_P_SC', data=data, ci=None, line_kws={'lw':4, 'color':'k'}, 
           scatter_kws={'alpha':0.5, 'color':'lightgray'})
plt.title('Parent-Reported Social Anxiety', {'fontsize': 24})
plt.ylabel('Raw Score')
plt.xlabel('Puberty Score')
plt.tight_layout()
plt.show()
plt.close()

sns.lmplot(x='PPS_score',y='SCARED_SR_SC', data=data, ci=None, line_kws={'lw':4, 'color':'k'}, 
           scatter_kws={'alpha':0.5, 'color':'lightgray'})
plt.title('Self-Reported Social Anxiety', {'fontsize': 24})
plt.ylabel('Raw Score')
plt.xlabel('Puberty Score')
plt.tight_layout()
plt.show()
plt.close()

In [None]:
for age in ['all','older','younger']:
    if age=='younger':
        clins = ['SCARED_P_SC']
    else:
        clins = ['SCARED_P_SC', 'SCARED_SR_SC']
    for clin in clins:
        for movie in ['DM', 'TP']:
            print(age, clin, movie)
            if '_SR_' in clin:
                other = 'MFQ_SR_Total'
            elif '_P_' in clin:
                other = 'MFQ_P_Total'

            out_folder = os.path.join(out_dir, 'agegroup_similarity_regagesxs', age, 
                                      'dynamic_movie{0}_{1}'.format(movie, clin), 'peak_analysis_20')
            if '_SC' in clin:
                both_data = pd.read_pickle(os.path.join(out_folder,'global_comparison_ref-lower','final_peaks_data_lower.pkl'))
                sig_parcs_file = os.path.join(out_dir, 'agegroup_similarity_regagesxs', age, 
                                          'ts_isc_movie{0}_{1}'.format(movie, clin), 
                                          'movie{0}_isc_{1}_AnnaKmaxminmax_maskedrho_fdr0.01127.pscalar.nii'.format(movie, clin))
            elif '_GD' in clin:
                both_data = pd.read_pickle(os.path.join(out_folder,'global_comparison_ref-upper','final_peaks_data_upper.pkl'))
                sig_parcs_file = os.path.join(out_dir, 'agegroup_similarity_regagesxs', age, 
                                          'ts_isc_movie{0}_{1}'.format(movie, clin), 
                                          'movie{0}_isc_{1}_AnnaKmin_maskedrho_fdr0.01127.pscalar.nii'.format(movie, clin))
            sigscenes = both_data['global']['long_peaks']['both']
            disc_ts = np.load(os.path.join(out_folder, 'compiled_timeseries_data_rubic_movie{0}.npy'.format(movie)))
            print(disc_ts.shape)
            rep_ts = np.load(os.path.join(out_folder, 'compiled_timeseries_data_cbic_movie{0}.npy'.format(movie)))
            print(rep_ts.shape)

            # pull sig parcels
            sig_parcs = np.squeeze(np.isfinite(nib.load(sig_parcs_file).get_fdata()))

            # pull sample info
            if age=='all':
                disc_info = subinfo.loc[(subinfo['site']=='rubic') & (subinfo['movie']==movie) & np.isfinite(subinfo[clin]),:]
                disc_info.loc[:,['age', 'female', 'meanFD', clin, other]] = \
                IterativeImputer(random_state=42).fit_transform(disc_info.loc[:,['age', 'female', 'meanFD', clin, other]])
                res = smf.ols('{0} ~ age + female + meanFD + {1}'.format(clin, other), data=disc_info).fit()
                disc_info[clin] = res.resid.to_frame().iloc[:,0]
                
                rep_info = subinfo.loc[(subinfo['site']=='cbic') & (subinfo['movie']==movie) & np.isfinite(subinfo[clin]),:]
                rep_info.loc[:,['age', 'female', 'meanFD', clin, other]] = \
                IterativeImputer(random_state=42).fit_transform(rep_info.loc[:,['age', 'female', 'meanFD', clin, other]])
                res = smf.ols('{0} ~ age + female + meanFD + {1}'.format(clin, other), data=rep_info).fit()
                rep_info[clin] = res.resid.to_frame().iloc[:,0]
            else:
                disc_info = subinfo.loc[(subinfo['age_group']==age) & (subinfo['site']=='rubic') & (subinfo['movie']==movie) & np.isfinite(subinfo[clin]),:]
                disc_info.loc[:,['age', 'female', 'meanFD', clin, other]] = \
                IterativeImputer(random_state=42).fit_transform(disc_info.loc[:,['age', 'female', 'meanFD', clin, other]])
                res = smf.ols('{0} ~ female + meanFD + {1}'.format(clin, other), data=disc_info).fit()
                disc_info[clin] = res.resid.to_frame().iloc[:,0]
                
                rep_info = subinfo.loc[(subinfo['age_group']==age) & (subinfo['site']=='cbic') & (subinfo['movie']==movie) & np.isfinite(subinfo[clin]),:]
                rep_info.loc[:,['age', 'female', 'meanFD', clin, other]] = \
                IterativeImputer(random_state=42).fit_transform(rep_info.loc[:,['age', 'female', 'meanFD', clin, other]])
                res = smf.ols('{0} ~ female + meanFD + {1}'.format(clin, other), data=rep_info).fit()
                rep_info[clin] = res.resid.to_frame().iloc[:,0]

            # separate upper and lower 20% scoring children
            disc_top = (disc_info[clin]>=np.percentile(disc_info[clin], 80)).astype(int)
            disc_bottom = (disc_info[clin]<=np.percentile(disc_info[clin], 20)).astype(int)
            rep_top = (rep_info[clin]>=np.percentile(rep_info[clin], 80)).astype(int)
            rep_bottom = (rep_info[clin]<=np.percentile(rep_info[clin], 20)).astype(int)

            disc_top_ts = disc_ts[:,:,disc_top==1][sigscenes==1,:,:]
            disc_bottom_ts = disc_ts[:,:,disc_bottom==1][sigscenes==1,:,:]
            rep_top_ts = rep_ts[:,:,rep_top==1][sigscenes==1,:,:]
            rep_bottom_ts = rep_ts[:,:,rep_bottom==1][sigscenes==1,:,:]

            # make cifti with mean and SD activation (and differences) for highest and lowest scoring children
            mean_disc_top_ts = np.expand_dims(np.mean(np.mean(disc_top_ts, axis=2), axis=0), axis=0)
            sd_disc_top_ts = np.expand_dims(np.std(np.mean(disc_top_ts, axis=0), axis=1), axis=0)
            mean_disc_bottom_ts = np.expand_dims(np.mean(np.mean(disc_bottom_ts, axis=2), axis=0), axis=0)
            sd_disc_bottom_ts = np.expand_dims(np.std(np.mean(disc_bottom_ts, axis=0), axis=1), axis=0)

            mean_rep_top_ts = np.expand_dims(np.mean(np.mean(rep_top_ts, axis=2), axis=0), axis=0)
            sd_rep_top_ts = np.expand_dims(np.std(np.mean(rep_top_ts, axis=0), axis=1), axis=0)
            mean_rep_bottom_ts = np.expand_dims(np.mean(np.mean(rep_bottom_ts, axis=2), axis=0), axis=0)
            sd_rep_bottom_ts = np.expand_dims(np.std(np.mean(rep_bottom_ts, axis=0), axis=1), axis=0)

            both_mean_top_ts = (mean_disc_top_ts + mean_rep_top_ts)/2
            both_sd_top_ts = (sd_disc_top_ts + sd_rep_top_ts)/2
            both_mean_bottom_ts = (mean_disc_bottom_ts + mean_rep_bottom_ts)/2
            both_sd_bottom_ts = (sd_disc_bottom_ts + sd_rep_bottom_ts)/2

            diff_mean = both_mean_top_ts-both_mean_bottom_ts
            diff_sd = both_sd_top_ts-both_sd_bottom_ts

            data = [mean_disc_top_ts, sd_disc_top_ts, mean_disc_bottom_ts, sd_disc_bottom_ts, 
                    mean_rep_top_ts, sd_rep_top_ts, mean_rep_bottom_ts, sd_rep_bottom_ts, 
                    both_mean_top_ts, both_sd_top_ts, both_mean_bottom_ts, both_sd_bottom_ts, 
                    diff_mean,diff_sd]

            labels = ['activation_mean_disc_top', 'activation_sd_disc_top', 'activation_mean_disc_bottom', 'activation_sd_disc_bottom', 
                      'activation_mean_rep_top', 'activation_sd_rep_top', 'activation_mean_rep_bottom', 'activation_sd_rep_bottom', 
                      'activation_both_mean_top', 'activation_both_sd_top', 'activation_both_mean_bottom', 'activation_both_sd_bottom', 
                      'activation_top_min_bottom_mean', 'activation_top_min_bottom_sd']
            for i, d in enumerate(data):
                d[:,sig_parcs==0] = np.nan
                img = nib.cifti2.cifti2.Cifti2Image(d, (ax0, ax1))
                nib.save(img, os.path.join(out_folder, labels[i] + '.pscalar.nii'))

In [None]:
for age in ['all','older','younger']:
    if age=='younger':
        clins = ['SCARED_P_SC']
    else:
        clins = ['SCARED_P_SC', 'SCARED_SR_SC']
    for clin in clins:
        print(age, clin)
        sig_parcs_file = os.path.join(out_dir, 'agegroup_similarity_regagesxs', age, 
                                      'ts_isc_movieDM_{0}'.format(clin), 
                                      'movieDM_isc_{0}_AnnaKmaxminmax_maskedrho_fdr0.01127.pscalar.nii'.format(clin))
        sig_rep_parcs = np.squeeze(np.isfinite(nib.load(sig_parcs_file).get_fdata()))
        
        sig_parcs_file = os.path.join(out_dir, 'agegroup_similarity_regagesxs', age, 
                                      'ts_isc_movieTP_{0}'.format(clin), 
                                      'movieTP_isc_{0}_AnnaKmaxminmax_maskedrho_fdr0.01127.pscalar.nii'.format(clin))
        sig_disc_parcs = np.squeeze(np.isfinite(nib.load(sig_parcs_file).get_fdata()))
        
        overlap = (sig_rep_parcs & sig_disc_parcs)
        overall = (sig_rep_parcs | sig_disc_parcs)
        
        print('Disc N sig parcels: {0}'.format(sig_disc_parcs.sum()))
        print('Rep N sig parcels: {0}'.format(sig_rep_parcs.sum()))
        print('Shared parcels: {0}'.format(overlap.sum()))
        print('percent overlap (out of sig): {0}'.format(round(overlap.sum()/overall.sum(), 3)))

## Make effect size plots

In [None]:
from statsmodels.stats.multitest import multipletests
for age in ['all','older','younger']:
    for clin in ['SC']:
        for mov in ['DM','TP']:
            print(age, clin, mov)
            # find ref
            if clin=='SC':
                ref='lower'
            elif clin=='GD' or clin=='SP':
                ref='upper'

            # load data and perform FDR correction
            srfolder = os.path.join(out_dir, 'agegroup_similarity_regagesxs', age, 'dynamic_movie{0}_SCARED_SR_{1}'.format(mov, clin), 
                                    'peak_analysis_20','global_comparison_ref-{0}'.format(ref))
            if os.path.exists(os.path.join(srfolder, 'final_peaks_data_{0}.pkl'.format(ref))):
                srpeak = pd.read_pickle(os.path.join(srfolder, 'final_peaks_data_{0}.pkl'.format(ref)))
                if 'peak_quant_analysis' in srpeak['global']:
                    srpeak_stats = srpeak['global']['peak_quant_analysis']['Stats']
                    srpeak_stats = pd.DataFrame.from_dict(srpeak_stats).T.sort_values('pval')
                    sig, q, _, _ = multipletests(srpeak_stats['pval'], 0.05, method='fdr_bh')
                    sigratingsnames = [m for i,m in enumerate(srpeak_stats.index) if q[i]<0.05]
                    srpeak_stats = srpeak_stats.loc[sigratingsnames, :]
                    srcolors = ['k']*len(srpeak_stats.index)
                    
                    plt.figure(figsize=(1.5 + 0.5*len(srcolors),5))
                    srpeak_stats['tstat'].plot(kind='bar', color='gray', 
                                             edgecolor=srcolors, ylim=(-9,9))
                    plt.axhline(0, color='k')
                    plt.ylabel('T-statistic')
                    plt.xticks(rotation=45, ha='right')
                    sns.despine(bottom=True)
                    plt.tight_layout()
                    plt.savefig(os.path.join(srfolder, 'feature_analysis.svg'))
                    plt.show()
                    plt.close()
                    del srpeak, srpeak_stats
                    

            pfolder = os.path.join(out_dir, 'agegroup_similarity_regagesxs', age, 'dynamic_movie{0}_SCARED_P_{1}'.format(mov, clin), 
                                   'peak_analysis_20','global_comparison_ref-{0}'.format(ref))
            if os.path.exists(os.path.join(pfolder, 'final_peaks_data_{0}.pkl'.format(ref))):
                ppeak = pd.read_pickle(os.path.join(pfolder, 'final_peaks_data_{0}.pkl'.format(ref)))
                if 'peak_quant_analysis' in ppeak['global']:
                    ppeak_stats = ppeak['global']['peak_quant_analysis']['Stats']
                    ppeak_stats = pd.DataFrame.from_dict(ppeak_stats).T.sort_values('pval')
                    sig, q, _, _ = multipletests(ppeak_stats['pval'], 0.05, method='fdr_bh')
                    sigratingsnames = [m for i,m in enumerate(ppeak_stats.index) if q[i]<0.05]
                    ppeak_stats = ppeak_stats.loc[sigratingsnames, :]
                    pcolors = ['k']*len(ppeak_stats.index)
                    
                    plt.figure(figsize=(1.5 + 0.5*len(pcolors),5))
                    ppeak_stats['tstat'].plot(kind='bar', color='gray', 
                                             edgecolor=pcolors, ylim=(-9,9))
                    plt.axhline(0, color='k')
                    plt.ylabel('T-statistic')
                    plt.xticks(rotation=45, ha='right')
                    sns.despine(bottom=True)
                    plt.tight_layout()
                    plt.savefig(os.path.join(pfolder, 'feature_analysis.svg'))
                    plt.show()
                    plt.close()
                    del ppeak, ppeak_stats

## make feature correlation plots

In [None]:
from emocodes.analysis import vif_collinear
sns.set(context='talk',style='white')

for movie in ['DM','TP']:    
    ratings = pd.read_csv(os.path.join(project_dir, 'HBN_video_coding','processing','summary',
                                       '{0}_summary_codes_intuitivenames.csv'.format(movie)), index_col=0).iloc[:,:-2]
    corr = ratings.corr(method='spearman')
    plt.figure(figsize=(12,10))
    sns.heatmap(corr, center=0, vmin=-1, vmax=1)
    plt.show()
    plt.close()
    
    # plot general emotions
    vif = vif_collinear(ratings.drop(['Anger','Happy','Fear','Sad','Excited'], axis=1))
    fig = vif.plot(kind='bar', figsize=(12,6))
    fig.axhline(2, color='green')
    fig.axhline(5, color='orange')
    fig.axhline(10, color='red')
    fig.set_xticklabels(fig.get_xticklabels(), rotation=30, ha='right')
    sns.despine()
    plt.tight_layout()
    plt.show()
    plt.close()
    
    # plot specific emotions
    vif = vif_collinear(ratings.drop(['Negative','Positive'], axis=1))
    fig = vif.plot(kind='bar', figsize=(12,6))
    fig.axhline(2, color='green')
    fig.axhline(5, color='orange')
    fig.axhline(10, color='red')
    fig.set_xticklabels(fig.get_xticklabels(), rotation=30, ha='right')
    sns.despine()
    plt.tight_layout()
    plt.show()
    plt.close()

In [None]:
sns.set(context='talk',style='white')

for movie in ['DM','TP']:    
    
    ratings = pd.read_csv(os.path.join(project_dir, 'HBN_video_coding','processing','v1','summary',
                                       '{0}_summary_codes_intuitivenames.csv'.format(movie)), index_col=0).iloc[:,:-2]
    ratings = ratings.drop('Closeup', axis=1)
    ratings.loc[:,:] = MinMaxScaler().fit_transform(ratings.to_numpy())
    ratings.plot(subplots=True, figsize=(14,16), xlim=(0, ratings.index[-1]), color='darkgray')
    sns.despine()
    plt.savefig(os.path.join(project_dir, 'HBN_video_coding','processing','v1','summary',
                                       '{0}_summary_codes_intuitivenames_plot.png'.format(movie)), dpi=300)
    plt.show()
    plt.close()