## 9) Summarize results 

- In this script we summarize the results of the analyses and generate some .csv files with the main outcomes
- Here we show the data that are presented in the main text of the manuscript

Gordillo, da Cruz, Moreno, Garobbio, Herzog

In [1]:
import os
import numpy as np
import pandas as pd
import itertools
import pickle
import matplotlib.pyplot as plt

In [2]:
main_dir = os.getcwd()
np.random.seed(234)

In [3]:
# Demographical data
demog_data = pd.read_csv(os.path.join(main_dir, 'data', 'age_bins_values.csv'), index_col=0)
print('Number of younger participants = ', len(np.where(demog_data['Group'] == 1)[0]))
print('Number of females = ', len(np.where(demog_data.iloc[np.where(demog_data['Group'] == 1)[0]]['Gender'] == 1)[0]))
print('Mean age = ', np.round(np.mean(demog_data.iloc[np.where(demog_data['Group'] == 1)[0]]['Age']), 2))
print('Std age = ', np.round(np.std(demog_data.iloc[np.where(demog_data['Group'] == 1)[0]]['Age']), 2))
print('Older participants')
print('Number of older participants = ', len(np.where(demog_data['Group'] == 2)[0]))
print('Number of females = ', len(np.where(demog_data.iloc[np.where(demog_data['Group'] == 2)[0]]['Gender'] == 1)[0]))
print('Mean age = ', np.round(np.mean(demog_data.iloc[np.where(demog_data['Group'] == 2)[0]]['Age']), 2))
print('Std age = ', np.round(np.std(demog_data.iloc[np.where(demog_data['Group'] == 2)[0]]['Age']), 2))

Number of younger participants =  138
Number of females =  42
Mean age =  25.43
Std age =  3.39
Older participants
Number of older participants =  63
Number of females =  31
Mean age =  67.66
Std age =  4.79


In [4]:
# Directories of results
data_dir = os.path.join(main_dir, 'data')
results_dir = os.path.join(main_dir, 'results')
results_1_dir = os.path.join(results_dir, '1_correlations_eeg_beh_results')
results_2_dir = os.path.join(results_dir, '2_regression_results')
results_3_dir = os.path.join(results_dir, '3_group_comparison_results')
results_4_dir = os.path.join(results_dir, '4_correlations_groups_results')
results_5_dir = os.path.join(results_dir, '5_distancecorrelation_results')
results_6_dir = os.path.join(results_dir, '6_correlations_references_results')
results_7_dir = os.path.join(results_dir, '7_icc_references_results')
results_8_dir = os.path.join(results_dir, '8_pca_results')

summary_results_dir = os.path.join(results_dir, 'summary_results')

os.chdir(main_dir)

# behavior variables

beh_vars = ["Cvlt_attention_span", "Cvlt_delayed_memory", "Pts-2_subtest_3",
            "Rwt_animal_categories", "Rwt_s_words", "Tap_alertness",
            "Tap_simon_congruent", "Tap_simon_incongruent", "Tap_working_memory",
            "Tmt-A", "Tmt-B", "Vocabulary_test"]

### Results section: Correlations between EEG features and cognitive variables

In [5]:
# Read files in the correct folder
files_1 = os.listdir(results_1_dir)

# Names of the columns for the summary file
results_1_cols = ['N significant', 'min corr', 'max corr', 
                  'within eeg 25', 'within eeg 50', 'within eeg 75',
                  'multivar dc eeg 25', 'multivar dc eeg 50', 'multivar dc eeg 75','% significant multivar dc']

# Summarize results for:
idgroup = 'y'

sp_data = 'spearman_' + idgroup
dc_data = 'distcorr_' + idgroup

data_mask_sp = pd.read_csv(os.path.join(results_1_dir,'1_mask_' + sp_data + '.csv'), index_col=0)
data_mask_dc = pd.read_csv(os.path.join(results_1_dir,'1_mask_' + dc_data + '.csv'), index_col=0)
spearman_max = pd.read_csv(os.path.join(results_1_dir,'1_maxcorrvals_' + sp_data +'.csv'), index_col=0)
distcorr_max = pd.read_csv(os.path.join(results_1_dir,'1_maxcorrvals_' + dc_data +'.csv'), index_col=0)
multivardc = pd.read_csv(os.path.join(results_5_dir,'5_dc_fx_'+ idgroup +'.csv'), index_col=0)
multivardc_pvals = pd.read_csv(os.path.join(results_5_dir,'5_dc_pval_'+ idgroup +'.csv'), index_col=0)

results_sp = np.zeros((len(beh_vars), len(results_1_cols)))
results_dc = np.zeros((len(beh_vars), len(results_1_cols)))

for k in range(len(beh_vars)):
    
    task = beh_vars[k]
    
    vec_mask_sp = data_mask_sp[task].loc[data_mask_sp[task]!='NS']
    n_feats_sp = len(vec_mask_sp)
    
    vec_mask_dc = data_mask_dc[task].loc[data_mask_dc[task]!='NS']
    n_feats_dc = len(vec_mask_dc)
    
    if n_feats_sp > 1:
        
        str_corr_sp = list(filter(lambda x: '1_correlations_eeg_' + task + '_' + sp_data in x, files_1))
        data_results = pd.read_csv(os.path.join(results_1_dir, str_corr_sp[0]), index_col=0)      
        min_sp = np.min(np.abs(np.diag(data_results.values)))
        max_sp = np.max(np.abs(np.diag(data_results.values)))
        sp_within = np.percentile(np.abs(data_results.values[np.triu_indices(n_feats_sp,1)]), (25, 50, 75))
        
        # find multivariate distance correlation values between pairs of EEG features 
        ix = vec_mask_sp.index
        feats = list(itertools.combinations(ix, 2))
        feats = list(set(feats))
        multivardc_vals = [multivardc[feats[i][1]][feats[i][0]] for i in range(len(feats))]
        # get the sqrt of multivariate dc since it approximates the population squared distance correlation      
        multivardc_within = np.percentile(np.sqrt(np.abs(multivardc_vals)), (25, 50, 75))
        
        multivardc_p = [multivardc_pvals[feats[i][1]][feats[i][0]] for i in range(len(feats))]
        prop_multi_sig = len(np.where(np.array(multivardc_p) < 0.05)[0])/len(multivardc_p)
        task_results = np.hstack([n_feats_sp, min_sp, max_sp, sp_within, multivardc_within, prop_multi_sig])
        
    elif n_feats_sp == 1:
        
        corr_val = np.abs(spearman_max[task][vec_mask_sp.index[0]])
        task_results = np.hstack([n_feats_sp, 0, corr_val, np.zeros(7)])
    
    elif n_feats_sp == 0:
        
        task_results = np.zeros(10)
        
    results_sp[k, :] = task_results

    # for distance correlations
    task_results = []
    
    if n_feats_dc > 1:
        
        str_corr_dc = list(filter(lambda x: '1_correlations_eeg_' + task + '_' + dc_data in x, files_1))
        data_results = pd.read_csv(os.path.join(results_1_dir, str_corr_dc[0]), index_col=0)      
        min_dc = np.min(np.abs(np.diag(data_results.values)))
        max_dc = np.max(np.abs(np.diag(data_results.values)))
        dc_within = np.percentile(np.abs(data_results.values[np.triu_indices(n_feats_dc,1)]), (25, 50, 75))
        
        # find multivariate distance correlation values between pairs of EEG features 
        ix = vec_mask_dc.index
        feats = list(itertools.combinations(ix, 2))
        feats = list(set(feats))
        multivardc_vals = [multivardc[feats[i][1]][feats[i][0]] for i in range(len(feats))]
        # get the sqrt of multivariate dc since it approximates the population squared distance correlation     
        multivardc_within = np.percentile(np.sqrt(np.abs(multivardc_vals)), (25, 50, 75))
        multivardc_p = [multivardc_pvals[feats[i][1]][feats[i][0]] for i in range(len(feats))]
        prop_multi_sig = len(np.where(np.array(multivardc_p) < 0.05)[0])/len(multivardc_p)
        task_results = np.hstack([n_feats_dc, min_dc, max_dc, dc_within, multivardc_within, prop_multi_sig])
        
    elif n_feats_dc == 1:
        
        corr_val = np.abs(distcorr_max[task][vec_mask_dc.index[0]])
        task_results = np.hstack([n_feats_dc, 0, corr_val, np.zeros(7)])
    
    elif n_feats_dc == 0:
        
        task_results = np.zeros(10)
    
    results_dc[k, :] = task_results

# save data
# spearman 
summary_spearman = pd.DataFrame(data=results_sp, index=beh_vars,
                                columns=results_1_cols)
summary_spearman.to_csv(os.path.join(summary_results_dir, '1_summary_spearman_' + idgroup + '.csv'))     

# distcorr
summary_distcorr = pd.DataFrame(data=results_dc, index=beh_vars,
                                columns=results_1_cols)
summary_distcorr.to_csv(os.path.join(summary_results_dir, '1_summary_distcorr_' + idgroup + '.csv'))  

print('Spearman correlation analysis')
print('N significant analyses using spearman correlation',sum(summary_spearman['N significant']))
summary_spearman

Spearman correlation analysis
N significant analyses using spearman correlation 109.0


Unnamed: 0,N significant,min corr,max corr,within eeg 25,within eeg 50,within eeg 75,multivar dc eeg 25,multivar dc eeg 50,multivar dc eeg 75,% significant multivar dc
Cvlt_attention_span,19.0,0.244167,0.317438,0.379561,0.503581,0.699757,0.445621,0.592408,0.728856,0.994152
Cvlt_delayed_memory,7.0,0.271669,0.296181,0.088752,0.142166,0.28174,0.118281,0.228204,0.490347,0.714286
Pts-2_subtest_3,2.0,0.293511,0.326514,0.052966,0.052966,0.052966,0.139398,0.139398,0.139398,1.0
Rwt_animal_categories,7.0,0.198816,0.310946,0.060505,0.133215,0.213677,0.081108,0.111019,0.206998,0.47619
Rwt_s_words,8.0,0.260827,0.321291,0.078317,0.669298,0.671439,0.099458,0.644703,0.663306,0.535714
Tap_alertness,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Tap_simon_congruent,7.0,0.278036,0.325477,0.132823,0.238867,0.33907,0.105166,0.201492,0.393799,0.619048
Tap_simon_incongruent,6.0,0.263934,0.367323,0.311264,0.381934,0.591086,0.182359,0.516523,0.620004,0.933333
Tap_working_memory,18.0,0.249328,0.329861,0.118056,0.237552,0.349915,0.148305,0.225993,0.339585,0.830065
Tmt-A,19.0,0.257787,0.326931,0.086024,0.17552,0.336486,0.111224,0.185139,0.369926,0.649123


In [6]:
print('Distance correlation analysis')
#print(summary_distcorr)
print('N significant analyses using distance correlation', sum(summary_distcorr['N significant']))

Distance correlation analysis
N significant analyses using distance correlation 121.0


In [7]:
# Examples 
task = 'Rwt_animal_categories'
eeg_feature_1 = 'microstate B'
eeg_feature_2 = 'clust coef e-wpli alpha'
corr_type = 'distcorr_'
idgroup = 'y'

# Find Spearman correlations
str_corr_ = list(filter(lambda x: '1_correlations_eeg_' + task + '_' + corr_type + idgroup in x, files_1))
data_results = pd.read_csv(os.path.join(results_1_dir, str_corr_[0]), index_col=0)   

index_1 = list(data_results).index(eeg_feature_1)
index_2 = list(data_results).index(eeg_feature_2)

# Find multivariate distance correlations between two EEG features
multivardc = pd.read_csv(os.path.join(results_5_dir,'5_dc_fx_'+ idgroup +'.csv'), index_col=0)
dc_ = np.sqrt(np.abs(multivardc[eeg_feature_2][eeg_feature_1]))
if dc_ == 0:
    dc_ = np.sqrt(np.abs(multivardc[eeg_feature_1][eeg_feature_2]))
    
print(eeg_feature_1, 'has a correlation of: ', np.round(data_results.iloc[index_1, index_1], 2), 'to ', task,)
print(eeg_feature_2, 'has a correlation of: ', np.round(data_results.iloc[index_2, index_2], 2), 'to ', task,)
print(eeg_feature_1, ' and ', eeg_feature_2, 'have a correlation of: ', np.round(data_results.iloc[index_1, index_2], 2))
print(eeg_feature_1, ' and ', eeg_feature_2, 'have a multivar correlation of: ', np.round(dc_,2))

microstate B has a correlation of:  0.27 to  Rwt_animal_categories
clust coef e-wpli alpha has a correlation of:  0.28 to  Rwt_animal_categories
microstate B  and  clust coef e-wpli alpha have a correlation of:  0.41
microstate B  and  clust coef e-wpli alpha have a multivar correlation of:  0.35


In [8]:
# Dimensionality reduction

idgroup = 'y'
files_summ = os.listdir(results_8_dir)

# PCA results
var_exp_sp = []
var_exp_sp_comp = []

var_exp_dc = []
var_exp_dc_comp = []

beh_vars = ["Cvlt_attention_span", "Cvlt_delayed_memory", "Pts-2_subtest_3",
            "Rwt_animal_categories", "Rwt_s_words", "Tap_alertness",
            "Tap_simon_congruent", "Tap_simon_incongruent", "Tap_working_memory",
            "Tmt-A", "Tmt-B", "Vocabulary_test"]

# Display results for
print_for = 'Rwt_s_words'
corr_type = 'distcorr'

for k in range(len(beh_vars)):
    
    task = beh_vars[k]
    
    # For spearman rho
    str_corr_sp = list(filter(lambda x: '8_' + task + '_' + idgroup + '_pca_results_sp' in x, files_summ))
    
    # For distance correlation
    str_corr_dc = list(filter(lambda x: '8_' + task + '_' + idgroup + '_pca_results_dc' in x, files_summ))
    
    if len(str_corr_sp) > 0:
        
        data_results = pd.read_csv(os.path.join(results_8_dir, str_corr_sp[0]), index_col=0)    
        pc1_ev = np.round(data_results['explained variance'].iloc[0]*100, 3)
        pc1_3_ev = np.round(data_results['explained variance'].iloc[0:3].sum()*100, 3)
        
        if corr_type == 'spearman' and print_for == task:
        
            print('Considering variables showing a significant Spearman correlation')
            print(task, 'correlated with', data_results.shape[0], 'EEG features')
            print('1st PC in ', task, 'explains: ', pc1_ev, '%')
            print('First three PCs in ', task, 'explain', pc1_3_ev,'%')
            print()
        
        var_exp_sp.append(pc1_ev)
        var_exp_sp_comp.append(pc1_3_ev)
    
    if len(str_corr_dc) > 0:
        
        data_results = pd.read_csv(os.path.join(results_8_dir, str_corr_dc[0]), index_col=0)   
        pc1_ev = np.round(data_results['explained variance'].iloc[0]*100, 3)
        pc1_3_ev = np.round(data_results['explained variance'].iloc[0:3].sum()*100, 3)
        
        if corr_type == 'distcorr' and print_for == task:
        
            print('Considering variables showing a significant distance correlation')
            print(task, 'correlated with', data_results.shape[0], 'EEG features')
            print('1st PC in ', task, 'explains: ', pc1_ev, '%')
            print('First three PCs in ', task, 'explain', pc1_3_ev,'%')
            print()
        
        var_exp_dc.append(pc1_ev)
        var_exp_dc_comp.append(pc1_3_ev)
        
        
print('General summary')    
print('For results using Spearman correlation')
print('min-max-median variance explained by 1st PC across tasks', [min(var_exp_sp), max(var_exp_sp), np.median(var_exp_sp)])
print('min-max-median variance explained by 1-3 PCs across tasks', [min(var_exp_sp_comp), max(var_exp_sp_comp), np.median(var_exp_sp_comp)])
print()
print('For results using distance correlation')
print('min-max-median variance explained by 1st PC across tasks', [min(var_exp_dc), max(var_exp_dc), np.median(var_exp_dc)])
print('min-max-median variance explained by 1-3 PCs across tasks', [min(var_exp_dc_comp), max(var_exp_dc_comp), np.median(var_exp_dc_comp)])

# For features showing group differences

idgroup_c = 'o'
group_comp = pd.read_csv(os.path.join(results_8_dir, '8_group_difference_'+ idgroup_c +'_pca_results_sp.csv'), index_col=0)
print()   
print('For features showing group differences')   
print('First component explains', group_comp['explained variance'].values[0]*100 , '% of', group_comp.shape[1]-1, 'EEG features')
print('Second component explains', group_comp['explained variance'].values[1]*100 , '% of', group_comp.shape[1]-1, 'EEG features')
print('Third component explains', group_comp['explained variance'].values[2]*100 , '% of', group_comp.shape[1]-1, 'EEG features')
print('First 3 components explains', np.sum(group_comp['explained variance'].values[0:3]*100).round(3) , '% of', group_comp.shape[0], 'EEG features')

Considering variables showing a significant distance correlation
Rwt_s_words correlated with 9 EEG features
1st PC in  Rwt_s_words explains:  58.64 %
First three PCs in  Rwt_s_words explain 85.956 %

General summary
For results using Spearman correlation
min-max-median variance explained by 1st PC across tasks [29.324, 62.834, 44.15]
min-max-median variance explained by 1-3 PCs across tasks [48.658, 100.0, 79.247]

For results using distance correlation
min-max-median variance explained by 1st PC across tasks [27.807, 58.64, 33.897999999999996]
min-max-median variance explained by 1-3 PCs across tasks [52.133, 100.0, 72.2105]

For features showing group differences
First component explains 24.01177581173413 % of 108 EEG features
Second component explains 13.47155760568583 % of 108 EEG features
Third component explains 7.5431528351615 % of 108 EEG features
First 3 components explains 45.026 % of 63 EEG features


In [9]:
# Results from multiple regression

idgroup = 'y'

# Save adj R2 values
pc1_adj_sp = []
pc1_3_adj_sp = []

pc1_adj_dc = []
pc1_3_adj_dc = []

# concatenate dataframes
df_sp = []
df_sp_task = []
df_dc = []
df_dc_task = []

# Display results for
print_for = 'Rwt_animal_categories'
corr_type = 'distcorr'


print('Adjusted R2 values using principal components to predict cognitive tasks')
print()

for k in range(len(beh_vars)):
    
    task = beh_vars[k]
    # Results using Spearman correlations
    str_pcr_sp = list(filter(lambda x: '8_PCR_' + task + '_' + idgroup + '_sp' in x, files_summ))
    # Results using distance correlations
    str_pcr_dc = list(filter(lambda x: '8_PCR_' + task + '_' + idgroup + '_dc' in x, files_summ))

    if len(str_pcr_sp) > 0:
        
        # Spearman
        data_results = pd.read_csv(os.path.join(results_8_dir, str_pcr_sp[0]), index_col = 0)    
        
        pc1_pred = data_results['adjusted R-squared'].loc['PC 1']
        pc1_adj_sp.append(pc1_pred)
        
        if data_results.shape[0] > 2:
            pc1_3_pred = data_results['adjusted R-squared'].loc['PC 1-3']
        else: 
            pc1_3_pred = data_results['adjusted R-squared'].loc['PC 1-2']
            
        pc1_3_adj_sp.append(pc1_3_pred)
        df_sp.append(data_results)
        df_sp_task.append(task)
        
        if corr_type=='spearman' and print_for == task:
            
            print('Using variables showing significant Spearman correlation')
            print(len(data_results), '  EEG features showed significant Spearman correlations to ', task)
            print(task + ':  PC 1  :  ', pc1_pred.round(3))
        
            if data_results.shape[0] > 2:
                print(task + ':  PC 1-3:  ', pc1_3_pred.round(3))
            else:
                print(task + ':  PC 1-2:  ', pc1_3_pred.round(3))  
        
    if len(str_pcr_dc) > 0:
        
        # Distance correlation
        data_results_dc = pd.read_csv(os.path.join(results_8_dir, str_pcr_dc[0]), index_col = 0)    
        pc1_pred = data_results_dc['adjusted R-squared'].loc['PC 1']
        pc1_adj_dc.append(pc1_pred)
        
        if data_results_dc.shape[0] > 2:
            pc1_3_pred = data_results_dc['adjusted R-squared'].loc['PC 1-3']
        else: 
            pc1_3_pred = data_results_dc['adjusted R-squared'].loc['PC 1-2']
        
        pc1_3_adj_dc.append(pc1_3_pred)
        df_dc.append(data_results_dc)
        df_dc_task.append(task)
        
        if corr_type=='distcorr' and print_for == task:
            
            print('Using variables showing significant distance correlation')
            print(len(data_results_dc), '  EEG features showed significant distance correlations to ', task)
            print(task + ':  PC 1  :  ', pc1_pred.round(3))
        
            if data_results_dc.shape[0] > 2:
                print(task + ':  PC 1-3:  ', pc1_3_pred.round(3))
            else:
                print(task + ':  PC 1-2:  ', pc1_3_pred.round(3))  

print()
print('General summary')
print('Min adj-R2 for EEG features showing Spearman correlation, 1PC: ', np.min(pc1_adj_sp))
print('Max adj-R2 for EEG features showing Spearman correlation, 1PC: ', np.max(pc1_adj_sp))
print('Median adj-R2 for EEG features showing Spearman correlation, 1PC: ', np.median(pc1_adj_sp))
print()
print('Min adj-R2 for EEG features showing Spearman correlation, 1-3PC: ', np.min(pc1_3_adj_sp))
print('Max adj-R2 for EEG features showing Spearman correlation, 1-3PC: ', np.max(pc1_3_adj_sp))
print('Median adj-R2 for EEG features showing Spearman correlation, 1-3PC: ', np.median(pc1_3_adj_sp))
print()
print('Min adj-R2 for EEG features showing distance correlation, 1PC: ', np.min(pc1_adj_dc))
print('Max adj-R2 for EEG features showing distance correlation, 1PC: ', np.max(pc1_adj_dc))
print('Median adj-R2 for EEG features showing distance correlation, 1PC: ', np.median(pc1_adj_dc))
print()
print('Min adj-R2 for EEG features showing distance correlation, 1-3PC: ', np.min(pc1_3_adj_dc))
print('Max adj-R2 for EEG features showing distance correlation, 1-3PC: ', np.max(pc1_3_adj_dc))
print('Median adj-R2 for EEG features showing distance correlation, 1-3PC: ', np.median(pc1_3_adj_dc))

Adjusted R2 values using principal components to predict cognitive tasks

Using variables showing significant distance correlation
11   EEG features showed significant distance correlations to  Rwt_animal_categories
Rwt_animal_categories:  PC 1  :   0.114
Rwt_animal_categories:  PC 1-3:   0.223

General summary
Min adj-R2 for EEG features showing Spearman correlation, 1PC:  -0.0073125153717175
Max adj-R2 for EEG features showing Spearman correlation, 1PC:  0.2163688892280325
Median adj-R2 for EEG features showing Spearman correlation, 1PC:  0.145938852567184

Min adj-R2 for EEG features showing Spearman correlation, 1-3PC:  0.1068880670494067
Max adj-R2 for EEG features showing Spearman correlation, 1-3PC:  0.322047433330785
Median adj-R2 for EEG features showing Spearman correlation, 1-3PC:  0.2056124421986235

Min adj-R2 for EEG features showing distance correlation, 1PC:  0.0662891438366922
Max adj-R2 for EEG features showing distance correlation, 1PC:  0.2024072814780065
Median adj

In [8]:
# Write csv files with PCR summary

# For Spearman correlation results
sp_results = pd.concat(df_sp, axis=1)
df_sp_index = [feature.replace('_', ' ') for feature in df_sp_task]
sp_results.columns = df_sp_index
sp_results['median'] = sp_results.median(numeric_only=True, axis=1)
sp_results = sp_results.round(2)
sp_results.to_csv(os.path.join(summary_results_dir, '8_summary_PCR_results_' + idgroup + '_sp.csv'))

# For Distance correlation results
dc_results = pd.concat(df_dc, axis=1)
df_dc_index = [feature.replace('_', ' ') for feature in df_dc_task]
dc_results.columns = df_dc_index
dc_results['median'] = dc_results.median(numeric_only=True, axis=1)
dc_results = dc_results.round(2)
dc_results.to_csv(os.path.join(summary_results_dir, '8_summary_PCR_results_' + idgroup + '_dc.csv'))

### Results section: Prediction of cognitive variables using EEG features

In [10]:
# Prediction performance 
# young adults data
with open(os.path.join(results_2_dir,'2_regression_y.pkl'), 'rb') as f:
    predictive_y = pickle.load(f)
# older data
with open(os.path.join(results_2_dir,'2_regression_o.pkl'), 'rb') as f:
    predictive_o = pickle.load(f)
    
# ridge results young

# train data
ridge_train_y = np.percentile(np.concatenate(np.median(predictive_y['ridge_r2_train'], 0)), (25, 50, 75))
# create df with median performance
ridge_train_df_median = pd.DataFrame(data=np.round(np.median(predictive_y['ridge_r2_train'], 0), 2), index=[k.replace(".csv", "") for k in predictive_y['eeg features']],
                                     columns=[k.replace("_", " ") for k in beh_vars])
# create df with iqr values
iqr = np.percentile(predictive_y['ridge_r2_train'], 75, 0) - np.percentile(predictive_y['ridge_r2_train'], 25, 0)

ridge_train_df_iqr = pd.DataFrame(data=np.round(iqr, 2), index=[k.replace(".csv", "") for k in predictive_y['eeg features']],
                                  columns=[k.replace("_", " ") for k in beh_vars])
# merge df
ridge_train_df = ridge_train_df_median.astype(str).add(' (').add(ridge_train_df_iqr.astype(str)).add(')')
# save df
ridge_train_df.to_csv(os.path.join(summary_results_dir, '1_ridge_median_train_y.csv'))    


# test data
ridge_test_y = np.percentile(np.concatenate(np.median(predictive_y['ridge_r2_test'], 0)), (25, 50, 75))

# create df with median performance
ridge_test_df_median = pd.DataFrame(data=np.round(np.median(predictive_y['ridge_r2_test'], 0), 2), index=[k.replace(".csv", "") for k in predictive_y['eeg features']],
                                    columns=[k.replace("_", " ") for k in beh_vars])
# create df with iqr values
iqr = np.percentile(predictive_y['ridge_r2_test'], 75, 0) - np.percentile(predictive_y['ridge_r2_test'], 25, 0)

ridge_test_df_iqr = pd.DataFrame(data=np.round(iqr, 2), index=[k.replace(".csv", "") for k in predictive_y['eeg features']],
                                 columns=[k.replace("_", " ") for k in beh_vars])
# merge df
ridge_test_df = ridge_test_df_median.astype(str).add(' (').add(ridge_test_df_iqr.astype(str)).add(')')
# save df
ridge_test_df.to_csv(os.path.join(summary_results_dir, '1_ridge_median_test_y.csv'))    


# ridge results older

# train data
ridge_train_o = np.percentile(np.concatenate(np.median(predictive_o['ridge_r2_train'], 0)), (25, 50, 75))

# create df with median performance
ridge_train_df_median = pd.DataFrame(data=np.round(np.median(predictive_o['ridge_r2_train'], 0), 2), index=[k.replace(".csv", "") for k in predictive_o['eeg features']],
                                     columns=[k.replace("_", " ") for k in beh_vars])
# create df with iqr values
iqr = np.percentile(predictive_o['ridge_r2_train'], 75, 0) - np.percentile(predictive_o['ridge_r2_train'], 25, 0)

ridge_train_df_iqr = pd.DataFrame(data=np.round(iqr, 2), index=[k.replace(".csv", "") for k in predictive_o['eeg features']],
                                  columns=[k.replace("_", " ") for k in beh_vars])
# merge df
ridge_train_df = ridge_train_df_median.astype(str).add(' (').add(ridge_train_df_iqr.astype(str)).add(')')
# save df
ridge_train_df.to_csv(os.path.join(summary_results_dir, '1_ridge_median_train_o.csv'))    


# test data
ridge_test_o = np.percentile(np.concatenate(np.median(predictive_o['ridge_r2_test'], 0)), (25, 50, 75))

# create df with median performance
ridge_test_df_median = pd.DataFrame(data = np.round(np.median(predictive_o['ridge_r2_test'], 0), 2), index=[k.replace(".csv", "") for k in predictive_o['eeg features']],
                                    columns = [k.replace("_", " ") for k in beh_vars])
# create df with iqr values
iqr = np.percentile(predictive_o['ridge_r2_test'], 75, 0) - np.percentile(predictive_o['ridge_r2_test'], 25, 0)

ridge_test_df_iqr = pd.DataFrame(data=np.round(iqr, 2), index=[k.replace(".csv", "") for k in predictive_o['eeg features']],
                                 columns=[k.replace("_", " ") for k in beh_vars])
# merge df
ridge_test_df = ridge_test_df_median.astype(str).add(' (').add(ridge_test_df_iqr.astype(str)).add(')')
# save df
ridge_test_df.to_csv(os.path.join(summary_results_dir, '1_ridge_median_test_o.csv'))    


# Random forest results young

# train data
rf_train_y = np.percentile(np.concatenate(np.median(predictive_y['rf_r2_train'], 0)), (25, 50, 75))

# create df with median performance
rf_train_df_median = pd.DataFrame(data = np.round(np.median(predictive_y['rf_r2_train'], 0), 2), index=[k.replace(".csv", "") for k in predictive_y['eeg features']],
                                  columns = [k.replace("_", " ") for k in beh_vars])
# create df with iqr values
iqr = np.percentile(predictive_y['rf_r2_train'], 75, 0) - np.percentile(predictive_y['rf_r2_train'], 25, 0)

rf_train_df_iqr = pd.DataFrame(data=np.round(iqr, 2), index=[k.replace(".csv", "") for k in predictive_y['eeg features']],
                               columns=[k.replace("_", " ") for k in beh_vars])
# merge df
rf_train_df = rf_train_df_median.astype(str).add(' (').add(rf_train_df_iqr.astype(str)).add(')')
# save df
rf_train_df.to_csv(os.path.join(summary_results_dir, '1_rf_median_train_y.csv'))    


# test data
rf_test_y = np.percentile(np.concatenate(np.median(predictive_y['rf_r2_test'], 0)), (25, 50, 75))

# create df with median performance
rf_test_df_median = pd.DataFrame(data=np.round(np.median(predictive_y['rf_r2_test'], 0), 2), index=[k.replace(".csv", "") for k in predictive_y['eeg features']],
                                 columns=[k.replace("_", " ") for k in beh_vars])
# create df with iqr values
iqr = np.percentile(predictive_y['rf_r2_test'], 75, 0) - np.percentile(predictive_y['rf_r2_test'], 25, 0)

rf_test_df_iqr = pd.DataFrame(data=np.round(iqr, 2), index=[k.replace(".csv", "") for k in predictive_y['eeg features']],
                              columns=[k.replace("_", " ") for k in beh_vars])
# merge df
rf_test_df = rf_test_df_median.astype(str).add(' (').add(rf_test_df_iqr.astype(str)).add(')')
# save df
rf_test_df.to_csv(os.path.join(summary_results_dir, '1_rf_median_test_y.csv'))    

# random forest results older

# train data
rf_train_o = np.percentile(np.concatenate(np.median(predictive_o['rf_r2_train'], 0)), (25, 50, 75))
# create df with median performance
rf_train_df_median = pd.DataFrame(data=np.round(np.median(predictive_o['rf_r2_train'], 0), 2), index=[k.replace(".csv", "") for k in predictive_o['eeg features']],
                                  columns=[k.replace("_", " ") for k in beh_vars])
# create df with iqr values
iqr = np.percentile(predictive_o['rf_r2_train'], 75, 0) - np.percentile(predictive_o['rf_r2_train'], 25, 0)

rf_train_df_iqr = pd.DataFrame(data=np.round(iqr, 2), index=[k.replace(".csv", "") for k in predictive_o['eeg features']],
                               columns=[k.replace("_", " ") for k in beh_vars])
# merge df
rf_train_df = rf_train_df_median.astype(str).add(' (').add(rf_train_df_iqr.astype(str)).add(')')
# save df
rf_train_df.to_csv(os.path.join(summary_results_dir, '1_rf_median_train_o.csv'))    


# test data
rf_test_o = np.percentile(np.concatenate(np.median(predictive_o['rf_r2_test'], 0)), (25, 50, 75))
# create df with median performance
rf_test_df_median = pd.DataFrame(data=np.round(np.median(predictive_o['rf_r2_test'], 0), 2), index=[k.replace(".csv", "") for k in predictive_o['eeg features']],
                                 columns=[k.replace("_", " ") for k in beh_vars])
# create df with iqr values
iqr = np.percentile(predictive_o['rf_r2_test'], 75, 0) - np.percentile(predictive_o['rf_r2_test'], 25, 0)

rf_test_df_iqr = pd.DataFrame(data=np.round(iqr, 2), index=[k.replace(".csv", "") for k in predictive_o['eeg features']],
                              columns=[k.replace("_", " ") for k in beh_vars])
# merge df
rf_test_df = rf_test_df_median.astype(str).add(' (').add(rf_test_df_iqr.astype(str)).add(')')
# save df
rf_test_df.to_csv(os.path.join(summary_results_dir, '1_rf_median_test_o.csv'))    


print('Ridge models')
print('Train set predictive performance (25, 50, 75 percentiles) ridge model for young adults = ', ridge_train_y)
print('Test set predictive performance (25, 50, 75 percentiles) ridge model for young adults = ', ridge_test_y)
print('Train set predictive performance (25, 50, 75 percentiles) ridge model for older adults = ', ridge_train_o)
print('Test set predictive performance (25, 50, 75 percentiles) ridge model for older adults = ', ridge_test_o)
print()
print('Random forest models')
print('Train set predictive performance (25, 50, 75 percentiles) ridge model for young adults = ', rf_train_y)
print('Test set predictive performance (25, 50, 75 percentiles) ridge model for young adults = ', rf_test_y)
print('Train set predictive performance (25, 50, 75 percentiles) ridge model for older adults = ', rf_train_o)
print('Test set predictive performance (25, 50, 75 percentiles) ridge model for older adults = ', rf_test_o)

Ridge models
Train set predictive performance (25, 50, 75 percentiles) ridge model for young adults =  [0.00074957 0.00140044 0.02777906]
Test set predictive performance (25, 50, 75 percentiles) ridge model for young adults =  [-0.03852588 -0.03015285 -0.02231509]
Train set predictive performance (25, 50, 75 percentiles) ridge model for older adults =  [0.00065057 0.00122202 0.06084672]
Test set predictive performance (25, 50, 75 percentiles) ridge model for older adults =  [-0.0908049  -0.06295667 -0.03936711]

Random forest models
Train set predictive performance (25, 50, 75 percentiles) ridge model for young adults =  [0.65395771 0.74086218 0.78276096]
Test set predictive performance (25, 50, 75 percentiles) ridge model for young adults =  [-0.13956541 -0.09739039 -0.06499686]
Train set predictive performance (25, 50, 75 percentiles) ridge model for older adults =  [0.80246656 0.81578906 0.82763705]
Test set predictive performance (25, 50, 75 percentiles) ridge model for older adult

### Results section: Group comparisons of the EEG features between younger and older adults

In [11]:
# Group comparison correlations and inertias
# group comparison data
data_mwu = pd.read_csv(os.path.join(results_3_dir, '3_mwu_r.csv'), index_col=0)

# positive effect size indicates that older adults show higher values
data_mwu['z_stat'] = data_mwu['z_stat'] * -1 
data_mwu = data_mwu.loc[data_mwu['pvalues'] < 0.05]

significant_analysis = len(data_mwu)

# effect size directions
positive_fx = len(np.where(data_mwu['z_stat'].values > 0)[0])
negative_fx = len(np.where(data_mwu['z_stat'].values < 0)[0])
# min and max significant effect
min_fx = data_mwu.loc[data_mwu['r_stat'] == data_mwu['r_stat'].min()]
max_fx = data_mwu.loc[data_mwu['r_stat'] == data_mwu['r_stat'].max()]

print(significant_analysis,' EEG features showed significant group differences between young and older adults', significant_analysis/175)
print(positive_fx,' EEG features showed a positive significant effect')
print(negative_fx,' EEG features showed a negative significant effect')
print('Mininum significant effect -> Feature : ',min_fx['r_stat'].index[0][:-4] , ', r value : ', min_fx['r_stat'][0])
print('Maximum significant effect -> Feature : ',max_fx['r_stat'].index[0][:-4] , ', r value : ', max_fx['r_stat'][0])
print('25, 50 and 75 percentles of significant r values: ',np.percentile(data_mwu['r_stat'],(25,50,75)))
data_mwu

108  EEG features showed significant group differences between young and older adults 0.6171428571428571
56  EEG features showed a positive significant effect
52  EEG features showed a negative significant effect
Mininum significant effect -> Feature :  microstate E , r value :  0.17939923
Maximum significant effect -> Feature :  spectral entropy beta , r value :  0.580234883
25, 50 and 75 percentles of significant r values:  [0.25904989 0.31288802 0.41526343]


Unnamed: 0_level_0,selected_ch,pvalues,z_stat,r_stat,rcil_stat,rcih_stat
features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ampl total power alpha.csv,59,4.721086e-02,-3.311934,0.233606,0.09,0.37
ampl total power beta.csv,49,4.077954e-03,3.839962,0.270850,0.11,0.40
ampl total power delta.csv,5,8.580000e-10,-6.757183,0.476615,0.36,0.58
ampl total power gamma.csv,48,4.570000e-07,5.779548,0.407658,0.27,0.52
ampl total power theta.csv,59,2.043252e-02,-3.406038,0.240243,0.10,0.37
...,...,...,...,...,...,...
std ampl gamma.csv,48,6.130000e-07,5.729882,0.404155,0.28,0.52
std ampl theta.csv,59,3.545612e-02,-3.251813,0.229365,0.09,0.36
waiting time beta.csv,49,1.259730e-04,4.660757,0.328744,0.20,0.45
waiting time delta.csv,40,3.580000e-06,5.422767,0.382493,0.25,0.50


In [12]:
# Find group effect of EEG features
data_mwu[['r_stat', 'z_stat','pvalues']].loc['node str e-plv gamma.csv']

r_stat     0.25739
z_stat     3.64914
pvalues    0.01605
Name: node str e-plv gamma.csv, dtype: float64

### Results section: Correlations between EEG features showing age-related differences

In [13]:
# correlation between EEG features showing significant effects

# for young adults
corrs_y = pd.read_csv(os.path.join(results_4_dir,'4_correlation_eeg_y.csv'), index_col = 0)
pvals_y = pd.read_csv(os.path.join(results_4_dir,'4_pvalues_eeg_y.csv'), index_col = 0)
prc_corr_y = np.percentile(np.abs(corrs_y.values[np.triu_indices(len(corrs_y),1)]),(25,50,75))
triu_pvals_y = pvals_y.values[np.triu_indices(len(pvals_y),1)]
significant_corrs_y = len(np.where(triu_pvals_y < 0.05)[0])/len(triu_pvals_y)

print('Correlations (25, 50, 75 percentiles) between EEG features showing significant effects for young adults : ', prc_corr_y)
print('% of significant correlations between EEG features showing significant effects for young adults : ', significant_corrs_y*100)

# for older adults
corrs_o = pd.read_csv(os.path.join(results_4_dir,'4_correlation_eeg_o.csv'), index_col = 0)
pvals_o = pd.read_csv(os.path.join(results_4_dir,'4_pvalues_eeg_o.csv'), index_col = 0)
prc_corr_o = np.percentile(np.abs(corrs_o.values[np.triu_indices(len(corrs_o),1)]),(25,50,75))
triu_pvals_o = pvals_o.values[np.triu_indices(len(pvals_o),1)]
significant_corrs_o = len(np.where(triu_pvals_o < 0.05)[0])/len(triu_pvals_o)
print()                                                            
print('Correlations (25, 50, 75 percentiles) between EEG features showing significant effects for older adults : ', prc_corr_o)
print('% of significant correlations between EEG features showing significant effects for older adults : ', significant_corrs_o*100)

Correlations (25, 50, 75 percentiles) between EEG features showing significant effects for young adults :  [0.05913733 0.13286635 0.2864387 ]
% of significant correlations between EEG features showing significant effects for young adults :  41.74454828660436

Correlations (25, 50, 75 percentiles) between EEG features showing significant effects for older adults :  [0.07868904 0.16983487 0.3140841 ]
% of significant correlations between EEG features showing significant effects for older adults :  33.766008999653856


In [14]:
# multivariate distance correlation between EEG features showing significant effects
# for young adults
multivardc_y = pd.read_csv(os.path.join(results_5_dir,'5_dc_fx_y.csv'), index_col = 0)
multivardcpvals_y = pd.read_csv(os.path.join(results_5_dir,'5_dc_pval_y.csv'), index_col = 0)
multivardc_y = multivardc_y.loc[corrs_y.index][corrs_y.index]
# get the sqrt of multivariate dc since it approximates the population squared distance correlation     
prc_multivardc_y = np.percentile(np.sqrt(np.abs(multivardc_y.values[np.triu_indices(len(multivardc_y),1)])),(25,50,75))

multivardc_y = multivardc_y.loc[corrs_y.index][corrs_y.index]
triu_pvals_multivardc_y = multivardcpvals_y.values[np.triu_indices(len(multivardcpvals_y),1)]
significant_multivardc_y = len(np.where(triu_pvals_multivardc_y < 0.05)[0])/len(triu_pvals_multivardc_y)
                                                                
print('Multivariate sqrt distance correlation (25, 50, 75 percentiles) between EEG features showing significant effects for young adults : ', prc_multivardc_y)
print('% of significant distance correlations between EEG features showing significant effects for young adults : ', significant_multivardc_y*100)

# for older adults
multivardc_o = pd.read_csv(os.path.join(results_5_dir,'5_dc_fx_o.csv'), index_col = 0)
multivardcpvals_o = pd.read_csv(os.path.join(results_5_dir,'5_dc_pval_o.csv'), index_col = 0)
multivardc_o = multivardc_o.loc[corrs_o.index][corrs_o.index]
# get the sqrt of multivariate dc since it approximates the population squared distance correlation     
prc_multivardc_o = np.percentile(np.sqrt(np.abs(multivardc_o.values[np.triu_indices(len(multivardc_o),1)])),(25,50,75))
multivardcpvals_o = multivardcpvals_o.loc[corrs_o.index][corrs_o.index]
triu_pvals_multivardc_o = multivardcpvals_o.values[np.triu_indices(len(multivardcpvals_o),1)]
significant_multivardc_o = len(np.where(triu_pvals_multivardc_o < 0.05)[0])/len(triu_pvals_multivardc_o)
print('Multivariate sqrt distance correlation (25, 50, 75 percentiles) between EEG features showing significant effects for older adults : ', prc_multivardc_o)
print('% of significant distance correlations between EEG features showing significant effects for older adults : ', significant_multivardc_o*100)


Multivariate sqrt distance correlation (25, 50, 75 percentiles) between EEG features showing significant effects for young adults :  [0.12360455 0.23248131 0.41361325]
% of significant distance correlations between EEG features showing significant effects for young adults :  58.53530377668309
Multivariate sqrt distance correlation (25, 50, 75 percentiles) between EEG features showing significant effects for older adults :  [0.11925205 0.2111067  0.38294468]
% of significant distance correlations between EEG features showing significant effects for older adults :  53.01142263759087


In [15]:
# Find correlations between EEG features
# ['node str e-icoh delta']['clust coef e-icoh delta']
# ['rqa laminarity']['rqa determinism']
# ['waiting time gamma']['life time gamma']


print('spearman: ', np.round(corrs_o['waiting time gamma']['life time gamma'], 2))
print('distcorr: ', np.round(np.sqrt(np.abs(multivardc_o['waiting time gamma']['life time gamma'])), 2))

spearman:  0.83
distcorr:  0.98


#### Comparison of reference choices

In [16]:
# Compare results with difference references

# For young adults
icc_ref_y = pd.read_csv(os.path.join(results_7_dir,'7_icc_references_y.csv'), index_col = 0)
spearman_ref_y = pd.read_csv(os.path.join(results_7_dir,'7_spearman_references_y.csv'), index_col = 0)
multivardc_y_z = pd.read_csv(os.path.join(results_6_dir,'6_dc_fx_y.csv'), index_col = 0)

# For older adults
icc_ref_o = pd.read_csv(os.path.join(results_7_dir,'7_icc_references_o.csv'), index_col = 0)
spearman_ref_o = pd.read_csv(os.path.join(results_7_dir,'7_spearman_references_o.csv'), index_col = 0)
multivardc_o_z = pd.read_csv(os.path.join(results_6_dir,'6_dc_fx_o.csv'), index_col = 0)

# Concatenate data
# Younger adults data
concat_y = pd.concat([np.round(icc_ref_y, 2), pd.DataFrame(np.round(np.sqrt(np.abs(np.diag(multivardc_y_z))), 2), columns=['multivardcor'],  index=icc_ref_y.index)], axis=1)
concat_y.columns = ['ICC 25th', 'ICC 50th', 'ICC 75th', 'multivardcor']
# Older adults data
concat_o = pd.concat([np.round(icc_ref_o, 2), pd.DataFrame(np.round(np.sqrt(np.abs(np.diag(multivardc_o_z)))), columns=['multivardcor'],  index=icc_ref_o.index)], axis=1)
concat_o.columns = ['ICC 25th', 'ICC 50th', 'ICC 75th', 'multivardcor']
# All data
all_cat = pd.concat([concat_y, concat_o], axis=1)
all_cat.to_csv(os.path.join(summary_results_dir, 'reliability_reference.csv'))

print("Results of comparing references in younger adults")
print("25, 50 and 75th percentiles of icc between average (csd for connectivity) and zero reference for young adults", 
      np.percentile(icc_ref_y['50th'], (25, 50, 75)))
print("25, 50 and 75th percentiles of spearman correlations between average (csd for connectivity) and zero reference for young adults", 
      np.percentile(spearman_ref_y['50th'], (25, 50, 75)))
print("25, 50 and 75th percentiles of multivariate correlations between average (csd for connectivity) and zero reference for young adults", 
      np.percentile(np.sqrt(np.abs(np.diagonal(multivardc_y_z))), (25, 50, 75)))
print()   
print("Results of comparing references in older adults")
print("25, 50 and 75th percentiles of icc between average (csd for connectivity) and zero reference for older adults", 
      np.percentile(icc_ref_o['50th'], (25, 50, 75)))
print("25, 50 and 75th percentiles of spearman correlations between average (csd for connectivity) and zero reference for older adults", 
      np.percentile(spearman_ref_o['50th'], (25, 50, 75)))
print("25, 50 and 75th percentiles of multivariate correlations between average (csd for connectivity) and zero reference for older adults", 
      np.percentile(np.sqrt(np.abs(np.diagonal(multivardc_o_z))), (25, 50, 75)))

Results of comparing references in younger adults
25, 50 and 75th percentiles of icc between average (csd for connectivity) and zero reference for young adults [0.56625 0.9195  0.96525]
25, 50 and 75th percentiles of spearman correlations between average (csd for connectivity) and zero reference for young adults [0.67038213 0.90268477 0.97369911]
25, 50 and 75th percentiles of multivariate correlations between average (csd for connectivity) and zero reference for young adults [0.78791708 0.98452908 0.99164168]

Results of comparing references in older adults
25, 50 and 75th percentiles of icc between average (csd for connectivity) and zero reference for older adults [0.6495  0.933   0.97425]
25, 50 and 75th percentiles of spearman correlations between average (csd for connectivity) and zero reference for older adults [0.68355175 0.92554724 0.97835061]
25, 50 and 75th percentiles of multivariate correlations between average (csd for connectivity) and zero reference for older adults [0.8

In [17]:
# Multivariate correlations between features with average(csd) and zero reference

# List of zero ref features
zero_ref_feats = list(multivardc_y_z)
feat_index = [feature.replace(' zero','') for feature in zero_ref_feats]

# Load multivariate data
multivardc_y = pd.read_csv(os.path.join(results_5_dir,'5_dc_fx_y.csv'), index_col = 0)
multivardc_o = pd.read_csv(os.path.join(results_5_dir,'5_dc_fx_o.csv'), index_col = 0)

# Find in nonzero
print('Multivariate correlations, all 140 features in young adults')
print('25, 50, 75 percentile between all avg/csd features', np.percentile(np.sqrt(np.abs(multivardc_y.loc[feat_index][feat_index])).values[np.triu_indices(len(feat_index), 1)], (25, 50, 75)))
print('25, 50, 75 percentile between all zero ref features', np.percentile(np.sqrt(np.abs(multivardc_y_z)).values[np.triu_indices(len(feat_index), 1)], (25, 50, 75)))
print()
print('Multivariate correlations, all 140 features in older adults')
print('25, 50, 75 percentile between all avg/csd features', np.percentile(np.sqrt(np.abs(multivardc_o.loc[feat_index][feat_index])).values[np.triu_indices(len(feat_index), 1)], (25, 50, 75)))
print('25, 50, 75 percentile between all zero ref features', np.percentile(np.sqrt(np.abs(multivardc_o_z)).values[np.triu_indices(len(feat_index), 1)], (25, 50, 75)))
print()

Multivariate correlations, all 140 features in young adults
25, 50, 75 percentile between all avg/csd features [0.10197995 0.19607684 0.37514465]
25, 50, 75 percentile between all zero ref features [0.09431465 0.19253163 0.35833837]

Multivariate correlations, all 140 features in older adults
25, 50, 75 percentile between all avg/csd features [0.11562122 0.19598147 0.3474091 ]
25, 50, 75 percentile between all zero ref features [0.10691003 0.17399355 0.33060365]



In [18]:
# List of features showing group effects
sign_feats = data_mwu.index
sign_feat_index = [feature.replace('.csv','') for feature in sign_feats]

# Remove source space features
sign_feat_index = [item for item in sign_feat_index if 's-' not in item]
sign_feat_index = [item for item in sign_feat_index if 'source' not in item]
sign_feat_index_z = [s + ' zero' for s in sign_feat_index]

# Get correlations
# For younger adults
zero_y = np.sqrt(np.abs(multivardc_y_z.loc[sign_feat_index_z][sign_feat_index_z])).values[np.triu_indices(len(sign_feat_index_z),1)]
avg_csd_y = np.sqrt(np.abs(multivardc_y.loc[sign_feat_index][sign_feat_index])).values[np.triu_indices(len(sign_feat_index),1)]

# For older adults
zero_o = np.sqrt(np.abs(multivardc_o_z.loc[sign_feat_index_z][sign_feat_index_z])).values[np.triu_indices(len(sign_feat_index_z),1)]
avg_csd_o = np.sqrt(np.abs(multivardc_o.loc[sign_feat_index][sign_feat_index])).values[np.triu_indices(len(sign_feat_index),1)]


# Find in nonzero
print('Multivariate correlations, 93 features showing group differences in young adults')
print('25, 50, 75 percentile between all avg/csd features', np.percentile(zero_y, (25, 50, 75)))
print('25, 50, 75 percentile between all zero ref features', np.percentile(avg_csd_y, (25, 50, 75)))
print()
print('Multivariate correlations, 93 features showing group differences in older adults')
print('25, 50, 75 percentile between all avg/csd features', np.percentile(zero_o, (25, 50, 75)))
print('25, 50, 75 percentile between all zero ref features', np.percentile(avg_csd_o, (25, 50, 75)))

Multivariate correlations, 93 features showing group differences in young adults
25, 50, 75 percentile between all avg/csd features [0.13137249 0.25170483 0.43610113]
25, 50, 75 percentile between all zero ref features [0.13374477 0.24375938 0.44146497]

Multivariate correlations, 93 features showing group differences in older adults
25, 50, 75 percentile between all avg/csd features [0.12416478 0.21911936 0.40825509]
25, 50, 75 percentile between all zero ref features [0.12327954 0.22861179 0.41491356]
