## 1) Correlations between EEG features and cognitive variables

- Each EEG feature is correlated with each cognitive variable
- P-values are corrected (see 'correlate_eeg_beh' function) for each pair of EEG feature and cognitive variable
- .csv files with the results are stored  in 'results_dir' 

Gordillo, da Cruz, Moreno, Garobbio, Herzog

In [None]:
import os
from tqdm import tqdm
import numpy as np
import pandas as pd
from dependencies.multicorr import correlate_eeg_beh, task_eeg_variables

In [None]:
main_dir = os.getcwd()
np.random.seed(234)

In [None]:
# define data and results directories
data_dir = os.path.join(main_dir, 'data')
results_dir = os.path.join(main_dir, 'results')

In [None]:
# behavior variables
beh_vars = ["Cvlt_attention_span", "Cvlt_delayed_memory", "Pts-2_subtest_3",
            "Rwt_animal_categories", "Rwt_s_words", "Tap_alertness",
            "Tap_simon_congruent", "Tap_simon_incongruent", "Tap_working_memory",
            "Tmt-A", "Tmt-B", "Vocabulary_test"]

nbeh = len(beh_vars)

# load behavior data
beh_tasks = pd.read_csv(os.path.join(data_dir, 'behavior_file.csv'), index_col=0)

# eeg features 
path_eeg_csv = os.path.join(data_dir, 'csv_data')
eeg_features = list(filter(lambda x: '.csv' in x, os.listdir(path_eeg_csv)))
neeg = len(eeg_features)
print('Number of EEG features to analyze:  ', neeg)
print('Number of EEG features to analyze:  ', len(beh_vars))

In [None]:
# allocate memory 
# data spearman
spearman_max_correlation = np.zeros((neeg, nbeh))
spearman_MASK = np.zeros(((neeg, nbeh))).astype(object)
spearman_n_significant = np.zeros(((neeg, nbeh)))
spearman_nan_out = np.zeros(((neeg, nbeh)))

# data distance correlations
distcorr_max_correlation = np.zeros(((neeg, nbeh)))
distcorr_MASK = np.zeros(((neeg, nbeh))).astype(object)
distcorr_n_significant= np.zeros(((neeg, nbeh)))
distcorr_nan_out = np.zeros(((neeg, nbeh)))

In [None]:
# stores data for young 'y' or older 'o' adults
idgroup = 'y'

In [None]:
dataeeg = pd.read_csv(os.path.join(path_eeg_csv, 'node str e-icoh theta.csv'), index_col=0)
task = 'Tap_simon_congruent'
maxcorr, maxcorr_var, n_sig_eegvars, nanout =  correlate_eeg_beh(dataeeg, beh_tasks, 
                                                                          behvar=task, 
                                                                          metric='distcorr', 
                                                                          group=idgroup)
print(n_sig_eegvars)

In [None]:
for ieeg in tqdm(range(neeg)):
    dataeeg = pd.read_csv(os.path.join(path_eeg_csv, eeg_features[ieeg]), index_col=0)
    for itask in range(nbeh):
        task = beh_vars[itask]
        # correlate each electrode with the cognitive variable and correct for multiple comparisons
        # run using spearman correlation
        maxcorr, maxcorr_var, n_sig_eegvars, nanout =  correlate_eeg_beh(dataeeg, beh_tasks, 
                                                                         behvar=task, 
                                                                         metric='spearman', 
                                                                         group=idgroup)
        # save data spearman
        spearman_max_correlation[ieeg, itask] = maxcorr
        spearman_MASK[ieeg, itask] = maxcorr_var
        spearman_n_significant[ieeg, itask] = n_sig_eegvars
        spearman_nan_out[ieeg, itask] = nanout

        # run using distance correlation
        maxcorr, maxcorr_var, n_sig_eegvars, nanout =  correlate_eeg_beh(dataeeg, beh_tasks, 
                                                                          behvar=task, 
                                                                          metric='distcorr', 
                                                                          group=idgroup)
        # save data distancecorr
        distcorr_max_correlation[ieeg, itask] = maxcorr
        distcorr_MASK[ieeg, itask] = maxcorr_var
        distcorr_n_significant[ieeg, itask] = n_sig_eegvars
        distcorr_nan_out[ieeg, itask] = nanout

In [None]:
# save data
results_1_dir = os.path.join(results_dir, '1_correlations_eeg_beh_results')
save_index = [feature.replace('.csv','') for feature in eeg_features]

In [None]:
# spearman max corr
maxcorrvals_spearman = pd.DataFrame(data=spearman_max_correlation, index=save_index, columns=beh_vars)
maxcorrvals_spearman.to_csv(os.path.join(results_1_dir, '1_maxcorrvals_spearman_' + idgroup + '.csv'))
# spearman MASK
mask_spearman = pd.DataFrame(data=spearman_MASK, index=save_index, columns=beh_vars)
mask_spearman.to_csv(os.path.join(results_1_dir,'1_mask_spearman_' + idgroup + '.csv'))
# spearman number of significant variables 
nsignificant_spearman = pd.DataFrame(data=spearman_n_significant, index=save_index, columns=beh_vars)
nsignificant_spearman.to_csv(os.path.join(results_1_dir, '1_nsignificant_spearman_' + idgroup + '.csv'))
# spearman number of significant variables 
nanout_spearman = pd.DataFrame(data=spearman_nan_out, index=save_index, columns=beh_vars)
nanout_spearman.to_csv(os.path.join(results_1_dir, '1_nanout_spearman_' + idgroup + '.csv'))

In [None]:
# distance correlation
# distcorr max corr
maxcorrvals_distcorr = pd.DataFrame(data=distcorr_max_correlation, index=save_index, columns=beh_vars)
maxcorrvals_distcorr.to_csv(os.path.join(results_1_dir, '1_maxcorrvals_distcorr_' + idgroup + '.csv'))
# distcorr MASK
mask_distcorr = pd.DataFrame(data=distcorr_MASK, index=save_index, columns=beh_vars)
mask_distcorr.to_csv(os.path.join(results_1_dir, '1_mask_distcorr_' + idgroup + '.csv'))
# distcorr number of significant variables 
nsignificant_distcorr = pd.DataFrame(data=distcorr_n_significant, index=save_index, columns=beh_vars)
nsignificant_distcorr.to_csv(os.path.join(results_1_dir, '1_nsignificant_distcorr_' + idgroup + '.csv'))
# distcorr number of significant variables 
nanout_distcorr = pd.DataFrame(data=distcorr_nan_out, index=save_index, columns=beh_vars)
nanout_distcorr.to_csv(os.path.join(results_1_dir,'1_nanout_distcorr_' + idgroup + '.csv'))

In [None]:
# Within task correlations
# create dataframes of cognitive variables linked to the same task and correlate the variables
for itask in tqdm(range(nbeh)): 
    
    task = beh_vars[itask]
    
    # analyze using spearman correlation
    within_taskeeg, magnitude_corr, pvalue_corr, metric = task_eeg_variables(results_1_dir, path_eeg_csv, beh_tasks, 
                                                                             behvar=task, 
                                                                             group=idgroup, 
                                                                             metric='spearman')
    if len(within_taskeeg) > 0:
        # save df with variables 
        within_taskeeg.columns = list([task]) + list(magnitude_corr.index)
        within_taskeeg.to_csv(os.path.join(results_1_dir,
                                           '1_variables_eeg_' + task + '_' + metric + '_' + idgroup + '.csv'))
        # save df with correlations between EEG features
        magnitude_corr.to_csv(os.path.join(results_1_dir,
                                           '1_correlations_eeg_' + task + '_' + metric + '_' + idgroup + '.csv'))
        # save df with pvals between EEG features
        pvalue_corr.to_csv(os.path.join(results_1_dir,
                                           '1_pvals_eeg_' + task + '_' + metric + '_' + idgroup + '.csv'))
        
    # analyze using distance correlations
    
    within_taskeeg, magnitude_corr, pvalue_corr, metric = task_eeg_variables(results_1_dir, path_eeg_csv, beh_tasks, 
                                                                             behvar=task, 
                                                                             group=idgroup, 
                                                                             metric='distcorr')
    if len(within_taskeeg) > 0:
        # save df with variables 
        within_taskeeg.columns = list([task]) + list(magnitude_corr.index)
        within_taskeeg.to_csv(os.path.join(results_1_dir,
                                           '1_variables_eeg_' + task + '_' + metric + '_' + idgroup + '.csv'))
        # save df with correlations between EEG features
        magnitude_corr.to_csv(os.path.join(results_1_dir,
                                           '1_correlations_eeg_' + task + '_' + metric + '_' + idgroup + '.csv'))
        # save df with pvals between EEG features
        pvalue_corr.to_csv(os.path.join(results_1_dir,
                                           '1_pvals_eeg_' + task + '_' + metric + '_' + idgroup + '.csv'))
    