## 7) Compare EEG references

- Some EEG features have been calculated both using an average/CSD reference and a zero reference
- Here, EEG features are compared between these two reference choices using intra class correlations
- Since EEG features have different numbers of electrodes or brain regions, ICC is calculated for each of these variables
- For example, for 'dfa exponent alpha' there will be 61 ICC values (one per electrode)

Gordillo, da Cruz, Moreno, Garobbio & Herzog

In [None]:
import os
import pingouin as pg
from scipy.stats import spearmanr
import numpy as np
import pandas as pd
from tqdm import tqdm

In [None]:
main_dir = os.getcwd()
np.random.seed(234)

In [None]:
# define data directory
data_dir = os.path.join(main_dir, 'data')

# eeg features 
path_eeg_csv = os.path.join(data_dir, 'csv_data')

# Find EEG features
eeg_features = list(filter(lambda x: '.csv' in x, os.listdir(path_eeg_csv)))

# path to zero ref EEG features 
z_path_eeg_csv = os.path.join(data_dir,'csv_data', 'zero_ref')

# Find EEG features
z_eeg_features = list(filter(lambda x: '.csv' in x, os.listdir(z_path_eeg_csv)))

In [None]:
# To store median ICC values across electrodes, brain regions, or microstates parameters
all_cor = []
all_icc= []
y_cor = []
y_icc = []
o_cor = []
o_icc = []

for k in tqdm(eeg_features):    
    
    dataeeg_1 = pd.read_csv(os.path.join(path_eeg_csv, k), index_col=0)
    dataeeg_1y = dataeeg_1.iloc[np.where(dataeeg_1['Group'] == 1)[0], :]
    dataeeg_1o = dataeeg_1.iloc[np.where(dataeeg_1['Group'] == 2)[0], :]
    
    dataeeg_1 = dataeeg_1.drop(['Group', 'Gender', 'Age'], axis=1)
    dataeeg_1y = dataeeg_1y.drop(['Group', 'Gender', 'Age'], axis=1)
    dataeeg_1o = dataeeg_1o.drop(['Group', 'Gender', 'Age'], axis=1)
    
    
    # find the same feature with a zero reference
    find_zero = [s for s in z_eeg_features if k[:-4] in s]
    
    if len(find_zero) > 0:
        
        dataeeg_2 = pd.read_csv(os.path.join(z_path_eeg_csv, find_zero[0]), index_col = 0)
        dataeeg_2y = dataeeg_2.iloc[np.where(dataeeg_2['Group'] == 1)[0],:]
        dataeeg_2o = dataeeg_2.iloc[np.where(dataeeg_2['Group'] == 2)[0],:]
        
        dataeeg_2 = dataeeg_2.drop(['Group','Gender','Age'],axis = 1)
        dataeeg_2y = dataeeg_2y.drop(['Group','Gender','Age'],axis = 1)
        dataeeg_2o = dataeeg_2o.drop(['Group','Gender','Age'],axis = 1)
        
        # calculate Spearman correlations
        corr_refs = [spearmanr(dataeeg_1[list(dataeeg_1)[i]], dataeeg_2[list(dataeeg_1)[i]])[0] 
                     for i in range(len(list(dataeeg_1)))]
        corr_refs_y = [spearmanr(dataeeg_1y[list(dataeeg_1)[i]], dataeeg_2y[list(dataeeg_1y)[i]])[0] 
                       for i in range(len(list(dataeeg_1y)))]        
        corr_refs_o = [spearmanr(dataeeg_1o[list(dataeeg_1)[i]], dataeeg_2o[list(dataeeg_1o)[i]])[0] 
                       for i in range(len(list(dataeeg_1o)))]
        
        # get percentiles of Spearman correlation across electrodes, brain regions or microstates parameters
        # all data
        to_save_sp = pd.DataFrame(data = np.nanpercentile(corr_refs, [25, 50, 75])).T
        to_save_sp.index = [k[:-4]]
        to_save_sp.columns = ['25th','50th','75th']
        # young
        to_save_spy = pd.DataFrame(data = np.nanpercentile(corr_refs_y, [25, 50, 75])).T
        to_save_spy.index = [k[:-4]]
        to_save_spy.columns = ['25th','50th','75th']
        # older
        to_save_spo = pd.DataFrame(data = np.nanpercentile(corr_refs_o, [25, 50, 75])).T
        to_save_spo.index = [k[:-4]]
        to_save_spo.columns = ['25th','50th','75th']
        
        
        # calculate ICC values
        icc_values = []
        icc_values_y = []
        icc_values_o = []
        
        for i in list(dataeeg_1):
            # All data
            data = pd.DataFrame(data = np.hstack(([np.arange(dataeeg_1.shape[0]), dataeeg_1[i].values, np.zeros(dataeeg_1.shape[0])], 
                                                  [np.arange(dataeeg_2.shape[0]), dataeeg_2[i].values, np.ones(dataeeg_2.shape[0])])).T,
                                columns=['Participant', 'EEG', 'reference'])
            icc = pg.intraclass_corr(data=data, targets='Participant', raters='reference', ratings='EEG').round(3)
            icc_values.append(icc['ICC'][1])
            
            # Younger data
            data = pd.DataFrame(data = np.hstack(([np.arange(dataeeg_1y.shape[0]), dataeeg_1y[i].values, np.zeros(dataeeg_1y.shape[0])], 
                                                  [np.arange(dataeeg_2y.shape[0]), dataeeg_2y[i].values, np.ones(dataeeg_2y.shape[0])])).T,
                                columns=['Participant', 'EEG', 'reference'])
            icc = pg.intraclass_corr(data=data, targets='Participant', raters='reference', ratings='EEG').round(3)
            icc_values_y.append(icc['ICC'][1])
            
            # Older data
            data = pd.DataFrame(data = np.hstack(([np.arange(dataeeg_1o.shape[0]), dataeeg_1o[i].values, np.zeros(dataeeg_1o.shape[0])], 
                                                  [np.arange(dataeeg_2o.shape[0]), dataeeg_2o[i].values, np.ones(dataeeg_2o.shape[0])])).T,
                                columns=['Participant', 'EEG', 'reference'])
            icc = pg.intraclass_corr(data=data, targets='Participant', raters='reference', ratings='EEG').round(3)
            icc_values_o.append(icc['ICC'][1])
        
        # get percentiles of ICC across electrodes, brain regions or microstates parameters
        # All data
        to_save_icc = pd.DataFrame(data = np.nanpercentile(icc_values, [25, 50, 75])).T
        to_save_icc.index = [k[:-4]]
        to_save_icc.columns = ['25th', '50th', '75th']

        # Younger data
        to_save_icc_y = pd.DataFrame(data = np.nanpercentile(icc_values_y, [25, 50, 75])).T
        to_save_icc_y.index = [k[:-4]]
        to_save_icc_y.columns = ['25th', '50th', '75th']
        
        # Older data
        to_save_icc_o = pd.DataFrame(data = np.nanpercentile(icc_values_o, [25, 50, 75])).T
        to_save_icc_o.index = [k[:-4]]
        to_save_icc_o.columns = ['25th', '50th', '75th']
        
        # Concatenate data
        # All data
        all_cor.append(to_save_sp)
        all_icc.append(to_save_icc)
        
        # Young data
        y_cor.append(to_save_spy)
        y_icc.append(to_save_icc_y)
        
        # Older data
        o_cor.append(to_save_spo)
        o_icc.append(to_save_icc_o)
    

In [None]:
# save data
results_dir = os.path.join(main_dir, 'results', '7_icc_references_results')
# All data
# for Spearman
spearman_ref = pd.concat(all_cor)
spearman_ref.to_csv(os.path.join(results_dir, '7_spearman_references.csv'))
# for ICC
icc_ref = pd.concat(all_icc)
icc_ref.to_csv(os.path.join(results_dir, '7_icc_references.csv'))

# Younger data
# for Spearman
spearman_ref_y = pd.concat(y_cor)
spearman_ref_y.to_csv(os.path.join(results_dir, '7_spearman_references_y.csv'))
# for ICC
icc_ref_y = pd.concat(y_icc)
icc_ref_y.to_csv(os.path.join(results_dir, '7_icc_references_y.csv'))

# Older data
# for Spearman
spearman_ref_o = pd.concat(o_cor)
spearman_ref_o.to_csv(os.path.join(results_dir, '7_spearman_references_o.csv'))
# for ICC
icc_ref_o = pd.concat(o_icc)
icc_ref_o.to_csv(os.path.join(results_dir, '7_icc_references_o.csv'))