In [1]:
import os
import mne
import PyQt6
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from collections import Counter

import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report, f1_score
from sklearn.model_selection import StratifiedKFold

from scipy import stats
from scipy.stats import ttest_rel
from scipy.stats import false_discovery_control

%matplotlib qt

In [5]:
folders = [x for x in os.listdir('OCD/') if os.path.isdir('OCD/' + x)]
fldr2label = {folders[i]: i for i in range(len(folders))}
fldr2label

{'anxiety_ADD (47)': 0,
 'anxiety_AFD (18)': 1,
 'anxiety_general_AD (16)': 2,
 'bipolar_BPD_1(26)': 3,
 'bipolar_BPD_2(25)': 4,
 'controls (157)': 5,
 'Cyclothymia(10)': 6,
 'depression_mild (29)': 7,
 'depression_moderade (47)': 8,
 'depression_severe (32)': 9,
 'personality_disorder (56)': 10,
 'stress_eating_disorder (14)': 11,
 'stress_neurasthenia (7)': 12,
 'stress_Somatoform_disorder (12)': 13,
 'stress_Stress_disorder (24)': 14}

In [41]:
label2class = {
    0: 0, 1: 0, 2: 0,
    3: 1, 4: 1,
    5: 2,
    6: 3,
    7: 4, 8: 4, 9: 4,
    10: 5,
    11: 6,
    12: 7,
    13: 8,
    14: 9,
}
stress_labels = [11, 12, 13, 14]
stress_classes = [6, 7, 8, 9]

In [None]:
og_files = []
zg_files = []
og_labels = []
zg_labels = []
for fldr in folders:
    pth =  'OCD/' + fldr
    og_pths = [x for x in os.listdir(pth) if x.lower().endswith('.edf') and ('og.' in x.lower() or 'ог.' in x.lower() or 'eo.' in x.lower() or '_eo' in x.lower())]
    zg_pths = [x for x in os.listdir(pth) if x.lower().endswith('.edf') and ('zg.' in x.lower() or 'зг.' in x.lower() or 'ec.' in x.lower() or 'fon.' in x.lower() or '_ec' in x.lower() or 'eс.' in x.lower())]
    left = [x for x in os.listdir(pth) if x not in og_pths and x not in zg_pths]
    if len(left) > 0:
        print(pth, left)
    for f in og_pths:
        og_files.append(pth + '/' + f)
        og_labels.append(fldr2label[fldr])
    for f in zg_pths:
        zg_files.append(pth + '/' + f)
        zg_labels.append(fldr2label[fldr])

print(f'Number of files for open eyes: {len(og_files)}')
print(f'Number of files for closed eyes: {len(zg_files)}')

# ensured that og_files[i] is the pair for zg_files[i]

Number of files for open eyes: 518
Number of files for closed eyes: 518


# Topography features

In [51]:
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
from sklearn.svm import SVC

In [44]:
def get_frontal_features(ch_names, psd):
    indices = [i for i in range(len(ch_names)) if 'F3' in ch_names[i] or 'Fz' in ch_names[i] or 'F4' in ch_names[i]]
    return psd[indices]

def get_central_features(ch_names, psd):
    indices = [i for i in range(len(ch_names)) if 'C3' in ch_names[i] or 'Cz' in ch_names[i] or 'C4' in ch_names[i]]
    return psd[indices]

def get_parietal_features(ch_names, psd):
    indices = [i for i in range(len(ch_names)) if 'P3' in ch_names[i] or 'Pz' in ch_names[i] or 'P4' in ch_names[i]]
    return psd[indices]

def get_frontal2central_gradient(ch_names, psd):
    frontal_indices = [i for i in range(len(ch_names)) if 'F3' in ch_names[i] or 'Fz' in ch_names[i] or 'F4' in ch_names[i]]
    central_indices = [i for i in range(len(ch_names)) if 'C3' in ch_names[i] or 'Cz' in ch_names[i] or 'C4' in ch_names[i]]
    return np.sum(psd[frontal_indices], axis=0) / np.sum(psd[central_indices], axis=0)

def get_frontal2parietal_gradient(ch_names, psd):
    frontal_indices = [i for i in range(len(ch_names)) if 'F3' in ch_names[i] or 'Fz' in ch_names[i] or 'F4' in ch_names[i]]
    parietal_indices = [i for i in range(len(ch_names)) if 'P3' in ch_names[i] or 'Pz' in ch_names[i] or 'P4' in ch_names[i]]
    return np.sum(psd[frontal_indices], axis=0) / np.sum(psd[parietal_indices], axis=0)

def get_central2parietal_gradient(ch_names, psd):
    central_indices = [i for i in range(len(ch_names)) if 'C3' in ch_names[i] or 'Cz' in ch_names[i] or 'C4' in ch_names[i]]
    parietal_indices = [i for i in range(len(ch_names)) if 'P3' in ch_names[i] or 'Pz' in ch_names[i] or 'P4' in ch_names[i]]
    return np.sum(psd[central_indices], axis=0) / np.sum(psd[parietal_indices], axis=0)

def get_front2back_gradient(ch_names, psd):
    front_indices = [i for i in range(len(ch_names)) if any([x in ch_names[i] for x in ['Fp1', 'Fp2', 'F3', 'Fz', 'F4']])]  
    back_indices = [i for i in range(len(ch_names)) if any([x in ch_names[i] for x in ['O1', 'O2', 'P3', 'Pz', 'P4']])]
    return np.sum(psd[front_indices], axis=0) / np.sum(psd[back_indices], axis=0)

def get_left2right_gradient(ch_names, psd):
    left_indices = [i for i in range(len(ch_names)) if any([x in ch_names[i] for x in ['F3', 'C3', 'P3', 'O1']])]
    right_indices = [i for i in range(len(ch_names)) if any([x in ch_names[i] for x in ['F4', 'C4', 'P4', 'O2']])]
    return np.sum(psd[left_indices], axis=0) / np.sum(psd[right_indices], axis=0)


## Open eyes

In [61]:
channels2use = ['Fp1', 'Fp2', 'F3', 'Fz', 'F4', 'F7', 'F8', 'T3', 'T4', 'C3', 'Cz', 'C4', 'T5', 'T6', 'P3', 'Pz', 'P4', 'O1', 'O2']
freq_bands = [(6, 8), (8, 10), (10, 12), (12, 14), (13, 20), (20, 26), (8, 13), (13, 26), (8, 26), (6, 26)]

classifiers = {'SVM': SVC(random_state=92), 'KNN': KNeighborsClassifier()}

topo_features_og = []

for i in tqdm(range(len(og_files))):
    path = og_files[i]
    try:
        sample = mne.io.read_raw_edf(path, verbose=False, preload=True)
    except Exception as e:
        print(f'skipped {path}')
        continue
    # skip faulty data for now
    if 'chan' in sample.ch_names[0].lower():
        print(f'skipped {path}')
        continue
    sample = sample.filter(l_freq=1, h_freq=30, method='iir', verbose=False)
    # get only necessary channels, reorder them
    channels = sample.ch_names
    to_drop = channels[19:]
    sample.drop_channels(to_drop)
    new_idx = []
    skip = False
    for ch in channels2use:
        found = False
        for k in range(19):
            if ch in channels[k]:
                new_idx.append(k)
                found = True
                break
        if not found:
            skip = True
            break
    if skip:
        print(f'skipped {path}')
        continue
    
    data = sample.get_data()[new_idx, :int(13 * s_freq)]
    s_freq = int(sample.info['sfreq'])
    psd, freqs = mne.time_frequency.psd_array_multitaper(data, sfreq=s_freq, fmin=0.5, fmax=30, normalization='length', verbose=False)
    topo_features_og.append({
        'frontal': get_frontal_features(channels, psd),
        'central': get_central_features(channels, psd),
        'parietal': get_parietal_features(channels, psd),
        'frontal2central': get_frontal2central_gradient(channels, psd),
        'frontal2parietal': get_frontal2parietal_gradient(channels, psd),
        'central2parietal': get_central2parietal_gradient(channels, psd),
        'front2back': get_front2back_gradient(channels, psd),
        'left2right': get_left2right_gradient(channels, psd),
        'label': og_labels[i]
    })

  0%|          | 0/518 [00:00<?, ?it/s]

skipped OCD/controls (157)/BORUTTO_JANNA_VLADIMIROVNA_48_EO_free.edf
skipped OCD/controls (157)/Kutuz_f23_contr_og.edf
skipped OCD/controls (157)/MANUILOVA_ELENA_55_og.edf
skipped OCD/controls (157)/Martinenko_m45_og.edf
skipped OCD/controls (157)/Skopincev_20_EO_free.edf
skipped OCD/depression_moderade (47)/FiAV_m50_f32-1_At_Nt_og.edf


In [None]:
frontal_features = [x['frontal'] for x in topo_features_og]
central_features = [x['central'] for x in topo_features_og]
parietal_features = [x['parietal'] for x in topo_features_og]
frontal2central_features = [x['frontal2central'] for x in topo_features_og]
frontal2parietal_features = [x['frontal2parietal'] for x in topo_features_og]
central2parietal_features = [x['central2parietal'] for x in topo_features_og]
front2back_features = [x['front2back'] for x in topo_features_og]
left2right_features = [x['left2right'] for x in topo_features_og]
labels = [x['label'] for x in topo_features_og]

### Features one by one

All labels

In [85]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

for k in topo_features_og[0].keys():
    if k == 'label':
        continue
    f1_scores = []
    print(k)
    features = [x[k].flatten() for x in topo_features_og]
    for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
        clf = KNeighborsClassifier()
        X_train = np.array(features)[train]
        X_test = np.array(features)[test]
        y_train = np.array(labels)[train]
        y_test = np.array(labels)[test]
        clf.fit(X_train, y_train)   
        f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

    print(f'\tF1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

frontal
	F1 score: 0.11250261416492494, std: 0.030606284602774345
central
	F1 score: 0.10486250423676235, std: 0.025602983231276306
parietal
	F1 score: 0.12223074671071998, std: 0.011575335434729935
frontal2central
	F1 score: 0.0774597833455145, std: 0.029984167815477556
frontal2parietal
	F1 score: 0.09779532740214456, std: 0.034392507508768266
central2parietal
	F1 score: 0.09068106494867942, std: 0.027668563019389653
front2back
	F1 score: 0.11498950869912547, std: 0.021684691499209045
left2right
	F1 score: 0.1112333003673633, std: 0.04347262443343937


Broad categories

In [76]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)
classes = [label2class[x] for x in labels]

for k in topo_features_og[0].keys():
    if k == 'label':
        continue
    f1_scores = []
    print(k)
    features = [x[k].flatten() for x in topo_features_og]
    for i, (train, test) in enumerate(kf.split(list(range(len(classes))), classes)):
        clf = KNeighborsClassifier()
        X_train = np.array(features)[train]
        X_test = np.array(features)[test]
        y_train = np.array(classes)[train]
        y_test = np.array(classes)[test]
        clf.fit(X_train, y_train)   
        f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

    print(f'\tF1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

frontal
	F1 score: 0.16016623964228058, std: 0.028209450556244683
central
	F1 score: 0.1949888604748365, std: 0.033500412808975324
parietal
	F1 score: 0.16295284781679323, std: 0.02864178674387819
frontal2central
	F1 score: 0.12691166559053219, std: 0.019388950709221572
frontal2parietal
	F1 score: 0.1596938003769357, std: 0.04683574331080182
central2parietal
	F1 score: 0.15057948800879722, std: 0.04109434929722972
front2back
	F1 score: 0.1613583381796355, std: 0.05536802042520719
left2right
	F1 score: 0.13116597703251426, std: 0.019182017864940903


Stress response removed as class

In [77]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

new_ids = [i for i in range(len(labels)) if labels[i] not in stress_labels]
new_labels = [labels[i] for i in new_ids]

for k in topo_features_og[0].keys():
    if k == 'label':
        continue
    f1_scores = []
    print(k)
    features = [topo_features_og[i][k].flatten() for i in new_ids]
    for i, (train, test) in enumerate(kf.split(list(range(len(new_labels))), new_labels)):
        clf = KNeighborsClassifier()
        X_train = np.array(features)[train]
        X_test = np.array(features)[test]
        y_train = np.array(new_labels)[train]
        y_test = np.array(new_labels)[test]
        clf.fit(X_train, y_train)   
        f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

    print(f'\tF1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

frontal
	F1 score: 0.14228432207718228, std: 0.024001507924142896
central
	F1 score: 0.12802797558151224, std: 0.02408283644427039
parietal
	F1 score: 0.13443983510192622, std: 0.03244637513448802
frontal2central
	F1 score: 0.11751893707541805, std: 0.035525024657985085
frontal2parietal
	F1 score: 0.11519565472775573, std: 0.031896568060066556
central2parietal
	F1 score: 0.1300317287405456, std: 0.05905077833918241
front2back
	F1 score: 0.15250655297972876, std: 0.0370593287612749
left2right
	F1 score: 0.12052739449702003, std: 0.05194412150234549


### All features

In [103]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

features = []
for i in range(len(labels)):
    features.append(np.concatenate([topo_features_og[i][k].flatten() for k in topo_features_og[0].keys() if k != 'label']))
f1_scores = []
for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
    clf = KNeighborsClassifier()
    X_train = np.array(features)[train]
    X_test = np.array(features)[test]
    y_train = np.array(labels)[train]
    y_test = np.array(labels)[test]
    clf.fit(X_train, y_train)   
    f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

print(f'F1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

F1 score: 0.0955267794837589, std: 0.024501477150044748


In [104]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

features = []
for i in range(len(labels)):
    features.append(np.concatenate([topo_features_og[i][k].flatten() for k in ['frontal', 'central', 'parietal']]))
f1_scores = []
for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
    clf = KNeighborsClassifier()
    X_train = np.array(features)[train]
    X_test = np.array(features)[test]
    y_train = np.array(labels)[train]
    y_test = np.array(labels)[test]
    clf.fit(X_train, y_train)   
    f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

print(f'F1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

F1 score: 0.15253638270687114, std: 0.050638889473513096


In [105]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

features = []
for i in range(len(labels)):
    features.append(np.concatenate([topo_features_og[i][k].flatten() for k in ['frontal2central', 'frontal2parietal', 'central2parietal']]))
f1_scores = []
for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
    clf = KNeighborsClassifier()
    X_train = np.array(features)[train]
    X_test = np.array(features)[test]
    y_train = np.array(labels)[train]
    y_test = np.array(labels)[test]
    clf.fit(X_train, y_train)   
    f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

print(f'F1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

F1 score: 0.09192030755785453, std: 0.027617024762315912


In [None]:
# gives no improvement
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

features = []
for i in range(len(labels)):
    features.append(np.concatenate([topo_features_og[i][k].flatten() for k in ['frontal', 'central', 'parietal', 'central2parietal']]))
f1_scores = []
for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
    clf = KNeighborsClassifier()
    X_train = np.array(features)[train]
    X_test = np.array(features)[test]
    y_train = np.array(labels)[train]
    y_test = np.array(labels)[test]
    clf.fit(X_train, y_train)   
    f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

print(f'F1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

F1 score: 0.09068106494867942, std: 0.027668563019389653


In [106]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

features = []
for i in range(len(labels)):
    features.append(np.concatenate([topo_features_og[i][k].flatten() for k in ['front2back', 'left2right']]))
f1_scores = []
for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
    clf = KNeighborsClassifier()
    X_train = np.array(features)[train]
    X_test = np.array(features)[test]
    y_train = np.array(labels)[train]
    y_test = np.array(labels)[test]
    clf.fit(X_train, y_train)   
    f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

print(f'F1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

F1 score: 0.1284111926958375, std: 0.03340391906428833


In [None]:
# gives no improvement
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

features = []
for i in range(len(labels)):
    features.append(np.concatenate([topo_features_og[i][k].flatten() for k in ['frontal', 'central', 'parietal', 'left2right']]))
f1_scores = []
for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
    clf = KNeighborsClassifier()
    X_train = np.array(features)[train]
    X_test = np.array(features)[test]
    y_train = np.array(labels)[train]
    y_test = np.array(labels)[test]
    clf.fit(X_train, y_train)   
    f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

print(f'F1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

F1 score: 0.1112333003673633, std: 0.04347262443343937


### Stat analysis?

## Closed eyes

In [119]:
channels2use = ['Fp1', 'Fp2', 'F3', 'Fz', 'F4', 'F7', 'F8', 'T3', 'T4', 'C3', 'Cz', 'C4', 'T5', 'T6', 'P3', 'Pz', 'P4', 'O1', 'O2']
freq_bands = [(6, 8), (8, 10), (10, 12), (12, 14), (13, 20), (20, 26), (8, 13), (13, 26), (8, 26), (6, 26)]

classifiers = {'SVM': SVC(random_state=92), 'KNN': KNeighborsClassifier()}

topo_features_zg = []

to_skip = ['BORUTTO_JANNA_VLADIMIROVNA', 'Kutuz_f23_contr', 'MANUILOVA_ELENA_55', 'Martinenko_m45', 'Skopincev_20', 'FiAV_m50']

for i in tqdm(range(len(zg_files))):
    path = zg_files[i]
    if any([x in path for x in to_skip]):
        continue
    try:
        sample = mne.io.read_raw_edf(path, verbose=False, preload=True)
    except Exception as e:
        print(f'skipped {path}')
        continue
    # skip faulty data for now
    if 'chan' in sample.ch_names[0].lower():
        print(f'skipped {path}')
        continue
    sample = sample.filter(l_freq=1, h_freq=30, method='iir', verbose=False)
    # get only necessary channels, reorder them
    channels = sample.ch_names
    to_drop = channels[19:]
    sample.drop_channels(to_drop)
    new_idx = []
    skip = False
    for ch in channels2use:
        found = False
        for k in range(19):
            if ch in channels[k]:
                new_idx.append(k)
                found = True
                break
        if not found:
            skip = True
            break
    if skip:
        print(f'skipped {path}')
        continue
    
    data = sample.get_data()[new_idx, :int(13 * s_freq)]
    s_freq = int(sample.info['sfreq'])
    psd, freqs = mne.time_frequency.psd_array_multitaper(data, sfreq=s_freq, fmin=0.5, fmax=30, normalization='length', verbose=False)
    topo_features_zg.append({
        'frontal': get_frontal_features(channels, psd),
        'central': get_central_features(channels, psd),
        'parietal': get_parietal_features(channels, psd),
        'frontal2central': get_frontal2central_gradient(channels, psd),
        'frontal2parietal': get_frontal2parietal_gradient(channels, psd),
        'central2parietal': get_central2parietal_gradient(channels, psd),
        'front2back': get_front2back_gradient(channels, psd),
        'left2right': get_left2right_gradient(channels, psd),
        'label': zg_labels[i]
    })

  0%|          | 0/518 [00:00<?, ?it/s]

In [120]:
frontal_features = [x['frontal'] for x in topo_features_zg]
central_features = [x['central'] for x in topo_features_zg]
parietal_features = [x['parietal'] for x in topo_features_zg]
frontal2central_features = [x['frontal2central'] for x in topo_features_zg]
frontal2parietal_features = [x['frontal2parietal'] for x in topo_features_zg]
central2parietal_features = [x['central2parietal'] for x in topo_features_zg]
front2back_features = [x['front2back'] for x in topo_features_zg]
left2right_features = [x['left2right'] for x in topo_features_zg]
labels = [x['label'] for x in topo_features_zg]

### Features one by one

All labels

In [121]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

for k in topo_features_zg[0].keys():
    if k == 'label':
        continue
    f1_scores = []
    print(k)
    features = [x[k].flatten() for x in topo_features_zg]
    for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
        clf = KNeighborsClassifier()
        X_train = np.array(features)[train]
        X_test = np.array(features)[test]
        y_train = np.array(labels)[train]
        y_test = np.array(labels)[test]
        clf.fit(X_train, y_train)   
        f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

    print(f'\tF1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

frontal
	F1 score: 0.12976919313541488, std: 0.03331798632837232
central
	F1 score: 0.14141747662604853, std: 0.016829266786306495
parietal
	F1 score: 0.11723079768003453, std: 0.03701843408594934
frontal2central
	F1 score: 0.09969380533973904, std: 0.037221849556686906
frontal2parietal
	F1 score: 0.11845553740554782, std: 0.031759880712988846
central2parietal
	F1 score: 0.11185274344459609, std: 0.03006002327910206
front2back
	F1 score: 0.12002205312738017, std: 0.025111508041410335
left2right
	F1 score: 0.0620403544033275, std: 0.014041766357170363


Broad categories

In [123]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)
classes = [label2class[x] for x in labels]

for k in topo_features_zg[0].keys():
    if k == 'label':
        continue
    f1_scores = []
    print(k)
    features = [x[k].flatten() for x in topo_features_zg]
    for i, (train, test) in enumerate(kf.split(list(range(len(classes))), classes)):
        clf = KNeighborsClassifier()
        X_train = np.array(features)[train]
        X_test = np.array(features)[test]
        y_train = np.array(classes)[train]
        y_test = np.array(classes)[test]
        clf.fit(X_train, y_train)   
        f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

    print(f'\tF1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

frontal
	F1 score: 0.21859389616061903, std: 0.07933795307490775
central
	F1 score: 0.19436420451362107, std: 0.032912563322366604
parietal
	F1 score: 0.1681550383842781, std: 0.05019505090061975
frontal2central
	F1 score: 0.14823737010722554, std: 0.037935489903403334
frontal2parietal
	F1 score: 0.16686885172225396, std: 0.052130254844792855
central2parietal
	F1 score: 0.16831473771437494, std: 0.031177688869385514
front2back
	F1 score: 0.15557972194257774, std: 0.05544135205729288
left2right
	F1 score: 0.11873261319844786, std: 0.03136688451649841


Stress response removed as class

In [124]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

new_ids = [i for i in range(len(labels)) if labels[i] not in stress_labels]
new_labels = [labels[i] for i in new_ids]

for k in topo_features_zg[0].keys():
    if k == 'label':
        continue
    f1_scores = []
    print(k)
    features = [topo_features_zg[i][k].flatten() for i in new_ids]
    for i, (train, test) in enumerate(kf.split(list(range(len(new_labels))), new_labels)):
        clf = KNeighborsClassifier()
        X_train = np.array(features)[train]
        X_test = np.array(features)[test]
        y_train = np.array(new_labels)[train]
        y_test = np.array(new_labels)[test]
        clf.fit(X_train, y_train)   
        f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

    print(f'\tF1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

frontal
	F1 score: 0.1370273163656369, std: 0.02466818620837538
central
	F1 score: 0.1617249548682112, std: 0.0304164411322227
parietal
	F1 score: 0.13347793362897656, std: 0.016218306070898383
frontal2central
	F1 score: 0.1266377942552916, std: 0.0377354587365152
frontal2parietal
	F1 score: 0.147489159901878, std: 0.04781762248036987
central2parietal
	F1 score: 0.11604610250880217, std: 0.03441600487617146
front2back
	F1 score: 0.15841881215564604, std: 0.035238039123653545
left2right
	F1 score: 0.08711616772518153, std: 0.017850978379902488


### All features

In [125]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

features = []
for i in range(len(labels)):
    features.append(np.concatenate([topo_features_zg[i][k].flatten() for k in topo_features_zg[0].keys() if k != 'label']))
f1_scores = []
for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
    clf = KNeighborsClassifier()
    X_train = np.array(features)[train]
    X_test = np.array(features)[test]
    y_train = np.array(labels)[train]
    y_test = np.array(labels)[test]
    clf.fit(X_train, y_train)   
    f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

print(f'F1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

F1 score: 0.106378336735964, std: 0.04928315813869875


In [126]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

features = []
for i in range(len(labels)):
    features.append(np.concatenate([topo_features_zg[i][k].flatten() for k in ['frontal', 'central', 'parietal']]))
f1_scores = []
for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
    clf = KNeighborsClassifier()
    X_train = np.array(features)[train]
    X_test = np.array(features)[test]
    y_train = np.array(labels)[train]
    y_test = np.array(labels)[test]
    clf.fit(X_train, y_train)   
    f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

print(f'F1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

F1 score: 0.1631519287050073, std: 0.029819053871439537


In [127]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

features = []
for i in range(len(labels)):
    features.append(np.concatenate([topo_features_zg[i][k].flatten() for k in ['frontal2central', 'frontal2parietal', 'central2parietal']]))
f1_scores = []
for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
    clf = KNeighborsClassifier()
    X_train = np.array(features)[train]
    X_test = np.array(features)[test]
    y_train = np.array(labels)[train]
    y_test = np.array(labels)[test]
    clf.fit(X_train, y_train)   
    f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

print(f'F1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

F1 score: 0.09402330446309448, std: 0.04581963144160866


In [None]:
# no improvement
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

features = []
for i in range(len(labels)):
    features.append(np.concatenate([topo_features_zg[i][k].flatten() for k in ['frontal', 'central', 'parietal', 'central2parietal']]))
f1_scores = []
for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
    clf = KNeighborsClassifier()
    X_train = np.array(features)[train]
    X_test = np.array(features)[test]
    y_train = np.array(labels)[train]
    y_test = np.array(labels)[test]
    clf.fit(X_train, y_train)   
    f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

print(f'F1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

F1 score: 0.11185274344459609, std: 0.03006002327910206


In [131]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

features = []
for i in range(len(labels)):
    features.append(np.concatenate([topo_features_zg[i][k].flatten() for k in ['front2back', 'left2right']]))
f1_scores = []
for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
    clf = KNeighborsClassifier()
    X_train = np.array(features)[train]
    X_test = np.array(features)[test]
    y_train = np.array(labels)[train]
    y_test = np.array(labels)[test]
    clf.fit(X_train, y_train)   
    f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

print(f'F1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

F1 score: 0.09834593232855646, std: 0.05175936428859388


In [133]:
# no improvement
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

features = []
for i in range(len(labels)):
    features.append(np.concatenate([topo_features_zg[i][k].flatten() for k in ['frontal', 'central', 'parietal', 'left2right']]))
f1_scores = []
for i, (train, test) in enumerate(kf.split(list(range(len(labels))), labels)):
    clf = KNeighborsClassifier()
    X_train = np.array(features)[train]
    X_test = np.array(features)[test]
    y_train = np.array(labels)[train]
    y_test = np.array(labels)[test]
    clf.fit(X_train, y_train)   
    f1_scores.append(f1_score(y_test, clf.predict(X_test), average='macro'))

print(f'F1 score: {np.mean(f1_scores)}, std: {np.std(f1_scores)}')

F1 score: 0.0620403544033275, std: 0.014041766357170363
