In [1]:
import os
import mne
import PyQt6
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

from collections import Counter

import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report, f1_score
from sklearn.model_selection import StratifiedKFold

%matplotlib qt

## Calculate DE features

In [2]:
folders = os.listdir('bipolar/')

fldr2label = {
    'control (160)': 0,
    'Биполярка депресс тип (35)': 4,
    'Биполярка маниакальный тип (36)': 5,
    'мания_психи(30)': 6,
    'циклотимия (13)': 7,
    'depressed_32-0(22)': 1,
    'depressed_32-1(44)': 2,
    'depressed_32-2(22)': 3,
}
og_files = []
zg_files = []
og_labels = []
zg_labels = []
for fldr in folders:
    pth =  'bipolar/' + fldr
    og_pths = [x for x in os.listdir(pth) if x.lower().endswith('.edf') and ('og.' in x.lower() or 'ог.' in x.lower() or '_ог' in x.lower() or 'eo' in x.lower())]
    zg_pths = [x for x in os.listdir(pth) if x.lower().endswith('.edf') and ('zg.' in x.lower() or 'зг.' in x.lower() or '_зг' in x.lower() or 'ec' in x.lower() or 'fon.' in x.lower() or 'eс' in x.lower())]
    for f in og_pths:
        og_files.append(pth + '/' + f)
        og_labels.append(fldr2label[fldr])
    for f in zg_pths:
        zg_files.append(pth + '/' + f)
        zg_labels.append(fldr2label[fldr])

print(f'Number of files for open eyes: {len(og_files)}')
print(f'Number of files for closed eyes: {len(zg_files)}')

Number of files for open eyes: 363
Number of files for closed eyes: 363


In [3]:
cnt_og = Counter(og_labels)
print(cnt_og)

Counter({0: 161, 2: 44, 5: 36, 4: 35, 6: 30, 1: 22, 3: 22, 7: 13})


In [4]:
cnt_zg = Counter(zg_labels)
print(cnt_zg)

Counter({0: 161, 2: 44, 5: 36, 4: 35, 6: 30, 1: 22, 3: 22, 7: 13})


In [5]:
def compute_features(data, s_freq=250, pairs=[], n_channels=19):
    freq_bands = np.array([1, 4, 8, 13, 30])

    psd, freqs = mne.time_frequency.psd_array_welch(data, sfreq=s_freq, fmin=1, fmax=30, n_fft=512, n_per_seg=s_freq, window='hann', average=None, verbose=False)

    # compute ES and DE
    differential_entropy = np.zeros((psd.shape[0], len(freq_bands) - 1, psd.shape[2]))
    energy_spectrum = np.zeros((psd.shape[0], len(freq_bands) - 1, psd.shape[2]))

    for i in range(n_channels):
        for j in range(len(freq_bands) - 1):
            band_indices = np.where((freqs >= freq_bands[j]) & (freqs < freq_bands[j+1]))[0]
            if len(band_indices) > 0:
                band_psd = psd[i, band_indices]
                band_variance = np.mean(band_psd, axis=0)
                differential_entropy[i, j] = 0.5 * np.log(2 * np.pi * np.e * band_variance)
                energy_spectrum[i, j] = band_variance
    # compute asymmetry features
    left = differential_entropy[[x[0] for x in pairs]]
    right = differential_entropy[[x[1] for x in pairs]]
    dasm = left - right
    rasm = left / right
    return energy_spectrum, differential_entropy, dasm, rasm

In [6]:
# calculate features for the first 14s of each recording
n_channels = 19

cleaned_labels_og = []
es_features_og = []
de_features_og = []
dasm_features_og = []
rasm_features_og = []

name_pairs = [('Fp1', 'Fp2'), ('F3', 'F4'), ('F7', 'F8'), ('C3', 'C4'), ('T3', 'T4'), ('P3', 'P4'), ('T5', 'T6'), ('O1', 'O2')]
idx_pairs = [(0, 1), (2, 4), (5, 6), (9, 11), (7, 8), (14, 16), (12, 13), (17, 18)]
channels2use = ['Fp1', 'Fp2', 'F3', 'Fz', 'F4', 'F7', 'F8', 'T3', 'T4', 'C3', 'Cz', 'C4', 'T5', 'T6', 'P3', 'Pz', 'P4', 'O1', 'O2']

for i in range(len(og_files)):
    try:
        path = og_files[i]
        sample = mne.io.read_raw_edf(path, verbose=False, preload=True)
        # skip faulty data for now
        if 'chan' in sample.ch_names[0].lower():
            continue

        sample = sample.filter(l_freq=1, h_freq=30, method='iir', verbose=False)
        channels = sample.ch_names
        to_drop = channels[19:]
        sample.drop_channels(to_drop)

        new_idx = []
        skip = False
        for ch in channels2use:
            found = False
            for k in range(19):
                if ch in channels[k]:
                    new_idx.append(k)
                    found = True
                    break
            if not found:
                skip = True
                break
        if skip:
            print(f'skipped {path}')
            continue

        s_freq = int(sample.info['sfreq'])
        data = sample.get_data()[new_idx, :int(14 * s_freq)]

        es, de, dasm, rasm = compute_features(data, s_freq, idx_pairs)
        es_features_og.append(es)
        de_features_og.append(de)
        dasm_features_og.append(dasm)
        rasm_features_og.append(rasm)
        cleaned_labels_og.append(og_labels[i])
    except Exception as e:
        print(e)
        print(path)

skipped bipolar/control (160)/BORUTTO_JANNA_VLADIMIROVNA_48_EO_free.edf
skipped bipolar/control (160)/MANUILOVA_ELENA_55_EO_free.edf
skipped bipolar/control (160)/Martinenko_45_EO.edf
could not convert string to float: '        '
bipolar/control (160)/Skopincev_20_EO_free.edf


In [7]:
# calculate features for the first 14s of each recording
n_channels = 19

cleaned_labels_zg = []
es_features_zg = []
de_features_zg = []
dasm_features_zg = []
rasm_features_zg = []

name_pairs = [('Fp1', 'Fp2'), ('F3', 'F4'), ('F7', 'F8'), ('C3', 'C4'), ('T3', 'T4'), ('P3', 'P4'), ('T5', 'T6'), ('O1', 'O2')]
idx_pairs = [(0, 1), (2, 4), (5, 6), (9, 11), (7, 8), (14, 16), (12, 13), (17, 18)]
channels2use = ['Fp1', 'Fp2', 'F3', 'Fz', 'F4', 'F7', 'F8', 'T3', 'T4', 'C3', 'Cz', 'C4', 'T5', 'T6', 'P3', 'Pz', 'P4', 'O1', 'O2']

to_skip = ['BORUTTO_JANNA_VLADIMIROVNA_48', 'MANUILOVA_ELENA_55', 'Martinenko_45', 'Skopincev_20']

for i in range(len(zg_files)):
    try:
        path = zg_files[i]
        if any([x in path for x in to_skip]):
            continue
        sample = mne.io.read_raw_edf(path, verbose=False, preload=True)
        # skip faulty data for now
        if 'chan' in sample.ch_names[0].lower():
            continue

        sample = sample.filter(l_freq=1, h_freq=30, method='iir', verbose=False)
        channels = sample.ch_names
        to_drop = channels[19:]

        new_idx = []
        skip = False
        for ch in channels2use:
            found = False
            for k in range(19):
                if ch in channels[k]:
                    new_idx.append(k)
                    found = True
                    break
            if not found:
                skip = True
                break
        if skip:
            continue

        s_freq = int(sample.info['sfreq'])
        data = sample.get_data()[new_idx, :int(14 * s_freq)]

        es, de, dasm, rasm = compute_features(data, s_freq, idx_pairs)
        es_features_zg.append(es)
        de_features_zg.append(de)
        dasm_features_zg.append(dasm)
        rasm_features_zg.append(rasm)
        cleaned_labels_zg.append(zg_labels[i])
    except Exception as e:
        print(e)
        print(path)

In [8]:
es_features_og = np.array(es_features_og)
de_features_og = np.array(de_features_og)
dasm_features_og = np.array(dasm_features_og)
rasm_features_og = np.array(rasm_features_og)
es_features_zg = np.array(es_features_zg)
de_features_zg = np.array(de_features_zg)
dasm_features_zg = np.array(dasm_features_zg)
rasm_features_zg = np.array(rasm_features_zg)

cleaned_labels_og = np.array(cleaned_labels_og)
cleaned_labels_zg = np.array(cleaned_labels_zg)

## Models

### Open eyes

In [11]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

classifiers = ["Random Forest", "KNN", "SVM"]

for j, clf in enumerate(classifiers):
    f1_scores_macro = []
    f1_scores_micro = []

    print(f'{clf}')

    for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_og))), cleaned_labels_og)):
        X_train = np.concatenate((es_features_og[train].reshape(-1, 19 * 4 * 14), de_features_og[train].reshape(-1, 19 * 4 * 14), dasm_features_og[train].reshape(-1, 8 * 4 * 14), rasm_features_og[train].reshape(-1, 8 * 4 * 14)), axis=1)
        X_test = np.concatenate((es_features_og[test].reshape(-1, 19 * 4 * 14), de_features_og[test].reshape(-1, 19 * 4 * 14), dasm_features_og[test].reshape(-1, 8 * 4 * 14), rasm_features_og[test].reshape(-1, 8 * 4 * 14)), axis=1)
        y_train = cleaned_labels_og[train]
        y_test = cleaned_labels_og[test]
        if j == 0:
            clf = RandomForestClassifier(random_state=92)
        elif j == 1:
            clf = KNeighborsClassifier()
        elif j == 2:
            clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
        clf.fit(X_train, y_train)
        preds = clf.predict(X_test)
        f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
        f1_scores_micro.append(f1_score(y_test, preds, average='micro'))

    print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
    print(f'avg micro-f1: {np.mean(f1_scores_micro)}')
    print('--------------------------------------------')

Random Forest
avg macro-f1: 0.2134123537969546
avg micro-f1: 0.5041079812206573
--------------------------------------------
KNN
avg macro-f1: 0.313935631813156
avg micro-f1: 0.4929577464788732
--------------------------------------------
SVM
avg macro-f1: 0.4320934030407605
avg micro-f1: 0.587793427230047
--------------------------------------------


#### Feature elimination

No asymmetry features

In [12]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_og))), cleaned_labels_og)):
    X_train = np.concatenate((es_features_og[train].reshape(-1, 19 * 4 * 14), de_features_og[train].reshape(-1, 19 * 4 * 14)), axis=1)
    X_test = np.concatenate((es_features_og[test].reshape(-1, 19 * 4 * 14), de_features_og[test].reshape(-1, 19 * 4 * 14)), axis=1)
    y_train = cleaned_labels_og[train]
    y_test = cleaned_labels_og[test]
    clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
    f1_scores_micro.append(f1_score(y_test, preds, average='micro'))
    # print(confusion_matrix(y_test, preds))
    # print()

# print(f1_scores_macro)
print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
# print(f1_scores_micro)
print(f'avg micro-f1: {np.mean(f1_scores_micro)}')

avg macro-f1: 0.4200857850234449
avg micro-f1: 0.5765649452269171


No energy spectrum

In [13]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_og))), cleaned_labels_og)):
    X_train = np.concatenate((de_features_og[train].reshape(-1, 19 * 4 * 14), dasm_features_og[train].reshape(-1, 8 * 4 * 14), rasm_features_og[train].reshape(-1, 8 * 4 * 14)), axis=1)
    X_test = np.concatenate((de_features_og[test].reshape(-1, 19 * 4 * 14), dasm_features_og[test].reshape(-1, 8 * 4 * 14), rasm_features_og[test].reshape(-1, 8 * 4 * 14)), axis=1)
    y_train = cleaned_labels_og[train]
    y_test = cleaned_labels_og[test]
    clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
    f1_scores_micro.append(f1_score(y_test, preds, average='micro'))
    # print(confusion_matrix(y_test, preds))
    # print()

# print(f1_scores_macro)
print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
# print(f1_scores_micro)
print(f'avg micro-f1: {np.mean(f1_scores_micro)}')

avg macro-f1: 0.4320934030407605
avg micro-f1: 0.587793427230047


Only differential entropy

In [14]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_og))), cleaned_labels_og)):
    X_train = de_features_og[train].reshape(-1, 19 * 4 * 14)
    X_test = de_features_og[test].reshape(-1, 19 * 4 * 14)
    y_train = cleaned_labels_og[train]
    y_test = cleaned_labels_og[test]
    clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
    f1_scores_micro.append(f1_score(y_test, preds, average='micro'))
    # print(confusion_matrix(y_test, preds))
    # print()

# print(f1_scores_macro)
print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
# print(f1_scores_micro)
print(f'avg micro-f1: {np.mean(f1_scores_micro)}')

avg macro-f1: 0.4200857850234449
avg micro-f1: 0.5765649452269171


Only energy spectrum

In [15]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_og))), cleaned_labels_og)):
    X_train = es_features_og[train].reshape(-1, 19 * 4 * 14)
    X_test = es_features_og[test].reshape(-1, 19 * 4 * 14)
    y_train = cleaned_labels_og[train]
    y_test = cleaned_labels_og[test]
    clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
    f1_scores_micro.append(f1_score(y_test, preds, average='micro'))
    # print(confusion_matrix(y_test, preds))
    # print()

# print(f1_scores_macro)
print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
# print(f1_scores_micro)
print(f'avg micro-f1: {np.mean(f1_scores_micro)}')

avg macro-f1: 0.019871794871794874
avg micro-f1: 0.08638497652582158


#### Features by bands

In [16]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

for j in range(4):
    print('BAND', j)
    for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_og))), cleaned_labels_og)):
        X_train = np.concatenate((de_features_og[train, :, j].reshape(-1, 19 * 14), dasm_features_og[train, :, j].reshape(-1, 8 * 14), rasm_features_og[train, :, j].reshape(-1, 8 * 14)), axis=1)
        X_test = np.concatenate((de_features_og[test, :, j].reshape(-1, 19 * 14), dasm_features_og[test, :, j].reshape(-1, 8 * 14), rasm_features_og[test, :, j].reshape(-1, 8 * 14)), axis=1)
        y_train = cleaned_labels_og[train]
        y_test = cleaned_labels_og[test]
        clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
        clf.fit(X_train, y_train)
        preds = clf.predict(X_test)
        f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
        f1_scores_micro.append(f1_score(y_test, preds, average='micro'))
        # print(confusion_matrix(y_test, preds))
        # print()

    # print(f1_scores_macro)
    print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
    # print(f1_scores_micro)
    print(f'avg micro-f1: {np.mean(f1_scores_micro)}')
    print('--------------------------------------------')

BAND 0
avg macro-f1: 0.33368200755679733
avg micro-f1: 0.4760954616588419
--------------------------------------------
BAND 1
avg macro-f1: 0.354980978955328
avg micro-f1: 0.48456572769953044
--------------------------------------------
BAND 2
avg macro-f1: 0.3649216541490458
avg micro-f1: 0.4873891497130932
--------------------------------------------
BAND 3
avg macro-f1: 0.3528734712174783
avg micro-f1: 0.48249217527386545
--------------------------------------------


#### Features by brain regions

In [17]:
zone2ids = {'frontal': [0, 1, 2, 3, 4, 5, 6], 'temporal': [7, 8, 12, 13], 'central': [9, 10, 11], 'parietal': [14, 15, 16], 'occipital': [17, 18]}
zone2pair = {'frontal': [0, 1, 2], 'temporal': [4, 6], 'central': [3], 'parietal': [5], 'occipital': [7]}

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

for zone, ids in zone2ids.items():
    print(f'ZONE: {zone}')
    for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_og))), cleaned_labels_og)):
        n = len(ids)
        n_pairs = len(zone2pair[zone])
        X_train = np.concatenate((es_features_og[train][:, ids].reshape(-1, n * 4 * 14), de_features_og[train][:, ids].reshape(-1, n * 4 * 14), dasm_features_og[train][:, zone2pair[zone]].reshape(-1, n_pairs * 4 * 14), rasm_features_og[train][:, zone2pair[zone]].reshape(-1, n_pairs * 4 * 14)), axis=1)
        X_test = np.concatenate((es_features_og[test][:, ids].reshape(-1, n * 4 * 14), de_features_og[test][:, ids].reshape(-1, n * 4 * 14), dasm_features_og[test][:, zone2pair[zone]].reshape(-1, n_pairs * 4 * 14), rasm_features_og[test][:, zone2pair[zone]].reshape(-1, n_pairs * 4 * 14)), axis=1)
        y_train = cleaned_labels_og[train]
        y_test = cleaned_labels_og[test]
        clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
        clf.fit(X_train, y_train)
        preds = clf.predict(X_test)
        f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
        f1_scores_micro.append(f1_score(y_test, preds, average='micro'))
        # print(confusion_matrix(y_test, preds))
        # print()

    # print(f1_scores_macro)
    print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
    # print(f1_scores_micro)
    print(f'avg micro-f1: {np.mean(f1_scores_micro)}')
    print('--------------------------------------------')

ZONE: frontal
avg macro-f1: 0.3745436422473487
avg micro-f1: 0.4929577464788732
--------------------------------------------
ZONE: temporal
avg macro-f1: 0.35901337553433105
avg micro-f1: 0.5013693270735524
--------------------------------------------
ZONE: central
avg macro-f1: 0.3609114468714058
avg micro-f1: 0.4921231090245175
--------------------------------------------
ZONE: parietal
avg macro-f1: 0.35276287533885897
avg micro-f1: 0.4860915492957747
--------------------------------------------
ZONE: occipital
avg macro-f1: 0.3481234367347128
avg micro-f1: 0.47521909233176834
--------------------------------------------


#### Unite classes

In [18]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

label_map = {0: 0, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2, 6: 3, 7: 4}

def map_labels(labels):
    new_labels = np.zeros(len(labels))
    for i in range(len(labels)):
        new_labels[i] = label_map[labels[i]]
    return new_labels

cleaned_labels_og_tmp = map_labels(cleaned_labels_og)

for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_og_tmp))), cleaned_labels_og_tmp)):
    X_train = np.concatenate((es_features_og[train].reshape(-1, 19 * 4 * 14), de_features_og[train].reshape(-1, 19 * 4 * 14), dasm_features_og[train].reshape(-1, 8 * 4 * 14), rasm_features_og[train].reshape(-1, 8 * 4 * 14)), axis=1)
    X_test = np.concatenate((es_features_og[test].reshape(-1, 19 * 4 * 14), de_features_og[test].reshape(-1, 19 * 4 * 14), dasm_features_og[test].reshape(-1, 8 * 4 * 14), rasm_features_og[test].reshape(-1, 8 * 4 * 14)), axis=1)
    y_train = cleaned_labels_og_tmp[train]
    y_test = cleaned_labels_og_tmp[test]
    clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
    f1_scores_micro.append(f1_score(y_test, preds, average='micro'))
    # print(confusion_matrix(y_test, preds))
    # print()

# print(f1_scores_macro)
print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
# print(f1_scores_micro)
print(f'avg micro-f1: {np.mean(f1_scores_micro)}')

avg macro-f1: 0.5288547171229467
avg micro-f1: 0.6296557120500783


### Closed eyes

In [19]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

classifiers = ["Random Forest", "KNN", "SVM"]

for j, clf in enumerate(classifiers):
    f1_scores_macro = []
    f1_scores_micro = []

    print(f'{clf}')

    for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_zg))), cleaned_labels_zg)):
        X_train = np.concatenate((es_features_zg[train].reshape(-1, 19 * 4 * 14), de_features_zg[train].reshape(-1, 19 * 4 * 14), dasm_features_zg[train].reshape(-1, 8 * 4 * 14), rasm_features_zg[train].reshape(-1, 8 * 4 * 14)), axis=1)
        X_test = np.concatenate((es_features_zg[test].reshape(-1, 19 * 4 * 14), de_features_zg[test].reshape(-1, 19 * 4 * 14), dasm_features_zg[test].reshape(-1, 8 * 4 * 14), rasm_features_zg[test].reshape(-1, 8 * 4 * 14)), axis=1)
        y_train = cleaned_labels_zg[train]
        y_test = cleaned_labels_zg[test]
        if j == 0:
            clf = RandomForestClassifier(random_state=92)
        elif j == 1:
            clf = KNeighborsClassifier()
        elif j == 2:
            clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
        clf.fit(X_train, y_train)
        preds = clf.predict(X_test)
        f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
        f1_scores_micro.append(f1_score(y_test, preds, average='micro'))

    print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
    print(f'avg micro-f1: {np.mean(f1_scores_micro)}')
    print('--------------------------------------------')

Random Forest
avg macro-f1: 0.2478204591444444
avg micro-f1: 0.5181533646322378
--------------------------------------------
KNN
avg macro-f1: 0.31527099586747587
avg micro-f1: 0.48200312989045385
--------------------------------------------
SVM
avg macro-f1: 0.4630724787768131
avg micro-f1: 0.6128716744913929
--------------------------------------------


#### Feature elimination

No asymmetry features

In [20]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_zg))), cleaned_labels_zg)):
    X_train = np.concatenate((es_features_zg[train].reshape(-1, 19 * 4 * 14), de_features_zg[train].reshape(-1, 19 * 4 * 14)), axis=1)
    X_test = np.concatenate((es_features_zg[test].reshape(-1, 19 * 4 * 14), de_features_zg[test].reshape(-1, 19 * 4 * 14)), axis=1)
    y_train = cleaned_labels_zg[train]
    y_test = cleaned_labels_zg[test]
    clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
    f1_scores_micro.append(f1_score(y_test, preds, average='micro'))

# print(f1_scores_macro)
print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
# print(f1_scores_micro)
print(f'avg micro-f1: {np.mean(f1_scores_micro)}')

avg macro-f1: 0.46099207551993937
avg micro-f1: 0.6100938967136151


No energy spectrum

In [21]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_zg))), cleaned_labels_zg)):
    X_train = np.concatenate((de_features_zg[train].reshape(-1, 19 * 4 * 14), dasm_features_zg[train].reshape(-1, 8 * 4 * 14), rasm_features_zg[train].reshape(-1, 8 * 4 * 14)), axis=1)
    X_test = np.concatenate((de_features_zg[test].reshape(-1, 19 * 4 * 14), dasm_features_zg[test].reshape(-1, 8 * 4 * 14), rasm_features_zg[test].reshape(-1, 8 * 4 * 14)), axis=1)
    y_train = cleaned_labels_zg[train]
    y_test = cleaned_labels_zg[test]
    clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
    f1_scores_micro.append(f1_score(y_test, preds, average='micro'))

# print(f1_scores_macro)
print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
# print(f1_scores_micro)
print(f'avg micro-f1: {np.mean(f1_scores_micro)}')

avg macro-f1: 0.4630724787768131
avg micro-f1: 0.6128716744913929


Only energy spectrum

In [22]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_zg))), cleaned_labels_zg)):
    X_train = es_features_zg[train].reshape(-1, 19 * 4 * 14)
    X_test = es_features_zg[test].reshape(-1, 19 * 4 * 14)
    y_train = cleaned_labels_zg[train]
    y_test = cleaned_labels_zg[test]
    clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
    f1_scores_micro.append(f1_score(y_test, preds, average='micro'))

# print(f1_scores_macro)
print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
# print(f1_scores_micro)
print(f'avg micro-f1: {np.mean(f1_scores_micro)}')

avg macro-f1: 0.019871794871794874
avg micro-f1: 0.08638497652582158


Only differential entropy

In [23]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_zg))), cleaned_labels_zg)):
    X_train = de_features_zg[train].reshape(-1, 19 * 4 * 14)
    X_test = de_features_zg[test].reshape(-1, 19 * 4 * 14)
    y_train = cleaned_labels_zg[train]
    y_test = cleaned_labels_zg[test]
    clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
    f1_scores_micro.append(f1_score(y_test, preds, average='micro'))

# print(f1_scores_macro)
print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
# print(f1_scores_micro)
print(f'avg micro-f1: {np.mean(f1_scores_micro)}')

avg macro-f1: 0.46099207551993937
avg micro-f1: 0.6100938967136151


#### Features by bands

In [24]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

for j in range(4):
    print(f'BAND {j}')
    for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_zg))), cleaned_labels_zg)):
        X_train = np.concatenate((es_features_zg[train, :, j].reshape(-1, 19 * 14), de_features_zg[train, :, j].reshape(-1, 19 * 14), dasm_features_zg[train, :, j].reshape(-1, 8 * 14), rasm_features_zg[train, :, j].reshape(-1, 8 * 14)), axis=1)
        X_test = np.concatenate((es_features_zg[test, :, j].reshape(-1, 19 * 14), de_features_zg[test, :, j].reshape(-1, 19 * 14), dasm_features_zg[test, :, j].reshape(-1, 8 * 14), rasm_features_zg[test, :, j].reshape(-1, 8 * 14)), axis=1)
        y_train = cleaned_labels_zg[train]
        y_test = cleaned_labels_zg[test]
        clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
        clf.fit(X_train, y_train)
        preds = clf.predict(X_test)
        f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
        f1_scores_micro.append(f1_score(y_test, preds, average='micro'))

    # print(f1_scores_macro)
    print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
    # print(f1_scores_micro)
    print(f'avg micro-f1: {np.mean(f1_scores_micro)}')
    print('------------------------------------------')

BAND 0
avg macro-f1: 0.3458356809028661
avg micro-f1: 0.49581377151799694
------------------------------------------
BAND 1
avg macro-f1: 0.3473792309939817
avg micro-f1: 0.48607198748043823
------------------------------------------
BAND 2
avg macro-f1: 0.3551551123886129
avg micro-f1: 0.49115805946791863
------------------------------------------
BAND 3
avg macro-f1: 0.3587663313639607
avg micro-f1: 0.4992566510172144
------------------------------------------


#### Features by brain regions

In [25]:
zone2ids = {'frontal': [0, 1, 2, 3, 4, 5, 6], 'temporal': [7, 8, 12, 13], 'central': [9, 10, 11], 'parietal': [14, 15, 16], 'occipital': [17, 18]}
zone2pair = {'frontal': [0, 1, 2], 'temporal': [4, 6], 'central': [3], 'parietal': [5], 'occipital': [7]}

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

for zone, ids in zone2ids.items():
    print(f'ZONE: {zone}')
    for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_zg))), cleaned_labels_zg)):
        n = len(ids)
        n_pairs = len(zone2pair[zone])
        X_train = np.concatenate((es_features_zg[train][:, ids].reshape(-1, n * 4 * 14), de_features_zg[train][:, ids].reshape(-1, n * 4 * 14), dasm_features_zg[train][:, zone2pair[zone]].reshape(-1, n_pairs * 4 * 14), rasm_features_zg[train][:, zone2pair[zone]].reshape(-1, n_pairs * 4 * 14)), axis=1)
        X_test = np.concatenate((es_features_zg[test][:, ids].reshape(-1, n * 4 * 14), de_features_zg[test][:, ids].reshape(-1, n * 4 * 14), dasm_features_zg[test][:, zone2pair[zone]].reshape(-1, n_pairs * 4 * 14), rasm_features_zg[test][:, zone2pair[zone]].reshape(-1, n_pairs * 4 * 14)), axis=1)
        y_train = cleaned_labels_zg[train]
        y_test = cleaned_labels_zg[test]
        clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
        clf.fit(X_train, y_train)
        preds = clf.predict(X_test)
        f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
        f1_scores_micro.append(f1_score(y_test, preds, average='micro'))
        # print(confusion_matrix(y_test, preds))
        # print()

    # print(f1_scores_macro)
    print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
    # print(f1_scores_micro)
    print(f'avg micro-f1: {np.mean(f1_scores_micro)}')
    print('--------------------------------------------')

ZONE: frontal
avg macro-f1: 0.37589617914376977
avg micro-f1: 0.5321987480438185
--------------------------------------------
ZONE: temporal
avg macro-f1: 0.3739683881986582
avg micro-f1: 0.5321009389671362
--------------------------------------------
ZONE: central
avg macro-f1: 0.3643290633990521
avg micro-f1: 0.5218831507563902
--------------------------------------------
ZONE: parietal
avg macro-f1: 0.3776156398555617
avg micro-f1: 0.5265453834115805
--------------------------------------------
ZONE: occipital
avg macro-f1: 0.373235281077944
avg micro-f1: 0.5192801251956182
--------------------------------------------


#### Unite classes

In [26]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=92)

f1_scores_macro = []
f1_scores_micro = []

label_map = {0: 0, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2, 6: 3, 7: 4}

def map_labels(labels):
    new_labels = np.zeros(len(labels))
    for i in range(len(labels)):
        new_labels[i] = label_map[labels[i]]
    return new_labels

cleaned_labels_zg_tmp = map_labels(cleaned_labels_zg)

for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_zg_tmp))), cleaned_labels_zg_tmp)):
    X_train = np.concatenate((es_features_zg[train].reshape(-1, 19 * 4 * 14), de_features_zg[train].reshape(-1, 19 * 4 * 14), dasm_features_zg[train].reshape(-1, 8 * 4 * 14), rasm_features_zg[train].reshape(-1, 8 * 4 * 14)), axis=1)
    X_test = np.concatenate((es_features_zg[test].reshape(-1, 19 * 4 * 14), de_features_zg[test].reshape(-1, 19 * 4 * 14), dasm_features_zg[test].reshape(-1, 8 * 4 * 14), rasm_features_zg[test].reshape(-1, 8 * 4 * 14)), axis=1)
    y_train = cleaned_labels_zg_tmp[train]
    y_test = cleaned_labels_zg_tmp[test]
    clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    f1_scores_macro.append(f1_score(y_test, preds, average='macro'))
    f1_scores_micro.append(f1_score(y_test, preds, average='micro'))
    # print(confusion_matrix(y_test, preds))
    # print()

# print(f1_scores_macro)
print(f'avg macro-f1: {np.mean(f1_scores_macro)}')
# print(f1_scores_micro)
print(f'avg micro-f1: {np.mean(f1_scores_micro)}')

avg macro-f1: 0.5423991804343802
avg micro-f1: 0.6435837245696401


## Closed vs open classification quality

In [30]:
# since the number of recordings and classes are in the same order, the approach seems okay?

random_states = [1, 13, 27, 59, 92]

f1_scores_macro_og = []
f1_scores_micro_og = []
f1_scores_macro_zg = []
f1_scores_micro_zg = []

for rs in random_states:
    kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=rs)

    for i, (train, test) in enumerate(kf.split(list(range(len(cleaned_labels_zg))), cleaned_labels_zg)):
        # open eyes
        X_train = np.concatenate((es_features_og[train].reshape(-1, 19 * 4 * 14), de_features_og[train].reshape(-1, 19 * 4 * 14)), axis=1)
        X_test = np.concatenate((es_features_og[test].reshape(-1, 19 * 4 * 14), de_features_og[test].reshape(-1, 19 * 4 * 14)), axis=1)
        y_train = cleaned_labels_og[train]
        y_test = cleaned_labels_og[test]
        clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
        clf.fit(X_train, y_train)
        preds = clf.predict(X_test)
        f1_scores_macro_og.append(f1_score(y_test, preds, average='macro'))
        f1_scores_micro_og.append(f1_score(y_test, preds, average='micro'))
        # closed eyes
        X_train = np.concatenate((es_features_zg[train].reshape(-1, 19 * 4 * 14), de_features_zg[train].reshape(-1, 19 * 4 * 14)), axis=1)
        X_test = np.concatenate((es_features_zg[test].reshape(-1, 19 * 4 * 14), de_features_zg[test].reshape(-1, 19 * 4 * 14)), axis=1)
        y_train = cleaned_labels_zg[train]
        y_test = cleaned_labels_zg[test]
        clf = SVC(kernel='linear', class_weight='balanced', random_state=92)
        clf.fit(X_train, y_train)
        preds = clf.predict(X_test)
        f1_scores_macro_zg.append(f1_score(y_test, preds, average='macro'))
        f1_scores_micro_zg.append(f1_score(y_test, preds, average='micro'))

In [32]:
print(f'mean f1 for opened eyes: {np.mean(f1_scores_macro_og)}')
print(f'mean f1 for closed eyes: {np.mean(f1_scores_macro_zg)}')

mean f1 for opened eyes: 0.38530730394186646
mean f1 for closed eyes: 0.4146567020111155


In [33]:
from scipy.stats import ttest_rel

ttest_rel(f1_scores_macro_og, f1_scores_macro_zg)

TtestResult(statistic=-2.2014639236538347, pvalue=0.03244283852491652, df=49)