## 1 Hackathon baseline
We provide here a simple pipeline to read the data, train a Tangent Space Classifier and try naive
transfer between sessions.

In [78]:
%matplotlib inline
import os
import mne
import pandas as pd
from mne.externals.pymatreader import read_mat
import numpy as np
import matplotlib.pyplot as plt
import itertools
from glob import glob
import pyriemann
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score

Here set the data_path to corresponding path on your computer

In [79]:
data_path = 'C:\\Users\\frank\\code\\NeuroErgonomics_Hackathon_2021'
#'/home/dcas/l.darmet/data/contest/comeptition_done'
n_subs = 4
n_sessions = 2
diff = ['MATBeasy', 'MATBmed', 'MATBdiff']

Read channels names and position

In [80]:
electrodes = pd.read_csv(data_path + '/Electrodes/chan_locs_standard',header=None, sep ='\t', names=['ch_names','x','y','z'])
print(electrodes.head())


   ch_names        x       y       z
0       Fp1 -29.4370  83.917  -6.990
1        Fz   0.3122  58.512  66.462
2        F3 -50.2440  53.111  42.192
3        F7 -70.2630  42.474 -11.420
4       FT9 -84.0760  14.567 -50.429


Covariance estimation For robust covariance estimation, we take advantage of shrinkage. Here
the [Oracle Approximating Shrinkage](https://scikit-learn.org/stable/modules/generated/sklearn.covariance.OAS.html) (OAS) is used. #### Classifier We use a simple Logistic
Regression (with a non-optimized L2 penalty) on [Tangent Space Features](https://hal.archives-ouvertes.fr/hal-00681328/document), extracted with [Pyriemann
toolbox](https://pyriemann.readthedocs.io/en/latest/). #### Channel selection A manual and naive EEG channel selection is performed
to use 13 electrodes, mostly frontal.



In [81]:
lr = LogisticRegression(C=1/10.)
clf = make_pipeline(pyriemann.estimation.Covariances(estimator='oas'),
    pyriemann.classification.TSclassifier(clf=lr))


ch_slice = ['F7', 'F5', 'F3', 'F1', 'F2', 'F4', 'F6', 'AF3', 'AFz', 'AF4','FP1', 'FP2', 'FPz']


## 1.1 Single subject epochs classification

In [82]:
for sub_n, session_n in itertools.product(range(n_subs), range(n_sessions)):
    epochs_data = []
    labels = []
    for lab_idx, level in enumerate(diff):
        sub = 'P{0:02d}'.format(sub_n+1)
        sess = f'S{session_n+1}'
        path = os.path.join(os.path.join(data_path, sub), sess) + f'/eeg/alldata_sbj{str(sub_n+1).zfill(2)}_sess{session_n+1}_{level}.set'
        # Read the epoched data with MNE
        epochs = mne.io.read_epochs_eeglab(path, verbose=False)
        # You could add some pre-processing here with MNE
        # We will just select some channels (mostly frontal ones)
        epochs = epochs.drop_channels(list(set(epochs.ch_names) -set(ch_slice)))

        # Get the data and concatenante with others MATB levels
        tmp = epochs.get_data()
        epochs_data.extend(tmp)
        labels.extend([lab_idx]*len(tmp))
    
    epochs_data = np.array(epochs_data)
    labels = np.array(labels)
    
    # Compute classification accuracy with 5-folds cross validation
    acc = cross_val_score(clf, X=epochs_data, y=labels, cv=5)
#     print(acc)
    print(f'Subject {sub} and session {session_n+1}: mean accuracy of {round(np.mean(acc), 2)} with a standard deviation of {round(np.std(acc), 2)}')

Subject P01 and session 1: mean accuracy of 0.78 with a standard deviation of 0.03
Subject P01 and session 2: mean accuracy of 0.74 with a standard deviation of 0.06
Subject P02 and session 1: mean accuracy of 0.74 with a standard deviation of 0.04
Subject P02 and session 2: mean accuracy of 0.83 with a standard deviation of 0.08
Subject P03 and session 1: mean accuracy of 0.67 with a standard deviation of 0.05
Subject P03 and session 2: mean accuracy of 0.63 with a standard deviation of 0.1
Subject P04 and session 1: mean accuracy of 0.73 with a standard deviation of 0.03
Subject P04 and session 2: mean accuracy of 0.78 with a standard deviation of 0.06


In [91]:
tmp = epochs.get_data()
print(len(tmp))
print(epochs)
print(epochs.get_data().shape)
epochs_data.shape

149
<EpochsEEGLAB |  149 events (all good), 0 - 1.996 sec, baseline off, ~5.7 MB, data loaded,
 'MATBdiff': 149>
(149, 10, 500)


(447, 10, 500)

## 1.2 Transfer from session 1 to session 2 for P01
For subject P01, a model is trained on session 1 and directly used for epochs of session 2

In [65]:
sub_n = 0


In [76]:
session_n = 0
epochs_data = []
labels = []
for lab_idx, level in enumerate(diff):
    sub = 'P{0:02d}'.format(sub_n+1)
    sess = f'S{session_n+1}'
    path = os.path.join(os.path.join(data_path, sub), sess) + f'/eeg/alldata_sbj{str(sub_n+1).zfill(2)}_sess{session_n+1}_{level}.set'
    # Read the epoched data with MNE
    epochs = mne.io.read_epochs_eeglab(path, verbose=False)
    # You could add some pre-processing here with MNE
    # We will just select some channels (mostly frontal ones)
    epochs = epochs.drop_channels(list(set(epochs.ch_names) - set(ch_slice)))

    # Get the data and concatenante with others MATB levels
    tmp = epochs.get_data()
    epochs_data.extend(tmp)
    labels.extend([lab_idx]*len(tmp))

epochs_data = np.array(epochs_data)
labels = np.array(labels)
# Train the model on all epochs from session 1
clf.fit(epochs_data, labels)



Pipeline(steps=[('covariances', Covariances(estimator='oas')),
                ('tsclassifier', TSclassifier(clf=LogisticRegression(C=0.1)))])

In [77]:
session_n = 1
epochs_data = []
labels = []
for lab_idx, level in enumerate(diff):
    sub = 'P{0:02d}'.format(sub_n+1)
    sess = f'S{session_n+1}'
    path = os.path.join(os.path.join(data_path, sub), sess) + f'/eeg/alldata_sbj{str(sub_n+1).zfill(2)}_sess{session_n+1}_{level}.set'
    # Read the epoched data with MNE
    tmp = mne.io.read_epochs_eeglab(path, verbose=False)
    # You could add some pre-processing here with MNE
    # We will just select some channels (mostly frontal ones)
    epochs = epochs.drop_channels(list(set(epochs.ch_names) - set(ch_slice)))
    
    # Get the data and concatenante with others MATB levels
    tmp = epochs.get_data()
    epochs_data.extend(tmp)
    labels.extend([lab_idx]*len(tmp))

epochs_data = np.array(epochs_data)
labels = np.array(labels)

# Use trained model to predict for all epochs of session 2 and compute accuracy
y_pred = clf.predict(epochs_data)
acc = accuracy_score(labels, y_pred)
print(f'Subject {sub} and transfer from session 1 to 2: mean accuracy of {round(acc, 2)}.')

Subject P01 and transfer from session 1 to 2: mean accuracy of 0.33.


In [60]:
submission = pd.DataFrame({'epochID':np.arange(len(y_pred)), 'prediction' : y_pred})
submission.head()

Unnamed: 0,epochID,prediction
0,0,2
1,1,2
2,2,2
3,3,2
4,4,2


In [61]:
submission.to_csv("submission.csv",header=True,index=False)

In [62]:
y_predd

NameError: name 'y_predd' is not defined

In [93]:
epochs_data

array([[-3.43385100e-06, -3.25384760e-06, -3.09842110e-06, ...,
        -8.03505719e-07,  3.11282337e-07,  6.09637380e-07],
       [-4.68323421e-06, -2.03526258e-06,  9.80643332e-07, ...,
         3.26994777e-06,  3.05282140e-06,  2.02247977e-06],
       [-4.08609438e-06, -2.65402174e-06, -7.46625483e-07, ...,
        -3.97810507e-06, -2.48682380e-06, -1.17737222e-06],
       ...,
       [-3.58012199e-06, -2.18071938e-06, -4.29864407e-07, ...,
        -3.92317629e-06, -2.15039706e-06, -8.55384648e-07],
       [-4.96033669e-06, -3.40122247e-06, -3.70908201e-07, ...,
        -6.67918158e-06, -4.01019335e-06, -1.40368879e-06],
       [-3.30757880e-06, -2.32430816e-06, -8.93423736e-07, ...,
        -3.89818478e-06, -2.34262514e-06, -1.10610080e-06]])

In [None]:
epochs_data.shape