## XDAWN Decoding from EEG data
* XDAWN converts channels and events to create feature vectors that can be fed into a logistic regression ([MNE documentation](https://www.martinos.org/mne/stable/auto_examples/decoding/plot_decoding_xdawn_eeg.html))
* This is a first attempt at exploring machine learning with EEG data. This method will produce a confusion matrix of event_ids.
* Note: This code is NOT mine, but is taken from the link above and adjusted for data used in project.  

In [None]:
# Authors: Alexandre Barachant <alexandre.barachant@gmail.com>
#
# License: BSD (3-clause)

import numpy as np
import matplotlib.pyplot as plt
import matplotlib

from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import MinMaxScaler

import mne
from mne import io, pick_types, read_events, Epochs
# from mne.datasets import sample
from mne.preprocessing import Xdawn
from mne.decoding import Vectorizer
from mne.viz import tight_layout

import os
import os.path as op

matplotlib.rcParams['figure.figsize'] = (15.0, 10.0)
matplotlib.rcParams.update({'font.size': 15})

#data path for each run of each subject.
drive_data_path = 'E:\eeg_data'

#data path on my external hdd for folder containing all tests of each subject in one file
drive_all_data_path = 'E:\eeg_data\ica_140_500_0.1'



The documentations starts with raw data, however we alreaday have epoched data with events and event id's. 

In [None]:
epochs = mne.read_epochs(op.join(drive_data_path, 'Ex10_Suj1_Run1-epo.fif'),
                          preload=True)
# epochs.info

In [None]:
# the 61st channel produces an error when computin eigenvalues with cross-validation.
# will use channels 0-60 instead.
epochs = epochs.pick_channels(epochs.ch_names[:60])

In [None]:
#create classification pipeline
clf = make_pipeline(Xdawn(n_components = 3),
                    Vectorizer(),
                    MinMaxScaler(),
                    LogisticRegression(penalty='l2'))
#cross validator
cv = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 42)


#Do cross-validation
labels = epochs.events[:,-1]
preds = np.empty(len(labels))
for train, test in cv.split(epochs, labels):
    clf.fit(epochs[train], labels[train])
    preds[test] = clf.predict(epochs[test])

In [None]:
target_names = ['after', 'before', 'new', 'scramble']
report = classification_report(labels, preds, target_names=target_names)
print(report)

In [None]:
# Normalized confusion matrix
cm = confusion_matrix(labels, preds)
cm_normalized = cm.astype(float) / cm.sum(axis=1)[:, np.newaxis]

Low accuracy scores, I must do more testing.

In [None]:
# Plot confusion matrix
plt.imshow(cm_normalized, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Normalized Confusion matrix')
plt.colorbar()
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [None]:
clf.score(epochs[test],labels[test])

## Random Forests

In [None]:
#create classification pipeline
clf = make_pipeline(Xdawn(n_components = 3),
                    Vectorizer(),
                    MinMaxScaler(),
                    RandomForestClassifier())

#cross validator
cv = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 42)


#Do cross-validation
labels = epochs.events[:,-1]
preds = np.empty(len(labels))
for train, test in cv.split(epochs, labels):
    clf.fit(epochs[train], labels[train])
    preds[test] = clf.predict(epochs[test])

In [None]:
#very low score on test values
clf.score(epochs[test],labels[test])

## Retry with all results of a subject in one file

In [None]:
epochs = mne.read_epochs(op.join(drive_all_data_path, 'Subject1_all_runs-epo.fif'),
preload=True)
# epochs.info

In [None]:
# the 61st channel produces an error when computin eigenvalues with cross-validation.
# will use channels 0-60 instead.
epochs = epochs.pick_channels(epochs.ch_names[:5])

#create classification pipeline
clf = make_pipeline(Xdawn(n_components = 3),
                    Vectorizer(),
                    MinMaxScaler(),
                    LogisticRegression(penalty='l1'))
#cross validator
cv = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 42)


#Do cross-validation
labels = epochs.events[:,-1]
preds = np.empty(len(labels))
for train, test in cv.split(epochs, labels):
    clf.fit(epochs[train], labels[train])
    preds[test] = clf.predict(epochs[test])
    
target_names = ['after', 'before', 'new', 'scramble']
report = classification_report(labels, preds, target_names=target_names)
print(report)

# Normalized confusion matrix
cm = confusion_matrix(labels, preds)
cm_normalized = cm.astype(float) / cm.sum(axis=1)[:, np.newaxis]

# Plot confusion matrix
plt.imshow(cm_normalized, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Normalized Confusion matrix')
plt.colorbar()
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [None]:
clf.score(epochs[test],labels[test])

## Classify New image versus Scrambled.
Adjust the above code to just classify new versus scrambled images. Converting this to a binary classifcation problem may have more success.

In [None]:
# get epochs that only have event_id 'scrambled' and 'new'

epochs_binary = mne.read_epochs(op.join(drive_data_path, 'Ex10_Suj1_Run1-epo.fif'),
                          preload=True)
#print shape for reference. 
epochs_binary._data.shape

In [None]:
epochs_binary.event_id

In [None]:
#extract only events with id 4 or 5 (new or scrambled)
epochs_binary = epochs_binary[(epochs_binary.events[:,-1] == 4) | (epochs_binary.events[:,-1] == 5)]

In [None]:
# number of epochs decreased from 280 to 141 due to filtering event_id's 4 and 5.
print(epochs_binary._data.shape)

# Also need to exclude channel 61
epochs_binary = epochs_binary.pick_channels(epochs_binary.ch_names[:60])

## Binary classification Results
* Flitering for event_id 4 and 5 greatly improved results. 

In [None]:
clf = make_pipeline(Xdawn(n_components = 3),
                    Vectorizer(),
                    MinMaxScaler(),
                    LogisticRegression(penalty='l1'))
#cross validator
cv = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 42)


#Do cross-validation
labels_binary = epochs_binary.events[:,-1]
preds_binary = np.empty(len(labels_binary))
for train, test in cv.split(epochs_binary, labels_binary):
    clf.fit(epochs_binary[train], labels_binary[train])
    preds_binary[test] = clf.predict(epochs_binary[test])
    
target_names = ['new', 'scramble']
report = classification_report(labels_binary, preds_binary, target_names=target_names)
print(report)

# Normalized confusion matrix
cm = confusion_matrix(labels_binary, preds_binary)
cm_normalized = cm.astype(float) / cm.sum(axis=1)[:, np.newaxis]

# Plot confusion matrix
plt.imshow(cm_normalized, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Normalized Confusion matrix')
plt.colorbar()
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [None]:
clf.score(epochs_binary[test],labels_binary[test])