# Detecting P300 
Blah.

This notebook and related process is based on the work of Aaron Newman and Colin Conrad. It follows the MIT lisence though, so feel free to use it and change it however you would like -- just make sure you credit this work! 

## Import the necessary libraries
We will need to import a large number of libraries (such as MNE and scikit-learn) as well as datasets. If you are not familiar with Python, don't worry, we will work through these together soon.

In [117]:
#note: using the Python 2.7 kernel
import numpy as np
import matplotlib, mne
from mne.io.reference import set_eeg_reference

from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold

from mne import io, compute_raw_covariance, pick_types, read_events, Epochs
from mne.decoding import Vectorizer, PSDEstimator
from mne.viz import tight_layout
from mne.decoding import CSP 
from mne.time_frequency import psd_multitaper
from mne.filter import filter_data
from mne.preprocessing import Xdawn

data_path = '/eeg_recordings/p300-speller'

subjects = ['01','02','03','04','05','06']

datasets = []

mne.set_log_level('error')

## Generate epochs with no filters

In [23]:
datasets = []

for s in subjects:
    raw_fname = './' + s + '/eeg_recordings/p300-speller-'+ s + '.set' #epochs filename 
    raw = mne.io.read_raw_eeglab(raw_fname, 
                          #eog=['VEOG', 'HEOG']
                          event_id={'Nontarget':1, 'Target':2}, 
                          montage='standard_1020',
                          preload=True) 
    events = mne.find_events(raw) 
    picks_eeg = mne.pick_types(raw.info, eeg=True, eog=True,
                           stim=False, exclude=[]) 
    epochs = mne.Epochs(raw, events, {'Nontarget':1, 'Target':2}, 0, 0.3, proj=False,
                    picks=picks_eeg, baseline=None, preload=True)
    datasets.append(epochs)

Reading ./01/eeg_recordings\p300-speller-01.fdt
Reading 0 ... 138995  =      0.000 ...   271.475 secs...
Used Annotations descriptions: ['1', '2']
720 events found


  preload=True)
  preload=True)


Event IDs: [1 2]
720 matching events found
No baseline correction applied
Not setting metadata
Loading data for 720 events and 155 original time points ...
0 bad epochs dropped
Reading ./02/eeg_recordings\p300-speller-02.fdt
Reading 0 ... 140055  =      0.000 ...   273.545 secs...


  preload=True)
  preload=True)


Used Annotations descriptions: ['1', '2']
720 events found
Event IDs: [1 2]
720 matching events found
No baseline correction applied
Not setting metadata
Loading data for 720 events and 155 original time points ...
0 bad epochs dropped
Reading ./03/eeg_recordings\p300-speller-03.fdt
Reading 0 ... 138895  =      0.000 ...   271.279 secs...


  preload=True)
  preload=True)


Used Annotations descriptions: ['1', '2']
720 events found
Event IDs: [1 2]
720 matching events found
No baseline correction applied
Not setting metadata
Loading data for 720 events and 155 original time points ...
0 bad epochs dropped
Reading ./04/eeg_recordings\p300-speller-04.fdt
Reading 0 ... 138555  =      0.000 ...   270.615 secs...
Used Annotations descriptions: ['1', '2']


  preload=True)
  preload=True)


720 events found
Event IDs: [1 2]
720 matching events found
No baseline correction applied
Not setting metadata
Loading data for 720 events and 155 original time points ...
0 bad epochs dropped
Reading ./05/eeg_recordings\p300-speller-05.fdt
Reading 0 ... 139255  =      0.000 ...   271.982 secs...
Used Annotations descriptions: ['1', '2']


  preload=True)
  preload=True)


720 events found
Event IDs: [1 2]
720 matching events found
No baseline correction applied
Not setting metadata
Loading data for 720 events and 155 original time points ...
0 bad epochs dropped
Reading ./06/eeg_recordings\p300-speller-06.fdt
Reading 0 ... 150919  =      0.000 ...   294.764 secs...


  preload=True)
  preload=True)


Used Annotations descriptions: ['1', '2']
720 events found
Event IDs: [1 2]
720 matching events found
No baseline correction applied
Not setting metadata
Loading data for 720 events and 155 original time points ...
0 bad epochs dropped


## Verify the datasets

In [24]:
datasets

[<Epochs  |   720 events (all good), 0 - 0.300781 sec, baseline off, ~29.0 MB, data loaded,
  'Nontarget': 620
  'Target': 100>,
 <Epochs  |   720 events (all good), 0 - 0.300781 sec, baseline off, ~29.0 MB, data loaded,
  'Nontarget': 604
  'Target': 116>,
 <Epochs  |   720 events (all good), 0 - 0.300781 sec, baseline off, ~29.0 MB, data loaded,
  'Nontarget': 623
  'Target': 97>,
 <Epochs  |   720 events (all good), 0 - 0.300781 sec, baseline off, ~29.0 MB, data loaded,
  'Nontarget': 623
  'Target': 97>,
 <Epochs  |   720 events (all good), 0 - 0.300781 sec, baseline off, ~29.0 MB, data loaded,
  'Nontarget': 607
  'Target': 113>,
 <Epochs  |   720 events (all good), 0 - 0.300781 sec, baseline off, ~29.0 MB, data loaded,
  'Nontarget': 604
  'Target': 116>]

### Now classify
Based on https://www.martinos.org/mne/stable/auto_examples/decoding/plot_decoding_xdawn_eeg.html#sphx-glr-auto-examples-decoding-plot-decoding-xdawn-eeg-py

This classifier will just go to majority class. We need to implement a subsampler first.

In [38]:
lda_raw_acc = []
for d in datasets:
    
    clf = make_pipeline(Vectorizer(),
                    MinMaxScaler(),
                    LinearDiscriminantAnalysis(n_components=1, priors=None, shrinkage=None, solver='svd', store_covariance=False, tol=0.0001))

    labels = d.events[:, -1]
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) #split into the n-folds for n-fold cross validation

    preds = np.empty(len(labels))
    
    for train, test in cv.split(d, labels):
        clf.fit(d[train], labels[train])
        preds[test] = clf.predict(d[test])
    
    name = s[0:2]
    
    target_names = ['Subject ' + name + 'Nontarget', 'Subject ' + name+'Target']
    report = classification_report(labels, preds, target_names=target_names)
    acc = accuracy_score(labels, preds)
    matrix = confusion_matrix(labels, preds)
    
    print "Singular value decomposition (SVD) LDA Accuracy for Subject " + name + ": " + str(acc) + "\n"
    print report #use this for a detailed report including precision, recall and f-measure
    print matrix
    lda_raw_acc.append(float(acc))
    
avg_acc = sum(lda_raw_acc)/len(lda_raw_acc)

print "***************************************************"
print "* Average Accuracy Among Subjects: " + str(avg_acc) + " *"
print "***************************************************"

Singular value decomposition (SVD) LDA Accuracy for Subject 06: 0.5958333333333333

                     precision    recall  f1-score   support

Subject 06Nontarget       0.86      0.63      0.73       620
   Subject 06Target       0.14      0.37      0.20       100

          micro avg       0.60      0.60      0.60       720
          macro avg       0.50      0.50      0.47       720
       weighted avg       0.76      0.60      0.66       720

[[392 228]
 [ 63  37]]
Singular value decomposition (SVD) LDA Accuracy for Subject 06: 0.7236111111111111

                     precision    recall  f1-score   support

Subject 06Nontarget       0.83      0.84      0.84       604
   Subject 06Target       0.12      0.11      0.12       116

          micro avg       0.72      0.72      0.72       720
          macro avg       0.48      0.48      0.48       720
       weighted avg       0.72      0.72      0.72       720

[[508  96]
 [103  13]]
Singular value decomposition (SVD) LDA Accuracy 

In [119]:
s = '02'

raw_fname = './' + s + '/eeg_recordings/p300-speller-'+ s + '.set' #epochs filename 

raw = mne.io.read_raw_eeglab(raw_fname,
                             eog=['VEOG', 'HEOG'],
                             event_id={'Nontarget':1, 'Target':2}, 
                             montage='standard_1020',
                             preload=True) 

raw.filter(0.1, 40, 
           l_trans_bandwidth = 'auto', 
           h_trans_bandwidth = 'auto', 
           filter_length= 'auto', 
           method='fft', 
           n_jobs = 4)  

events = mne.find_events(raw) 

picks_eeg = mne.pick_types(raw.info, eeg=True, eog=False, stim=False, exclude=[]) 

epochs = mne.Epochs(raw, events, {'Nontarget':1, 'Target':2}, 0, 0.3, proj=False,
                picks=picks_eeg, baseline=None, preload=True)

d = epochs

clf = make_pipeline(Xdawn(n_components=2),
                    Vectorizer(),
                    MinMaxScaler(),
                    LogisticRegression(penalty='l1', solver='liblinear',
                                       multi_class='auto'))
labels = d.events[:, -1]

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) #split into the n-folds for n-fold cross validation

preds = np.empty(len(labels))

for train, test in cv.split(d, labels):
    clf.fit(d[train], labels[train])
    preds[test] = clf.predict(d[test])

name = s[0:2]

target_names = ['Subject ' + name + 'Nontarget', 'Subject ' + name+'Target']
report = classification_report(labels, preds, target_names=target_names)
acc = accuracy_score(labels, preds)
matrix = confusion_matrix(labels, preds)

print "Singular value decomposition (SVD) LDA Accuracy for Subject " + name + ": " + str(acc) + "\n"
print report #use this for a detailed report including precision, recall and f-measure
print matrix
lda_raw_acc.append(float(acc))

Singular value decomposition (SVD) LDA Accuracy for Subject 02: 0.8388888888888889

                     precision    recall  f1-score   support

Subject 02Nontarget       0.84      1.00      0.91       604
   Subject 02Target       0.00      0.00      0.00       116

          micro avg       0.84      0.84      0.84       720
          macro avg       0.42      0.50      0.46       720
       weighted avg       0.70      0.84      0.77       720

[[604   0]
 [116   0]]
