### Tutorial #6: Applying Machine Learning Methods to Temporal Dimensions of EEG Data

In this tutorial, same classification tasks in last two tutorails will be examined on temporal dimensions of eeg data.

In this strategy, selected model will be fitted to each time instance and performance of the model at each time instance will be evaluated on the same time instance in new epochs. 

SlidingEstimator in MNE package is provided for this purpose. It takes the model and optionally a scoring function as parameters to create a sliding estimator. After having created a sliding estimator, it can be fitted to the data, scored the estimator on each task or made predictions on new epochs.

In [1]:
import mne
from mne.decoding import SlidingEstimator, cross_val_multiscore
from mne.decoding import Vectorizer

from os.path import isfile, join
from os import listdir
import numpy as np
import statistics

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

# Models
from sklearn import svm
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression

In [None]:
#Load Dataset
data_folder = '../../study1/study1_eeg/epochdata/'
files = [data_folder+f for f in listdir(data_folder) if isfile(join(data_folder, f)) and '.DS_Store' not in f]
#files = files[:10]
ids = [int(f[-6:-4]) for f in files]

numberOfEpochs = np.zeros((len(ids), 3))
# Read the EEG epochs:
epochs_all_UN, epochs_all_UP, epochs_all_NP = [], [], []
for f in range(len(files)):
    epochs = mne.read_epochs(files[f], verbose=False)
    epochs_UN = epochs['FU', 'FN']
    epochs_UP = epochs['FU', 'FP']
    epochs_NP = epochs['FN', 'FP']
    numberOfEpochs[f,0] = int(len(epochs_UN.events))
    numberOfEpochs[f,1] = int(len(epochs_UP.events))
    numberOfEpochs[f,2] = int(len(epochs_NP.events))
    UN, UP, NP = [ids[f]], [ids[f]], [ids[f]]
    UN.append(epochs_UN)
    UP.append(epochs_UP)
    NP.append(epochs_NP)
    epochs_all_UN.append(UN)
    epochs_all_UP.append(UP)
    epochs_all_NP.append(NP)

#print(numberOfEpochs)
epochs_all_UN = np.array(epochs_all_UN)
epochs_all_UP = np.array(epochs_all_UP)
epochs_all_NP = np.array(epochs_all_NP)

In [None]:
def getData_labels(epochs):
    data, labels, ids = [], [], []
    for p in epochs:
        tmp_epoch = p[1]
        tmp_labels = tmp_epoch.events[:,-1]
        labels.extend(tmp_labels)
        tmp_id = p[0]
        ids.extend([tmp_id]*len(tmp_labels))        
        data.extend(tmp_epoch.get_data())
        
    data = np.array(data)
    labels = np.array(labels)
    ids = np.array(ids)
    return data, labels, ids

### Task #1: Classification of Unpleasant and Pleasant Events

In [None]:
#Preparing dataset
data_UP, labels_UP, ids_UP = getData_labels(epochs_all_UP)
print(np.all(np.isfinite(labels_UP)))
print(np.any(np.isnan(labels_UP)))
if np.isfinite(data_UP).all() == True and np.isnan(data_UP).any() == False:
    print('not nan')
else:
    print('Nan')

In [None]:
clf_UP = make_pipeline(Vectorizer(), StandardScaler(), LinearDiscriminantAnalysis(solver='svd'))
sl_UP = SlidingEstimator(clf_UP, scoring='accuracy') 
if np.isfinite(data_UP).all() == True and np.isnan(data_UP).any() == False:
    CV_score_time = cross_val_multiscore(sl_UP, data_UP, labels_UP, cv=3)
    print(CV_score_time)
else:
    print('Input contains NaN or infinity!')

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

def plotCVScores(times, CV_score_time):
    fig, ax = plt.subplots()
    fig.suptitle('CV Scores')
    ax.plot(times, CV_score_time.T)
    plt.xlabel('Time')
    plt.ylabel('Mean CV Accuracy')
    plt.show()


In [None]:
plotCVScores(epochs_UP.times, CV_score_time)

### Task #2: Classification of Unpleasant and Neutral Events

In [None]:
#Preparing dataset
data_UN, labels_UN, ids_UN = getData_labels(epochs_all_UN)

In [None]:
clf_UN = make_pipeline(Vectorizer(), StandardScaler(), LinearDiscriminantAnalysis(solver='svd'))
sl_UN = SlidingEstimator(clf_UN, scoring='accuracy') 
if np.isfinite(data_UN).all() == True and np.isnan(data_UN).any() == False:
    CV_score_time = cross_val_multiscore(sl_UN, data_UN, labels_UN, cv=3)
    print(CV_score_time)
    plotCVScores(epochs_UN.times, CV_score_time)
else:
    print('Input contains NaN or infinity!')

### Task #3: Classification of Neutral and Pleasant Events

In [None]:
#Preparing dataset
data_NP, labels_NP, ids_NP = getData_labels(epochs_all_NP)

In [None]:
clf_NP = make_pipeline(Vectorizer(), StandardScaler(), LinearDiscriminantAnalysis(solver='svd'))
sl_NP = SlidingEstimator(clf_NP, scoring='accuracy') 
if np.isfinite(data_NP).all() == True and np.isnan(data_NP).any() == False:
    CV_score_time = cross_val_multiscore(sl_NP, data_NP, labels_NP, cv=3)
    print(CV_score_time)
    plotCVScores(epochs_NP.times, CV_score_time)
else:
    print('Input contains NaN or infinity!')

#### Analysis on temporal dimensions for each participant separately

1. Prepare data for the analysis

In [3]:
data_folder = '../../study1/study1_eeg/epochdata/'
files = [data_folder+f for f in listdir(data_folder) if isfile(join(data_folder, f)) and '.DS_Store' not in f]

In [4]:
epochs = [mne.read_epochs(f, verbose=False) for f in files]
epochs_UN = [e['FU', 'FN'] for e in epochs]
epochs_UP = [e['FU', 'FP'] for e in epochs]
epochs_NP = [e['FN', 'FP'] for e in epochs]
# Dataset with unpleasant and neutral events
data_UN = [e.get_data() for e in epochs_UN]
labels_UN = [e.events[:,-1] for e in epochs_UN]

data_UP = [e.get_data() for e in epochs_UP]
labels_UP = [e.events[:,-1] for e in epochs_UP]

data_NP = [e.get_data() for e in epochs_NP]
labels_NP = [e.events[:,-1] for e in epochs_NP]

  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs = [mne.read_epochs(f, verbose=False) for f in files]
  epochs

2. Train classifiers with sliding window estimator on data of each participant separately.

In [None]:
def applyCrossValidation(data, labels, epochs, classifier):
    CV_score_time = []
    for i in range(len(data)):
        sl = SlidingEstimator(classifier, scoring='accuracy') 
        if np.isfinite(data).all() == True and np.isnan(data).any() == False:
            cvs_tmp = cross_val_multiscore(sl, data, labels, cv=3)
            CV_score_time.append(cvs_tmp)
            plotCVScores(epochs.times, cvs_tmp)
        else:
            print('Input contains NaN or infinity!')
    return CV_score_time

#### Classification Between Unplesant and Neutral Events

In [None]:
for i in range(len(data_UN)):
    clf = make_pipeline(Vectorizer(), StandardScaler(), LinearDiscriminantAnalysis(solver='svd'))
    CV_score_time_UN = applyCrossValidation(data_UN[i], labels_UN[i], epochs_UN[i], clf)

In [None]:
print(CV_score_time_UN)

In [None]:
def averageCVScores(CV_score_time):
    avg_cv_scores = []
    for cv in CV_score_time:
        avg_scores_tmp = []
        sum_col_wise = [sum(e) for e in zip(*cv)]
        avg_scores_tmp = [s/len(cv) for s in sum_col_wise]
        avg_cv_scores.append(avg_scores_tmp)

In [None]:
avg_cv_score_time = averageCVScores(CV_score_time_UN)

In [None]:
NUM_COLORS = len(avg_cv_scores)
cm = plt.get_cmap('gist_rainbow')
fig = plt.figure(figsize=(15, 10))
ax = fig.add_subplot(111)
ax.set_prop_cycle(color=[cm(1.*i/NUM_COLORS) for i in range(NUM_COLORS)])
ax.set_ylabel('Mean CV Accuracy')
ax.set_xlabel('Times')
for i in range(NUM_COLORS):
    ax.plot(epochs_UN[i].times, avg_cv_scores[i], label=str(ids[i]))
ax.legend(bbox_to_anchor=(1.1, 1.05))
plt.show()