In [None]:
# %%capture
# use %%capture command to suppress output for this cell
import mne
import numpy as np
import matplotlib.pyplot as plt




#initializing training samples and labels
#epochs will be concatenated
epochs_total = np.zeros(1)
labels_total = np.zeros(1)
epochs_train_total = np.zeros(1)


#Using data taken from 108 different people, each performing various MI tasks. In this experiment, we are trying to 
#classify motor imagery of hands vs. feet, only run 6, 10, 14 are relevant
epoch_size = []
for i in range(108):
    print(str(i))
    if (i == 87) or (i == 91) or (i == 99):
        continue
    subject = i+1
#     runs = [6, 10, 14] 
    runs = [4, 8, 12] 
    
    #the 3 lines below are taken from mne website
    #reading eeg files and concatenate them into one file
    raw_fnames = mne.datasets.eegbci.load_data(subject, runs)
    raw_files = [mne.io.read_raw_edf(f, preload=True, stim_channel='auto') for f in
                 raw_fnames]
    raw = mne.io.concatenate_raws(raw_files)
    raw.rename_channels(lambda x: x.strip('.'))

    #We can print the names of all 64 electrode channels, but only some of them are 
    #relevant to motor imagery.
    #The relevant channels are FC5, FC3, C5, C3, CP5, CP3, FP2, FC4, FC6, C4
    print(raw.ch_names)
    
    #Electrode relevant to right hand MI: FC5, FC3, C5, C3, CP5, CP3
    right_hand = np.array([0,1,7,8,14,15])
    #Electrode relevant to left hand MI: FC2, FC4, FC6, C4
    left_hand = np.array([23, 5, 6, 12])
    
    
    #print the channel names to verify we got the right ones
    for i in right_hand:
        print(raw.ch_names[i])
    
    for i in left_hand:
        print(raw.ch_names[i])

        
        
    #Since it is shown motor imagery will change the mu band (8 -13 Hz) and beta band (13 - 25 Hz), we want to 
    #filter out the noise by applying a band pass filter
    
    
#     raw.filter(7., 30., fir_design='firwin', skip_by_annotation='edge')

    
    #the next four lines make raw data into epochs for training
    #The time points when stimuli was presented in front of the experimental subjects 
    #are recorded in stim channel. We take a second before the stimuli happened and four seconds
    #afterward.
    #passing these parameter into mne.Epochs
    events = mne.find_events(raw, shortest_event=0, stim_channel='STI 014')
    picks = mne.pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False,
                       exclude='bads')
    tmin, tmax = -1., 4.
    event_id = dict(hands=2, feet=3)
    epochs = mne.Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks,
                    baseline=None, preload=True)
    
    
    
    
    #Since right after the stimuli the eeg might not be pure motor imagery
    #only take the 1-2 seconds after as training data
    epochs_train = epochs.copy().crop(tmin=1., tmax=2.)
    epochs_train = epochs_train.get_data()
    
    
    epoch_size.append(epochs_train.shape[0])
    labels = epochs.events[:, -1] - 2

    
    
    #the section below simply concatenate all epochs into one ndarray
    epochs_data = epochs.get_data()
    if epochs_total.size == 1:
        epochs_total = epochs_data
    else:
        epochs_total = np.concatenate((epochs_total, epochs_data), axis = 0)
    
    if labels_total.size == 1:
        labels_total = labels
    else:
        labels_total = np.concatenate((labels_total, labels), axis = 0)
        
        
    if epochs_train_total.size == 1:
        epochs_train_total = epochs_train
    else:
        epochs_train_total = np.concatenate((epochs_train_total, epochs_train), axis = 0)
        
    print("total epoch shape:  " + str(epochs_train_total.shape))

        
#printing the shape of the samples and labels to make sure they make sense
print(epochs_total.shape)
print(labels_total.shape)
print(epochs_train_total.shape)


#Please ignore the error message below. This is due to corrupted #89 dataset.


In [None]:
print(raw.ch_names[8])
print(raw.ch_names[10])
print(raw.ch_names[12])


In [None]:
e_index = [0]
ind = 0
for e_size in epoch_size:
    ind = ind + e_size
    e_index.append(ind)

    
print(e_index)

#3762 + 941

In [None]:
import sklearn 
import numpy as np
import pyriemann.estimation
import pyriemann.classification




cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
scores = []
epochs_data = epochs_train_total[:150, :]

for train_idx, test_idx in cv.split(labels_total[:150]):
    y_train, y_test = labels_total[train_idx], labels_total[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
    TSclassifier = pyriemann.classification.TSclassifier(metric='riemann')
    TSclassifier.fit(cov_X_train, y_train)
    
    
    y_predict = TSclassifier.predict(cov_X_test)
    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


class_balance = np.mean(labels == labels[0])
class_balance = max(class_balance, 1. - class_balance)
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))


In [None]:
#Below is a function that takes the upper half of a matrix and put them in an array
def takeupper(m):
    print(m.shape)
    ret = []
    for k in range(m.shape[0]):
        row = []
        for i in range(m.shape[1]):
            for j in range(i):
                row.append(m[k][i][j])
        ret.append(row)
    return np.array(ret)



cv = ShuffleSplit(n_splits=10)
scores = []
epochs_data = epochs_train_total
clf = sklearn.discriminant_analysis.LinearDiscriminantAnalysis()
labels = labels_total

for train_idx, test_idx in cv.split(labels):
    y_train, y_test = labels[train_idx], labels[test_idx]
    X_train = epochs_data[train_idx][:,index]
    X_test = epochs_data[test_idx][:,index]

    cov =  pyriemann.estimation.Covariances('lwf')
    
    #take the covariance matrix of input signal, then only take the upper half to avoid dupplicate
    cov_X_train = cov.transform(X_train)
    X_tr = takeupper(cov_X_train)
    cov_X_test = cov.transform(X_test)
    X_ts = takeupper(cov_X_test)
    

    clf.fit(X_tr, y_train)
    y_predict = clf.predict(X_ts)
    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


    
class_balance = np.mean(labels == labels[0])
class_balance = max(class_balance, 1. - class_balance)
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))


In the part below a CNN will be built to detect patterns in the covariance matrix of the EEG signal. 

Since only 10 channels are relevant to motor imagery, the input data size is 3115 X 10 X 10 (sample, num_channel, num_channel)

The first two layers are convolutional layers. The kernel size is set to be 3X3. There are 8 filters at each layer. Since each element in the covariance matrix could provide useful information, and the size is only 10 X 10, no max pooling layer is added.

The batch size is set to be 128, and epoch set to be 4. Tuning batch size does not seem to improve the result.






In [None]:
import sklearn.linear_model
from sklearn.model_selection import ShuffleSplit  
from scipy.fftpack import fft, ifft

from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import pyriemann.estimation



K.clear_session()

scores = []


#Using only the 10 relevant channels instead of 64 channels.
index = np.concatenate((left_hand, right_hand))

epochs_data = epochs_train_total
labels = labels_total

print(epochs_data.shape)



for train_idx, test_idx in cv.split(labels):
    y_train, y_test = labels[train_idx], labels[test_idx]
    X_train = epochs_data[train_idx][:,index]
    X_test = epochs_data[test_idx][:,index]
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
    print("the shape of cov matrix is {}".format(cov_X_train.shape))

    num_train = cov_X_train.shape[0]
    num_test = cov_X_test.shape[0]
    
    cov_X_train = cov_X_train.reshape(num_train, 1, 10, 10)
    cov_X_test = cov_X_test.reshape(num_test, 1, 10, 10)

    x_train = cov_X_train
    x_test = cov_X_test



    num_classes = 2

    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    model = Sequential()
    model.add(Conv2D(8, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=(1,10, 10), data_format='channels_first'))
    model.add(Conv2D(8, (3, 3), activation='relu'))
#     model.add(MaxPooling2D(pool_size=(1, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(200, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.SGD(lr=0.01),
                  metrics=['accuracy'])

    model.fit(x_train, y_train,
              batch_size=128,
              epochs=4,
              verbose=1,
              validation_data=(x_test, y_test))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    scores.append(score)

# Printing the results
class_balance = np.mean(labels == labels[0])
class_balance = max(class_balance, 1. - class_balance)
print("Classification accuracy: %f / Chance level: %f" % (np.mean(np.asarray(scores)[:,1]),
                                                          class_balance))


In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Embedding
from keras.layers import LSTM
from sklearn.model_selection import ShuffleSplit  

K.clear_session()

# define model parameters
# samples = 4703  # how many trials of eeg data
n_features = 1  # how many channels of eeg in each sample
time_steps = 161 # how many ms was each sample run for


cv = ShuffleSplit(n_splits=2, test_size=0.2, random_state=42)



epochs_data = epochs_train_total[:]
labels = labels_total[:]

scores = []
for train_idx, test_idx in cv.split(labels):
    y_train, y_test = labels[train_idx], labels[test_idx]
    X_train = epochs_data[train_idx][:,8]
    # 8 is for C3
    
#     X_train = X_train.reshape((36, 161, 64))
    X_train = X_train.reshape((3762, 161, 1))
    X_test = epochs_data[test_idx][:,8]
#     X_test = X_test.reshape((9, 161, 64))
    X_test = X_test.reshape((941, 161, 1))

# code for building an LSTM with 100 neurons and dropout. Runs for 50 epochs

    model = Sequential()
    model.add(LSTM(64, return_sequences=True, input_shape=(time_steps, n_features)))
    model.add(Dropout(0.2))
    model.add(LSTM(32, return_sequences=False, input_shape=(time_steps, n_features)))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    print(X_train.shape)
    print(y_train.shape)
    model.fit(X_train, y_train, batch_size=9, epochs=50)
    score = model.evaluate(X_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    scores.append(score)


# Printing the results
class_balance = np.mean(labels == labels[0])
class_balance = max(class_balance, 1. - class_balance)

print("Classification accuracy: %f / Chance level: %f" % (np.mean(np.asarray(scores)[:,1]),
                                                          class_balance))

In [None]:
print(scores)
np.mean(np.asarray(scores)[:,1])

In [None]:
from mne.decoding import CSP
csp = CSP(n_components=4, reg=None, log=True, norm_trace=False)
X_train = csp.fit_transform(epochs_data, labels)



In [None]:
print(X_train.shape)
print(epochs_data.shape)

In [None]:
from mne.decoding import CSP
from sklearn import svm
csp = CSP(n_components=4, reg=None, log=False, norm_trace=False)


cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
# C = [0.01, 0.1, 1, 10, 1000]
C = [1]
# gam = [0.1, 0.2, 0.5, 0.8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
gam = [0.5]

epochs_data = epochs_train_total[45:90]
labels = labels_total[45:90]
# 3762  ->   941
#C as 1 amd gamma as 0.5 gives 0.5557917109458024%
max_score = 0.0
for C_ in C:
    for gamma in gam:
        clf = svm.SVC(C=C_, cache_size=200, class_weight=None, coef0=0.0,
            decision_function_shape='ovr', degree=3, gamma=gamma, kernel='rbf',
            max_iter=-1, probability=False, random_state=None, shrinking=True,
            tol=0.001, verbose=False)

        print("now using C as {} and gamma as {}".format(C_, gamma))
        scores = []
        for train_idx, test_idx in cv.split(labels):
            y_train, y_test = labels[train_idx], labels[test_idx]

            X_train = epochs_data[train_idx]
            X_train = X_train.reshape((36, 161, 64))
            X_train_csp = csp.fit_transform(X_train, y_train)

            X_test = epochs_data[test_idx]
            X_test = X_test.reshape((9, 161, 64))
            X_test_csp = csp.fit_transform(X_test, y_test)
            #(4703, 4)

            X = X_train_csp
            y = y_train


            clf.fit(X, y)  
            y_predict = clf.predict(X_test_csp)


            print(sklearn.metrics.accuracy_score(y_predict, y_test))
            scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


        class_balance = np.mean(labels == labels[0])
        class_balance = max(class_balance, 1. - class_balance)
        print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                                  class_balance))
        if np.mean(scores) > max_score:
            max_score = np.mean(scores)
            
print(max_score)


In [None]:
# %%capture
from mne.decoding import CSP
from sklearn import svm
csp = CSP(n_components=4, reg=None, log=False, norm_trace=False)


cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)

s = []
for i in range(103):
    start = 45 * i
    end = start + 45
    epochs_data = epochs_train_total[start:end]
    labels = labels_total[start:end]

    #C as 1 amd gamma as 0.5 gives 0.5557917109458024

    clf = svm.SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
        decision_function_shape='ovr', degree=3, gamma=0.5, kernel='rbf',
        max_iter=-1, probability=False, random_state=None, shrinking=True,
        tol=0.001, verbose=False)

    print("now using C as {} and gamma as {}".format(C_, gamma))
    scores = []
    for train_idx, test_idx in cv.split(labels):
        y_train, y_test = labels[train_idx], labels[test_idx]

        X_train = epochs_data[train_idx]
        X_train = X_train.reshape((36, 161, 64))
        X_train_csp = csp.fit_transform(X_train, y_train)

        X_test = epochs_data[test_idx]
        X_test = X_test.reshape((9, 161, 64))
        X_test_csp = csp.fit_transform(X_test, y_test)
        #(4703, 4)



        X = X_train_csp
        y = y_train


        clf.fit(X, y)  


        y_predict = clf.predict(X_test_csp)


        print(sklearn.metrics.accuracy_score(y_predict, y_test))
        scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


    class_balance = np.mean(labels == labels[0])
    class_balance = max(class_balance, 1. - class_balance)
    print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                              class_balance))
    s.append(np.mean(scores))
    print("start is {}, end is {}, scores are {}".format(start, end, s))


# 8/30/2018

In [None]:
epochs_data = epochs_train_total[:45]
labels = labels_total[:45]

channels = [8,10,12]
epochs_select_chan = epochs_data[:,channels,:]

print(epochs_select_chan.shape)


def avg_power(epochs_select_chan, labels):
    hands = []
    feets = []
    samples, chan_num, timesteps = epochs_select_chan.shape
    for sample in range(samples):
        if labels[sample] == 0:
            hands.append(epochs_select_chan[sample,:,:])
        else:
            feets.append(epochs_select_chan[sample,:,:])
    hands = np.asarray(hands)
    feets = np.asarray(feets) 
    samples, chan_num, timesteps = epochs_select_chan.shape
    C3_hands = np.mean((hands[:,0,:])**2, axis = 0)
    CZ_hands = np.mean((hands[:,1,:])**2, axis = 0)
    C4_hands = np.mean((hands[:,2,:])**2, axis = 0)
    C3_feets = np.mean((feets[:,0,:])**2, axis = 0)
    CZ_feets = np.mean((feets[:,1,:])**2, axis = 0)
    C4_feets = np.mean((feets[:,2,:])**2, axis = 0)
    return C3_hands, CZ_hands, C4_hands, C3_feets, CZ_feets, C4_feets


C3_hands, CZ_hands, C4_hands, C3_feets, CZ_feets, C4_feets = avg_power(epochs_select_chan,labels)

In [None]:
import matplotlib.pyplot as plt
plt.figure()
plt.plot(C3_hands)
plt.show()


plt.figure()
plt.plot(C3_feets)
plt.show()


# label 0 means hands, label 1 means feets

In [None]:
import pywt

coeffs = pywt.wavedec(C4_hands, 'db4', level=4)
#because filtered to be 7 - 30, can only have 4 db level

# for c in coeffs:
#     plt.figure()
#     plt.plot(c)
#     plt.show()



def make_dwt_epoch(C3_hands, CZ_hands, C4_hands, C3_feets, CZ_feets, C4_feets):
    dwt_hands = []
    dwt_hands.append(pywt.wavedec(C3_hands, 'db4', level=4)[0])
    dwt_hands.append(pywt.wavedec(C4_hands, 'db4', level=4)[0])
    dwt_hands.append(pywt.wavedec(CZ_hands, 'db4', level=4)[0])  
    dwt_feets = []
    dwt_feets.append(pywt.wavedec(C3_feets, 'db4', level=4)[0]) 
    dwt_feets.append(pywt.wavedec(C4_feets, 'db4', level=4)[0])
    dwt_feets.append(pywt.wavedec(CZ_feets, 'db4', level=4)[0])   
    return np.asarray(dwt_hands), np.asarray(dwt_feets)


dwt_hands, dwt_feets = make_dwt_epoch(C3_hands, CZ_hands, C4_hands, C3_feets, CZ_feets, C4_feets)
plt.plot(dwt_hands[0])

In [None]:
def make_epochs(epochs_train_total, labels_total):
    epochs = []
    labels_dwt = []
    for ind in range(len(e_index)-1):
        start = e_index[ind]
        end = e_index[ind+1]
        epochs_data = epochs_train_total[start:end]
        labels = labels_total[start:end]
        channels = [8,10,12]
        epochs_select_chan = epochs_data[:,channels,:]
        C3_hands, CZ_hands, C4_hands, C3_feets, CZ_feets, C4_feets = avg_power(epochs_select_chan, labels)
        dwt_hands, dwt_feets = make_dwt_epoch(C3_hands, CZ_hands, C4_hands, C3_feets, CZ_feets, C4_feets)
       
        epochs.append(dwt_hands)
        labels_dwt.append(0)
        epochs.append(dwt_feets)
        labels_dwt.append(1)
        
    return np.asarray(epochs), np.asarray(labels_dwt)
        
epochs, labels = make_epochs(epochs_train_total, labels_total)
epochs = epochs.swapaxes(1,2)
print(epochs.shape)

print(labels[:10])

plt.plot(epochs[1,:,0])
plt.plot(epochs[1,:,1])
plt.plot(epochs[1,:,2])

plt.figure()
plt.plot(epochs[3,:,0])
plt.plot(epochs[3,:,1])
plt.plot(epochs[3,:,2])

plt.figure()
plt.plot(epochs[5,:,0])
plt.plot(epochs[5,:,1])
plt.plot(epochs[5,:,2])

plt.figure()
plt.plot(epochs[7,:,0])
plt.plot(epochs[7,:,1])
plt.plot(epochs[7,:,2])

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Embedding
from keras.layers import LSTM
from keras import backend as K


subject = 3

start = e_index[subject-1]
end = e_index[subject+20]

print("start is {}, end is {}".format(start, end))

K.clear_session()

testing_epochs = np.asarray(testing_epochs)

testing_epochs = testing_epochs.swapaxes(1,2)

time_steps = 16
n_features = 3

# model = Sequential()
# model.add(LSTM(3, return_sequences=True, input_shape=(time_steps, n_features)))
# model.add(Dropout(0.2))
# model.add(LSTM(10, return_sequences=False, input_shape=(time_steps, n_features)))
# model.add(Dense(1, activation='sigmoid'))

# model.compile(loss='binary_crossentropy',
#               optimizer='rmsprop',
#               metrics=['accuracy'])

testing_epochs_ANN = testing_epochs.reshape(testing_epochs.shape[0], -1)
print(np.asarray(testing_epochs_ANN)[start:end,:].shape)
model = Sequential()
model.add(Dense(60, input_dim=48, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='relu'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


# model.fit(np.asarray(testing_epochs)[start:end,:,:], labels_total[start:end], batch_size=1, epochs=20)
model.fit(np.asarray(testing_epochs_ANN)[start:end,:], labels_total[start:end], batch_size=1, epochs=20)



# score = model.evaluate(X_test, y_test, verbose=0)

In [None]:
clf = svm.SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.5, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)



clf.fit(X, y)  


y_predict = clf.predict(X_test_csp)


print(sklearn.metrics.accuracy_score(y_predict, y_test))



In [None]:
# samples,_,_ = epochs_train_total.shape
# epochs_test = []
# labels_test = []
# for s in range(samples):
#     e = epochs_train_total[s,channels,:].reshape(1,3,161)
#     C3_hands, CZ_hands, C4_hands, C3_feets, CZ_feets, C4_feets = avg_power(e,labels_total)
#     dwt_hands, dwt_feets = make_dwt_epoch(C3_hands, CZ_hands, C4_hands, C3_feets, CZ_feets, C4_feets)
#     epochs_test.append(dwt_hands)
#     labels_test.append(0)
#     epochs_test.append(dwt_feets)
#     labels_test.append(1)

# print(epochs_test.shape)

# def make_testing_epoch(epochs_train_total):

import pywt

testing_epochs = []
single_epoch = []
samples, chan_num, timesteps = epochs_train_total.shape
epochs_power = epochs_train_total**2   
for s in range(samples):
    single_epoch = []
    single_epoch.append(pywt.wavedec(epochs_power[s,8,:], 'db4', level=4)[0])
    single_epoch.append(pywt.wavedec(epochs_power[s,10,:], 'db4', level=4)[0])
    single_epoch.append(pywt.wavedec(epochs_power[s,12,:], 'db4', level=4)[0])
    testing_epochs.append(np.asarray(single_epoch))

print(np.asarray(testing_epochs).shape)

print(len(labels_total))