# Organize and preprocessing data from matlab files

In [1]:
import numpy as np
import scipy.io

In [None]:
data_structure_file = 'D:\data_structure_ANM210862.tar\data_structure_ANM210862\data_structure_ANM210862_20130628.mat'
label_to_save = 'ANM210862_20130628_labels.npy'
trace_dir = 'D:\\voltage_traces_ANM210862_20130628\\voltage_traces_ANM210862_20130628\\raw_trace_958_trial_'
sparse_epoch_to_save = 'sparse_ANM210862_20130628.npy'

In [None]:
mat_dict = scipy.io.loadmat(data_structure_file)


data = mat_dict['obj']
data = data['trialTypeMat'][0,0]

valid_trials = []
labels = []
for i in range(len(data[0,:])):
    if data[1,i] == 1:
        labels.append(1)
        valid_trials.append(i+1)
    elif data[0,i] == 1:
        labels.append(0)
        valid_trials.append(i+1)
        
print(valid_trials)
print(labels)
# 1 means right, 0 menas left


file = open(label_to_save, 'wb')
np.save(file, labels)
file.close()


In [None]:
# raw_trace_127_trial_5.mat

epochs = []
epochs = np.asarray(epochs)
min_timepoint = 1000000

for t in valid_trials:
    print(t)
    trial = scipy.io.loadmat(trace_dir + str(t) + '.mat')['ch_MUA']
    timepoint, chan = trial.shape
    if min_timepoint > timepoint:
        min_timepoint = timepoint
    
    trial = trial[:min_timepoint, :]
    trial = trial.reshape((1, min_timepoint, chan))
    print(trial.shape)
    if epochs.size == 0:
        epochs = trial
    else:
        epochs = epochs[:, :min_timepoint, :]
        epochs = np.concatenate((epochs, trial), axis = 0)
    print(epochs.shape)




In [None]:
trial, timepoint, channel = epochs.shape
sparse_ind = [i*100 for i in range(timepoint//100) ]
print(len(sparse_ind))
sparse_epochs = epochs[:, sparse_ind, :]
sparse_epochs = np.swapaxes(sparse_epochs,1,2)


file = open(sparse_epoch_to_save, 'wb')
np.save(file, sparse_epochs)
file.close()

# Loading Data

In [2]:
sparse_epochs = np.load('sparse_ANM210861_20130702.npy')
labels = np.load('ANM210861_20130702_labels.npy')
print(sparse_epochs.shape)

# ind = np.linspace(0,3443-1, num = 300)
# ind = [int(np.floor(i)) for i in ind]
# sparse_epochs = sparse_epochs[:,:, ind]
# print(sparse_epochs.shape)



#get rid of flat channel

trial, channel,timepoint = sparse_epochs.shape
flat_chan = [i for i in range(channel) if sparse_epochs[0,i,0] == 0]
good_chan = [i for i in range(0,32) if i not in flat_chan]
print(flat_chan)
sparse_epochs = sparse_epochs[:,good_chan,:]
print(sparse_epochs.shape)


#standardize signal based on each channel

from sklearn.preprocessing import StandardScaler
import copy

epochs_std = copy.copy(sparse_epochs)
sample_num, chan_num, timepoint = sparse_epochs.shape
for c in range(chan_num):
    original_timepoints = sparse_epochs[:,c,:]
    scaler = StandardScaler()
    scaler.fit(original_timepoints)
    chan_std = scaler.transform(original_timepoints)
    epochs_std[:,c,:] = chan_std

(317, 32, 1033)
[0, 2, 3, 7, 10, 14, 15, 20, 22, 23, 25, 27, 30, 31]
(317, 18, 1033)


# Prediction on single session data

In [8]:
import sklearn 
import pyriemann.estimation
import pyriemann.classification
import sklearn.linear_model
from sklearn.model_selection import ShuffleSplit  
from scipy.fftpack import fft, ifft

cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
scores = []
epochs_data = epochs_std

for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]


    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    print(cov_X_train.shape)
    cov_X_test = cov.transform(X_test)
    
#     TSclassifier = pyriemann.classification.TSclassifier(metric='riemann', clf=sklearn.discriminant_analysis.LinearDiscriminantAnalysis())
    TSclassifier = pyriemann.classification.TSclassifier(metric='riemann')

    TSclassifier.fit(cov_X_train, y_train)
    
    
    y_predict = TSclassifier.predict(cov_X_test)
    print(sklearn.metrics.accuracy_score(TSclassifier.predict(cov_X_train), y_train))
    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))

(253, 18, 18)
0.924901185770751
(253, 18, 18)
0.9209486166007905
(253, 18, 18)
0.9288537549407114
(253, 18, 18)
0.9288537549407114
(253, 18, 18)
0.9169960474308301
(253, 18, 18)
0.9367588932806324
(253, 18, 18)
0.932806324110672
(253, 18, 18)
0.9367588932806324
(253, 18, 18)
0.9367588932806324
(253, 18, 18)
0.924901185770751
Classification accuracy: 0.901563 / Chance level: 0.457413


#  PCA across trials

In [None]:
import sklearn
from sklearn.decomposition import PCA
from sklearn import linear_model, decomposition, datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ShuffleSplit  
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

for n_component in range(30,31):
    sparse_epochs_rs = sparse_epochs.reshape((trial, -1))
    pca = PCA(n_components=n_component)
    pca.fit(sparse_epochs_rs)
    total_variance = np.asarray(pca.explained_variance_ratio_)
    print(sum(total_variance))
    new_X = pca.transform(sparse_epochs_rs)

    samples = new_X

    scores = []
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
    for train_idx, test_idx in cv.split(samples):
        y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
        X_train = samples[train_idx]
        X_test = samples[test_idx]



#         logistic = linear_model.LogisticRegression(C = 1e-5)
#         logistic.fit(X_train, y_train)
        lda = LinearDiscriminantAnalysis()
        lda.fit(X_train, y_train)



#         y_predict = logistic.predict(X_test)
        y_predict = lda.predict(X_test)
        scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


    class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
    print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                              class_balance))

#  PCA across channels

In [None]:
import sklearn
from sklearn.decomposition import PCA
from sklearn import linear_model, decomposition, datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ShuffleSplit  

for n_components in range(3,12):
    samples = []
    for s in range(sparse_epochs.shape[0]):
        X = sparse_epochs[s,:,:]
        pca = PCA(n_components=n_components)
        pca.fit(X)
        new_X = pca.transform(X)
        samples.append(new_X)
    samples = np.asarray(samples)
    print(samples.shape)
    samples = samples.reshape((sparse_epochs.shape[0], -1))
    print(samples.shape)


    scores = []
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
    for train_idx, test_idx in cv.split(samples):
        y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
        X_train = samples[train_idx]
        X_test = samples[test_idx]



    #         logistic = linear_model.LogisticRegression(C = 1e-5)
    #         logistic.fit(X_train, y_train)
        lda = LinearDiscriminantAnalysis()
        lda.fit(X_train, y_train)



    #         y_predict = logistic.predict(X_test)
        y_predict = lda.predict(X_test)
        scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


    class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
    print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                              class_balance))

# CSP + LDA

In [69]:
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import ShuffleSplit, cross_val_score

from mne import Epochs, pick_types, find_events
from mne.channels import read_layout
from mne.io import concatenate_raws, read_raw_edf
from mne.datasets import eegbci
from mne.decoding import CSP

#you need to make sure there is no flat channel! otherwise covariance matrix is not positive definite

scores = []
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
for train_idx, test_idx in cv.split(epochs_std):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_std[train_idx]
    X_test = epochs_std[test_idx]
    print(X_train.shape)
    
    csp = CSP(n_components=4, reg=None, log=True, norm_trace=False, cov_est = 'epoch', cov_method_params= 'shrinkage')
    new_epochs = csp.fit_transform(X_train,  y_train)
    print(new_epochs.shape)
    lda = LinearDiscriminantAnalysis()
    lda.fit(new_epochs, y_train)
    
#     logistic = linear_model.LogisticRegression(C = 1e-5)
#     logistic.fit(new_epochs, y_train)
    

    y_pred_csp = csp.transform(X_test)
    y_pred = lda.predict(y_pred_csp)
#     y_predict = logistic.predict(y_pred_csp)
    
    print(y_pred)
    print(y_test)
    print(sklearn.metrics.accuracy_score(y_pred, y_test))
    scores.append(sklearn.metrics.accuracy_score(y_pred, y_test))


class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))

(262, 19, 300)
(262, 4)
[1 0 1 0 1 1 0 0 0 0 0 0 1 1 1 1 0 0 1 1 1 1 0 0 1 0 0 0 0 0 0 1 1 0 1 1 1
 0 1 0 0 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 0 0 0 1 1 1]
[1 0 1 0 1 1 0 0 0 0 0 0 0 1 1 1 0 0 0 1 1 1 0 0 1 1 1 0 0 1 0 1 0 0 1 1 1
 0 0 0 1 0 1 0 1 1 0 1 1 1 1 0 0 0 1 0 1 1 1 0 0 0 0 1 1 1]
0.803030303030303
(262, 19, 300)
(262, 4)
[1 1 0 1 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 1 1 1 1 1 1 0 0 0
 1 0 0 1 1 0 1 1 0 0 0 1 1 0 0 1 0 1 0 0 1 1 0 1 1 0 1 0 0]
[1 0 0 1 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 1 1 1 0 0 0
 1 0 0 1 1 0 1 1 0 0 0 1 1 0 0 1 0 1 0 0 1 1 1 0 1 0 0 1 0]
0.8787878787878788
(262, 19, 300)
(262, 4)
[1 1 1 1 1 1 0 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 1 1 1 1 1 0 1
 0 1 1 1 1 1 0 1 0 0 0 0 1 0 1 1 1 0 1 1 1 0 1 0 1 1 1 0 0]
[1 1 0 1 1 1 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 1 0 1
 0 0 1 1 1 1 0 1 0 0 1 1 1 0 1 1 1 0 0 1 1 0 1 0 1 1 1 0 0]
0.8636363636363636
(262, 19, 300)
(262, 4)
[0 1 0 0 1 1 1 0 0 0 0 1 0 1 0 1 0 1 1

#  Aggregate data across session

In [102]:
from sklearn.preprocessing import StandardScaler
import copy

# sparse_files = ['sparse_ANM210861_20130701.npy', 'sparse_ANM210861_20130702.npy', 
#                'sparse_ANM210861_20130703.npy']

# label_files = ['ANM210861_20130701_labels.npy', 'ANM210861_20130702_labels.npy',
#               'ANM210861_20130703_labels.npy']

sparse_files = ['sparse_ANM210861_20130701.npy', 'sparse_ANM210861_20130702.npy', 
               'sparse_ANM210861_20130703.npy', 'sparse_ANM210862_20130626.npy', 'sparse_ANM210862_20130627.npy', 
               'sparse_ANM210862_20130628.npy']

label_files = ['ANM210861_20130701_labels.npy', 'ANM210861_20130702_labels.npy',
              'ANM210861_20130703_labels.npy', 'ANM210862_20130626_labels.npy', 'ANM210862_20130627_labels.npy',
              'ANM210862_20130628_labels.npy']


agg_spares_epochs = np.load(sparse_files[0])
agg_labels = np.load(label_files[0])
ind = np.linspace(0,3443-1, num = 1033)
ind = [int(np.floor(i)) for i in ind]
agg_spares_epochs = agg_spares_epochs[:,:, ind]


for i in range(1,6):
    new_epochs =  np.load(sparse_files[i])
    agg_spares_epochs = np.concatenate((agg_spares_epochs, new_epochs), axis = 0)
    new_label = np.load(label_files[i])
    agg_labels = np.concatenate((agg_labels, new_label), axis = 0)

print(agg_spares_epochs.shape)
print(agg_labels.shape)



agg_epochs_std = copy.copy(agg_spares_epochs)
sample_num, chan_num, timepoint = agg_spares_epochs.shape
for c in range(chan_num):
    original_timepoints = agg_spares_epochs[:,c,:]
    scaler = StandardScaler()
    scaler.fit(original_timepoints)
    chan_std = scaler.transform(original_timepoints)
    agg_epochs_std[:,c,:] = chan_std



(1669, 32, 1033)
(1669,)


#  Prediction using Riemannian based classifier on mix dataset across session from same subject

In [149]:

def check_good(agg_epochs_std, test_idx):
    cov =  pyriemann.estimation.Covariances('lwf')
    res = [i for i in test_idx if (bad_catcher.predict(cov.transform(agg_epochs_std[[i]]))[0] == 0)]
    return res


import sklearn 
import pyriemann.estimation
import pyriemann.classification
import sklearn.linear_model
from sklearn.model_selection import ShuffleSplit  
from scipy.fftpack import fft, ifft

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
scores = []
epochs_data = agg_epochs_std
labels = agg_labels

for train_idx, test_idx in cv.split(epochs_data):
    print(len(test_idx))
    test_idx = check_good(epochs_data, test_idx)
    print(len(test_idx))
    
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]
    print(X_train.shape)

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
#     TSclassifier = pyriemann.classification.TSclassifier(metric='riemann', clf=sklearn.discriminant_analysis.LinearDiscriminantAnalysis())
    TSclassifier = pyriemann.classification.TSclassifier(metric='riemann', clf=SVC(kernel='rbf', random_state=0, gamma= 0.03, C=100))
     
#     TSclassifier = pyriemann.classification.TSclassifier(metric='riemann', clf=LogisticRegression())

    TSclassifier.fit(cov_X_train, y_train)
    
    
    y_predict = TSclassifier.predict(cov_X_test)
    print(sklearn.metrics.accuracy_score(TSclassifier.predict(cov_X_train), y_train))
    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))







334
160
(1335, 32, 1033)
0.9820224719101124
334
160
(1335, 32, 1033)
0.9842696629213483
334
150
(1335, 32, 1033)
0.9827715355805243
334
146
(1335, 32, 1033)
0.9820224719101124
334
145
(1335, 32, 1033)
0.9805243445692884
334
154
(1335, 32, 1033)
0.9865168539325843
334
150
(1335, 32, 1033)
0.9805243445692884
334
139
(1335, 32, 1033)
0.9865168539325843
334
145
(1335, 32, 1033)
0.9820224719101124
334
151
(1335, 32, 1033)
0.9835205992509364
Classification accuracy: 0.857189 / Chance level: 0.500899


#  Prediction using PCA + Logistic regression

In [None]:
import sklearn
from sklearn.decomposition import PCA
from sklearn import linear_model, decomposition, datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ShuffleSplit  

trial,_,_ = agg_spares_epochs.shape
for n_component in range(10,30):
    epochs_reshaped = agg_spares_epochs.reshape((trial, -1))
    pca = PCA(n_components=n_component)
    samples = pca.fit_transform(epochs_reshaped)
    total_variance = np.asarray(pca.explained_variance_ratio_)
    print(sum(total_variance))
    

    scores = []
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
    for train_idx, test_idx in cv.split(samples):
        y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
        X_train = samples[train_idx]
        X_test = samples[test_idx]



#         logistic = linear_model.LogisticRegression(C = 1e-5)
#         logistic.fit(X_train, y_train)
        lda = LinearDiscriminantAnalysis()
        lda.fit(X_train, y_train)



#         y_predict = logistic.predict(X_test)
        y_predict = lda.predict(X_test)
        scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


    class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
    print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                              class_balance))

#  Prediction Using PCA + logistic across channels on aggregated dataset

In [None]:
for n_components in range(3,30):
    samples = []
    for s in range(agg_spares_epochs.shape[0]):
        X = agg_spares_epochs[s,:,:]
        pca = PCA(n_components=n_components)
        pca.fit(X)
        new_X = pca.transform(X)
        samples.append(new_X)
    samples = np.asarray(samples)
    print(samples.shape)
    samples = samples.reshape((agg_spares_epochs.shape[0], -1))

    print(samples.shape)


    labels = agg_labels
    scores = []
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
    for train_idx, test_idx in cv.split(samples):
        y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
        X_train = samples[train_idx]
        X_test = samples[test_idx]



    #         logistic = linear_model.LogisticRegression(C = 1e-5)
    #         logistic.fit(X_train, y_train)
        lda = LinearDiscriminantAnalysis()
        lda.fit(X_train, y_train)



    #         y_predict = logistic.predict(X_test)
        y_predict = lda.predict(X_test)
        scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


    class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
    print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                              class_balance))

# Boosting With MDM classifier 

In [17]:
import sklearn 
import pyriemann.estimation
import pyriemann.classification
import sklearn.linear_model
from sklearn.model_selection import ShuffleSplit  

from sklearn.ensemble import AdaBoostClassifier #For Classification



cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
scores = []
epochs_data = sparse_epochs

for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]
    print(X_train.shape)

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
    
    MDMclassifier = pyriemann.classification.MDM(metric='riemann')
    clf = AdaBoostClassifier(n_estimators=4, base_estimator=MDMclassifier,learning_rate=0.7)
    
    
    clf.fit(cov_X_train, y_train)
    
    
    y_predict = clf.predict(cov_X_test)
    print(sklearn.metrics.accuracy_score(clf.predict(cov_X_train), y_train))
    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))


#MDM with no boosting: 77%

(262, 19, 300)
0.7824427480916031
(262, 19, 300)
0.7977099236641222
(262, 19, 300)
0.7862595419847328
(262, 19, 300)
0.7938931297709924
(262, 19, 300)
0.8053435114503816
(262, 19, 300)
0.7786259541984732
(262, 19, 300)
0.7900763358778626
(262, 19, 300)
0.7633587786259542
(262, 19, 300)
0.7938931297709924
(262, 19, 300)
0.8091603053435115
Classification accuracy: 0.751515 / Chance level: 0.509146


In [23]:
import sklearn 
import pyriemann.estimation
import pyriemann.classification
import sklearn.linear_model
from sklearn.model_selection import ShuffleSplit  

from sklearn.ensemble import AdaBoostClassifier #For Classification
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC



cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
scores = []

epochs_data = agg_epochs_std
labels = agg_labels


for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]
    print(X_train.shape)

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
    
#     TSclassifier = pyriemann.classification.TSclassifier(metric='riemann')
    TSclassifier = pyriemann.classification.TSclassifier(metric='riemann', clf=SVC(kernel='rbf', random_state=0, gamma= 0.03, C=100, probability=True))

    clf = AdaBoostClassifier(n_estimators=5, base_estimator=TSclassifier,learning_rate=1)
#     
    clf.fit(cov_X_train, y_train)
    
    
    y_predict = clf.predict(cov_X_test)
    print(sklearn.metrics.accuracy_score(clf.predict(cov_X_train), y_train))
    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))


#MDM with no boosting: 77%

(513, 32, 1033)
0.9961013645224172
(513, 32, 1033)
0.9980506822612085
(513, 32, 1033)
0.9961013645224172
(513, 32, 1033)
0.9941520467836257
(513, 32, 1033)
0.9980506822612085
(513, 32, 1033)
1.0
(513, 32, 1033)


KeyboardInterrupt: 

#  Bagging

In [22]:
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
scores = []
epochs_data = agg_epochs_std
labels = agg_labels
for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]
    print(X_train.shape)

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
    n_samples, _,_ = cov_X_train.shape
    ind_set_1 = np.random.randint(0, n_samples, size = n_samples//3 * 2)
    ind_set_2 = np.random.randint(0, n_samples, size = n_samples//3 * 2)
    ind_set_3 = np.random.randint(0, n_samples, size = n_samples//3*2)

    clf_1 = pyriemann.classification.TSclassifier(metric='riemann', clf=SVC(kernel='rbf', random_state=0, gamma= 0.03, C=100, probability=True))
    clf_2 = pyriemann.classification.TSclassifier(metric='riemann', clf=SVC(kernel='rbf', random_state=0, gamma= 0.03, C=100, probability=True))
    clf_3 = pyriemann.classification.TSclassifier(metric='riemann', clf=SVC(kernel='rbf', random_state=0, gamma= 0.03, C=100, probability=True))

    clf_1.fit(cov_X_train[ind_set_1,:,:], y_train[ind_set_1])
    clf_2.fit(cov_X_train[ind_set_2,:,:], y_train[ind_set_2])
    clf_3.fit(cov_X_train[ind_set_3,:,:], y_train[ind_set_3])

    y_predict = scipy.stats.mode([clf_1.predict(cov_X_test), clf_2.predict(cov_X_test), clf_3.predict(cov_X_test)], axis=0).mode[0]
    print(sklearn.metrics.accuracy_score(scipy.stats.mode([clf_1.predict(cov_X_train), clf_2.predict(cov_X_train), clf_3.predict(cov_X_train)], axis=0).mode[0], y_train))
    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))

class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))



(513, 32, 1033)
0.8966861598440545
(513, 32, 1033)
0.9161793372319688
(513, 32, 1033)
0.8927875243664717
(513, 32, 1033)
0.8947368421052632
(513, 32, 1033)
0.8947368421052632
(513, 32, 1033)
0.8947368421052632
(513, 32, 1033)
0.8732943469785575
(513, 32, 1033)
0.9181286549707602
(513, 32, 1033)
0.8947368421052632
(513, 32, 1033)
0.9122807017543859
Classification accuracy: 0.730233 / Chance level: 0.526480


In [107]:
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
scores = []
epochs_data = agg_epochs_std
labels = agg_labels

n_estimator = 21
subset_size = n_samples//3 * 2

wrong = []

for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
    n_samples, _,_ = cov_X_train.shape
    
    ind_sets = [np.random.randint(0, n_samples, size = subset_size) for i in range(n_estimator)]
    
    clfs = [pyriemann.classification.TSclassifier(metric='riemann', clf=SVC(kernel='rbf', random_state=0, gamma= 0.03, C=100, probability=True)) for i in range(n_estimator)]

    for i in range(n_estimator):
        clfs[i].fit(cov_X_train[ind_sets[i],:,:], y_train[ind_sets[i]])
    
    y_predict = [clfs[i].predict(cov_X_test) for i in range(n_estimator)]
    y_predict = scipy.stats.mode(y_predict, axis=0).mode[0]
    

    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))
    
    print(sklearn.metrics.accuracy_score(y_predict, y_test))
    
    wrong.append([test_idx[i] for i in range(len(y_test)) if y_predict[i] != y_test[i]])

class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))



0.7694610778443114
0.7844311377245509
0.7844311377245509
0.7934131736526946
0.8173652694610778
0.7694610778443114
0.8053892215568862
0.7904191616766467
0.8053892215568862
0.7934131736526946
Classification accuracy: 0.791317 / Chance level: 0.500899


In [109]:
result = []
result = result + sum(wrong, [])

# temp = wrong
# result = sum(temp, [])
from collections import Counter
print(Counter(result).most_common(40))

[(1344, 5), (599, 5), (989, 5), (1057, 5), (457, 5), (1115, 5), (1485, 5), (785, 4), (1161, 4), (1468, 4), (432, 4), (834, 4), (485, 4), (1648, 4), (806, 4), (822, 4), (810, 4), (1381, 4), (1503, 4), (675, 4), (811, 4), (1131, 4), (1137, 4), (1645, 4), (782, 3), (1535, 3), (650, 3), (744, 3), (1370, 3), (1179, 3), (879, 3), (836, 3), (1260, 3), (724, 3), (1324, 3), (946, 3), (1353, 3), (247, 3), (670, 3), (184, 3)]


In [110]:
print(list(Counter(result).most_common(150)))
l = [i[0] for i in list(Counter(result).most_common(150))]
print(l)

[(1344, 5), (599, 5), (989, 5), (1057, 5), (457, 5), (1115, 5), (1485, 5), (785, 4), (1161, 4), (1468, 4), (432, 4), (834, 4), (485, 4), (1648, 4), (806, 4), (822, 4), (810, 4), (1381, 4), (1503, 4), (675, 4), (811, 4), (1131, 4), (1137, 4), (1645, 4), (782, 3), (1535, 3), (650, 3), (744, 3), (1370, 3), (1179, 3), (879, 3), (836, 3), (1260, 3), (724, 3), (1324, 3), (946, 3), (1353, 3), (247, 3), (670, 3), (184, 3), (839, 3), (997, 3), (1567, 3), (1481, 3), (429, 3), (711, 3), (898, 3), (743, 3), (591, 3), (1376, 3), (795, 3), (996, 3), (1025, 3), (874, 3), (835, 3), (295, 3), (1010, 3), (354, 3), (3, 3), (538, 3), (1422, 3), (900, 3), (1362, 3), (826, 3), (1242, 3), (1104, 3), (1268, 3), (423, 3), (1337, 3), (877, 3), (1049, 3), (746, 3), (691, 3), (851, 3), (1407, 3), (1271, 3), (1408, 3), (936, 3), (831, 3), (1366, 3), (1186, 3), (1350, 3), (1522, 3), (282, 3), (801, 3), (1499, 3), (931, 3), (1157, 2), (1080, 2), (576, 2), (1176, 2), (1375, 2), (425, 2), (1295, 2), (115, 2), (1573, 2

In [111]:
print(clfs[0].predict_proba(cov.transform(agg_epochs_std[[1344]])))
print(clfs[0].predict_proba(cov.transform(agg_epochs_std[[599]])))
print(clfs[0].predict_proba(cov.transform(agg_epochs_std[[989]])))
print(clfs[0].predict_proba(cov.transform(agg_epochs_std[[1057]])))

# print(clfs[1].predict_proba(cov.transform(agg_epochs_std[[1]])))
# print(clfs[1].predict_proba(cov.transform(agg_epochs_std[[2]])))
# print(clfs[1].predict_proba(cov.transform(agg_epochs_std[[3]])))
# print(clfs[1].predict_proba(cov.transform(agg_epochs_std[[4]])))


print(agg_labels[1344])
print(agg_labels[599])
print(agg_labels[989])
print(agg_labels[1057])


# print(agg_labels[1])
# print(agg_labels[2])
# print(agg_labels[3])
# print(agg_labels[4])


[[0.03839222 0.96160778]]
[[0.31764853 0.68235147]]
[[0.82268444 0.17731556]]
[[0.22734746 0.77265254]]
0
0
1
0


# got rid of outliers

In [101]:
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
scores = []
epochs_data = agg_epochs_std
labels = agg_labels

n_estimator = 21
subset_size = n_samples//2


for train_idx, test_idx in cv.split(epochs_data):
    
    print(len(train_idx))
    train_idx = [i for i in train_idx if i not in l]
    print(len(train_idx))
    
    
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    

    
    n_samples, _,_ = cov_X_train.shape
    
    ind_sets = [np.random.randint(0, n_samples, size = subset_size) for i in range(n_estimator)]
    
    clfs = [pyriemann.classification.TSclassifier(metric='riemann', clf=SVC(kernel='rbf', random_state=0, gamma= 0.03, C=100, probability=True)) for i in range(n_estimator)]

    for i in range(n_estimator):
        clfs[i].fit(cov_X_train[ind_sets[i],:,:], y_train[ind_sets[i]])
    
    y_predict = [clfs[i].predict(cov_X_test) for i in range(n_estimator)]
    y_predict = scipy.stats.mode(y_predict, axis=0).mode[0]
    

    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))
    print(scores)

class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))


513
399
[0.689922480620155]
513
397
[0.689922480620155, 0.7751937984496124]
513
400
[0.689922480620155, 0.7751937984496124, 0.6666666666666666]
513
404
[0.689922480620155, 0.7751937984496124, 0.6666666666666666, 0.6976744186046512]
513
401
[0.689922480620155, 0.7751937984496124, 0.6666666666666666, 0.6976744186046512, 0.6976744186046512]
Classification accuracy: 0.705426 / Chance level: 0.526480


In [None]:
[(546, 26), (134, 24), (22, 22), (63, 21), (298, 21), (90, 20), (386, 20), (6, 19), (56, 19), (233, 19), (498, 19), (152, 19), (376, 19), (10, 18), (424, 18), (155, 18), (77, 18), (621, 18), (544, 18), (245, 18), (495, 18), (241, 18), (116, 18), (2, 17), (572, 17), (5, 17), (603, 17), (53, 17), (564, 17), (508, 17), (264, 16), (284, 16), (220, 16), (244, 16), (100, 16), (401, 16), (96, 16), (192, 15), (496, 15), (101, 15), (440, 15), (483, 15), (162, 15), (309, 15), (143, 15), (536, 15), (49, 14), (567, 14), (381, 14), (201, 14), (128, 14), (464, 14), (34, 14), (594, 14), (120, 14), (559, 14), (268, 13), (174, 13), (30, 13), (551, 13), (504, 13), (285, 13), (293, 13), (27, 13), (297, 13), (168, 13), (175, 12), (184, 12), (529, 12), (454, 12), (354, 12), (452, 12), (458, 12), (349, 12), (472, 12), (130, 12), (420, 12), (232, 12), (474, 12), (88, 12), (282, 12), (221, 12), (392, 12), (476, 12), (310, 12), (609, 11), (618, 11), (410, 11), (337, 11), (348, 11), (326, 11), (519, 11), (331, 11), (435, 11), (157, 11), (173, 11), (39, 10), (494, 10), (364, 10), (159, 10), (51, 10), (521, 10), (402, 10), (273, 10), (79, 10), (35, 10), (339, 10), (531, 10), (160, 10), (38, 10), (416, 10), (343, 10), (323, 10), (607, 10), (182, 10), (133, 9), (560, 9), (254, 9), (406, 9), (290, 9)]


[(546, 18), (63, 15), (134, 15), (5, 15), (498, 15), (376, 15), (10, 14), (386, 14), (22, 14), (152, 14), (241, 14), (96, 14), (264, 13), (6, 13), (53, 13), (495, 13), (401, 13), (564, 13), (508, 13), (559, 12), (101, 12), (220, 12), (77, 12), (309, 12), (544, 12), (100, 12), (245, 12), (536, 12), (192, 11), (298, 11), (56, 11), (483, 11), (233, 11), (572, 11), (128, 11), (285, 11), (27, 11), (143, 11), (116, 11), (496, 10), (424, 10), (284, 10), (268, 10), (49, 10), (567, 10), (621, 10), (184, 10), (244, 10), (201, 10), (529, 10), (354, 10), (34, 10), (603, 10), (173, 10), (594, 10), (221, 10), (120, 10), (2, 9), (174, 9), (551, 9), (440, 9), (504, 9), (452, 9), (293, 9), (349, 9), (92, 9), (297, 9), (474, 9), (481, 9), (88, 9), (519, 9), (343, 9), (157, 9), (182, 9), (358, 9), (208, 8), (494, 8), (162, 8), (381, 8), (283, 8), (454, 8), (464, 8), (458, 8), (472, 8), (420, 8), (326, 8), (121, 8), (28, 8), (339, 8), (323, 8), (38, 8), (155, 8), (476, 8), (99, 8), (435, 8), (13, 8), (112, 8), (71, 8), (90, 7), (473, 7), (364, 7), (618, 7), (406, 7), (402, 7), (368, 7), (130, 7), (147, 7), (337, 7), (348, 7), (232, 7), (513, 7), (411, 7), (409, 7), (331, 7), (149, 7), (282, 7), (168, 7), (113, 7), (144, 7), (607, 7)]

In [148]:



correct = [i for i in range(700) if i not in l]
bad = l
ind = correct + bad
X = agg_epochs_std[ind]
y = [0 for i in range(len(correct))] + [1 for i in range(len(bad))] 


# cov =  pyriemann.estimation.Covariances('lwf')
# cov_X = cov.transform(X)

    
    
# TSclassifier = pyriemann.classification.TSclassifier(metric='riemann')


# TSclassifier.fit(cov_X, y)


# y_predict = TSclassifier.predict(cov_X)
# print((sklearn.metrics.accuracy_score(y_predict, y)))







cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
scores = []

epochs_data = X
labels = y


for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]
    print(X_train.shape)

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
    
    bad_catcher = pyriemann.classification.TSclassifier(metric='riemann')

    bad_catcher.fit(cov_X_train, y_train)
    
    
    y_predict = bad_catcher.predict(cov_X_test)
    print(sklearn.metrics.accuracy_score(bad_catcher.predict(cov_X_train), y_train))
    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))




(651, 32, 1033)
0.9354838709677419
(651, 32, 1033)
0.9339477726574501
(651, 32, 1033)
0.9493087557603687
(651, 32, 1033)
0.9400921658986175
(651, 32, 1033)
0.9370199692780338
(651, 32, 1033)
0.9385560675883257
(651, 32, 1033)
0.9324116743471582
(651, 32, 1033)
0.9385560675883257
(651, 32, 1033)
0.9416282642089093
(651, 32, 1033)
0.9293394777265745
Classification accuracy: 0.922086 / Chance level: 0.815725


In [135]:
y_predict = bad_catcher.predict(cov.transform(agg_epochs_std[[1344]]))
print(y_predict)


[1]


1