# Loading and concatenante data 

In [1]:
import numpy as np
import scipy.io


from sklearn.preprocessing import StandardScaler
import copy

# sparse_files = ['sparse_ANM210861_20130701.npy', 'sparse_ANM210861_20130702.npy', 
#                'sparse_ANM210861_20130703.npy']

# label_files = ['ANM210861_20130701_labels.npy', 'ANM210861_20130702_labels.npy',
#               'ANM210861_20130703_labels.npy']

sparse_files = ['sparse_ANM210861_20130701.npy', 'sparse_ANM210861_20130702.npy', 
               'sparse_ANM210861_20130703.npy', 'sparse_ANM210862_20130626.npy', 'sparse_ANM210862_20130627.npy', 
               'sparse_ANM210862_20130628.npy']

label_files = ['ANM210861_20130701_labels.npy', 'ANM210861_20130702_labels.npy',
              'ANM210861_20130703_labels.npy', 'ANM210862_20130626_labels.npy', 'ANM210862_20130627_labels.npy',
              'ANM210862_20130628_labels.npy']


agg_spares_epochs = np.load(sparse_files[0])
agg_labels = np.load(label_files[0])
ind = np.linspace(0,3443-1, num = 1033)
ind = [int(np.floor(i)) for i in ind]
agg_spares_epochs = agg_spares_epochs[:,:, ind]


for i in range(1,6):
    new_epochs =  np.load(sparse_files[i])
    agg_spares_epochs = np.concatenate((agg_spares_epochs, new_epochs), axis = 0)
    new_label = np.load(label_files[i])
    agg_labels = np.concatenate((agg_labels, new_label), axis = 0)

print(agg_spares_epochs.shape)
print(agg_labels.shape)



agg_epochs_std = copy.copy(agg_spares_epochs)
sample_num, chan_num, timepoint = agg_spares_epochs.shape
for c in range(chan_num):
    original_timepoints = agg_spares_epochs[:,c,:]
    scaler = StandardScaler()
    scaler.fit(original_timepoints)
    chan_std = scaler.transform(original_timepoints)
    agg_epochs_std[:,c,:] = chan_std


(1669, 32, 1033)
(1669,)


# Tangent Space classifier with single SVM

In [183]:
import sklearn 
import pyriemann.estimation
import pyriemann.classification
import sklearn.linear_model
from sklearn.model_selection import ShuffleSplit  
from scipy.fftpack import fft, ifft

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
scores = []
epochs_data = agg_epochs_std
labels = agg_labels

for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
#     TSclassifier = pyriemann.classification.TSclassifier(metric='riemann', clf=sklearn.discriminant_analysis.LinearDiscriminantAnalysis())
    TSclassifier = pyriemann.classification.TSclassifier(metric='riemann', clf=SVC(kernel='rbf', random_state=0, gamma= 0.03, C=100))
     
#     TSclassifier = pyriemann.classification.TSclassifier(metric='riemann', clf=LogisticRegression())

    TSclassifier.fit(cov_X_train, y_train)
    
    
    y_predict = TSclassifier.predict(cov_X_test)
    print(sklearn.metrics.accuracy_score(y_predict, y_test))
    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))

0.7604790419161677
0.7754491017964071
0.7634730538922155
0.7904191616766467
0.8143712574850299
0.7664670658682635
0.7964071856287425
0.8173652694610778
0.781437125748503
0.7844311377245509
Classification accuracy: 0.785030 / Chance level: 0.500899


# Bagging of 21 SVM base classifiers with fine-tuned parameters

In [100]:
import sklearn 
import pyriemann.estimation
import pyriemann.classification
import sklearn.linear_model
from sklearn.model_selection import ShuffleSplit  
from scipy.fftpack import fft, ifft

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

cv = ShuffleSplit(n_splits=50, test_size=0.2, random_state=42)
scores = []



n_estimator = 21
n_samples, _,_ = agg_epochs_std.shape
labels = agg_labels[:(n_samples//5 * 4)]

agg_epochs_std_train_test = agg_epochs_std[:(n_samples//5 * 4),:,:]
epochs_data = agg_epochs_std_train_test

wrong = []

for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]
    print(X_train.shape)

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
    n_samples, _,_ = cov_X_train.shape
    subset_size = n_samples//3 * 2
    
    ind_sets = [np.random.randint(0, n_samples, size = subset_size) for i in range(n_estimator)]
    
    clfs = [pyriemann.classification.TSclassifier(metric='riemann', clf=SVC(kernel='rbf', random_state=0, gamma= 0.03, C=100, probability=True)) for i in range(n_estimator)]

    for i in range(n_estimator):
        clfs[i].fit(cov_X_train[ind_sets[i],:,:], y_train[ind_sets[i]])
    
    y_predict = [clfs[i].predict(cov_X_test) for i in range(n_estimator)]
    y_predict = scipy.stats.mode(y_predict, axis=0).mode[0]
    

    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))
    print(sklearn.metrics.accuracy_score(y_predict, y_test))
    wrong.append([test_idx[i] for i in range(len(y_test)) if y_predict[i] != y_test[i]])

class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))


(1065, 32, 1033)
0.7865168539325843
(1065, 32, 1033)
0.7902621722846442
(1065, 32, 1033)
0.7902621722846442
(1065, 32, 1033)
0.7790262172284644
(1065, 32, 1033)
0.7827715355805244
(1065, 32, 1033)
0.7677902621722846
(1065, 32, 1033)
0.7827715355805244
(1065, 32, 1033)
0.8014981273408239
(1065, 32, 1033)
0.8352059925093633
(1065, 32, 1033)
0.7940074906367042
(1065, 32, 1033)
0.7715355805243446
(1065, 32, 1033)
0.7827715355805244
(1065, 32, 1033)
0.846441947565543
(1065, 32, 1033)
0.8202247191011236
(1065, 32, 1033)
0.7528089887640449
(1065, 32, 1033)
0.8052434456928839
(1065, 32, 1033)
0.7902621722846442
(1065, 32, 1033)
0.8127340823970037
(1065, 32, 1033)
0.8314606741573034
(1065, 32, 1033)
0.7715355805243446
(1065, 32, 1033)
0.7677902621722846
(1065, 32, 1033)
0.8052434456928839
(1065, 32, 1033)
0.8202247191011236
(1065, 32, 1033)
0.8127340823970037
(1065, 32, 1033)
0.8014981273408239
(1065, 32, 1033)
0.8014981273408239
(1065, 32, 1033)
0.7940074906367042
(1065, 32, 1033)
0.8014981273

Pick out the channel that were repeatedly classified wrong

In [101]:
result = []
result = result + sum(wrong, [])
from collections import Counter
l = [i[0] for i in list(Counter(result).most_common(150))]
print(l)



[(608, 21), (1128, 16), (744, 16), (831, 16), (1143, 15), (1090, 15), (1268, 15), (625, 15), (901, 15), (1178, 14), (276, 14), (834, 14), (302, 14), (975, 14), (983, 13), (1243, 13), (879, 13), (1117, 13), (765, 13), (643, 13), (1272, 13), (1176, 13), (1257, 13), (833, 13), (1005, 13), (825, 13), (1155, 13), (658, 13), (656, 13), (847, 13), (296, 12), (1325, 12), (1254, 12), (785, 12), (1051, 12), (931, 12), (1295, 12), (803, 12), (315, 12), (724, 12), (1161, 12), (1201, 12), (1061, 11), (1179, 11), (650, 11), (838, 11), (295, 11), (978, 11), (719, 11), (1115, 11), (711, 11), (835, 11), (37, 11), (680, 11), (836, 11), (760, 11), (538, 11), (429, 11), (1312, 11), (21, 11), (1033, 11), (997, 11), (754, 11), (662, 11), (1043, 11), (1271, 11), (704, 10), (503, 10), (771, 10), (665, 10), (254, 10), (1209, 10), (717, 10), (602, 10), (858, 10), (991, 10), (837, 10), (805, 10), (1180, 10), (472, 10), (903, 10), (697, 10), (1104, 10), (996, 10), (839, 10), (1228, 10), (715, 10), (470, 10), (577

# Training another classifier to classify "good" epochs and "bad" epochs

In [162]:

correct = [i for i in np.random.randint(0,(n_samples//5 * 4),900) if i not in l]
bad = l
ind = correct + bad
X = agg_epochs_std_train_test[ind]
y = [0 for i in range(len(correct))] + [1 for i in range(len(bad))] 


cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
scores = []
epochs_data = X
labels = y


for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
    
    bad_catcher = pyriemann.classification.TSclassifier(metric='riemann')
    bad_catcher.fit(cov_X_train, y_train)
    y_predict = bad_catcher.predict(cov_X_test)
    print(sklearn.metrics.accuracy_score(y_predict, y_test))
    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))


0.9378238341968912
0.9067357512953368
0.9222797927461139
0.8808290155440415
0.9015544041450777
0.8963730569948186
0.9222797927461139
0.8963730569948186
0.9119170984455959
0.9067357512953368
Classification accuracy: 0.908290 / Chance level: 0.844237


# Two stag classifer on validation data

In [192]:
def check_good(epochs):
    cov =  pyriemann.estimation.Covariances('lwf')
    bad = bad_catcher.predict(cov.transform(epochs))
    bad_proba = bad_catcher.predict_proba(cov.transform(epochs))

    for i in range(len(bad)):
        if bad[i] == 1: 
            if bad_proba[i,1] < 0.4:
                bad[i] = 0
                      
    res = [i for i in range(epochs.shape[0]) if bad[i] == 0]

    return res


label_validation = agg_labels[(n_samples//5 * 4):]
X_validation = agg_epochs_std[(n_samples//5 * 4):,:,:]
print(X_validation.shape)
ind = check_good(X_validation)


X_validation = X_validation[ind]
label_validation = label_validation[ind]
print(X_validation.shape)

cov =  pyriemann.estimation.Covariances('lwf')
cov_X_validation= cov.transform(X_validation)


y_predict = [clfs[i].predict(cov_X_validation) for i in range(n_estimator)]
y_predict = scipy.stats.mode(y_predict, axis=0).mode[0]


print(sklearn.metrics.accuracy_score(y_predict,  label_validation))


(817, 32, 1033)
(56, 32, 1033)
0.9107142857142857


Appendix:
    
[(608, 21), (1128, 16), (744, 16), (831, 16), (1143, 15), (1090, 15), (1268, 15), (625, 15), (901, 15), (1178, 14), (276, 14), (834, 14), (302, 14), (975, 14), (983, 13), (1243, 13), (879, 13), (1117, 13), (765, 13), (643, 13), (1272, 13), (1176, 13), (1257, 13), (833, 13), (1005, 13), (825, 13), (1155, 13), (658, 13), (656, 13), (847, 13), (296, 12), (1325, 12), (1254, 12), (785, 12), (1051, 12), (931, 12), (1295, 12), (803, 12), (315, 12), (724, 12), (1161, 12), (1201, 12), (1061, 11), (1179, 11), (650, 11), (838, 11), (295, 11), (978, 11), (719, 11), (1115, 11), (711, 11), (835, 11), (37, 11), (680, 11), (836, 11), (760, 11), (538, 11), (429, 11), (1312, 11), (21, 11), (1033, 11), (997, 11), (754, 11), (662, 11), (1043, 11), (1271, 11), (704, 10), (503, 10), (771, 10), (665, 10), (254, 10), (1209, 10), (717, 10), (602, 10), (858, 10), (991, 10), (837, 10), (805, 10), (1180, 10), (472, 10), (903, 10), (697, 10), (1104, 10), (996, 10), (839, 10), (1228, 10), (715, 10), (470, 10), (577, 10), (1242, 10), (283, 10), (1049, 10), (1015, 10), (667, 10), (701, 10), (750, 10), (1247, 9), (739, 9), (115, 9), (1157, 9), (247, 9), (777, 9), (354, 9), (371, 9), (1057, 9), (505, 9), (1065, 9), (576, 9), (1083, 9), (851, 9), (1080, 9), (359, 9), (936, 9), (679, 9), (1127, 9), (826, 9), (1248, 9), (918, 9), (1148, 9), (892, 9), (743, 9), (898, 9), (989, 9), (670, 9), (287, 9), (726, 9), (981, 9), (1297, 8), (946, 8), (1053, 8), (638, 8), (1098, 8), (797, 8), (1141, 8), (1076, 8), (334, 8), (1170, 8), (676, 8), (580, 8), (655, 8), (599, 8), (1182, 8), (675, 8), (877, 8), (436, 8), (1200, 8), (463, 8), (423, 8), (369, 8), (1153, 8)]
[608, 1128, 744, 831, 1143, 1090, 1268, 625, 901, 1178, 276, 834, 302, 975, 983, 1243, 879, 1117, 765, 643, 1272, 1176, 1257, 833, 1005, 825, 1155, 658, 656, 847, 296, 1325, 1254, 785, 1051, 931, 1295, 803, 315, 724, 1161, 1201, 1061, 1179, 650, 838, 295, 978, 719, 1115, 711, 835, 37, 680, 836, 760, 538, 429, 1312, 21, 1033, 997, 754, 662, 1043, 1271, 704, 503, 771, 665, 254, 1209, 717, 602, 858, 991, 837, 805, 1180, 472, 903, 697, 1104, 996, 839, 1228, 715, 470, 577, 1242, 283, 1049, 1015, 667, 701, 750, 1247, 739, 115, 1157, 247, 777, 354, 371, 1057, 505, 1065, 576, 1083, 851, 1080, 359, 936, 679, 1127, 826, 1248, 918, 1148, 892, 743, 898, 989, 670, 287, 726, 981, 1297, 946, 1053, 638, 1098, 797, 1141, 1076, 334, 1170, 676, 580, 655, 599, 1182, 675, 877, 436, 1200, 463, 423, 369, 1153]