In [1]:
import numpy as np
import scipy.linalg as la
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import geomstats as gs
import geomstats.geometry.spd_matrices as spd
import pickle
from scipy.signal import butter, lfilter
from geomstats.learning.frechet_mean import FrechetMean
from scipy.io import loadmat
import pandas as pd
from copy import deepcopy
from scipy.stats import wilcoxon
from copy import deepcopy


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.options.display.float_format = "{:,.3f}".format

INFO: Using numpy backend


# Globally used things

In [2]:
class RCSP:
    def __init__(self, metric, nchannels, clf):
        self.metric = metric
        self.nchannels = nchannels
        self.clf=clf()
        self.V=None
        self.n=None
    
    def estimateMeans(self, classSpecificCOV):
        if self.metric=="classic":
            class0_avg = sum(classSpecificCOV[0])/len(classSpecificCOV[0])
            class1_avg = sum(classSpecificCOV[1])/len(classSpecificCOV[1])
            return [class0_avg, class1_avg]
        elif self.metric=="AIRM":
            estimator = FrechetMean(spd.SPDMetricAffine(n=self.nchannels), max_iter=64)
        elif self.metric=="LEM":
            estimator = FrechetMean(spd.SPDMetricLogEuclidean(n=self.nchannels), max_iter=64)
        elif self.metric=="BW":
            estimator = FrechetMean(spd.SPDMetricBuresWasserstein(n=self.nchannels), max_iter=64) #doesn't work yet
        else:
            raise Exception("Not implemented metric")
            
        means = []
        
        for COV in classSpecificCOV:
            estimator.fit(COV)
            mean = estimator.estimate_
            means.append(mean)
        return means
    
    def separate_classes(self, X, Y):
        classSpecificCOV = []
        for i in range(2): 
            ind = np.where(Y==i)[0]
            classCOV = X[ind]
            classSpecificCOV.append(classCOV)
        return classSpecificCOV
    
    def CSP(self, means, n):
        _,V = la.eigh(means[0], means[0]+means[1])
        V = np.concatenate((V[:, :n], V[:, -n:]), axis=1)
        return V
    
    def applyCSP(self, trial, V):
        a = np.dot(np.dot(V.T, trial), V) 
        f = np.log(np.diagonal(a)/np.trace(a)) #logvariance features 
        return f
    
    def train(self, trainCOV, trainLabels, n=3):
        
        classSpecificCOV = self.separate_classes(trainCOV, trainLabels)
        means = self.estimateMeans(classSpecificCOV)
        
        V=self.CSP(means, n)
        self.V=V
        self.n=n
        train_features = np.empty((len(trainCOV), 2*n))
        
        for i in range(len(trainCOV)):
            trial = trainCOV[i]
            train_features[i] = self.applyCSP(trial, V)
        
        self.clf.fit(train_features, trainLabels)
    
    def predict(self, testCOV):
        V = self.V
        n = self.n
        
        if V is None or n is None:
            raise Exception('Train the model first')
        
        test_features = np.empty((len(testCOV), 2*n))
        for i in range(len(testCOV)):
            trial = testCOV[i]
            test_features[i] = self.applyCSP(trial, V)
        
        prediction = self.clf.predict(test_features)
        return prediction

In [3]:
#base classifiers 
c_csp = RCSP('classic', 20, LDA)
airm_csp = RCSP('AIRM', 20, LDA)
lem_csp = RCSP('LEM', 20, LDA)

def count_accuracy(predicted, true, dec_places=2):
    err_count = 0
    for j in range(len(true)):
        if predicted[j]!=true[j]:
            err_count+=1
    acc = (1-err_count/len(true))*100
    acc = round(acc, dec_places)
    return acc

SubjectsCOV1, SubjectsY1 = pickle.load(open('datasets/54COVSess01.pickle','rb'))
SubjectsCOV2, SubjectsY2 = pickle.load(open('datasets/54COVSess02.pickle','rb'))

# Majority Vote

In [4]:
def majority_vote(SubjectsCOV, SubjectsY, n):
    df = pd.DataFrame(index=list(range(1, 55))+['Average', 'p-value'], columns=['Classic CSP', 'AIRM CSP', 'LEM CSP', 'Majority Vote'])
    
    c_results = [] #very sloppy coding, did not prioritize clean code
    a_results = []
    l_results = []
    m_results = []
    
    for i in range(len(SubjectsCOV)):
        SC = deepcopy(SubjectsCOV)
        SY = deepcopy(SubjectsY)
            
        testCOV = SC.pop(i)
        testY = SY.pop(i)
            
        trainCOV = None
        trainY = None

        for j in range(len(SubjectsCOV)-1):
            if trainCOV is None:
                trainCOV = SC[j]
                trainY = SY[j]
            else:
                trainCOV = np.concatenate((trainCOV, SC[j]))
                trainY = np.concatenate((trainY, SY[j]))

        c_csp.train(trainCOV, trainY, n=n)
        c_res = c_csp.predict(testCOV)

        airm_csp.train(trainCOV, trainY, n=n)
        a_res = airm_csp.predict(testCOV)

        lem_csp.train(trainCOV, trainY, n=n)
        l_res = lem_csp.predict(testCOV)

        res = np.column_stack((a_res, l_res))
        N = len(res)

        majority = np.empty(N, dtype='uint8')

        for j in range(N):
            majority[j] = np.argmax(np.bincount(res[j]))
    
        c_results.append(count_accuracy(c_res, testY))
        a_results.append(count_accuracy(a_res, testY))
        l_results.append(count_accuracy(l_res, testY))
        m_results.append(count_accuracy(majority, testY))
    
    c_results.append(sum(c_results)/len(c_results)) #чуть чуть клоунский код, но работает
    a_results.append(sum(a_results)/len(a_results))
    l_results.append(sum(l_results)/len(l_results))
    m_results.append(sum(m_results)/len(m_results))

    a_results.append(wilcoxon(a_results[:-1], c_results[:-1])[1])
    l_results.append(wilcoxon(l_results[:-1], c_results[:-1])[1])
    m_results.append(wilcoxon(m_results[:-1], c_results[:-1])[1])


    df['Classic CSP'] = c_results + [None]
    df['AIRM CSP'] = a_results
    df['LEM CSP'] = l_results
    df['Majority Vote'] = m_results
    
    return df

## Session 1

In [5]:
dfmv1_2 = majority_vote(SubjectsCOV1, SubjectsY1, 2)
dfmv1_2.head(56)

Unnamed: 0,Classic CSP,AIRM CSP,LEM CSP,Majority Vote
1,70.0,77.5,76.5,77.0
2,50.0,50.5,50.5,50.5
3,67.5,84.0,84.5,85.0
4,54.0,62.0,63.5,62.5
5,71.5,81.5,81.5,81.0
6,70.5,72.5,72.5,72.5
7,47.0,46.5,46.5,46.5
8,59.5,62.0,63.0,63.0
9,80.0,72.5,75.0,75.0
10,60.0,60.5,62.5,60.5


In [6]:
dfmv1_3 = majority_vote(SubjectsCOV1, SubjectsY1, 3)
dfmv1_3.head(56)

Unnamed: 0,Classic CSP,AIRM CSP,LEM CSP,Majority Vote
1,71.5,78.0,76.0,77.5
2,50.0,50.5,50.5,50.5
3,70.0,84.0,84.5,85.0
4,60.0,61.5,61.5,62.5
5,71.5,81.5,81.0,80.5
6,69.5,71.5,73.5,71.0
7,48.0,47.5,46.5,47.5
8,61.0,64.5,59.5,64.5
9,79.0,71.5,73.5,74.0
10,59.5,60.5,62.5,60.5


In [7]:
dfmv1_4 = majority_vote(SubjectsCOV1, SubjectsY1, 4)
dfmv1_4.head(56)

Unnamed: 0,Classic CSP,AIRM CSP,LEM CSP,Majority Vote
1,73.5,78.0,73.0,72.5
2,50.0,50.5,50.0,50.0
3,63.0,86.5,85.0,88.5
4,52.0,65.5,61.0,64.5
5,72.0,83.0,82.0,83.0
6,70.0,70.5,70.0,69.0
7,47.0,47.5,47.0,47.5
8,61.5,61.5,66.0,61.0
9,83.5,79.0,82.5,83.0
10,62.5,53.0,54.5,53.0


In [8]:
dfmv1_5 = majority_vote(SubjectsCOV1, SubjectsY1, 5)
dfmv1_5.head(56)

Unnamed: 0,Classic CSP,AIRM CSP,LEM CSP,Majority Vote
1,71.5,71.5,69.5,69.5
2,50.0,50.5,50.0,50.0
3,66.5,88.5,91.0,91.0
4,52.5,63.0,61.5,64.5
5,72.5,82.0,83.0,82.0
6,68.0,70.5,70.5,70.5
7,47.0,46.5,45.5,46.5
8,63.0,61.0,65.5,60.5
9,82.0,80.5,82.5,81.5
10,56.5,54.0,54.5,54.0


In [9]:
ResultsSess01 = [dfmv1_2, dfmv1_3, dfmv1_4, dfmv1_5]

filename = 'datasets/MVResultsSess01.pickle'
outfile = open(filename,'wb')
pickle.dump(ResultsSess01, outfile)
outfile.close()


## Session 2

In [5]:
dfmv2_2 = majority_vote(SubjectsCOV2, SubjectsY2, 2)
dfmv2_2.head(56)

Unnamed: 0,Classic CSP,AIRM CSP,LEM CSP,Majority Vote
1,77.0,77.0,78.0,77.0
2,76.5,68.0,69.0,69.5
3,92.0,93.5,93.5,93.5
4,52.5,54.5,54.5,54.5
5,81.5,79.0,81.5,81.5
6,71.0,74.0,73.5,74.0
7,48.0,48.5,48.5,48.5
8,70.5,69.5,69.5,70.0
9,69.0,65.0,69.5,69.5
10,52.0,56.0,58.0,56.0


In [6]:
dfmv2_3 = majority_vote(SubjectsCOV2, SubjectsY2, 3)
dfmv2_3.head(56)

Unnamed: 0,Classic CSP,AIRM CSP,LEM CSP,Majority Vote
1,76.0,75.0,78.0,75.0
2,78.5,68.0,70.5,70.5
3,93.0,95.0,94.5,94.5
4,56.0,57.5,57.0,57.0
5,82.0,78.5,80.5,80.5
6,71.0,72.5,74.0,72.5
7,49.5,48.5,48.5,48.5
8,72.5,72.0,69.5,72.0
9,73.0,69.5,72.0,72.0
10,53.0,56.5,59.5,56.5


In [7]:
dfmv2_4 = majority_vote(SubjectsCOV2, SubjectsY2, 4)
dfmv2_4.head(56)

Unnamed: 0,Classic CSP,AIRM CSP,LEM CSP,Majority Vote
1,78.0,76.5,78.0,76.5
2,78.5,71.0,71.0,71.0
3,93.5,94.0,94.5,94.0
4,57.5,54.5,56.0,56.0
5,80.5,80.5,82.0,82.0
6,70.0,73.5,73.5,73.5
7,48.0,50.0,50.0,50.0
8,71.0,69.0,67.5,69.0
9,67.0,73.5,73.5,73.5
10,51.0,59.5,58.5,59.5


In [8]:
dfmv2_5 = majority_vote(SubjectsCOV2, SubjectsY2, 5)
dfmv2_5.head(56)

Unnamed: 0,Classic CSP,AIRM CSP,LEM CSP,Majority Vote
1,76.5,74.0,73.0,72.5
2,80.5,73.5,76.0,76.0
3,94.5,94.5,93.5,94.5
4,50.0,49.0,49.0,49.0
5,83.5,84.0,85.0,85.0
6,70.0,73.0,72.0,71.5
7,50.0,50.0,50.0,50.5
8,69.5,68.0,67.5,68.0
9,77.5,71.5,72.0,72.0
10,53.0,55.0,53.0,53.0


In [9]:
ResultsSess02 = [dfmv2_2, dfmv2_3, dfmv2_4, dfmv2_5]

filename = 'datasets/MVResultsSess02.pickle'
outfile = open(filename,'wb')
pickle.dump(ResultsSess02, outfile)
outfile.close()

# Bootstrap Mean Estimation

In [10]:
class RCSP_bootstrap(RCSP):
    def train(self, trainCOV, trainLabels, n=3, btsp_size=0.6, btsp_n=10):
        btsp_means = np.zeros((2, btsp_n, self.nchannels, self.nchannels))
        trials = len(trainCOV)
        idxs = list(range(trials))
        
        for i in range(btsp_n):
            idx = np.random.choice(idxs, int(btsp_size*trials))
            subsetCOV = trainCOV[idx]
            subsetY = trainLabels[idx]
            
            classSpecificCOV = self.separate_classes(subsetCOV, subsetY)
            means = self.estimateMeans(classSpecificCOV)
            btsp_means[0, i, :, :] = means[0]
            btsp_means[1, i, :, :] = means[1]
        
        btsp_means = self.estimateMeans(btsp_means)
        
        V = self.CSP(btsp_means, n)
        self.V=V
        self.n=n
        
        train_features = np.empty((len(trainCOV), 2*n))
        
        for i in range(len(trainCOV)):
            trial = trainCOV[i]
            train_features[i] = self.applyCSP(trial, V)
        
        self.clf.fit(train_features, trainLabels)    

In [11]:
c_rcsp_b = RCSP_bootstrap("classic", 20, LDA)
a_rcsp_b = RCSP_bootstrap("AIRM", 20, LDA)
l_rcsp_b = RCSP_bootstrap("LEM", 20, LDA)

In [12]:
def bootstrap_estimation(SubjectsCOV, SubjectsY, n, btsp_size, btsp_n):
    df = pd.DataFrame(index=list(range(1, 55))+['Average', 'p-values'], columns=['Bootstraped Classic CSP', 'Bootstraped AIRM CSP', 'Bootstraped LEM CSP' 'Majority Vote'])
    
    c_results = []
    a_results = []
    l_results = []
    m_results = []
    
    for i in range(len(SubjectsCOV)):    
        SC = deepcopy(SubjectsCOV)
        SY = deepcopy(SubjectsY)    
        testCOV = SC.pop(i)
        testY = SY.pop(i)
        
        trainCOV = None
        trainY = None
        
        for j in range(len(SubjectsCOV)-1):
            if trainCOV is None:
                trainCOV = SC[j]
                trainY = SY[j]
            else:
                trainCOV = np.concatenate((trainCOV, SC[j]))
                trainY = np.concatenate((trainY, SY[j]))
        
        
        c_rcsp_b.train(trainCOV, trainY, n, btsp_size, btsp_n)
        c_res = c_rcsp_b.predict(testCOV)
        
        a_rcsp_b.train(trainCOV, trainY, n, btsp_size, btsp_n)
        a_res = a_rcsp_b.predict(testCOV)
        
        l_rcsp_b.train(trainCOV, trainY, n, btsp_size, btsp_n)
        l_res = l_rcsp_b.predict(testCOV)
        
        res = np.column_stack((a_res, l_res))

        N = len(res)

        majority = np.empty(N, dtype='uint8')

        for j in range(N):
            majority[j] = np.argmax(np.bincount(res[j]))


        c_results.append(count_accuracy(c_res, testY))
        a_results.append(count_accuracy(a_res, testY))
        l_results.append(count_accuracy(l_res, testY))
        m_results.append(count_accuracy(majority, testY))
        
    c_results.append(sum(c_results)/len(c_results))
    a_results.append(sum(a_results)/len(a_results))
    l_results.append(sum(l_results)/len(l_results))
    m_results.append(sum(m_results)/len(m_results))
    

    a_results.append(wilcoxon(a_results[:-1], c_results[:-1])[1])
    l_results.append(wilcoxon(l_results[:-1], c_results[:-1])[1])
    m_results.append(wilcoxon(m_results[:-1], c_results[:-1])[1])
    
    df['Bootstraped Classic CSP'] = c_results + [None]
    df['Bootstraped AIRM CSP'] = a_results
    df['Bootstraped LEM CSP'] = l_results
    df['Majority Vote'] = m_results
    
    return df
        

In [13]:
btsp_size = 0.6
btsp_n = 10

## Session 1

In [None]:
dfbt1_2 = bootstrap_estimation(SubjectsCOV1, SubjectsY1, 2, btsp_size, btsp_n)
dfbt1_2.head(56)

In [None]:
dfbt1_3 = bootstrap_estimation(SubjectsCOV1, SubjectsY1, 3, btsp_size, btsp_n)
dfbt1_3.head(56)

In [None]:
dfbt1_4 = bootstrap_estimation(SubjectsCOV1, SubjectsY1, 4, btsp_size, btsp_n)
dfbt1_4.head(56)

In [None]:
dfbt1_5 = bootstrap_estimation(SubjectsCOV1, SubjectsY1, 5, btsp_size, btsp_n)
dfbt1_5.head(56)

In [None]:
ResultsSess01 = [dfbt1_2, dfbt1_3, dfbt1_4, dfbt1_5]

filename = 'datasets/BTResultsSess01.pickle'
outfile = open(filename,'wb')
pickle.dump(ResultsSess01, outfile)
outfile.close()

## Session 2

In [None]:
dfbt2_2 = bootstrap_estimation(SubjectsCOV2, SubjectsY2, 2, btsp_size, btsp_n)
dfbt2_2.head(56)

In [None]:
dfbt2_3 = bootstrap_estimation(SubjectsCOV2, SubjectsY2, 3, btsp_size, btsp_n)
dfbt2_3.head(56)

In [None]:
dfbt2_4 = bootstrap_estimation(SubjectsCOV2, SubjectsY2, 4, btsp_size, btsp_n)
dfbt2_4.head(56)

In [None]:
dfbt2_5 = bootstrap_estimation(SubjectsCOV2, SubjectsY2, 5, btsp_size, btsp_n)
dfbt2_5.head(56)

In [None]:
ResultsSess01 = [dfbt2_2, dfbt2_3, dfbt2_4, dfbt2_5]

filename = 'datasets/BTResultsSess02.pickle'
outfile = open(filename,'wb')
pickle.dump(ResultsSess01, outfile)
outfile.close()