In [None]:
%env GEOMSTATS_BACKEND=numpy
%env NUMEXPR_MAX_THREADS=12 

import pickle
import pandas as pd
import numpy as np

import geomstats.backend as gs
import geomstats.geometry.spd_matrices as spd
from geomstats.learning.frechet_mean import FrechetMean
from scipy.stats import wilcoxon

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
class MDM:
    def __init__(self, nchannels, metric):
        self.nchannels = nchannels
        self.n_classes = 2
        self.means = np.zeros((2, nchannels, nchannels))
        
        if metric == "Euclidean":
            self.Rmethod = None
        elif metric == 'AIRM':
            self.Rmethod = spd.SPDMetricAffine(n=self.nchannels)
        elif metric == 'LEM':
            self.Rmethod = spd.SPDMetricLogEuclidean(n=self.nchannels)
        else:
            raise Exception('Not implemented metric')
        
    def separate_classes(self, COV, Y):
        classSpecificSPD=[]
        for i in range(self.n_classes):
            indecies = [j for j,val in enumerate(Y) if val==i]
            classSPD = [COV[j] for j in indecies]
            classSpecificSPD.append(classSPD)
        return classSpecificSPD
    
    def calculate_means(self, classSpecificCOV):
        if self.Rmethod is None:
            class0_avg = sum(classSpecificCOV[0])/len(classSpecificCOV[0])
            class1_avg = sum(classSpecificCOV[1])/len(classSpecificCOV[1])
            return [class0_avg, class1_avg]
        
        estimator = FrechetMean(self.Rmethod, max_iter=64)
        means = []
        
        for COV in classSpecificCOV:
            estimator.fit(COV)
            mean = estimator.estimate_
            means.append(mean)
        return means
    
    def train(self, COVtrain, Ytrain):        
        classSpecificCOV = self.separate_classes(COVtrain, Ytrain)
        means = self.calculate_means(classSpecificCOV)
        self.means[0] = means[0]
        self.means[1] = means[1]
    
    def frob_distance(self, matrix1, matrix2):
        return np.linalg.norm(matrix1-matrix2, ord = 'fro')
    
    def predict(self, COVtest):
        N = COVtest.shape[0]
        prediction = np.empty((N, 1))
        
        for i in range(N):
            dist = []
            trial = COVtest[i]
            for mean in self.means:
                if self.Rmethod is None:
                    dist.append(self.frob_distance(trial, mean))
                else:
                    dist.append(self.Rmethod.dist(trial, mean))
            
            prediction[i] = dist.index(min(dist))
        return prediction
    
    def score(self, prediction, Ytest):
        N = Ytest.shape[0]
        error = 0
        for i in range(N):
            if prediction[i]!=Ytest[i]:
                error+=1
        return 100*(1-error/N)

In [None]:
SubjectsCOV1, SubjectsY1 = pickle.load(open('datasets/54COV7Sess01.pickle','rb'))
SubjectsCOV2, SubjectsY2 = pickle.load(open('datasets/54COV7Sess02.pickle','rb'))

# Subject-dependent

In [None]:
c_clf = MDM(20, 'Euclidean')
a_clf = MDM(20, 'AIRM')
l_clf = MDM(20, 'LEM')

In [None]:
def cross_validate(COV, Y, k):
    N = len(Y)
    foldsize = int(N/k)
    
    c_acc = []
    a_acc = []
    l_acc = []
    m_acc = []
    
    for i in range(k):
        testCOV = COV[i*foldsize:(i+1)*foldsize]
        testY = Y[i*foldsize:(i+1)*foldsize]
        
        
        trainCOV = np.concatenate((COV[:i*foldsize],COV[(i+1)*foldsize:]), axis=0) 
        trainY = np.concatenate((Y[:i*foldsize],Y[(i+1)*foldsize:]), axis=0)

        c_clf.train(trainCOV, trainY)
        c_pred = c_clf.predict(testCOV)
        c_res = c_clf.score(c_pred, testY)
        
        a_clf.train(trainCOV, trainY)
        a_pred = a_clf.predict(testCOV)
        a_res = a_clf.score(a_pred, testY)
        
        l_clf.train(trainCOV, trainY)
        l_pred = l_clf.predict(testCOV)
        l_res = l_clf.score(l_pred, testY)
        
       
        
        preds = np.column_stack((c_pred, a_pred, l_pred)).astype('int8')
        
        majority = np.empty(len(preds), dtype = 'int8')
        
        for j in range(len(preds)):
            majority[j] = np.argmax(np.bincount(preds[j]))
        
        m_res = c_clf.score(majority, testY)
        
        c_acc.append(c_res)
        a_acc.append(a_res)
        l_acc.append(l_res)
        m_acc.append(m_res)
    
    return sum(c_acc)/len(c_acc), sum(a_acc)/len(a_acc), sum(l_acc)/len(l_acc), sum(m_acc)/len(m_acc)
        

In [None]:
def subject_dependent(SubjectsCOV, SubjectsY, k):
    df = pd.DataFrame(index=list(range(1, 55))+['Average', 'p-values'], columns=['Euclidean distance', 'AIRM distance', 'LEM distance', 'Majority Vote'])
    
    c_results = [] 
    a_results = []
    l_results = []
    m_results = []
    
    for i in range(len(SubjectsCOV)):
        COVi = SubjectsCOV[i]
        Yi = SubjectsY[i]

        c_acc, a_acc, l_acc, m_acc = cross_validate(COVi, Yi, k)
        c_results.append(c_acc)
        a_results.append(a_acc)
        l_results.append(l_acc)
        m_results.append(m_acc)
    
    c_results.append(sum(c_results)/len(c_results))
    
    a_results.append(sum(a_results)/len(a_results))
    _, pval = wilcoxon(c_results[:-1], a_results[:-1], alternative='less')
    a_results.append(pval)
                     
    l_results.append(sum(l_results)/len(l_results))
    _, pval = wilcoxon(c_results[:-1], l_results[:-1], alternative='less')
    l_results.append(pval)
    
    m_results.append(sum(m_results)/len(m_results))
    _, pval = wilcoxon(c_results[:-1], m_results[:-1], alternative='less')
    m_results.append(pval)
    
    df['Euclidean distance'] = c_results + [None]
    df['AIRM distance'] = a_results
    df['LEM distance'] = l_results
    df['Majority Vote'] = m_results
    
    return df
    

## Session 1

In [None]:
df1 = subject_dependent(SubjectsCOV1, SubjectsY1, 5)

In [None]:
df1.round(3)

## Session 2

In [None]:
df2 = subject_dependent(SubjectsCOV2, SubjectsY2, 5)

In [None]:
df2.round(3)

In [None]:
filename = 'results/MDM/sess01_dependent.pickle'
outfile = open(filename,'wb')
pickle.dump(df1, outfile)
outfile.close()

In [None]:
filename = 'results/MDM/sess02_dependent.pickle'
outfile = open(filename,'wb')
pickle.dump(df2, outfile)
outfile.close()

# Subject-Independent

In [None]:
from copy import deepcopy

In [None]:
def subject_independent(SubjectsCOV, SubjectsY):
    df = pd.DataFrame(index=list(range(1, 55))+['Average', 'p-values'], columns=['Euclidean distance', 'AIRM distance', 'LEM distance', 'Majority Vote'])
    
    c_results = [] #very sloppy coding, did not prioritize clean code
    a_results = []
    l_results = []
    m_results = []
    
    for i in range(len(SubjectsCOV)):
        SC = deepcopy(SubjectsCOV)
        SY = deepcopy(SubjectsY)
            
        testCOV = SC.pop(i)
        testY = SY.pop(i)
            
        trainCOV = None
        trainY = None
            
        for j in range(len(SubjectsCOV)-1):
            if trainCOV is None:
                trainCOV = SC[j]
                trainY = SY[j]
            else:
                trainCOV = np.concatenate((trainCOV, SC[j]))
                trainY = np.concatenate((trainY, SY[j]))

        c_clf.train(trainCOV, trainY)
        c_pred = c_clf.predict(testCOV)
        c_res = c_clf.score(c_pred, testY)
        
        a_clf.train(trainCOV, trainY)
        a_pred = a_clf.predict(testCOV)
        a_res = a_clf.score(a_pred, testY)
        
        l_clf.train(trainCOV, trainY)
        l_pred = l_clf.predict(testCOV)
        l_res = l_clf.score(l_pred, testY)
        
        preds = np.column_stack((c_pred, a_pred, l_pred)).astype('int8')
        
        majority = np.empty(len(preds), dtype = 'int8')
        
        for j in range(len(preds)):
            majority[j] = np.argmax(np.bincount(preds[j]))
        
        m_res = c_clf.score(majority, testY)
    
        c_results.append(c_res)
        a_results.append(a_res)
        l_results.append(l_res)
        m_results.append(m_res)
    
    c_results.append(sum(c_results)/len(c_results))
    
    a_results.append(sum(a_results)/len(a_results))
    _, pval = wilcoxon(c_results[:-1], a_results[:-1], alternative='less')
    a_results.append(pval)
                     
    l_results.append(sum(l_results)/len(l_results))
    _, pval = wilcoxon(c_results[:-1], l_results[:-1], alternative='less')
    l_results.append(pval)
    
    m_results.append(sum(m_results)/len(m_results))
    _, pval = wilcoxon(c_results[:-1], m_results[:-1], alternative='less')
    m_results.append(pval)
    
    df['Euclidean distance'] = c_results + [None]
    df['AIRM distance'] = a_results
    df['LEM distance'] = l_results
    df['Majority Vote'] = m_results
    
    return df

## Session 1

In [None]:
df1_i = subject_independent(SubjectsCOV1, SubjectsY1)

In [None]:
df1_i = df1_i.round(3)

In [None]:
filename = 'results/MDM/sess01_7_independent.pickle'
outfile = open(filename,'wb')
pickle.dump(df1_i, outfile)
outfile.close()

## Session 2

In [None]:
df2_i = subject_independent(SubjectsCOV2, SubjectsY2)

In [None]:
df2_i = df2_i.round(3)

In [None]:
filename = 'results/MDM/sess02_7_independent.pickle'
outfile = open(filename,'wb')
pickle.dump(df2_i, outfile)
outfile.close()