In [1]:
%env GEOMSTATS_BACKEND=numpy

%env NUMEXPR_MAX_THREADS=12 

import pickle
import nu_smrutils as u
import pandas as pd
import numpy as np

import geomstats.backend as gs
import geomstats.geometry.spd_matrices as spd
from geomstats.learning.frechet_mean import FrechetMean
from geomstats.learning.preprocessing import ToTangentSpace

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

env: GEOMSTATS_BACKEND=numpy
env: NUMEXPR_MAX_THREADS=12


INFO: Using numpy backend


In [2]:
class DataPreparation:
    def __init__(self, directory, conditions, epochs):
        self.conditions=conditions
        self.epochs=epochs
        self.directory=directory
    
    def loadConcat(self, subject):
        train_f=self.directory.format(subject, 'train')
        test_f=self.directory.format(subject, 'test')

        train = u.loaddat(train_f)
        tr_df=train.to_data_frame()

        test = u.loaddat(test_f)
        ts_df=test.to_data_frame()
        ts_df['epoch']+=200
        return pd.concat([tr_df, ts_df])

    def convertToSPD(self, df, normalize=True):
        SPD = []
        labels = [] 
        for i in range(self.epochs):
            df_slice=df.loc[df['epoch']==i, :]
            matrix=df_slice.iloc[:, 3:]
            
            if normalize:
                matrix=(matrix-matrix.mean())/matrix.std()
            
            label=df_slice['condition'].iloc[0]

            for j in range(len(self.conditions)):
                if label==self.conditions[j]:
                    label=j #encoding of conditions to integers
                    break
            covmat=matrix.cov().to_numpy()
            SPD.append(covmat)
            labels.append(label) 
        return SPD, labels

    def generateSPDDataset(self, r=[1,54], normalize=True):
        SPDDataset=[]
        labels = []
        for i in range(r[0],r[1]+1):
            df=self.loadConcat(i)
            SPD, l = self.convertToSPD(df,normalize)
            SPDDataset+=SPD
            labels+=l
            
        return SPDDataset, labels
        

In [3]:
class RMLNM:
    def __init__(self, subjects, epochs, channels, n_classes):
        self.subjects=subjects
        self.epochs=epochs
        self.n_classes=n_classes
        
        self.LE_metric = spd.SPDMetricLogEuclidean(n=channels)
        self.AI_metric = spd.SPDMetricAffine(n=channels)
        
        self.LE_mean = FrechetMean(self.LE_metric)
        self.AI_mean = FrechetMean(self.AI_metric)
        
    def outer_LOSO(self, d, l):
        subject_acc=[]
        for i in range(self.subjects):
            d_test = d[i*self.epochs:(i+1)*self.epochs]
            l_test = l[i*self.epochs:(i+1)*self.epochs]
            
            d_train = d[:i*self.epochs] + d[(i+1)*self.epochs:] 
            l_train = l[:i*self.epochs] + l[(i+1)*self.epochs:]
            
            print("Subject: "+str(i+1))
            classSpecificSPD = self.separate_classes(d_train, l_train)
            LE_SPDk, AI_SPDk = self.compute_SPDk(classSpecificSPD)
            LE_acc, AI_acc = self.compute_accuracy(d_test, l_test, LE_SPDk, AI_SPDk)
            
            
            print("Accuracy with LE: "+str(LE_acc))
            print("Accuracy with AI: "+str(AI_acc))
            print("\n")
            
            subject_acc.append([LE_acc, AI_acc])
            
        return subject_acc
    
    def compute_accuracy(self, d, l, LE_SPDk, AI_SPDk):
        LE_error = 0
        AI_error = 0
        
        for i in range(len(d)):
            LE_dist, AI_dist = [], []
            for j in range(self.n_classes):
                LE_dist.append(self.LE_metric.dist(d[i], LE_SPDk[j]))
                AI_dist.append(self.AI_metric.dist(d[i], AI_SPDk[j]))
            
            LE_prediction = LE_dist.index(min(LE_dist))
            AI_prediction = AI_dist.index(min(AI_dist))
            
            if LE_prediction!=l[i]:
                LE_error+=1
            if AI_prediction!=l[i]:
                AI_error+=1
                
        LE_accuracy=1-LE_error/self.epochs
        AI_accuracy=1-AI_error/self.epochs
        print("Compute Accuracy: Done!")
        return LE_accuracy, AI_accuracy
    
    def compute_SPDk(self, classSpecificSPD):
        LE_SPDk=[]
        AI_SPDk=[]
        
        for SPD in classSpecificSPD:
            self.LE_mean.fit(SPD)
            LE_SPDk.append(self.LE_mean.estimate_)
            
            self.AI_mean.fit(SPD)
            AI_SPDk.append(self.AI_mean.estimate_)
        print("Compute SPDk: Done!")
        return LE_SPDk, AI_SPDk
    
    def separate_classes(self, d, l):
        classSpecificSPD=[]
        for i in range(self.n_classes):
            indecies = [j for j,val in enumerate(l) if val==i]
            classSPD = [d[j] for j in indecies]
            classSpecificSPD.append(classSPD)
        print('Separate Classes: Done!')
        return classSpecificSPD

In [4]:
subjects = 54
epochs = 400 #total epochs in one subject
points = 512
channels = 62
directory = 'datasets/54subjects/Subject{}_{}.pickle'
conditions=['left','right']
subjectRange=[1,54] #for which subjects generate SPD dataset

load=True
dump=False
filename='datasets/SPDDataset.pickle'

In [5]:
a=DataPreparation(directory, conditions, epochs)

In [6]:
if load:
    infile=open(filename,'rb')
    data=pickle.load(infile)
    d, l = data[0], data[1]
else:
    d, l = a.generateSPDDataset(subjectRange)
    data = [d, l]
    if dump:
        outfile = open(filename,'w+b')
        pickle.dump(data, outfile)
        outfile.close()

In [7]:
b=RMLNM(subjects, epochs, channels, len(conditions))
results=b.outer_LOSO(d, l)

Subject: 1
Separate Classes: Done!
Compute SPDk: Done!
Compute Accuracy: Done!
Accuracy with LE: 0.5
Accuracy with AI: 0.495


Subject: 2
Separate Classes: Done!
Compute SPDk: Done!
Compute Accuracy: Done!
Accuracy with LE: 0.5449999999999999
Accuracy with AI: 0.5525


Subject: 3
Separate Classes: Done!
Compute SPDk: Done!
Compute Accuracy: Done!
Accuracy with LE: 0.515
Accuracy with AI: 0.505


Subject: 4
Separate Classes: Done!
Compute SPDk: Done!
Compute Accuracy: Done!
Accuracy with LE: 0.36750000000000005
Accuracy with AI: 0.35


Subject: 5
Separate Classes: Done!
Compute SPDk: Done!
Compute Accuracy: Done!
Accuracy with LE: 0.5575
Accuracy with AI: 0.56


Subject: 6
Separate Classes: Done!
Compute SPDk: Done!
Compute Accuracy: Done!
Accuracy with LE: 0.6325000000000001
Accuracy with AI: 0.6074999999999999


Subject: 7
Separate Classes: Done!
Compute SPDk: Done!
Compute Accuracy: Done!
Accuracy with LE: 0.5025
Accuracy with AI: 0.505


Subject: 8
Separate Classes: Done!
Compute SP

In [9]:
results_dump='datasets/resultsNM.pickle'
outfile = open(results_dump,'w+b')
pickle.dump(results, outfile)
outfile.close()