In [1]:
%env GEOMSTATS_BACKEND=numpy

#delete this if you don't have 12 CPU cores on your machine
%env NUMEXPR_MAX_THREADS=12 

import nu_smrutils as u
import pandas as pd
import numpy as np
import pickle

from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LogisticRegression

#from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
#from tensorflow.keras import Sequential
#from tensorflow.keras.layers import Dense
#from tensorflow.keras.regularizers import L1L2

import geomstats.backend as gs
import geomstats.geometry.spd_matrices as spd
from geomstats.learning.preprocessing import ToTangentSpace

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

env: GEOMSTATS_BACKEND=numpy
env: NUMEXPR_MAX_THREADS=12


INFO: Using numpy backend


In [2]:
class DataPreparation:
    def __init__(self, directory, conditions, epochs):
        self.conditions=conditions
        self.epochs=epochs
        self.directory=directory
    
    def loadConcat(self, subject):
        train_f=self.directory.format(subject, 'train')
        test_f=self.directory.format(subject, 'test')

        train = u.loaddat(train_f)
        tr_df=train.to_data_frame()

        test = u.loaddat(test_f)
        ts_df=test.to_data_frame()
        ts_df['epoch']+=200

        return pd.concat([tr_df, ts_df])

    def convertToSPD(self, df, normalize=True):
        SPD = []
        labels = [] 
        for i in range(self.epochs):
            df_slice=df.loc[df['epoch']==i, :]
            matrix=df_slice.iloc[:, 3:]
            
            if normalize:
                matrix=(matrix-matrix.mean())/matrix.std()
            
            label=df_slice['condition'].iloc[0]

            for j in range(len(self.conditions)):
                if label==self.conditions[j]:
                    label=j #encoding of conditions to integers
                    break

            covmat=matrix.cov().to_numpy()
            SPD.append(covmat)
            labels.append(label) 
        return [SPD, labels]

    def generateSPDDataset(self, r=[0,53], normalize=True):
        SPDDataset=[]
        for i in range(r[0]+1,r[1]+2):
            df=self.loadConcat(i)
            SPDDataset.append(self.convertToSPD(df,normalize))

        return SPDDataset
    
    def generateTrainTest(self, SPDDataset, testindex):
        
        d=SPDDataset.copy()
        
        [testX, testY] = d.pop(testindex)
        trainX=[]
        trainY=[]
        
        for i in range(len(SPDDataset)):
            [SPD, labels] = SPDDataset[i]
            
            trainX += SPD
            trainY += labels
        
        return trainX, trainY, testX, testY
        

In [3]:
subjects = 54
epochs = 400 #total in train and test files
points = 512
channels = 62
directory = 'datasets/54subjects/Subject{}_{}.pickle'
conditions=['left','right']

a=DataPreparation(directory, conditions, epochs)

manifold = spd.SPDMatrices(n=channels)
metric = spd.SPDMetricLogEuclidean(n=channels)

pipeline = make_pipeline(
    ToTangentSpace(metric), 
    LogisticRegression(C=3, max_iter=2000)
)

subjectRange=[0,53] #for which subjects generate SPD dataset
testingRange=[0,53] #which subjects use for testing. index is relative to subjectRange

load=True
dump=False
filename='datasets/SPDDataset1.pickle'

In [4]:
if load:
    infile=open(filename,'rb')
    SPDData=pickle.load(infile)
else:
    SPDData = a.generateSPDDataset(subjectRange)
    if dump:
        outfile = open(filename,'w+b')
        pickle.dump(SPDData, outfile)
        outfile.close()

In [5]:
def subjectIndependent(a, pipeline, SPDData, testingRange):
    results=[]
    for i in range(testingRange[0], testingRange[1]+1):
        trainX, trainY, testX, testY = a.generateTrainTest(SPDData, i)
        pipeline.fit(trainX, trainY)
        result=pipeline.score(testX, testY)
        results.append(result)
        print(i)
        print(result)
        print('\n')
        
    return results    

In [6]:
results = subjectIndependent(a, pipeline, SPDData, testingRange)

0
0.635


1
0.6075


2
0.69


3
0.6175


4
0.6525


5
0.7475


6
0.63


7
0.6575


8
0.625


9
0.61


10
0.5525


11
0.555


12
0.5275


13
0.5825


14
0.64


15
0.605


16
0.59


17
0.625


18
0.6425


19
0.625


20
0.7375


21
0.605


22
0.6125


23
0.5225


24
0.745


25
0.635


26
0.62


27
0.795


28
0.905


29
0.6525


30
0.6125


31
0.7075


32
0.77


33
0.5525


34
0.7225


35
0.8975


36
0.65


37
0.6075


38
0.6825


39
0.64


40
0.5575


41
0.6025


42
0.6575


43
0.705


44
0.715


45
0.6425


46
0.6975


47
0.65


48
0.6225


49
0.5375


50
0.6375


51
0.63


52
0.5625


53
0.5625




In [8]:
results_dump='datasets/results.pickle'
outfile = open(results_dump,'w+b')
pickle.dump(results, outfile)
outfile.close()