To use it, you need to define 2 paths:
- project_path (path to the folder name project_eval in git) **in line 3**
- data_path (path to the data on your computer) **in line 11**

# Loading EEG eval

In [1]:
from importlib.util import spec_from_file_location, module_from_spec

In [2]:
import os

In [3]:
project_path = r'C:\Users\Antoine CHEHIRE\IdeaProjects\IFT6269_Project'

In [4]:
eeg_eval_path = os.path.join(project_path, 'Evaluation.py')
predictor_path = os.path.join(project_path, 'Predictors.py')

In [5]:
spec = spec_from_file_location('EEG eval', eeg_eval_path)
eeg_eval = module_from_spec(spec)
spec.loader.exec_module(eeg_eval)

spec = spec_from_file_location('Predictors', predictor_path)
preds = module_from_spec(spec)
spec.loader.exec_module(preds)

# Loading the filter

In [6]:
import numpy as np

In [18]:
class KalmanFilter:
    
    name = "Kalman Filter 2"
    
    starting_value = 0.001
    default_state_diagonal = 0.05
    
    def __init__(self, verbose=1):
        self.features = None
        self.state_estimate = None
        self.state_cov = None
        self.transition_matrix = None
        self.noise_cov = None
        self.obs_model = None
        self.obs_cov = None
        self.verbose = verbose
    
    def init_states(self, n_features, sfreq_times_noise_sensitivity=256, trans_param=0.000001):
        self.features = n_features
        
        self.state_estimate = (np.ones(self.features)*KalmanFilter.starting_value).reshape(self.features,1)
        self.state_cov = np.diag([KalmanFilter.default_state_diagonal]*self.features)

        self.transition_matrix = (np.eye(self.features)+(np.ones(shape=(self.features,self.features))-np.eye(self.features))*trans_param)
        
        self.noise_cov = np.eye(self.features)/(sfreq_times_noise_sensitivity)

        self.obs_model = np.eye(self.features)
        self.obs_cov = np.eye(self.features)
        
    def predict(self):
        state_estimate = np.matmul(self.transition_matrix, self.state_estimate)
        state_cov = np.matmul(self.transition_matrix, np.matmul(self.state_cov, self.transition_matrix.T)) + self.noise_cov
        return state_estimate,state_cov 
    
    def update(self,data):
        innovation_prefit = data - np.matmul(self.obs_model,self.state_estimate) 
        innovation_covariance = self.obs_cov + np.matmul(self.obs_model,np.matmul(self.state_cov,self.obs_model.T))

        Kalman_gain = np.matmul(self.state_cov,np.matmul(self.obs_model.T,np.linalg.inv(innovation_covariance)))
        
        updated_state = self.state_estimate+np.matmul(Kalman_gain,innovation_prefit)
        state_cov_helper = np.identity(Kalman_gain.shape[0])-np.matmul(Kalman_gain,self.obs_model)
        
        updated_state_cov = np.matmul(state_cov_helper,np.matmul(self.state_cov,state_cov_helper.T)) + np.matmul(Kalman_gain,np.matmul(self.obs_cov,Kalman_gain.T))
        innovation_postfit = data - np.matmul(self.obs_model,updated_state)
        
        return updated_state,updated_state_cov,innovation_prefit,innovation_postfit
    
    def generate_features(self, data):
        
        N_iter, n_features = data.shape
        
        self.init_states(n_features)
        
        self.state_estimate = data[0].reshape(self.features,1)
        estimates = np.zeros_like(data)
        for i in range(N_iter):
            if self.verbose:
                if(i%(int(N_iter/10)) == 0):
                    print(round(i/N_iter),"/", N_iter)
            self.state_estimate,self.state_cov = self.predict()
            self.state_estimate,self.state_cov, prefit, postfit = self.update(data[i].reshape(self.features,1))
            estimates[i] = self.state_estimate.reshape(self.features)
        return estimates

In [19]:
algorithm = KalmanFilter(0)

# Loading predictor

In [9]:
pred = preds.LogReg()

No need to optimize parameters yet. It takes too long and doesn't improve results too much. So let's impose default sklearn.

In [10]:
pred.hyper_parameters_grid = {'C': [1]}

# Evaluating

In [11]:
eeg = eeg_eval.EEGEval()

In [12]:
data_path = r'D:\Scolaire\UdeM\IFT_6269\PROJECT\data\kaggle_small'

In [13]:
from time import time

In [20]:
t0 = time()
eeg.evaluate(data_path, algorithm, pred, cv_fold=1, sub_select=1, verbose=2)
time() - t0

Generating features...
Scoring 1 out of 6...
Best params obtained: {'C': 1}
Scoring 2 out of 6...
Best params obtained: {'C': 1}
Scoring 3 out of 6...
Best params obtained: {'C': 1}
Scoring 4 out of 6...
Best params obtained: {'C': 1}
Scoring 5 out of 6...
Best params obtained: {'C': 1}
Scoring 6 out of 6...
Best params obtained: {'C': 1}


730.6448423862457

In [14]:
eeg.result

Unnamed: 0,Algo time,Accuracy 0,Precision 0,Recall 0,F1-score 0,Accuracy 1,Precision 1,Recall 1,F1-score 1,Accuracy 2,...,Recall 3,F1-score 3,Accuracy 4,Precision 4,Recall 4,F1-score 4,Accuracy 5,Precision 5,Recall 5,F1-score 5
Raw data - LogReg,10.62,53.34,54.06,44.41,48.76,55.5,56.36,48.77,52.29,57.73,...,51.28,52.52,54.8,54.01,64.64,58.85,52.46,52.18,58.97,55.37


In [31]:
eeg.result

Unnamed: 0,Algo time,Accuracy 0,Precision 0,Recall 0,F1-score 0,Accuracy 1,Precision 1,Recall 1,F1-score 1,Accuracy 2,...,Recall 3,F1-score 3,Accuracy 4,Precision 4,Recall 4,F1-score 4,Accuracy 5,Precision 5,Recall 5,F1-score 5
Kalman Filter - LogReg,328.67,53.36,56.02,31.25,40.12,59.4,60.89,52.53,56.4,56.44,...,53.68,54.38,55.37,54.57,64.05,58.93,53.1,53.02,54.43,53.72


In [21]:
eeg.result

Unnamed: 0,Algo time,Accuracy 0,Precision 0,Recall 0,F1-score 0,Accuracy 1,Precision 1,Recall 1,F1-score 1,Accuracy 2,...,Recall 3,F1-score 3,Accuracy 4,Precision 4,Recall 4,F1-score 4,Accuracy 5,Precision 5,Recall 5,F1-score 5
Kalman Filter 2 - LogReg,652.62,54.0,56.51,34.76,43.05,59.27,60.77,52.33,56.24,57.31,...,52.94,54.06,54.86,54.15,63.4,58.41,52.28,52.18,54.67,53.39


algo time is not nul since we need to load the data from the data path in memory which takes a few minutes.

as we can see, the features can't be used directly. The classifer works slightly better than a person making a random guess.

# Saving it

Saving the result is crucial as it makes it easier to make comparisons of different models without running the whole pipeline as it takes ages to run.

In [32]:
path_to_save = os.path.join(project_path, 'Results')

In [33]:
file_name = eeg.result.index[0]

In [34]:
eeg.save_json(os.path.join(path_to_save, file_name+'.json'))

# Random forest 

In [19]:
pred = preds.RandForest()

No need to optimize parameters yet. It takes too long and doesn't improve results too much. So let's impose default sklearn.

In [21]:
pred.hyper_parameters_grid = {'n_estimators': [200]}

In [22]:
t0 = time()
eeg.evaluate(data_path, algorithm, pred, cv_fold=1, sub_select=1, verbose=2)
time() - t0

Generating features...
Scoring 1 out of 6...
Best params obtained: {'n_estimators': 200}
Scoring 2 out of 6...
Best params obtained: {'n_estimators': 200}
Scoring 3 out of 6...
Best params obtained: {'n_estimators': 200}
Scoring 4 out of 6...
Best params obtained: {'n_estimators': 200}
Scoring 5 out of 6...
Best params obtained: {'n_estimators': 200}
Scoring 6 out of 6...
Best params obtained: {'n_estimators': 200}


370.3781638145447

In [23]:
eeg.result

Unnamed: 0,Algo time,Accuracy 0,Precision 0,Recall 0,F1-score 0,Accuracy 1,Precision 1,Recall 1,F1-score 1,Accuracy 2,...,Recall 3,F1-score 3,Accuracy 4,Precision 4,Recall 4,F1-score 4,Accuracy 5,Precision 5,Recall 5,F1-score 5
Raw data - RandForest,10.64,50.52,50.87,30.46,38.1,54.33,57.54,33.03,41.97,55.8,...,45.86,49.86,52.38,52.92,43.21,47.57,52.11,52.84,39.33,45.1


The scores remain still barely above what a random classifier would do.

In [24]:
file_name = eeg.result.index[0]

In [25]:
eeg.save_json(os.path.join(path_to_save, file_name+'.json'))

# Finer control (if necessary)

As the labels are not balanced, you may want to balance them manually to help the classifier

In [None]:
features = eeg.generate_features(data_path, algorithm)

You may modify the features. Though please note that the score function needs y_train and y_test as a vector.

Thus you still need to do as in the evaluation protocol:

In [None]:
y_train = features['y_train']
y_test = features['y_test']

# Let's say you want to see the scores for the 1st task:
j = 0
features['y_train'] = y_train[:, 0]
features['y_test'] = y_test[:, 0]

In [None]:
scores, best_params = eeg.score_features(features, pred, cv_fold=1, verbose=2)