To use it, you need to define 2 paths:
- project_path (path to the folder name project_eval in git) **in line 3**
- data_path (path to the data on your computer) **in line 11**

# Loading EEG eval

In [1]:
from importlib.util import spec_from_file_location, module_from_spec

In [2]:
import os

In [3]:
project_path = r'C:\Users\Antoine CHEHIRE\IdeaProjects\IFT6269_Project'

In [4]:
eeg_eval_path = os.path.join(project_path, 'Evaluation.py')
predictor_path = os.path.join(project_path, 'Predictors.py')

In [5]:
spec = spec_from_file_location('EEG eval', eeg_eval_path)
eeg_eval = module_from_spec(spec)
spec.loader.exec_module(eeg_eval)

spec = spec_from_file_location('Predictors', predictor_path)
preds = module_from_spec(spec)
spec.loader.exec_module(preds)

# Loading the filter

In [6]:
import numpy as np

In [23]:
class KalmanFilter:
    name = "Kalman Filter"

    @staticmethod
    def predict(state_estimate, state_cov, transition_matrix, noise_cov, control_mat, control_vec):
        if(len(state_estimate.shape) is 1):    
            state_estimate = state_estimate.reshape(state_estimate.shape[0],1)
        state_estimate = np.matmul(transition_matrix, state_estimate) + np.matmul(control_mat, control_vec)
        state_cov = np.matmul(transition_matrix, np.matmul(state_cov, transition_matrix.T)) + noise_cov
        return state_estimate,state_cov 

    #observation data is z_k
    #Observation matrix is H_k
    #State estimate is x_k
    #State cov is P_k
    #Observation noise is R_k
    @staticmethod
    def update(state_estimate, state_covariance, observation_data, observation_covariance,observation_noise):
        innovation_prefit = observation_data - np.matmul(observation_covariance,state_estimate) 
        innovation_covariance = observation_noise + np.matmul(observation_covariance,np.matmul(state_covariance,observation_covariance.T))

        Kalman_gain = np.matmul(state_covariance,np.matmul(observation_covariance.T,np.linalg.inv(innovation_covariance)))
        updated_state_estimate = state_estimate+np.matmul(Kalman_gain,innovation_prefit)
        state_cov_helper = np.identity(Kalman_gain.shape[0])-np.matmul(Kalman_gain,observation_covariance)
        updated_state_covariance = np.matmul(state_cov_helper,np.matmul(state_covariance,state_cov_helper.T)) + np.matmul(Kalman_gain,np.matmul(observation_noise,Kalman_gain.T))

        innovation_postfit = observation_data - np.matmul(observation_covariance,updated_state_estimate)
        return updated_state_estimate,updated_state_covariance,innovation_prefit,innovation_postfit
    
    def generate_features(self, time_series):
        """
        generate features from a time_series
        :param np.ndarray time_series: nb_of_observations x nb_of_sensors matrix
        :return np.ndarray feature_matrix: matrix of same shape
        """
        
        N_iter, n_features = time_series.shape

        # Initialization of state matrices
        state_estimate = np.ones(n_features)*0.001#block_data[0,:]
        state_cov = np.diag([0.005]*n_features)

        transition_matrix = np.eye(n_features)+(np.ones(shape=(n_features,n_features))-np.eye(n_features))*0.00001

        noise_cov = np.eye(state_estimate.shape[0])/(500)

        control_model = np.eye(state_estimate.shape[0])
        control_vec = np.zeros((state_estimate.shape[0],1))



        # Measurement matrices
        observed_value = state_estimate.reshape(n_features,1)
        observation_model = np.eye(n_features)

        observation_cov = np.eye(observed_value.shape[0])


        prediction ,measurements = [],[]

        # Applying the Kalman Filter
        for i in np.arange(N_iter):
            state_estimate, state_cov = self.predict(state_estimate, state_cov, transition_matrix, noise_cov, control_model, control_vec)

            state_estimate, state_cov, pre,post = self.update(state_estimate, state_cov, observed_value, observation_model, observation_cov)

            prediction.append(state_estimate)


            observed_value = time_series[i].reshape(n_features,1)


        prediction = np.array(prediction).reshape(N_iter,n_features)

        return prediction

In [24]:
algorithm = KalmanFilter()

# Loading predictor

In [25]:
pred = preds.LogReg()

No need to optimize parameters yet. It takes too long and doesn't improve results too much. So let's impose default sklearn.

In [26]:
pred.hyper_parameters_grid = {'C': [1]}

# Evaluating

In [27]:
eeg = eeg_eval.EEGEval()

In [28]:
data_path = r'D:\Scolaire\UdeM\IFT_6269\PROJECT\data\kaggle_small'

In [29]:
from time import time

In [30]:
t0 = time()
eeg.evaluate(data_path, algorithm, pred, cv_fold=1, sub_select=1, verbose=2)
time() - t0

Generating features...
Scoring 1 out of 6...
Best params obtained: {'C': 1}
Scoring 2 out of 6...
Best params obtained: {'C': 1}
Scoring 3 out of 6...
Best params obtained: {'C': 1}
Scoring 4 out of 6...
Best params obtained: {'C': 1}
Scoring 5 out of 6...
Best params obtained: {'C': 1}
Scoring 6 out of 6...
Best params obtained: {'C': 1}


353.08979320526123

In [14]:
eeg.result

Unnamed: 0,Algo time,Accuracy 0,Precision 0,Recall 0,F1-score 0,Accuracy 1,Precision 1,Recall 1,F1-score 1,Accuracy 2,...,Recall 3,F1-score 3,Accuracy 4,Precision 4,Recall 4,F1-score 4,Accuracy 5,Precision 5,Recall 5,F1-score 5
Raw data - LogReg,10.62,53.34,54.06,44.41,48.76,55.5,56.36,48.77,52.29,57.73,...,51.28,52.52,54.8,54.01,64.64,58.85,52.46,52.18,58.97,55.37


In [31]:
eeg.result

Unnamed: 0,Algo time,Accuracy 0,Precision 0,Recall 0,F1-score 0,Accuracy 1,Precision 1,Recall 1,F1-score 1,Accuracy 2,...,Recall 3,F1-score 3,Accuracy 4,Precision 4,Recall 4,F1-score 4,Accuracy 5,Precision 5,Recall 5,F1-score 5
Kalman Filter - LogReg,328.67,53.36,56.02,31.25,40.12,59.4,60.89,52.53,56.4,56.44,...,53.68,54.38,55.37,54.57,64.05,58.93,53.1,53.02,54.43,53.72


algo time is not nul since we need to load the data from the data path in memory which takes a few minutes.

as we can see, the features can't be used directly. The classifer works slightly better than a person making a random guess.

# Saving it

Saving the result is crucial as it makes it easier to make comparisons of different models without running the whole pipeline as it takes ages to run.

In [32]:
path_to_save = os.path.join(project_path, 'Results')

In [33]:
file_name = eeg.result.index[0]

In [34]:
eeg.save_json(os.path.join(path_to_save, file_name+'.json'))

# Random forest 

In [19]:
pred = preds.RandForest()

No need to optimize parameters yet. It takes too long and doesn't improve results too much. So let's impose default sklearn.

In [21]:
pred.hyper_parameters_grid = {'n_estimators': [200]}

In [22]:
t0 = time()
eeg.evaluate(data_path, algorithm, pred, cv_fold=1, sub_select=1, verbose=2)
time() - t0

Generating features...
Scoring 1 out of 6...
Best params obtained: {'n_estimators': 200}
Scoring 2 out of 6...
Best params obtained: {'n_estimators': 200}
Scoring 3 out of 6...
Best params obtained: {'n_estimators': 200}
Scoring 4 out of 6...
Best params obtained: {'n_estimators': 200}
Scoring 5 out of 6...
Best params obtained: {'n_estimators': 200}
Scoring 6 out of 6...
Best params obtained: {'n_estimators': 200}


370.3781638145447

In [23]:
eeg.result

Unnamed: 0,Algo time,Accuracy 0,Precision 0,Recall 0,F1-score 0,Accuracy 1,Precision 1,Recall 1,F1-score 1,Accuracy 2,...,Recall 3,F1-score 3,Accuracy 4,Precision 4,Recall 4,F1-score 4,Accuracy 5,Precision 5,Recall 5,F1-score 5
Raw data - RandForest,10.64,50.52,50.87,30.46,38.1,54.33,57.54,33.03,41.97,55.8,...,45.86,49.86,52.38,52.92,43.21,47.57,52.11,52.84,39.33,45.1


The scores remain still barely above what a random classifier would do.

In [24]:
file_name = eeg.result.index[0]

In [25]:
eeg.save_json(os.path.join(path_to_save, file_name+'.json'))

# Finer control (if necessary)

As the labels are not balanced, you may want to balance them manually to help the classifier

In [None]:
features = eeg.generate_features(data_path, algorithm)

You may modify the features. Though please note that the score function needs y_train and y_test as a vector.

Thus you still need to do as in the evaluation protocol:

In [None]:
y_train = features['y_train']
y_test = features['y_test']

# Let's say you want to see the scores for the 1st task:
j = 0
features['y_train'] = y_train[:, 0]
features['y_test'] = y_test[:, 0]

In [None]:
scores, best_params = eeg.score_features(features, pred, cv_fold=1, verbose=2)