# Project Code Draft ##

## Import Statements ##

In [23]:
import scipy.io as sio
import mne
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
sns.set()

## Data Pre-processing Methods ##

In [2]:
#function-type alias for use in type hints
function = type(lambda x: x)
#Data Dimensionality Reduction Methods
def sliding_window(X:np.ndarray, window_length:int, step_size:int, step:int):
    w_start_idx = step_size*step
    w_end_idx = min( [X.shape[1]-1, w_start_idx + window_length] )
    return X[:, w_start_idx:w_end_idx, :]

def calc_lrp(X:np.ndarray): 
    X_reduced = X[23,:,:] - X[7,:,:]
    return X_reduced.T

def sliding_lrp(X:np.ndarray, window_length:int, step_size:int, step:int):
    return calc_lrp(sliding_window(X, window_length, step_size, step))

def csp_transform(X:np.ndarray, y:np.ndarray, n_components:int):
    dim = X.shape
    X_shaped = X.reshape(dim[2], dim[0], dim[1])
    csp = mne.decoding.CSP(n_components)
    csp.fit(X_shaped, y)
    return csp.transform(X_shaped)
    
def sliding_csp_transform(X:np.ndarray,
                          y:np.ndarray, 
                          n_components:int,
                          window_length:int, 
                          step_size:int,
                          step:int):
    X_window = sliding_window(X, window_length, step_size, step)
    return csp_transform(X_window, y, n_components)
    

def Format(X:np.ndarray, 
           y:np.ndarray,  
           reduction_method:function, 
           reduction_method_args=None,
           trials_in_A:int=100):
    if type(reduction_method_args) == dict:
        X_reduced = reduction_method(X, **reduction_method_args)
    elif type(reduction_method_args) == list:
        X_reduced = reduction_method(X, *reduction_method_args)
    else:
        X_reduced = reduction_method(X)
    condition_A_data_dict, condition_B_data_dict = condition_split(X_reduced, y, trials_in_A)
    return condition_A_data_dict, condition_B_data_dict

def condition_split(X:np.ndarray, y:np.ndarray, trials_in_A:int):
    AX, BX = X[:trials_in_A], X[trials_in_A:]
    Ay, By = y[:trials_in_A], y[trials_in_A:]
    A_dict = {'X': AX, 'y': Ay}
    B_dict = {'X': BX, 'y': By}
    return A_dict, B_dict


## Hyperparameter Tuning ##

#### Read in Hyperparameter Tuning Data ####

In [3]:
data = sio.loadmat('../Data/data_cube_subject1.mat')
channel_labels = sio.loadmat('../Data/channel_label.mat')
data_cube = data['data_cube']
data_labels = data['event_label'].ravel()

#### Hyperparameter Tuning Constants ####

In [4]:
#Hyperparameter dictionaries for use in grid search
ada_params = {
    'n_estimators': [50, 100, 200, 500],
    'learning_rate': [x/20 for x in range(1,21)],
    'algorithm': ['SAMME', 'SAMME.R']
}

rf_params = {
    'n_estimators': [50, 100, 200, 500],
    'criterion': ['gini', 'entropy'],
    'max_depth': (None, 10, 50, 100, 500),
    'min_samples_split': [2, 4, 5],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ('auto', 'sqrt', 'log2', None),
    'min_impurity_split': (None, 0.5)
}

#Dictionary of classifiers and hyperparemeter selections
clfs = {
    'RandomForestClassifier': RandomForestClassifier(),
    'AdaBoostClassifier': AdaBoostClassifier()
}

params = {
    'RandomForestClassifier': rf_params,
    'AdaBoostClassifier': ada_params
}

#data dimensionality reduction methods to opitmize for
    #note that csp transform will use n_components=4 for model hyperparameter tuning
    # n_components will be finalized in another round of cross-validation
    # runtime to validate all combinations of n_components and model hyperparameters too long
    # for our current computational capacity so this will have to do
reduction_methods = {
    'lsp': calc_lrp,
    'csp': lambda X: csp_transform(X, data_labels, 4)
}

In [5]:

def tune_multiple_models(data_matrix:np.ndarray, 
                         data_labels:np.ndarray, 
                         trials_in_A:int=100, 
                         clf_list:list=None, 
                         reduction_method_list:list=None):
    if clf_list==None:
        clf_list = [key for key in clfs]
    if reduction_method_list==None:
        reduction_method_list = [key for key in reduction_methods]
    tuned_models = []
    for clf_ident in clf_list:
        for reduction_method_ident in reduction_method_list:
            tuned_models.append(tune_model(data_matrix, 
                                           data_labels, 
                                           trials_in_A,
                                           clf_ident, 
                                           reduction_method_ident) )
    return tuned_models

def tune_model(data_matrix:np.ndarray, 
               data_labels:np.ndarray,
               trials_in_A:int,
               clf_ident:str, 
               reduction_method_ident:str):
    #obtain classifier, hyperparameters from corresponding dictionaries
    clf = clfs[clf_ident]
    hyperparam_dict = params[clf_ident]
    reduction_method = reduction_methods[reduction_method_ident]
    #apply dimensionality reduction to data and split into different experiment classes
    A_dict, B_dict = Format(data_matrix, data_labels, reduction_method, trials_in_A=trials_in_A)
    #tune model
    #For model hyperparameter tuning, use both A and B
    #TODO: When we have more data, it might be better to tune separate models for A and B classes
        #At the moment there is not enough data to tune each class independently
    X = np.vstack( [A_dict['X'], B_dict['X']] )
    y = np.hstack( [A_dict['y'], B_dict['y']] )
    clf_mod = GridSearchCV(clf, hyperparam_dict, n_jobs=7)
    clf_mod.fit(X,y)
    return {
        'clf_type': clf_ident,
        'reduction_method': reduction_method_ident,
        'clf': clf_mod
    }




In [6]:
#Tune hyperparameters. WARNING: This code block takes approximatley 5 hours to complete on 7 cores
%time trained_clfs = tune_multiple_models(data_cube, data_labels)
with open('./bin/clfs.p','wb') as clf_pickle_file:
    pickle.dump(trained_clfs, clf_pickle_file)

Computing rank from data with rank=None
    Using tolerance 1.5e+02 (2.2e-16 eps * 64 dim * 1.1e+16  max singular value)
    Estimated rank (mag): 64
    MAG: rank 64 computed from 64 data channels with 0 projectors
Reducing data rank from 64 -> 64
Estimating covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
    Using tolerance 1.6e+02 (2.2e-16 eps * 64 dim * 1.1e+16  max singular value)
    Estimated rank (mag): 64
    MAG: rank 64 computed from 64 data channels with 0 projectors
Reducing data rank from 64 -> 64
Estimating covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
    Using tolerance 1.5e+02 (2.2e-16 eps * 64 dim * 1.1e+16  max singular value)
    Estimated rank (mag): 64
    MAG: rank 64 computed from 64 data channels with 0 projectors
Reducing data rank from 64 -> 64
Estimating covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
    Using tolerance 1.6e+02 (2.2e-16 eps * 64 dim * 1.1e+16  max singular value)


PicklingError: Can't pickle <function <lambda> at 0x7f381c1dc9d0>: attribute lookup <lambda> on __main__ failed

In [22]:
trained_clfs[3]

{'clf_type': 'AdaBoostClassifier',
 'reduction_method': 'csp',
 'clf': GridSearchCV(estimator=AdaBoostClassifier(), n_jobs=7,
              param_grid={'algorithm': ['SAMME', 'SAMME.R'],
                          'learning_rate': [0.05, 0.1, 0.15, 0.2, 0.25, 0.3,
                                            0.35, 0.4, 0.45, 0.5, 0.55, 0.6,
                                            0.65, 0.7, 0.75, 0.8, 0.85, 0.9,
                                            0.95, 1.0],
                          'n_estimators': [50, 100, 200, 500]})}

In [18]:
trained_clfs[3]['clf'].best_params_

{'algorithm': 'SAMME', 'learning_rate': 0.75, 'n_estimators': 100}

#### Tune n_components for CSP Data Reduction ###

In [None]:
csp_tuned_models = [ x for x in trained_clfs if x['reduction_method'] == 'csp']
n_component_candidates = [x for x in range(1,10)]

def tune_csp_args(X:np.ndarray=data_cube, 
                  y:np.ndarray=data_labels, 
                  csp_models:list=csp_tuned_models, 
                  n_component_list:list=n_component_candidates):
    for clf_dict in csp_tuned_models:
        best_n = None
        best_acc = -1
        for n_components in n_component_candidates:
            acc_list = []
            cv = KFold(shuffle=True)
            csp_args = {'y': data_labels, 'n_components': n_components}
            A_dict, B_dict = Format(X, y, csp_transform, csp_args)
            X_formatted = np.vstack( [A_dict['X'], B_dict['X']] )
            y_formatted = np.hstack( [A_dict['y'], B_dict['y']] )        
            for train_idx, test_idx in cv.split(X):
                X_tr, y_tr = X_formatted[train_idx], y_formatted[train_idx]
                X_ts, y_ts = X_formatted[test_idx], y_formatted[test_idx]
                mod = clf_dict['clf']
                mod.fit(X_tr, y_tr)
                y_pred = mod.predict(X_ts)
                trial_acc = accuracy_score(y_ts, y_pred)
                acc_list.append(trial_acc)
            ave_acc = sum(acc_list)/len(acc_list)
            if ave_acc > best_acc:
                best_acc = ave_acc
                best_n = n_components
        clf_dict['csp_n_components'] = best_n
    return clf_dict

In [None]:
%time best_csp_models = tune_csp_args()
best_csp_models

Computing rank from data with rank=None
    Using tolerance 1.5e+02 (2.2e-16 eps * 64 dim * 1.1e+16  max singular value)
    Estimated rank (mag): 64
    MAG: rank 64 computed from 64 data channels with 0 projectors
Reducing data rank from 64 -> 64
Estimating covariance using EMPIRICAL
Done.
Computing rank from data with rank=None
    Using tolerance 1.6e+02 (2.2e-16 eps * 64 dim * 1.1e+16  max singular value)
    Estimated rank (mag): 64
    MAG: rank 64 computed from 64 data channels with 0 projectors
Reducing data rank from 64 -> 64
Estimating covariance using EMPIRICAL
Done.




## Model Evaluation And Selection ##