In [1]:
# import local libraries using host specific paths
import socket, sys
hostname = socket.gethostname().split('.')[0]
# set local path settings based on computer host
if hostname == 'PFC':
    pylibrary = '/Users/connylin/Dropbox/Code/proj'
elif hostname == 'Angular-Gyrus':
    pylibrary = '/Users/connylin/Code/proj'
else:
    assert False, 'host computer not regonized'
# load local libraries
if pylibrary not in sys.path:
    sys.path.insert(1, pylibrary)
# import brainstation_capstone
# import other standard paths and local variables
from brainstation_capstone.system import host_paths
localpaths = host_paths.get(hostname)
datapath = localpaths['datapath']

# import standard libraries
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


getting host computer specific paths


## no tuning

no feature reduction

In [2]:
# get machine learning input data
from brainstation_capstone.etl.datatransform import Nutcracker
X_train, X_test, y_train, y_test = Nutcracker(localpaths['datapath']).mldata(feature_reduction='None')
print(f'X has {X_train.shape[1]} features')
# run untuned model
from sklearn.linear_model import RidgeClassifier
clf = RidgeClassifier().fit(X_train, y_train)
testscore_fullfeature = clf.score(X_test, y_test)
trainscore_fullfeature = clf.score(X_train, y_train)
print(f'test score: {testscore_fullfeature}')
print(f'train score: {trainscore_fullfeature}')

None
X has 21 features
test score: 0.8664475
train score: 0.866919375


reduced to 18 features (remove time, persistence, and orient)

In [3]:
# get machine learning input data
from brainstation_capstone.etl.datatransform import Nutcracker
X_train, X_test, y_train, y_test = Nutcracker(localpaths['datapath']).mldata(feature_reduction='standard')
print(f'X has {X_train.shape[1]} features')
# run untuned model
from sklearn.linear_model import RidgeClassifier
clf = RidgeClassifier().fit(X_train, y_train)
testscore_18feature = clf.score(X_test, y_test)
trainscore_18feature = clf.score(X_train, y_train)
print(f'test score: {testscore_fullfeature}')
print(f'train score: {trainscore_fullfeature}')

standard
X has 18 features
test score: 0.8664475
train score: 0.866919375


In [5]:
# comparison of full vs 18 features
train_feature_reduction_acc_gain = trainscore_18feature - trainscore_fullfeature
test_feature_reduction_acc_gain = testscore_18feature - testscore_fullfeature
print(f'reduce to 18 feature accuracy gain (train): {train_feature_reduction_acc_gain*100:.3f}%')
print(f'reduce to 18 feature accuracy gain (test): {test_feature_reduction_acc_gain*100:.3f}%')

reduce to 18 feature accuracy gain (train): -0.216%
reduce to 18 feature accuracy gain (test): -0.218%


reducing 18 features loses accuracy

## Run rough hyperparameter tuning

In [10]:
# example of grid searching key hyperparametres for ridge classifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import RidgeClassifier

# import data
X_train, _, y_train, _ = Nutcracker(datapath).mldata(feature_reduction='None')

# define models and parameters
model = RidgeClassifier()
alpha = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
cv = 5 #cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

# define grid search
grid = dict(alpha=alpha)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, 
                           cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X_train, y_train)

# summarize results
from brainstation_capstone.ml import GridSearchCVHelper
mean, stdev, param = GridSearchCVHelper.print_summary(grid_result)
print('done')

None
Best: 0.864768 using {'alpha': 0.7}
0.864767 (0.000452) with: {'alpha': 0.1}
0.864767 (0.000452) with: {'alpha': 0.2}
0.864767 (0.000452) with: {'alpha': 0.3}
0.864767 (0.000452) with: {'alpha': 0.4}
0.864767 (0.000452) with: {'alpha': 0.5}
0.864767 (0.000452) with: {'alpha': 0.6}
0.864768 (0.000451) with: {'alpha': 0.7}
0.864768 (0.000451) with: {'alpha': 0.8}
0.864768 (0.000451) with: {'alpha': 0.9}
0.864768 (0.000451) with: {'alpha': 1.0}
done


not much difference with different alpha. try different solve and set random state to 318

In [3]:
# example of grid searching key hyperparametres for ridge classifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import RidgeClassifier

# import data
from brainstation_capstone.etl.datatransform import Nutcracker
X_train, _, y_train, _ = Nutcracker(datapath).mldata(feature_reduction='None')

# define models and parameters
model = RidgeClassifier()
alpha = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
solver = ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
random_state = [318]

grid = dict(alpha=alpha, solver=solver, random_state=random_state)

# grid search setting
cv = 5 #cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, 
                           cv=cv, scoring='accuracy',error_score=0)
# run gridsearch
grid_result = grid_search.fit(X_train, y_train)

# summarize results
from brainstation_capstone.ml import GridSearchCVHelper
mean, stdev, param = GridSearchCVHelper.print_summary(grid_result)
print('done')

None
None
Best: 0.866908 using {'alpha': 0.1, 'random_state': 318, 'solver': 'sag'}
0.866904 (0.000446) with: {'alpha': 0.1, 'random_state': 318, 'solver': 'svd'}
0.866904 (0.000446) with: {'alpha': 0.1, 'random_state': 318, 'solver': 'cholesky'}
0.866898 (0.000456) with: {'alpha': 0.1, 'random_state': 318, 'solver': 'lsqr'}
0.866902 (0.000439) with: {'alpha': 0.1, 'random_state': 318, 'solver': 'sparse_cg'}
0.866908 (0.000451) with: {'alpha': 0.1, 'random_state': 318, 'solver': 'sag'}
0.866906 (0.000441) with: {'alpha': 0.1, 'random_state': 318, 'solver': 'saga'}
0.866904 (0.000446) with: {'alpha': 0.2, 'random_state': 318, 'solver': 'svd'}
0.866904 (0.000446) with: {'alpha': 0.2, 'random_state': 318, 'solver': 'cholesky'}
0.866898 (0.000457) with: {'alpha': 0.2, 'random_state': 318, 'solver': 'lsqr'}
0.866903 (0.000440) with: {'alpha': 0.2, 'random_state': 318, 'solver': 'sparse_cg'}
0.866908 (0.000451) with: {'alpha': 0.2, 'random_state': 318, 'solver': 'sag'}
0.866905 (0.000442) wi