In [1]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(threshold=np.inf)

# Sklearn model selection
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV

# Sklearn prebuilt models
from sklearn.svm import SVC

# Sklearn metrics
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report


In [2]:
train = pd.read_csv('../Reformatted EMG Data/training_set.csv')
val = pd.read_csv('../Reformatted EMG Data/validation_set.csv')
test = pd.read_csv('../Reformatted EMG Data/test_set.csv')

In [3]:
print('Train shape: ', train.shape)
print('Val shape: ', val.shape)
print('Test shape: ', test.shape)

Train shape:  (1866, 57)
Val shape:  (622, 57)
Test shape:  (622, 57)


In [4]:
# NOTE: In this version we first combine all three sets before randomly repartitioning into new train/test sets:
# Append validation to the end of training set using pandas.DataFrame.append 
# Then append the test set again to finish combining all three sets
df = train.append(val, ignore_index=True)
df = df.append(test, ignore_index=True)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,47,48,49,50,51,52,53,54,55,Gesture
0,12.800,5.625,3.350,7.525,15.750,30.925,13.000,11.350,797.0,379.0,...,0.135896,-0.676306,-0.651367,-0.331830,-0.208513,-0.188145,-0.391191,-0.183349,-0.262518,3
1,11.775,12.050,4.450,4.050,9.925,28.075,9.450,10.150,781.0,797.0,...,0.145100,-0.872881,-0.404738,-0.312142,-0.141769,-0.253684,-0.285637,0.037007,-0.185160,2
2,16.050,8.225,4.025,12.150,12.550,32.525,12.975,19.700,1037.0,565.0,...,0.009826,-0.552480,-0.417388,-0.145595,-0.009078,-0.447030,-0.370000,-0.309961,-0.249044,3
3,14.350,3.225,2.400,2.725,20.425,20.975,2.150,6.450,930.0,212.0,...,-0.372802,-0.348524,-0.133006,-0.095131,-0.226875,-0.348008,0.012600,-0.288841,-0.523217,1
4,12.375,2.525,1.600,2.500,14.825,11.700,1.625,4.000,783.0,149.0,...,-0.209545,-0.184494,-0.285429,-0.240479,-0.186715,-0.061272,-0.243753,-0.110633,-0.239182,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3105,14.600,11.375,6.300,7.075,11.525,6.650,19.975,12.625,987.0,688.0,...,-0.362923,-0.429516,-0.239828,-0.221800,-0.480777,-0.694733,-0.420748,-0.245940,-0.179027,0
3106,5.800,2.050,2.525,3.400,13.650,23.675,5.925,8.000,361.0,121.0,...,-0.091187,-0.297904,-0.198859,-0.136441,-0.121303,-0.138884,-0.015662,0.004741,-0.127008,3
3107,13.350,3.400,2.275,2.625,16.275,14.600,2.325,6.625,894.0,222.0,...,-0.485066,-0.199362,-0.113962,-0.248865,0.093129,-0.284206,-0.134567,-0.192715,-0.134633,1
3108,4.575,2.300,2.350,2.825,14.850,23.550,4.475,7.275,314.0,131.0,...,-0.169328,-0.364084,-0.071852,-0.251953,-0.268409,-0.403572,-0.334678,-0.476653,-0.492596,3


In [5]:
# Create independent and dependent variables
X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

# Randomly partition the set into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    test_size=0.20, 
                                                    shuffle=True)

# Show resulting dimensions
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(2488, 56)
(2488,)
(622, 56)
(622,)


### Non Cross Validation

In [6]:
# Build, fit and score SVC
svc = SVC()
svc.fit(X_train, y_train)
train_score = svc.score(X_train, y_train)

# Make predictions on the test set
test_pred = svc.predict(X_test)
test_accuracy = metrics.accuracy_score(y_test, test_pred)

print('Train accuracy: ', round(train_score, 4))
print('Test score: ', round(test_accuracy, 4))

Train accuracy:  0.994
Test score:  0.9952


### Cross Validation

In [7]:
# Build, fit and score SVC using k-fold CV
svc = SVC()
svc.fit(X_train, y_train)
kfold = KFold(n_splits=10,shuffle=True)
cv = cross_val_score(svc, X_train, y_train, cv=kfold, scoring='accuracy')

test_pred = svc.predict(X_test)
test_accuracy = metrics.accuracy_score(y_test, test_pred)

print('Cross val scores: ', cv)
print('Average CV Score: ', round(np.average(cv), 4))
print('Test score: ', round(test_accuracy, 4))

Cross val scores:  [0.99196787 0.99598394 1.         0.98795181 0.98795181 0.99196787
 0.98795181 1.         0.99193548 1.        ]
Average CV Score:  0.9936
Test score:  0.9952


### Hyperparameter tuning: GridSearchCV

In [8]:
param_grid = [{'C': [1, 10, 100, 1000],
               'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
               'degree': [1, 3, 5, 7],
               'gamma': ['scale', 'auto']}]

gs_cv = GridSearchCV(SVC(), param_grid, scoring='accuracy', cv=10, n_jobs=-1, verbose=1)
gs_cv.fit(X_train, y_train)

Fitting 10 folds for each of 128 candidates, totalling 1280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:    5.3s
[Parallel(n_jobs=-1)]: Done 452 tasks      | elapsed:   14.2s
[Parallel(n_jobs=-1)]: Done 832 tasks      | elapsed:   25.3s
[Parallel(n_jobs=-1)]: Done 1249 out of 1280 | elapsed:   39.1s remaining:    1.0s
[Parallel(n_jobs=-1)]: Done 1280 out of 1280 | elapsed:   40.8s finished


GridSearchCV(cv=10, estimator=SVC(), n_jobs=-1,
             param_grid=[{'C': [1, 10, 100, 1000], 'degree': [1, 3, 5, 7],
                          'gamma': ['scale', 'auto'],
                          'kernel': ['linear', 'poly', 'rbf', 'sigmoid']}],
             scoring='accuracy', verbose=1)

In [9]:
print("Best parameters set found on validation set:")
print()
print(gs_cv.best_params_)
print()
print("Best score on validation set: %0.4f" % gs_cv.best_score_)
print()
print("Grid scores on validation set:")
print()
means = gs_cv.cv_results_['mean_test_score']
stds = gs_cv.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, gs_cv.cv_results_['params']):
    print("%0.4f (+/-%0.04f) for %r"
          % (mean, std * 2, params))

Best parameters set found on validation set:

{'C': 100, 'degree': 1, 'gamma': 'scale', 'kernel': 'rbf'}

Best score on validation set: 0.9960

Grid scores on validation set:

0.9932 (+/-0.0108) for {'C': 1, 'degree': 1, 'gamma': 'scale', 'kernel': 'linear'}
0.9908 (+/-0.0096) for {'C': 1, 'degree': 1, 'gamma': 'scale', 'kernel': 'poly'}
0.9932 (+/-0.0102) for {'C': 1, 'degree': 1, 'gamma': 'scale', 'kernel': 'rbf'}
0.8368 (+/-0.0591) for {'C': 1, 'degree': 1, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.9932 (+/-0.0108) for {'C': 1, 'degree': 1, 'gamma': 'auto', 'kernel': 'linear'}
0.9936 (+/-0.0090) for {'C': 1, 'degree': 1, 'gamma': 'auto', 'kernel': 'poly'}
0.2544 (+/-0.0039) for {'C': 1, 'degree': 1, 'gamma': 'auto', 'kernel': 'rbf'}
0.2544 (+/-0.0039) for {'C': 1, 'degree': 1, 'gamma': 'auto', 'kernel': 'sigmoid'}
0.9932 (+/-0.0108) for {'C': 1, 'degree': 3, 'gamma': 'scale', 'kernel': 'linear'}
0.9835 (+/-0.0127) for {'C': 1, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'}
0.9932 (

In [10]:
test_pred = gs_cv.predict(X_test)
test_accuracy = metrics.accuracy_score(y_test, test_pred)

print('Test score: ', round(test_accuracy, 4))
print(classification_report(y_test, test_pred))

Test score:  0.9968
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       145
           1       1.00      0.99      1.00       167
           2       1.00      0.99      1.00       151
           3       0.99      1.00      0.99       159

    accuracy                           1.00       622
   macro avg       1.00      1.00      1.00       622
weighted avg       1.00      1.00      1.00       622

