In [1]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Sklearn model selection
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

# Sklearn prebuilt models
from sklearn.svm import SVC

# Sklearn metrics
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [2]:
train = pd.read_csv('../Reformatted EMG Data/training_set.csv')
val = pd.read_csv('../Reformatted EMG Data/validation_set.csv')
test = pd.read_csv('../Reformatted EMG Data/test_set.csv')

In [3]:
print('Train shape: ', train.shape)
print('Val shape: ', val.shape)
print('Test shape: ', test.shape)

Train shape:  (1866, 57)
Val shape:  (622, 57)
Test shape:  (622, 57)


In [4]:
# Combine training and validation sets:
# Append validation to the end of training set using pandas.DataFrame.append 
# Dimensions are (622,57) and (1866,57) respectively, result should be of shape (1866+622,57) = (2488,57) 
# Set ignore_index=True so indices from validation set do not carry over to new dataframe
df = train.append(val, ignore_index=True)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,47,48,49,50,51,52,53,54,55,Gesture
0,12.800,5.625,3.350,7.525,15.750,30.925,13.000,11.350,797.0,379.0,...,0.135896,-0.676306,-0.651367,-0.331830,-0.208513,-0.188145,-0.391191,-0.183349,-0.262518,3
1,11.775,12.050,4.450,4.050,9.925,28.075,9.450,10.150,781.0,797.0,...,0.145100,-0.872881,-0.404738,-0.312142,-0.141769,-0.253684,-0.285637,0.037007,-0.185160,2
2,16.050,8.225,4.025,12.150,12.550,32.525,12.975,19.700,1037.0,565.0,...,0.009826,-0.552480,-0.417388,-0.145595,-0.009078,-0.447030,-0.370000,-0.309961,-0.249044,3
3,14.350,3.225,2.400,2.725,20.425,20.975,2.150,6.450,930.0,212.0,...,-0.372802,-0.348524,-0.133006,-0.095131,-0.226875,-0.348008,0.012600,-0.288841,-0.523217,1
4,12.375,2.525,1.600,2.500,14.825,11.700,1.625,4.000,783.0,149.0,...,-0.209545,-0.184494,-0.285429,-0.240479,-0.186715,-0.061272,-0.243753,-0.110633,-0.239182,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2483,14.000,12.350,4.250,3.875,12.525,18.850,2.850,7.625,1004.0,768.0,...,-0.381901,-0.187739,-0.190132,-0.035732,-0.314430,-0.190161,0.003855,-0.218739,-0.040739,2
2484,4.800,2.625,2.775,3.025,17.150,19.625,4.925,10.375,316.0,137.0,...,-0.351091,-0.272144,-0.117917,-0.240737,-0.339937,-0.323283,-0.208709,-0.295625,-0.514628,3
2485,18.750,14.850,4.700,5.100,17.150,26.475,10.825,13.000,1252.0,879.0,...,0.045476,-0.149866,-0.262809,-0.146483,-0.188831,-0.752618,-0.663131,-0.386512,-0.311171,2
2486,12.800,3.025,3.475,3.625,25.675,25.875,2.600,7.350,807.0,192.0,...,-0.525611,-0.422638,-0.173903,-0.420056,-0.446556,-0.502039,-0.409440,-0.424476,-0.437964,1


In [5]:
# Only keep AR features
df = df.drop(df.iloc[:,:24], axis = 1)
test = test.drop(test.iloc[:,:24], axis = 1)

df

Unnamed: 0,24,25,26,27,28,29,30,31,32,33,...,47,48,49,50,51,52,53,54,55,Gesture
0,-0.429038,-0.177309,-0.264952,-0.085118,-0.533972,-0.080851,-0.055747,-0.303062,-0.309590,-0.358098,...,0.135896,-0.676306,-0.651367,-0.331830,-0.208513,-0.188145,-0.391191,-0.183349,-0.262518,3
1,-0.485369,-0.321078,0.050176,-0.088444,-0.609718,-0.442237,-0.378759,-0.100552,-0.281609,-0.005296,...,0.145100,-0.872881,-0.404738,-0.312142,-0.141769,-0.253684,-0.285637,0.037007,-0.185160,2
2,-0.452043,-0.325093,-0.279130,-0.325090,-0.613625,-0.274775,-0.335741,-0.284871,-0.262131,-0.250311,...,0.009826,-0.552480,-0.417388,-0.145595,-0.009078,-0.447030,-0.370000,-0.309961,-0.249044,3
3,-0.375450,-0.205289,-0.161271,-0.268460,-0.574741,-0.418102,-0.393743,-0.382298,-0.755330,-0.460810,...,-0.372802,-0.348524,-0.133006,-0.095131,-0.226875,-0.348008,0.012600,-0.288841,-0.523217,1
4,-0.468571,-0.559280,-0.107318,-0.325788,-0.242452,-0.274439,-0.110095,-0.349018,-0.186697,-0.287307,...,-0.209545,-0.184494,-0.285429,-0.240479,-0.186715,-0.061272,-0.243753,-0.110633,-0.239182,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2483,-0.740764,-0.246944,-0.032165,-0.109689,-0.361168,-0.456378,-0.253161,-0.368771,-0.322351,-0.041660,...,-0.381901,-0.187739,-0.190132,-0.035732,-0.314430,-0.190161,0.003855,-0.218739,-0.040739,2
2484,-0.215285,0.023565,0.006707,-0.190818,-0.355389,-0.457538,-0.375693,-0.689898,-0.269667,-0.474772,...,-0.351091,-0.272144,-0.117917,-0.240737,-0.339937,-0.323283,-0.208709,-0.295625,-0.514628,3
2485,-0.432050,-0.013301,-0.011798,-0.074540,-0.637726,-0.629187,-0.555086,-0.470188,-0.721811,-0.821664,...,0.045476,-0.149866,-0.262809,-0.146483,-0.188831,-0.752618,-0.663131,-0.386512,-0.311171,2
2486,-0.475304,-0.263320,-0.296725,-0.471287,-0.334812,-0.286712,-0.001553,-0.178823,-0.191408,-0.196575,...,-0.525611,-0.422638,-0.173903,-0.420056,-0.446556,-0.502039,-0.409440,-0.424476,-0.437964,1


In [6]:
# Create independent and dependent variables for train and test sets
X_train = df.iloc[:,:-1].values
y_train = df.iloc[:,-1].values
X_test = test.iloc[:,:-1].values
y_test = test.iloc[:,-1].values

# Show resulting dimensions
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(2488, 32)
(2488,)
(622, 32)
(622,)


### Non Cross Validation

In [7]:
# Build, fit and score SVC
svc = SVC()
svc.fit(X_train, y_train)
train_score = svc.score(X_train, y_train)

# Make predictions on the test set
test_pred = svc.predict(X_test)
test_accuracy = metrics.accuracy_score(y_test, test_pred)

print('Train accuracy: ', round(train_score, 4))
print('Test score: ', round(test_accuracy, 4))

Train accuracy:  0.8449
Test score:  0.7186


### Cross Validation

In [8]:
# Build, fit and score SVC using k-fold CV
svc = SVC()
svc.fit(X_train, y_train)
kfold = KFold(n_splits=10,shuffle=True)
cv = cross_val_score(svc, X_train, y_train, cv=kfold, scoring='accuracy')

test_pred = svc.predict(X_test)
test_accuracy = metrics.accuracy_score(y_test, test_pred)

print('Cross val scores: ', cv)
print('Average CV Score: ', round(np.average(cv), 4))
print('Test score: ', round(test_accuracy, 4))

Cross val scores:  [0.67871486 0.75502008 0.73092369 0.69477912 0.70682731 0.71485944
 0.73493976 0.68273092 0.69354839 0.69354839]
Average CV Score:  0.7086
Test score:  0.7186
