In [1]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Sklearn model selection
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

# Sklearn prebuilt models
from sklearn.svm import SVC

# Sklearn metrics
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [2]:
train = pd.read_csv('../Reformatted EMG Data/training_set.csv')
val = pd.read_csv('../Reformatted EMG Data/validation_set.csv')
test = pd.read_csv('../Reformatted EMG Data/test_set.csv')

In [3]:
print('Train shape: ', train.shape)
print('Val shape: ', val.shape)
print('Test shape: ', test.shape)

Train shape:  (1866, 57)
Val shape:  (622, 57)
Test shape:  (622, 57)


In [4]:
# Combine training and validation sets:
# Append validation to the end of training set using pandas.DataFrame.append 
# Dimensions are (622,57) and (1866,57) respectively, result should be of shape (1866+622,57) = (2488,57) 
# Set ignore_index=True so indices from validation set do not carry over to new dataframe
df = train.append(val, ignore_index=True)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,47,48,49,50,51,52,53,54,55,Gesture
0,12.800,5.625,3.350,7.525,15.750,30.925,13.000,11.350,797.0,379.0,...,0.135896,-0.676306,-0.651367,-0.331830,-0.208513,-0.188145,-0.391191,-0.183349,-0.262518,3
1,11.775,12.050,4.450,4.050,9.925,28.075,9.450,10.150,781.0,797.0,...,0.145100,-0.872881,-0.404738,-0.312142,-0.141769,-0.253684,-0.285637,0.037007,-0.185160,2
2,16.050,8.225,4.025,12.150,12.550,32.525,12.975,19.700,1037.0,565.0,...,0.009826,-0.552480,-0.417388,-0.145595,-0.009078,-0.447030,-0.370000,-0.309961,-0.249044,3
3,14.350,3.225,2.400,2.725,20.425,20.975,2.150,6.450,930.0,212.0,...,-0.372802,-0.348524,-0.133006,-0.095131,-0.226875,-0.348008,0.012600,-0.288841,-0.523217,1
4,12.375,2.525,1.600,2.500,14.825,11.700,1.625,4.000,783.0,149.0,...,-0.209545,-0.184494,-0.285429,-0.240479,-0.186715,-0.061272,-0.243753,-0.110633,-0.239182,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2483,14.000,12.350,4.250,3.875,12.525,18.850,2.850,7.625,1004.0,768.0,...,-0.381901,-0.187739,-0.190132,-0.035732,-0.314430,-0.190161,0.003855,-0.218739,-0.040739,2
2484,4.800,2.625,2.775,3.025,17.150,19.625,4.925,10.375,316.0,137.0,...,-0.351091,-0.272144,-0.117917,-0.240737,-0.339937,-0.323283,-0.208709,-0.295625,-0.514628,3
2485,18.750,14.850,4.700,5.100,17.150,26.475,10.825,13.000,1252.0,879.0,...,0.045476,-0.149866,-0.262809,-0.146483,-0.188831,-0.752618,-0.663131,-0.386512,-0.311171,2
2486,12.800,3.025,3.475,3.625,25.675,25.875,2.600,7.350,807.0,192.0,...,-0.525611,-0.422638,-0.173903,-0.420056,-0.446556,-0.502039,-0.409440,-0.424476,-0.437964,1


In [5]:
# Remove AR features (columns 24-55)
df = df.drop(df.iloc[:,24:56], axis = 1)
test = test.drop(test.iloc[:,24:56], axis = 1)

df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,Gesture
0,12.800,5.625,3.350,7.525,15.750,30.925,13.000,11.350,797.0,379.0,...,668.0,19.0,20.0,15.0,16.0,19.0,18.0,18.0,12.0,3
1,11.775,12.050,4.450,4.050,9.925,28.075,9.450,10.150,781.0,797.0,...,559.0,18.0,16.0,16.0,14.0,17.0,27.0,22.0,17.0,2
2,16.050,8.225,4.025,12.150,12.550,32.525,12.975,19.700,1037.0,565.0,...,1254.0,19.0,19.0,14.0,13.0,17.0,14.0,17.0,17.0,3
3,14.350,3.225,2.400,2.725,20.425,20.975,2.150,6.450,930.0,212.0,...,419.0,18.0,16.0,15.0,20.0,19.0,17.0,13.0,17.0,1
4,12.375,2.525,1.600,2.500,14.825,11.700,1.625,4.000,783.0,149.0,...,232.0,20.0,15.0,10.0,14.0,18.0,15.0,15.0,13.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2483,14.000,12.350,4.250,3.875,12.525,18.850,2.850,7.625,1004.0,768.0,...,460.0,23.0,16.0,14.0,12.0,19.0,19.0,11.0,18.0,2
2484,4.800,2.625,2.775,3.025,17.150,19.625,4.925,10.375,316.0,137.0,...,636.0,16.0,12.0,14.0,12.0,20.0,18.0,14.0,19.0,3
2485,18.750,14.850,4.700,5.100,17.150,26.475,10.825,13.000,1252.0,879.0,...,824.0,18.0,14.0,19.0,14.0,23.0,16.0,17.0,16.0,2
2486,12.800,3.025,3.475,3.625,25.675,25.875,2.600,7.350,807.0,192.0,...,474.0,17.0,13.0,18.0,16.0,20.0,14.0,14.0,20.0,1


In [6]:
# Create independent and dependent variables for train and test sets
X_train = df.iloc[:,:-1].values
y_train = df.iloc[:,-1].values
X_test = test.iloc[:,:-1].values
y_test = test.iloc[:,-1].values

# Show resulting dimensions
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(2488, 24)
(2488,)
(622, 24)
(622,)


### Non Cross Validation

In [7]:
# Build, fit and score SVC
svc = SVC()
svc.fit(X_train, y_train)
train_score = svc.score(X_train, y_train)

# Make predictions on the test set
test_pred = svc.predict(X_test)
test_accuracy = metrics.accuracy_score(y_test, test_pred)

print('Train accuracy: ', round(train_score, 4))
print('Test score: ', round(test_accuracy, 4))

Train accuracy:  0.994
Test score:  0.9952


### Cross Validation

In [8]:
# Build, fit and score SVC using k-fold CV
svc = SVC()
svc.fit(X_train, y_train)
kfold = KFold(n_splits=10,shuffle=True)
cv = cross_val_score(svc, X_train, y_train, cv=kfold, scoring='accuracy')

test_pred = svc.predict(X_test)
test_accuracy = metrics.accuracy_score(y_test, test_pred)

print('Cross val scores: ', cv)
print('Average CV Score: ', round(np.average(cv), 4))
print('Test score: ', round(test_accuracy, 4))

Cross val scores:  [0.99598394 0.99598394 1.         0.99196787 0.98795181 0.99598394
 0.98795181 0.98795181 0.99596774 0.99596774]
Average CV Score:  0.9936
Test score:  0.9952
