# 1. Read collected data & pre-process

In [29]:
# Import relevant dependencies
import pandas as pd
import os
from sklearn.model_selection import train_test_split

In [30]:
# Initialize main directory location
path = os.getcwd()
main_directory = os.path.dirname(path)

# Read combined .csv dataset file from previous data collection
df = pd.read_csv(main_directory + '/dataset/keypoints_combined.csv')

In [31]:
# Initialize dataset feature and target values
x = df.drop('class', axis=1) # features
y = df['class'] # target value

In [32]:
# Split data into 80/20 for training part and testing part
# Randomize dataset contents (to avoid possible over-fitting)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [33]:
print(y_test)

301    R
39     C
338    T
410    X
155    I
      ..
449    Z
137    H
93     F
57     D
154    I
Name: class, Length: 91, dtype: object


# 2. Train machine learning classification model

In [65]:
# import relevant dependencies and model libraries
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.svm import SVC, NuSVC, LinearSVC

In [75]:
# Initialize training pipelines
pipelines = {
    'SVM':make_pipeline(StandardScaler(), SVC(probability=True, kernel='poly')),
    'NuSVC':make_pipeline(StandardScaler(), NuSVC(probability=True))
    
}

In [76]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(x_train, y_train)
    fit_models[algo] = model

In [77]:
pipeline.fit

<bound method Pipeline.fit of Pipeline(steps=[('nusvc', NuSVC(probability=True))])>

# 3. Evaluate model

In [78]:
# Import dependencies
from sklearn.metrics import accuracy_score # Accuracy metrics 

In [79]:
# Run training evaluation
for algo, model in fit_models.items():
    yhat = model.predict(x_test)
    print(algo, accuracy_score(y_test, yhat))

SVM 0.978021978021978
NuSVC 0.978021978021978


In [80]:
fit_models['SVM'].predict(x_test)

array(['R', 'C', 'T', 'X', 'I', 'O', 'O', 'F', 'X', 'B', 'E', 'L', 'I',
       'U', 'S', 'D', 'J', 'N', 'Q', 'E', 'E', 'H', 'S', 'G', 'W', 'M',
       'E', 'Z', 'A', 'P', 'V', 'O', 'Y', 'G', 'Z', 'E', 'Q', 'X', 'H',
       'E', 'Q', 'F', 'Y', 'E', 'I', 'E', 'E', 'S', 'X', 'W', 'T', 'B',
       'O', 'S', 'J', 'P', 'A', 'W', 'A', 'Z', 'L', 'A', 'B', 'Q', 'U',
       'D', 'Y', 'P', 'R', 'L', 'X', 'W', 'B', 'L', 'S', 'Y', 'C', 'P',
       'Z', 'G', 'K', 'Q', 'Y', 'B', 'C', 'Z', 'Z', 'H', 'F', 'D', 'I'],
      dtype=object)

In [81]:
y_test

301    R
39     C
338    T
410    X
155    I
      ..
449    Z
137    H
93     F
57     D
154    I
Name: class, Length: 91, dtype: object

# 4. Serialize/Export model

In [82]:
# import dependencies
import pickle 

In [83]:
# Export trained model into specified directory as .pkl file
with open(main_directory+'/model/svm_trained_classifier.pkl', 'wb') as f:
    pickle.dump(fit_models['SVM'], f)