# 1. Read collected data & pre-process

In [8]:
# Import relevant dependencies
import pandas as pd
import os
from sklearn.model_selection import train_test_split

In [9]:
# Initialize main directory location
path = os.getcwd()
main_directory = os.path.dirname(path)

# Read combined .csv dataset file from previous data collection
df = pd.read_csv(main_directory + '/dataset/keypoints_combined.csv')

In [10]:
# Initialize dataset feature and target values
x = df.drop('class', axis=1) # features
y = df['class'] # target value

In [11]:
# Split data into 80/20 for training part and testing part
# Randomize dataset contents (to avoid possible over-fitting)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [12]:
print(y_test)

172    K
137    I
126    I
94     F
72     E
      ..
152    J
5      A
45     C
93     F
284    R
Name: class, Length: 82, dtype: object


# 2. Train machine learning classification model

In [13]:
# import relevant dependencies and model libraries
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.svm import SVC, NuSVC, LinearSVC

In [14]:
# Initialize training pipelines
pipelines = {
    'SVM':make_pipeline(StandardScaler(), SVC(probability=True, kernel='linear')),
    'NuSVC':make_pipeline(StandardScaler(), NuSVC(probability=True))
    
}

In [15]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(x_train, y_train)
    fit_models[algo] = model

In [16]:
pipeline.fit

<bound method Pipeline.fit of Pipeline(steps=[('standardscaler', StandardScaler()),
                ('nusvc', NuSVC(probability=True))])>

In [17]:
fit_models

{'SVM': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('svc', SVC(kernel='linear', probability=True))]),
 'NuSVC': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('nusvc', NuSVC(probability=True))])}

# 3. Evaluate model

In [18]:
# Import dependencies
from sklearn.metrics import accuracy_score # Accuracy metrics 

In [19]:
# Run training evaluation
for algo, model in fit_models.items():
    yhat = model.predict(x_test)
    print(algo, accuracy_score(y_test, yhat))

SVM 0.9878048780487805
NuSVC 0.9634146341463414


In [20]:
fit_models['SVM'].predict(x_test)

array(['K', 'I', 'I', 'F', 'E', 'B', 'Y', 'N', 'V', 'O', 'I', 'N', 'S',
       'V', 'K', 'F', 'W', 'E', 'A', 'E', 'T', 'O', 'B', 'E', 'M', 'D',
       'X', 'M', 'R', 'G', 'Z', 'N', 'O', 'A', 'P', 'A', 'S', 'U', 'G',
       'Q', 'F', 'N', 'Y', 'X', 'D', 'U', 'V', 'W', 'C', 'V', 'E', 'C',
       'F', 'B', 'M', 'J', 'K', 'W', 'B', 'O', 'D', 'Z', 'W', 'E', 'V',
       'D', 'L', 'B', 'B', 'K', 'Q', 'B', 'R', 'Z', 'M', 'X', 'U', 'J',
       'A', 'C', 'F', 'R'], dtype=object)

In [21]:
y_test

172    K
137    I
126    I
94     F
72     E
      ..
152    J
5      A
45     C
93     F
284    R
Name: class, Length: 82, dtype: object

# 4. Serialize/Export model

In [22]:
# import dependencies
import pickle 

In [23]:
# Export trained model into specified directory as .pkl file
with open(main_directory+'/model/svm_trained_classifier.pkl', 'wb') as f:
    pickle.dump(fit_models['SVM'], f)