# 1. Read collected data & pre-process

In [1]:
# Import relevant dependencies
import pandas as pd
import os
from sklearn.model_selection import train_test_split

In [2]:
# Initialize main directory location
path = os.getcwd()
main_directory = os.path.dirname(path)

# Read combined .csv dataset file from previous data collection
df = pd.read_csv(main_directory + '/dataset/keypoints_combined.csv')

In [3]:
# Initialize dataset feature and target values
x = df.drop('class', axis=1) # features
y = df['class'] # target value

In [4]:
# Split data into 80/20 for training part and testing part
# Randomize dataset contents (to avoid possible over-fitting)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42)

In [5]:
print(y_test)

172    K
137    I
126    I
94     F
72     E
33     B
379    Y
223    N
341    V
227    O
132    I
222    N
286    S
331    V
157    K
90     F
357    W
78     E
9      A
77     E
312    T
238    O
30     B
73     E
203    M
56     D
368    X
193    M
281    R
104    G
397    Z
211    N
229    O
15     A
248    P
0      A
302    S
316    U
110    G
266    Q
84     F
Name: class, dtype: object


# 2. Train machine learning classification model

In [6]:
# import relevant dependencies and model libraries
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.svm import SVC, NuSVC, LinearSVC

In [7]:
# Initialize training pipelines
pipelines = {
    'SVM':make_pipeline(StandardScaler(), SVC(probability=True, kernel='linear')),
    'NuSVC':make_pipeline(StandardScaler(), NuSVC(probability=True))
    
}

In [8]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(x_train, y_train)
    fit_models[algo] = model

In [9]:
pipeline.fit

<bound method Pipeline.fit of Pipeline(steps=[('standardscaler', StandardScaler()),
                ('nusvc', NuSVC(probability=True))])>

In [10]:
fit_models

{'SVM': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('svc', SVC(kernel='linear', probability=True))]),
 'NuSVC': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('nusvc', NuSVC(probability=True))])}

# 3. Evaluate model

In [11]:
# Import dependencies
from sklearn.metrics import accuracy_score # Accuracy metrics 

In [12]:
# Run training evaluation
for algo, model in fit_models.items():
    yhat = model.predict(x_test)
    print(algo, accuracy_score(y_test, yhat))

SVM 1.0
NuSVC 1.0


In [13]:
fit_models['SVM'].predict(x_test)

array(['K', 'I', 'I', 'F', 'E', 'B', 'Y', 'N', 'V', 'O', 'I', 'N', 'S',
       'V', 'K', 'F', 'W', 'E', 'A', 'E', 'T', 'O', 'B', 'E', 'M', 'D',
       'X', 'M', 'R', 'G', 'Z', 'N', 'O', 'A', 'P', 'A', 'S', 'U', 'G',
       'Q', 'F'], dtype=object)

In [14]:
y_test

172    K
137    I
126    I
94     F
72     E
33     B
379    Y
223    N
341    V
227    O
132    I
222    N
286    S
331    V
157    K
90     F
357    W
78     E
9      A
77     E
312    T
238    O
30     B
73     E
203    M
56     D
368    X
193    M
281    R
104    G
397    Z
211    N
229    O
15     A
248    P
0      A
302    S
316    U
110    G
266    Q
84     F
Name: class, dtype: object

# 4. Serialize/Export model

In [15]:
# import dependencies
import pickle 

In [16]:
# Export trained model into specified directory as .pkl file
with open(main_directory+'/model/svm_trained_classifier.pkl', 'wb') as f:
    pickle.dump(fit_models['SVM'], f)