# 1. Read collected data & pre-process

In [1]:
# Import relevant dependencies
import pandas as pd
import os
from sklearn.model_selection import train_test_split

In [2]:
# Initialize main directory location
path = os.getcwd()
main_directory = os.path.dirname(path)

# Read combined .csv dataset file from previous data collection
df = pd.read_csv(main_directory + '/dataset/keypoints_combined.csv')

In [3]:
# Initialize dataset feature and target values
x = df.drop('class', axis=1) # features
y = df['class'] # target value

In [6]:
# Split data into 80/20 for training part and testing part
# Randomize dataset contents (to avoid possible over-fitting)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [7]:
print(y_test)

301    R
39     C
338    T
410    X
155    I
      ..
195    L
211    M
26     B
7      A
431    Y
Name: class, Length: 137, dtype: object


# 2. Train machine learning classification model

In [8]:
# import relevant dependencies and model libraries
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.svm import SVC, NuSVC, LinearSVC

In [9]:
# Initialize training pipelines
pipelines = {
    'SVM':make_pipeline(StandardScaler(), SVC(probability=True, kernel='linear')),
    'NuSVC':make_pipeline(StandardScaler(), NuSVC(probability=True))
    
}

In [10]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(x_train, y_train)
    fit_models[algo] = model

In [11]:
pipeline.fit

<bound method Pipeline.fit of Pipeline(steps=[('standardscaler', StandardScaler()),
                ('nusvc', NuSVC(probability=True))])>

In [12]:
fit_models

{'SVM': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('svc', SVC(kernel='linear', probability=True))]),
 'NuSVC': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('nusvc', NuSVC(probability=True))])}

# 3. Evaluate model

In [14]:
# Import dependencies
from sklearn.metrics import accuracy_score # Accuracy metrics 

In [15]:
# Run training evaluation
for algo, model in fit_models.items():
    yhat = model.predict(x_test)
    print(algo, accuracy_score(y_test, yhat))

SVM 0.9927007299270073
NuSVC 0.9781021897810219


In [16]:
fit_models['SVM'].predict(x_test)

array(['R', 'C', 'T', 'X', 'I', 'O', 'O', 'F', 'X', 'B', 'E', 'L', 'I',
       'U', 'S', 'D', 'J', 'N', 'Q', 'E', 'E', 'H', 'S', 'G', 'W', 'M',
       'E', 'Z', 'A', 'P', 'V', 'O', 'Y', 'G', 'Z', 'E', 'Q', 'X', 'H',
       'E', 'Q', 'F', 'Y', 'E', 'I', 'E', 'E', 'N', 'X', 'W', 'T', 'B',
       'O', 'S', 'J', 'P', 'A', 'W', 'A', 'Z', 'L', 'A', 'B', 'Q', 'U',
       'D', 'Y', 'P', 'R', 'L', 'X', 'W', 'B', 'L', 'N', 'Y', 'C', 'P',
       'Z', 'G', 'K', 'Q', 'Y', 'B', 'C', 'Z', 'Z', 'H', 'F', 'D', 'I',
       'B', 'A', 'D', 'V', 'O', 'B', 'E', 'S', 'H', 'U', 'U', 'F', 'O',
       'T', 'X', 'T', 'K', 'G', 'V', 'A', 'C', 'K', 'H', 'W', 'Q', 'A',
       'Y', 'V', 'S', 'A', 'B', 'Z', 'D', 'M', 'N', 'V', 'D', 'G', 'Q',
       'O', 'B', 'L', 'M', 'B', 'A', 'Y'], dtype=object)

In [17]:
y_test

301    R
39     C
338    T
410    X
155    I
      ..
195    L
211    M
26     B
7      A
431    Y
Name: class, Length: 137, dtype: object

# 4. Serialize/Export model

In [18]:
# import dependencies
import pickle 

In [19]:
# Export trained model into specified directory as .pkl file
with open(main_directory+'/model/svm_trained_classifier.pkl', 'wb') as f:
    pickle.dump(fit_models['SVM'], f)