# 1. Read collected data & pre-process

In [1]:
# Import relevant dependencies
import pandas as pd
import os
from sklearn.model_selection import train_test_split

In [2]:
# Initialize main directory location
path = os.getcwd()
main_directory = os.path.dirname(path)

# Read combined .csv dataset file from previous data collection
df = pd.read_csv(main_directory + '/dataset/keypoints_combined_v2.csv')

In [3]:
# Initialize dataset feature and target values
x = df.drop('class', axis=1) # features
y = df['class'] # target value

In [39]:
# Split data into 80/20 for training part and testing part
# Randomize dataset contents (to avoid possible over-fitting)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [40]:
print(y_test)

478    J
721    O
312    G
660    N
823    Q
      ..
49     A
844    Q
413    I
297    F
86     B
Name: class, Length: 130, dtype: object


# 2. Train machine learning classification model

In [41]:
# import relevant dependencies and model libraries
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.svm import SVC, NuSVC

In [42]:
# Initialize training pipelines
pipelines = {
    'SVM':make_pipeline(StandardScaler(), SVC(probability=True, kernel='linear')),
    'RBF_SVM':make_pipeline(StandardScaler(), SVC(probability=True, kernel='rbf')),
    'NuSVC':make_pipeline(StandardScaler(), NuSVC(probability=True))
    
}

In [43]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(x_train, y_train)
    fit_models[algo] = model

In [44]:
pipeline.fit

<bound method Pipeline.fit of Pipeline(steps=[('standardscaler', StandardScaler()),
                ('nusvc', NuSVC(probability=True))])>

In [45]:
fit_models

{'SVM': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('svc', SVC(kernel='linear', probability=True))]),
 'RBF_SVM': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('svc', SVC(probability=True))]),
 'NuSVC': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('nusvc', NuSVC(probability=True))])}

# 3. Evaluate model

In [46]:
# Import dependencies
from sklearn.metrics import accuracy_score # Accuracy metrics 

In [47]:
# Run training evaluation
for algo, model in fit_models.items():
    yhat = model.predict(x_test)
    print(algo, accuracy_score(y_test, yhat))

SVM 0.9769230769230769
RBF_SVM 0.8692307692307693
NuSVC 0.9307692307692308


In [13]:
fit_models['SVM'].predict(x_test)

array(['J', 'O', 'G', 'N', 'Q', 'O', 'S', 'M', 'W', 'C', 'Q', 'Y', 'M',
       'N', 'R', 'E', 'X', 'O', 'K', 'D', 'M', 'U', 'V', 'V', 'R', 'A',
       'P', 'J', 'Q', 'J', 'Z', 'A', 'W', 'K', 'I', 'U', 'T', 'S', 'M',
       'V', 'N', 'B', 'F', 'X', 'X', 'E', 'B', 'R', 'M', 'J', 'Q', 'Y',
       'V', 'G', 'M', 'S', 'N', 'A', 'B', 'Z', 'N', 'B', 'U', 'A', 'A',
       'O', 'J', 'I', 'K', 'Y', 'Z', 'J', 'B', 'E', 'L', 'W', 'F', 'L',
       'V', 'E', 'H', 'K', 'M', 'E', 'F', 'M', 'A', 'R', 'W', 'T', 'B',
       'Y', 'R', 'I', 'L', 'V', 'L', 'Q', 'Q', 'F', 'F', 'Q', 'O', 'S',
       'J', 'I', 'U', 'U', 'H', 'E', 'E', 'S', 'J', 'L', 'E', 'B', 'O',
       'U', 'X', 'Z', 'V', 'U', 'H', 'T', 'J', 'A', 'Q', 'I', 'F', 'B',
       'C', 'Y', 'I', 'G', 'Z', 'L', 'K', 'X', 'C', 'H', 'K', 'D', 'Q',
       'X', 'H', 'I', 'Y', 'P', 'Z', 'A', 'F', 'O', 'A', 'G', 'U', 'M',
       'Y', 'B', 'Z', 'M', 'Z', 'F', 'P', 'F', 'R', 'T', 'L', 'A', 'C',
       'D', 'M', 'I', 'N', 'M', 'Q', 'Z', 'N', 'I', 'C', 'E', 'U

In [14]:
y_test

478    J
721    O
312    G
660    N
823    Q
      ..
306    G
199    D
296    F
286    F
826    Q
Name: class, Length: 260, dtype: object

# 4. Serialize/Export model

In [19]:
# import dependencies
import pickle 

In [20]:
# Export trained model into specified directory as .pkl file
with open(main_directory+'/model/svm_trained_classifier_v2_20percent.pkl', 'wb') as f:
    pickle.dump(fit_models['RBF_SVM'], f)