# <center style="color:#145277">Model Training</center>

### <span style="color:#145277">Importing Libraries</span>

In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score

### <span style="color:#145277">Loading & Exploring Dataset </span>

In [2]:
# loading data
df = pd.read_csv('sign_data.csv')
df.head()

Unnamed: 0,Distance_0,Distance_1,Distance_2,Distance_3,Distance_4,Distance_5,Distance_6,Distance_7,Distance_8,Distance_9,...,Distance_201,Distance_202,Distance_203,Distance_204,Distance_205,Distance_206,Distance_207,Distance_208,Distance_209,Sign
0,0.105151,0.219548,0.313035,0.392161,0.32936,0.431127,0.475016,0.495393,0.327917,0.443504,...,0.18401,0.140056,0.108465,0.089051,0.142206,0.188338,0.053337,0.099607,0.046278,A
1,0.111101,0.252695,0.364716,0.45107,0.315518,0.388219,0.298722,0.253134,0.30757,0.383007,...,0.126662,0.062835,0.046759,0.070189,0.016089,0.04727,0.077674,0.114872,0.038531,A
2,0.111975,0.252069,0.366455,0.457079,0.314929,0.395829,0.30184,0.248627,0.308029,0.393448,...,0.142677,0.069461,0.045488,0.074016,0.021795,0.055818,0.083826,0.126208,0.043605,A
3,0.110323,0.250238,0.365462,0.456366,0.315646,0.396813,0.300209,0.246396,0.309401,0.396331,...,0.143852,0.069471,0.046139,0.073823,0.023312,0.056232,0.084579,0.126099,0.042901,A
4,0.091807,0.205298,0.303033,0.379785,0.270043,0.342094,0.258915,0.21295,0.267259,0.342078,...,0.131304,0.064672,0.043861,0.066647,0.01821,0.053566,0.078263,0.118124,0.040814,A


In [3]:
# shape of dataset
df.shape

(200, 211)

In [4]:
# null data
df.isnull().sum()

Distance_0      0
Distance_1      0
Distance_2      0
Distance_3      0
Distance_4      0
               ..
Distance_206    0
Distance_207    0
Distance_208    0
Distance_209    0
Sign            0
Length: 211, dtype: int64

### <span style="color:#145277">Data Preprocessing & Model Building</span>

In [8]:
# mapping 'A', 'B', 'C', 'D' into 0, 1, 2, 3
df['Sign'] = df['Sign'].map({'A':0, 'B':1, 'C':2, 'D':3})

In [9]:
# spliting dataset into training and testing
y = df['Sign']
X = df.drop(columns=['Sign'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [10]:
# model pipeline
pipelines = {
    'svm': Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', SVC())
    ]),
    'knn': Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', KNeighborsClassifier())
    ]),
    'rf': Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', RandomForestClassifier())
    ])
}

In [11]:
cv_scores = {}

for name, pipeline in pipelines.items():
    scores = cross_val_score(pipeline, X_train, y_train, cv=5, scoring='accuracy')
    cv_scores[name] = scores
    print(f"{name} CV Accuracy: {np.mean(scores):.4f} ± {np.std(scores):.4f}")

for name, pipeline in pipelines.items():
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Test Accuracy: {test_accuracy:.4f}")


svm CV Accuracy: 0.9938 ± 0.0125
knn CV Accuracy: 0.9938 ± 0.0125
rf CV Accuracy: 0.9938 ± 0.0125
svm Test Accuracy: 1.0000
knn Test Accuracy: 1.0000
rf Test Accuracy: 1.0000


In [12]:
best_model_name = max(cv_scores, key=lambda name: np.mean(cv_scores[name]))
best_model = pipelines[best_model_name]
print(f"The best model is {best_model_name} with a cross-validated accuracy of {np.mean(cv_scores[best_model_name]):.4f}")

The best model is svm with a cross-validated accuracy of 0.9938


### <span style="color:#145277">Saving Model</span>

In [13]:
import pickle

# Create pipeline for KNN
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', SVC())
])

# Train the model
pipeline.fit(X_train, y_train)

# Save the model
with open('sign_gesture.pkl', 'wb') as file:
    pickle.dump(pipeline, file)

# Evaluate the model
y_pred = pipeline.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
print(f"KNN Test Accuracy: {test_accuracy:.4f}")

KNN Test Accuracy: 1.0000
