In [1]:
# importing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [3]:
df = pd.read_csv('sign_data.csv')
df.tail()

Unnamed: 0,Distance_0,Distance_1,Distance_2,Distance_3,Distance_4,Distance_5,Distance_6,Distance_7,Distance_8,Distance_9,...,Distance_201,Distance_202,Distance_203,Distance_204,Distance_205,Distance_206,Distance_207,Distance_208,Distance_209,Sign
2695,0.088972,0.130166,0.162504,0.200254,0.173446,0.235127,0.280532,0.320568,0.165033,0.239816,...,0.113226,0.075261,0.043884,0.06365,0.102741,0.137906,0.039173,0.07441,0.035296,Space
2696,0.095095,0.128824,0.158698,0.197312,0.170525,0.229949,0.272802,0.309916,0.164182,0.238616,...,0.106958,0.069399,0.039574,0.064194,0.102857,0.137429,0.038798,0.073588,0.034854,Space
2697,0.075805,0.107031,0.137145,0.175597,0.147967,0.206562,0.252867,0.293611,0.147785,0.212464,...,0.101876,0.06692,0.033984,0.052835,0.087759,0.120721,0.03498,0.067956,0.032981,Space
2698,0.08238,0.121251,0.153004,0.191029,0.158623,0.224033,0.273687,0.3168,0.156625,0.230912,...,0.111972,0.074774,0.041023,0.059216,0.096474,0.130556,0.037281,0.071387,0.034115,Space
2699,0.086183,0.122475,0.153708,0.192051,0.166552,0.227843,0.276621,0.318729,0.163669,0.235324,...,0.109894,0.072633,0.038696,0.059989,0.097266,0.131361,0.037321,0.071411,0.034102,Space


In [4]:
df['Sign'] = df['Sign'].map({'A':0, 'B':1, 'C':2, 'D':3, 'E':4, 'F':5, 'G':6, 'H':7, 'I':8, 'J':9, 'K':10,
                            'L':11, 'M':12, 'N':13, 'O':14, 'P':15, 'Q':16, 'R':17, 'S':18, 'T':19, "U":20,
                            'V':21, 'W':22, "X":23, 'Y':24, 'Z':25, 'Space':26})

In [5]:
X = df.drop(columns=['Sign'])
y = df['Sign']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [6]:
# model pipeline
pipelines = {
    'svm': Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', SVC())
    ]),
    'knn': Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', KNeighborsClassifier())
    ]),
    'rf': Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', RandomForestClassifier())
    ])
}

In [7]:
cv_scores = {}

for name, pipeline in pipelines.items():
    scores = cross_val_score(pipeline, X_train, y_train, cv=5, scoring='accuracy')
    cv_scores[name] = scores
    print(f"{name} CV Accuracy: {np.mean(scores):.4f} ± {np.std(scores):.4f}")

for name, pipeline in pipelines.items():
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Test Accuracy: {test_accuracy:.4f}")


svm CV Accuracy: 0.9986 ± 0.0019
knn CV Accuracy: 0.9981 ± 0.0017
rf CV Accuracy: 0.9954 ± 0.0021
svm Test Accuracy: 0.9981
knn Test Accuracy: 0.9981
rf Test Accuracy: 0.9963


In [8]:
best_model_name = max(cv_scores, key=lambda name: np.mean(cv_scores[name]))
best_model = pipelines[best_model_name]
print(f"The best model is {best_model_name} with a cross-validated accuracy of {np.mean(cv_scores[best_model_name]):.4f}")

The best model is svm with a cross-validated accuracy of 0.9986


## Saving Model

In [10]:
import pickle

# Create pipeline for KNN
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', SVC())
])

# Train the model
pipeline.fit(X_train, y_train)

# Save the model
with open('sign_gesture.pkl', 'wb') as file:
    pickle.dump(pipeline, file)

# Evaluate the model
y_pred = pipeline.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
print(f"KNN Test Accuracy: {test_accuracy:.4f}")

KNN Test Accuracy: 0.9981
