# Hyper-parameter Tuning

In [1]:
import joblib
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [3]:
df = pd.read_csv("handSign.csv")

In [4]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

## KNeighbors Classifier

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Create a KNN classifier
knn = KNeighborsClassifier()

# Define the parameter grid to search
param_grid = {'n_neighbors': np.arange(1, 21), 'weights': ['uniform', 'distance']}

# Create the GridSearchCV object
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Print the best parameters and corresponding accuracy
print("Best Parameters: ", grid_search.best_params_)
print("Best Cross-Validated Accuracy: {:.2f}".format(grid_search.best_score_))

# Evaluate the model on the test set
best_knn = grid_search.best_estimator_
test_accuracy = best_knn.score(X_test, y_test)
print("Test set accuracy: {:.2f}".format(test_accuracy))

Best Parameters:  {'n_neighbors': 1, 'weights': 'uniform'}
Best Cross-Validated Accuracy: 1.00
Test set accuracy: 1.00


In [8]:
# saving the model
joblib.dump(grid_search, "GridSearchCV/knn_numbers.pkl")

['GridSearchCV/knn_numbers.pkl']

## Decision Tree Classifier

In [9]:
# Create a Decision Tree classifier
dt_classifier = DecisionTreeClassifier()

# Define the parameter grid to search
param_grid = {'criterion': ['gini', 'entropy'],
                'splitter': ['best', 'random'],
                'max_depth': [None, 10, 20, 30, 40, 50],
                'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4]
                }

# Create the GridSearchCV object
grid_search = GridSearchCV(dt_classifier, param_grid, cv=5, scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Print the best parameters and corresponding accuracy
print("Best Parameters: ", grid_search.best_params_)
print("Best Cross-Validated Accuracy: {:.2f}".format(grid_search.best_score_))

# Evaluate the model on the test set
best_dt_classifier = grid_search.best_estimator_
test_accuracy = best_dt_classifier.score(X_test, y_test)
print("Test set accuracy: {:.2f}".format(test_accuracy))

Best Parameters:  {'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 10, 'splitter': 'random'}
Best Cross-Validated Accuracy: 1.00
Test set accuracy: 1.00


In [10]:
joblib.dump(grid_search, "GridSearchCV/tree_numbers.pkl")

['GridSearchCV/tree_numbers.pkl']

## Support Vector Machine

In [11]:
svm_classifier = SVC()

# Define the parameter grid to search
param_grid = {'C': [0.1, 1, 10, 100],
                'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
                'gamma': ['scale', 'auto']}

# Create the GridSearchCV object
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5, scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Print the best parameters and corresponding accuracy
print("Best Parameters: ", grid_search.best_params_)
print("Best Cross-Validated Accuracy: {:.2f}".format(grid_search.best_score_))

# Evaluate the model on the test set
best_svm_classifier = grid_search.best_estimator_
test_accuracy = best_svm_classifier.score(X_test, y_test)
print("Test set accuracy: {:.2f}".format(test_accuracy))

Best Parameters:  {'C': 10, 'gamma': 'scale', 'kernel': 'linear'}
Best Cross-Validated Accuracy: 1.00
Test set accuracy: 1.00


In [12]:
joblib.dump(grid_search, "GridSearchCV/svm_numbers.pkl")

['GridSearchCV/svm_numbers.pkl']