In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score, recall_score, precision_score
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import mediapipe as mp
import cv2
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

## Data Cleaning

In [2]:
df = pd.read_csv('../final_datasetBSL.csv')
df.columns = [i for i in range(df.shape[1])]
df = df.rename(columns={42: 'Output'})
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.655702,0.925128,1.050000e-06,0.682345,0.758577,-0.176642,0.649095,0.588104,-0.213796,0.655571,...,0.344230,0.579110,0.105051,0.412619,0.584032,0.089767,0.424564,0.626385,0.093815,uu
1,0.679762,0.926130,1.070000e-06,0.750432,0.768064,-0.162872,0.748235,0.583428,-0.197232,0.770389,...,0.423689,0.542225,0.080406,0.492506,0.567158,0.074484,0.491576,0.615617,0.082858,uu
2,0.603220,0.947350,1.410000e-06,0.679967,0.786868,-0.165490,0.679376,0.594078,-0.193451,0.679339,...,0.392822,0.580250,0.135206,0.464227,0.594151,0.120891,0.481252,0.633894,0.114396,uu
3,0.638866,0.913687,1.150000e-06,0.674359,0.739793,-0.176041,0.646234,0.569926,-0.213120,0.657998,...,0.335804,0.563531,0.126427,0.403648,0.562292,0.109442,0.419350,0.597216,0.111975,uu
4,0.660101,0.857356,9.900000e-07,0.702825,0.687018,-0.139452,0.682981,0.513898,-0.177975,0.693476,...,0.370128,0.543506,0.047628,0.443109,0.549186,0.041145,0.464826,0.590265,0.052348,uu
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12282,0.511694,0.638892,-1.040000e-06,0.402440,0.568709,-0.029674,0.336785,0.456135,-0.043737,0.331267,...,0.613433,0.396834,-0.067425,0.603184,0.458521,-0.052200,0.591756,0.509309,-0.028870,e
12283,0.338759,0.520325,-9.640000e-07,0.256694,0.469144,-0.010858,0.207957,0.384196,-0.018250,0.197506,...,0.405297,0.328453,-0.062364,0.393271,0.374995,-0.050687,0.388584,0.420478,-0.032457,e
12284,0.404412,0.527195,-8.960000e-07,0.323074,0.469202,-0.017836,0.277114,0.375911,-0.028632,0.268199,...,0.494190,0.334309,-0.061868,0.485649,0.376657,-0.048413,0.473534,0.416851,-0.029561,e
12285,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,e


In [3]:
print("Uncleaned dataset shape =", df.shape)

Uncleaned dataset shape = (12287, 64)


In [4]:
# removing null values from our dataset

all_null_values = df[df.iloc[:, 0] == 0]
print("Number of null values =", len(all_null_values.index))

Number of null values = 1131


In [5]:
# dropping those null values from our dataset

df.drop(all_null_values.index, inplace=True)

In [6]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.655702,0.925128,1.050000e-06,0.682345,0.758577,-0.176642,0.649095,0.588104,-0.213796,0.655571,...,0.344230,0.579110,0.105051,0.412619,0.584032,0.089767,0.424564,0.626385,0.093815,uu
1,0.679762,0.926130,1.070000e-06,0.750432,0.768064,-0.162872,0.748235,0.583428,-0.197232,0.770389,...,0.423689,0.542225,0.080406,0.492506,0.567158,0.074484,0.491576,0.615617,0.082858,uu
2,0.603220,0.947350,1.410000e-06,0.679967,0.786868,-0.165490,0.679376,0.594078,-0.193451,0.679339,...,0.392822,0.580250,0.135206,0.464227,0.594151,0.120891,0.481252,0.633894,0.114396,uu
3,0.638866,0.913687,1.150000e-06,0.674359,0.739793,-0.176041,0.646234,0.569926,-0.213120,0.657998,...,0.335804,0.563531,0.126427,0.403648,0.562292,0.109442,0.419350,0.597216,0.111975,uu
4,0.660101,0.857356,9.900000e-07,0.702825,0.687018,-0.139452,0.682981,0.513898,-0.177975,0.693476,...,0.370128,0.543506,0.047628,0.443109,0.549186,0.041145,0.464826,0.590265,0.052348,uu
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12281,0.367536,0.623313,-9.080000e-07,0.286119,0.546593,-0.013862,0.242720,0.457763,-0.024391,0.245307,...,0.448097,0.422324,-0.065120,0.427464,0.476553,-0.051281,0.418729,0.523540,-0.031376,e
12282,0.511694,0.638892,-1.040000e-06,0.402440,0.568709,-0.029674,0.336785,0.456135,-0.043737,0.331267,...,0.613433,0.396834,-0.067425,0.603184,0.458521,-0.052200,0.591756,0.509309,-0.028870,e
12283,0.338759,0.520325,-9.640000e-07,0.256694,0.469144,-0.010858,0.207957,0.384196,-0.018250,0.197506,...,0.405297,0.328453,-0.062364,0.393271,0.374995,-0.050687,0.388584,0.420478,-0.032457,e
12284,0.404412,0.527195,-8.960000e-07,0.323074,0.469202,-0.017836,0.277114,0.375911,-0.028632,0.268199,...,0.494190,0.334309,-0.061868,0.485649,0.376657,-0.048413,0.473534,0.416851,-0.029561,e


In [7]:
print("Cleaned dataset shape =", df.shape)

Cleaned dataset shape = (11156, 64)


## Data Preparation

In [8]:
X = df.iloc[:, :63]
print("Features shape =", X.shape)

Y = df.iloc[:, 63]
print("Labels shape =", Y.shape)

Features shape = (11156, 63)
Labels shape = (11156,)


## Data Split

In [9]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

## Model Making

In [10]:
svm = SVC(C=50, gamma=0.1, kernel='rbf')

In [11]:
svm.fit(x_train, y_train)

SVC(C=50, gamma=0.1)

In [12]:
print("Training score =", svm.score(x_train, y_train))

Training score = 0.9992155983863739


In [13]:
y_pred = svm.predict(x_test)

In [14]:
print("Testing score =", accuracy_score(y_test, y_pred))

Testing score = 0.9991039426523297


## Visualization

In [15]:
cf_matrix = confusion_matrix(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
precision = precision_score(y_test, y_pred, average='micro')

In [16]:
f1, recall, precision

(0.9991039426523297, 0.9991039426523297, 0.9991039426523297)

In [18]:
labels = sorted(list(set(df['Output'])))
labels = [x.upper() for x in labels]

fig, ax = plt.subplots(figsize=(9, 9))

ax.set_title("Confusion Matrix - Indian Sign Language")

maping = sns.heatmap(cf_matrix, 
                     annot=True,
                     cmap = plt.cm.Blues, 
                     linewidths=.2,
                     xticklabels=labels,
                     yticklabels=labels, vmax=8,
                     ax=ax
                    )
maping

AttributeError: 'float' object has no attribute 'upper'

In [19]:
maping.figure.savefig("output.png")
import joblib

NameError: name 'maping' is not defined

In [18]:
joblib.dump(svm,'../model.pkl')

['../model.pkl']

In [17]:
import joblib