In [100]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score, recall_score, precision_score
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import seaborn as sns
import matplotlib.pyplot as plt

## Data Cleaning

In [101]:
df = pd.read_csv('../final_datasetBSL.csv')
df.columns = [i for i in range(df.shape[1])]
df = df.rename(columns={42: 'Output'})
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,56,57,58,59,60,61,62,63,64,65
0,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1,,
1,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1,,
2,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1,,
3,0.503749,0.750119,-6.770000e-07,0.389167,0.670613,-0.084230,0.329903,0.553373,-0.145092,0.409233,...,-0.184079,0.796939,0.562214,-0.166877,0.731328,0.595657,-0.139746,1,,
4,0.444042,0.705644,-6.390000e-07,0.357433,0.604926,-0.046315,0.335348,0.507655,-0.072823,0.431247,...,-0.093008,0.696466,0.534845,-0.068607,0.652367,0.562143,-0.044197,1,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11349,0.762818,0.469212,4.110000e-07,0.716889,0.400375,-0.007419,0.629883,0.359263,-0.015385,0.553812,...,-0.060937,0.472606,0.522587,-0.056836,0.440707,0.517673,-0.052005,6,,
11350,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,6,,
11351,0.220359,0.698914,2.320000e-07,0.267864,0.630138,-0.041846,0.358711,0.615007,-0.066627,0.443233,...,-0.114574,0.549138,0.790407,-0.118746,0.598481,0.776460,-0.116412,6,,
11352,0.272194,0.677432,-6.360000e-08,0.319713,0.601632,0.022952,0.400660,0.567798,0.024836,0.469747,...,-0.047339,0.555062,0.713081,-0.022709,0.554903,0.702406,-0.001947,6,,


In [102]:
print("Uncleaned dataset shape =", df.shape)

Uncleaned dataset shape = (11354, 66)


In [103]:
# removing null values from our dataset

all_null_values = df[df.iloc[:, 0] == 0]
print("Number of null values =", len(all_null_values.index))

Number of null values = 5866


In [104]:
# dropping those null values from our dataset

df.drop(all_null_values.index, inplace=True)

In [105]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,56,57,58,59,60,61,62,63,64,65
3,0.503749,0.750119,-6.770000e-07,0.389167,0.670613,-0.084230,0.329903,0.553373,-0.145092,0.409233,...,-0.184079,0.796939,0.562214,-0.166877,0.731328,0.595657,-0.139746,1,,
4,0.444042,0.705644,-6.390000e-07,0.357433,0.604926,-0.046315,0.335348,0.507655,-0.072823,0.431247,...,-0.093008,0.696466,0.534845,-0.068607,0.652367,0.562143,-0.044197,1,,
5,0.492974,0.867944,-4.370000e-07,0.381305,0.803903,-0.057753,0.328445,0.691551,-0.077181,0.414042,...,-0.101114,0.607647,0.662273,-0.094596,0.578731,0.681397,-0.082558,1,,
8,0.483482,0.943150,1.280000e-07,0.404109,0.838192,-0.098903,0.404909,0.679281,-0.142251,0.528050,...,-0.141401,0.797331,0.658915,-0.123175,0.747692,0.685885,-0.088171,1,,
11,0.377854,0.894570,-1.180000e-06,0.278479,0.718604,-0.035467,0.267706,0.597904,-0.054504,0.388219,...,-0.064081,0.693981,0.591330,-0.034705,0.618633,0.640284,-0.008541,1,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11348,0.340367,0.588146,-6.030000e-07,0.393775,0.500936,0.006349,0.479433,0.474874,0.008889,0.542152,...,-0.018614,0.582816,0.637757,-0.001801,0.556686,0.635143,0.011711,6,,
11349,0.762818,0.469212,4.110000e-07,0.716889,0.400375,-0.007419,0.629883,0.359263,-0.015385,0.553812,...,-0.060937,0.472606,0.522587,-0.056836,0.440707,0.517673,-0.052005,6,,
11351,0.220359,0.698914,2.320000e-07,0.267864,0.630138,-0.041846,0.358711,0.615007,-0.066627,0.443233,...,-0.114574,0.549138,0.790407,-0.118746,0.598481,0.776460,-0.116412,6,,
11352,0.272194,0.677432,-6.360000e-08,0.319713,0.601632,0.022952,0.400660,0.567798,0.024836,0.469747,...,-0.047339,0.555062,0.713081,-0.022709,0.554903,0.702406,-0.001947,6,,


In [106]:
print("Cleaned dataset shape =", df.shape)

Cleaned dataset shape = (5488, 66)


## Data Preparation

In [107]:
X = df.iloc[:, :-1]
print("Features shape =", X.shape)

Y = df.iloc[:, -1]
print("Labels shape =", Y.shape)

Features shape = (5488, 65)
Labels shape = (5488,)


## Data Split

In [108]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

## Model Making

In [109]:
svm = SVC(C=50, gamma=0.1, kernel='rbf')

In [110]:
svm.fit(x_train, y_train)

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [95]:
print("Training score =", svm.score(x_train, y_train))

Training score = 0.9982738780207134


In [96]:
y_pred = svm.predict(x_test)

In [97]:
print("Testing score =", accuracy_score(y_test, y_pred))

Testing score = 0.9942462600690449


## Visualization

In [98]:
cf_matrix = confusion_matrix(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred, average='micro')
precision = precision_score(y_test, y_pred, average='micro')

In [99]:
f1, recall, precision

(0.9942462600690449, 0.9942462600690449, 0.9942462600690449)

In [101]:
labels = sorted(list(set(df['Output'])))
labels = [x.upper() for x in labels]

fig, ax = plt.subplots(figsize=(12, 12))

ax.set_title("Confusion Matrix - American Sign Language")

maping = sns.heatmap(cf_matrix, 
                     annot=True,
                     cmap = plt.cm.Blues, 
                     linewidths=.2,
                     xticklabels=labels,
                     yticklabels=labels, vmax=8,
                     fmt='g',
                     ax=ax
                    )
maping

AttributeError: 'float' object has no attribute 'upper'

In [102]:
maping.figure.savefig("output.png")

NameError: name 'maping' is not defined

In [103]:
import joblib

In [104]:
joblib.dump(svm,'model.pkl')

['model.pkl']

In [52]:
import numpy as np

In [53]:
np.isnan(df.any())

0     False
1     False
2     False
3     False
4     False
      ...  
61    False
62    False
63    False
64    False
65    False
Length: 66, dtype: bool

In [55]:
np.isfinite(df.all())

0     True
1     True
2     True
3     True
4     True
      ... 
61    True
62    True
63    True
64    True
65    True
Length: 66, dtype: bool

In [99]:
np.isnan(X)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,64
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
5,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
8,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
11,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11348,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
11349,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
11351,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
11352,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
