In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import cv2
import os
import keras
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn import metric
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier


In [None]:
train_dir = './data/asl_alphabet_train'
test_dir = './data/asl_alphabet_test'

In [None]:
# dict for mapping an integer for each class/label
labels_dict = {'A':0,'B':1,'C':2,'D':3,'E':4,'F':5,'G':6,'H':7,'I':8,'J':9,'K':10,'L':11,'M':12,
                   'N':13,'O':14,'P':15,'Q':16,'R':17,'S':18,'T':19,'U':20,'V':21,'W':22,'X':23,'Y':24,
                   'Z':25,'space':26,'del':27,'nothing':28}

def load_data():
    images_list = []
    labels_list = []
    size = (64,64)
    
    for folder in os.listdir(train_dir):
        for imgfile in os.listdir(f'{train_dir}/{folder}'):
            curr_img = cv2.imread(f'{train_dir}/{folder}/{imgfile}')
            # resize image
            curr_img = cv2.resize(curr_img, size)
            # store into images and labels lists
            images_list.append(curr_img)
            labels_list.append(labels_dict[folder])

    # make np array
    images_arr = np.array(images_list)
    # normalize values
    images_arr = images_arr.astype('float32')/255.0
    # one hot encode labels
    labels_arr = keras.utils.to_categorical(labels_list)
    # split into training and testing data
    x_train, x_test, y_train, y_test = train_test_split(images_arr, labels_arr, test_size = 0.1)
    
    print(f'Loaded {len(x_train)} images for training')
    print(f'Training data shape: {x_train.shape}')
    print(f'Training labels shape: {y_train.shape}\n')
    print(f'Loaded {len(x_test)} images for testing')
    print(f'Testing data shape: {x_test.shape}')
    print(f'Testing labels shape: {y_test.shape}')
    
    return x_train, x_test, y_train, y_test

In [None]:
x_train, x_test, y_train, y_test = load_data()

In [None]:
y_test_int = np.argmax(y_test,axis=1)
y_train_int = np.argmax(y_train,axis=1)

In [None]:
nsamples, nx, ny, rgb = x_train.shape
new_x_train = x_train.reshape((nsamples, nx*ny*rgb))

testsamples, tx, ty, trgb = x_test.shape
new_x_test = x_test.reshape((testsamples, tx*ty*trgb))

In [None]:
#k-nearest neighbors
neighbors = [5, 10, 20]
for i in neighbors:
    knn = KNeighborsClassifier(n_neighbors=i)
    knn = knn.fit(new_x_train, y_train)
    y_trainpred = knn.predict(new_x_train)
    knntrain_int = np.argmax(y_trainpred,axis=1)
    print(classification_report(y_train_int,knntrain_int,digits=4))

    knntest = knn.predict(new_x_test)
    knntest_int = np.argmax(knntest,axis=1)
    print(classification_report(y_test_int,knntest_int,digits=4))


In [None]:
cm = confusion_matrix(y_test_int, knntest_int, range(0, 29))
display = metrics.ConfusionMatrixDisplay(cm)
fig, ax = plt.subplots(figsize=(15, 15))
display.plot(ax=ax)

In [None]:
# Random Forest

trees = [5, 10, 20]
for i in trees:
    rf = RandomForestClassifier(n_estimators=i)
    rf = rf.fit(new_x_train, y_train)
    rftrain = rf.predict(new_x_train)

    rftrain_int = np.argmax(rftrain,axis=1)
    print(classification_report(y_train_int,rftrain_int,digits=4))
    
    rftest = rf.predict(new_x_test)
    rftest_int = np.argmax(rftest,axis=1)
    print(classification_report(y_test_int,rftest_int,digits=4))

In [None]:
cm = confusion_matrix(y_test_int, rftest_int, range(0, 29))
display = metrics.ConfusionMatrixDisplay(cm)
fig, ax = plt.subplots(figsize=(15, 15))
display.plot(ax=ax)

In [None]:
# Decision Tree with AdaBoost

base = [5, 10, 20]
for i in base:
    model = DecisionTreeClassifier()
    abc = AdaBoostClassifier(n_estimators=i, base_estimator=model) 
    abc = abc.fit(new_x_train, y_train_int)
    abctrain = abc.predict(new_x_train)
    
    abctrain_int = np.argmax(abctrain,axis=1)
    print(classification_report(y_train_int,abctrain,digits=4))

    abctest = abc.predict(new_x_test)
    print(classification_report(y_test_int,abctest,digits=4))

In [None]:
cm = confusion_matrix(y_test_int, abctest, range(0, 29))
display = metrics.ConfusionMatrixDisplay(cm)
fig, ax = plt.subplots(figsize=(15, 15))
display.plot(ax=ax)