In [None]:
import kaggle as kg
import os
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.utils import to_categorical

from keras.layers import Flatten, Dense
from keras.applications import VGG16
from keras.models import Model
from keras.optimizers import SGD

In [None]:
os.environ["KAGGLE_USERNAME"] = "aimlrl"
os.environ["KAGGLE_KEY"] = "54d4150a6ca782d7b27af3f3754eddd8"

In [None]:
kg.api.authenticate()

In [None]:
kg.api.dataset_download_files(dataset="ayuraj/asl-dataset",
                              path="dataset",unzip=True)

In [None]:
def train_test_df(path):

    img_path = list()
    img_label = list()

    for single_class_dir_path in pathlib.Path(path).glob("*"):

        if str(single_class_dir_path).split("/")[-1] == "asl_dataset":
            continue

        for single_class_img_path in pathlib.Path(single_class_dir_path).glob("*.jpeg"):

            img_path.append(str(single_class_img_path))
            img_label.append(str(single_class_img_path).split("/")[-2])

    return pd.DataFrame(data={"img_path":img_path,"label":img_label})     

In [None]:
data_path = "dataset/asl_dataset"

In [None]:
data = train_test_df(data_path)

In [None]:
data

In [None]:
for label in data['label'].unique():

    print("There are {} number of Images for alphabet {}".format(data[data['label'] == label].shape[0], label))

In [None]:
training_data = pd.DataFrame(data={"img_path":[],"label":[]})
testing_data = pd.DataFrame(data={"img_path":[],"label":[]})

In [None]:
for label in data["label"].unique():

    training_data = pd.concat((training_data,data[data["label"] == label].iloc[0:60]),axis=0)
    testing_data = pd.concat((testing_data,data[data["label"] == label].iloc[60:]),axis=0)

In [None]:
training_data

In [None]:
testing_data

In [None]:
training_data.reset_index(inplace=True,drop=True)

In [None]:
training_data

In [None]:
testing_data.reset_index(drop=True,inplace=True)

In [None]:
testing_data

In [None]:
character2int = dict(zip(training_data["label"].unique(),range(len(training_data["label"].unique()))))

In [None]:
training_data["label"].replace(to_replace=character2int.keys(),value=character2int.values(),
                               inplace=True)

testing_data["label"].replace(to_replace=character2int.keys(),value=character2int.values(),
                     inplace=True)

In [None]:
training_data

In [None]:
testing_data

In [None]:
plt.imshow(plt.imread(training_data.iloc[1,0]))

In [None]:
plt.imshow(plt.imread(testing_data.iloc[0,0]))

In [None]:
Y_true_train = to_categorical(y=training_data["label"],num_classes=36)
Y_true_test = to_categorical(y=testing_data["label"],num_classes=36)

In [None]:
def multiclass_cnn():

    vgg16 = VGG16(include_top=False,input_shape=(400,400,3),weights="imagenet",pooling="avg")
    vgg16.trainable = False
    input_to_vgg16 = vgg16.layers[0].input
    vgg16_output = Dense(units=36,activation="softmax")(vgg16.layers[-1].output)

    return Model(inputs=[input_to_vgg16],outputs=[vgg16_output])

In [None]:
multiclass_cnn().summary()

In [None]:
def custom_data_generator(data_df, Y_true, mb_size, featurewise_center=False, samplewise_center=False,
    featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, zca_epsilon=1e-06, 
    rotation_range=0, width_shift_range=0.0, height_shift_range=0.0, brightness_range=None, shear_range=0.0,
    zoom_range=0.0, channel_shift_range=0.0, fill_mode='nearest', cval=0.0, horizontal_flip=False, vertical_flip=False,
    rescale=None, preprocessing_function=None, data_format=None, validation_split=0.0, interpolation_order=1, 
    dtype=None):

    for time_step in range(data_df.shape[0]//mb_size):
        X_mb = list()

        for img_path in data_df.iloc[time_step*mb_size:(time_step+1)*mb_size,0]:

            img_np_array = plt.imread(img_path)
            X_mb.append(img_np_array)

        X_mb = np.array(X_mb)
        Y_true_mb = Y_true[time_step*mb_size:(time_step+1)*mb_size]

        yield X_mb, Y_true_mb

In [None]:
epochs = 50
training_data_mb_size = 2
testing_data_mb_size = 5

In [None]:
model = multiclass_cnn()
model.summary()

In [None]:
def loss_fn(Y_true_mb,Y_pred_mb):

    return tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true=Y_true_mb,
                                                                          y_pred=Y_pred_mb))

optimizer = SGD()

In [None]:
@tf.function
def training_step(X_train_mb,Y_true_train_mb):

    with tf.GradientTape() as tape:
            
        Y_pred_train_mb = model(X_train_mb, training=True)
        training_loss = loss_fn(Y_true_train_mb, Y_pred_train_mb)

    grads = tape.gradient(training_loss, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    train_acc_metric.update_state(Y_true_train_mb,Y_pred_train_mb)

    return training_loss

In [None]:
@tf.function
def testing_forward_pass(X_test_mb,Y_true_test_mb):

    Y_pred_test_mb = model(X_test_mb,training=False)
    testing_loss = loss_fn(Y_true_test_mb,Y_pred_test_mb)
    test_acc_metric.update_state(Y_true_test_mb,Y_pred_test_mb)

    return testing_loss

In [None]:
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
test_acc_metric = tf.keras.metrics.CategoricalAccuracy()

for epoch in range(epochs):

    training_data_generator = custom_data_generator(training_data,Y_true_train,training_data_mb_size)

    for time_step, (X_train_mb, Y_true_train_mb) in enumerate(training_data_generator):
        training_loss = training_step(X_train_mb,Y_true_train_mb)

        if (time_step+1) % 50 == 0:
            print("Epoch %d, Time Step %d, Training loss for one mini batch: %.4f"
            % (epoch+1, time_step+1, float(training_loss)))
            
    training_acc = train_acc_metric.result()    
    print("Epoch %d, Training Accuracy: %.2f" % (epoch+1,float(training_acc)))
    train_acc_metric.reset_states()

    testing_data_generator = custom_data_generator(testing_data,Y_true_test,testing_data_mb_size)

    for X_test_mb, Y_true_test_mb in testing_data_generator:
        testing_loss = testing_forward_pass(X_test_mb,Y_true_test_mb)

    print("\nEpoch %d, Testing Loss for last mini batch: %.4f" % (epoch+1,float(testing_loss)))
    testing_acc = test_acc_metric.result()
    print("Epoch %d, Testing Accuracy: %.2f" % (epoch+1,float(testing_acc)))
    test_acc_metric.reset_states()

    print("\n\n")