In [None]:
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.models import Sequential, Model
from keras.applications.vgg16 import VGG16

## Gesture Recognition

American Sign Language is a natural language serving as the sign language of Deaf communities in the United States of America, as well as Anglophone Canada.  As with any sign language, it employs hand movements combined with facial expressions and body posture to convey expressions, words, and ideas.

This notebook's goal is to create a Convolutional Neural Network that can identify signs made in ASL.  The specific set of signs focused here will be that of the English alphabet which is pictured below.

<img src = 'notebook/alphabet_image.png' width="300">

## Data

The following code cell sets the dimensions, hyperparameters, and folders for training and testing the models.

The data is not included in the repository.  The same data, as specifed in the README, may be acquired or the below strings may be modified.

In [None]:
epochs = 3
img_height = 200
img_width = 200
batch_size = 32
data_dir = "data/asl_alphabet_train"
test_dir = "data/asl_alphabet_test/asl_alphabet_test"

### Preprocessing

Both the training and testing data is processed and split using the keras preprocessing function.

Split is done 80 / 20 with an arbitrary seed for consistency purposes.

In [None]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

In [None]:
classes = train_ds.class_names

In [None]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Data is normalized using the keras Rescaling function.

In [None]:
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))

### Test Data Breakdown

Unlike the training code, the data used for this notebook did not label the test data in a way that is processable by the Keras preprocessing function.

This code iterates through all the files in the test directory and properly breaks down the labels inscribed in their filenames according to the dictionary created from the training set classes.

In [None]:
x = os.listdir(test_dir)
class_enum = enumerate(classes)
class_dict = dict((j, i) for i, j in class_enum)
image_array = []
label_array = []
for item in x:
  if item.lower().endswith('.jpg'):
    image = tf.io.read_file(test_dir + '/' + item)
    image = tf.image.decode_image(image, channels = 3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, size=(200, 200))
    image_array.append(image)
    label = class_dict.get(item[0])
    label_array.append(label)

In [None]:
image_array = np.asarray(image_array)
label_array = np.asarray(label_array)

### Plotting Function

Plots the training and validation curves.

In [None]:
def plot_his(history):
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('Model Loss')
  plt.ylabel('Loss')
  plt.xlabel('Epochs')
  plt.legend(['train', 'validation'])
  plt.show()

  plt.plot(history.history['accuracy'])
  plt.plot(history.history['val_accuracy'])
  plt.title('Model Accuracy')
  plt.ylabel('Accuracy')
  plt.xlabel('Epochs')
  plt.legend(['train', 'validation'])
  plt.show()

## CNN Model

### Model Construction

This is a small model to test the capabilities of a CNN model in identifying signs.

The model is kept relatively small for the purposes of keeping runtime low.

In [None]:
model = Sequential()

model.add(tf.keras.layers.Conv2D(32, (5, 5), 
                 strides = 3,
                 padding = 'Same',
                 activation = 'relu',
                 input_shape = (200, 200, 3)))

model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Conv2D(32, (3, 3),
                 strides = 2,
                 padding = 'Same',
                 activation = 'relu'))

model.add(tf.keras.layers.MaxPooling2D())

model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(108, activation = 'relu'))

model.add(tf.keras.layers.Dense(108, activation = 'relu'))

model.add(tf.keras.layers.Dense(59, activation = 'relu'))

model.add(tf.keras.layers.Dense(26))

model.compile(optimizer = tf.keras.optimizers.Adagrad(learning_rate = 0.1),
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics = ['accuracy'])

### Model Training

In [None]:
# Stores the best model in the specified path
filepath = "data/best_model.hdf5"
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath,
              monitor='val_accuracy', verbose=1, save_best_only=True)
callbacks_list = [checkpoint]

history_3 = model.fit(train_ds, validation_data=val_ds, epochs=epochs,
                    callbacks = callbacks_list)

In [None]:
plot_his(history_3)

In [None]:
model.evaluate(image_array, label_array)

The simple model above displays a high degree of capability in identifying the set of signs.
A larger model should be able to do much better, and these models should be applicable to a larger set of ASL.

## Transfer Learning

Training a large model for this task is redunant when the task can be applied to an already available model.  For this task, the VGG16 pretrained model from the Keras library will be reapplied to the ASL English alphabet.

In [None]:
base_model = VGG16(weights = 'imagenet', 
                   include_top = False, 
                   input_shape = (200, 200, 3), 
                   pooling = None)

base_model.summary()

In [None]:
# Freeze Base Model
for layer in base_model.layers:  
    print(layer)
    layer.trainable = False

In [None]:
# Two hidden layers and an output layer are added for training
x = base_model.output
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(108, activation = 'relu')(x)
x = tf.keras.layers.Dense(108, activation = 'relu')(x)
x = tf.keras.layers.Dense(26)(x)

In [None]:
model = Model(inputs = base_model.input, outputs = x)

model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics = ['accuracy'])

filepath = "pweights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5"
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True)
callbacks_list = [checkpoint]

history_p = model.fit(train_ds, validation_data=val_ds, epochs=epochs, callbacks = callbacks_list)

In [None]:
plot_his(history_p)

In [None]:
model.evaluate(image_array, label_array)