In [1]:
# Libraries for data storage and image processing
'''!python --version
!pip install numpy
!pip install pandas
!pip install pillow
!pip install tensorflow
!pip install keras
!pip install matplotlib
!pip install seaborn
!pip install sklearn
!pip install opencv-python '''

import numpy as np
import pandas as pd
import os, re
from PIL import Image, ImageEnhance

# TensorFlow/Keras
import tensorflow as tf

from tensorflow import keras
from keras.layers import Dense, Input
from keras.models import Sequential
from keras import metrics, models, layers
from keras.preprocessing import image
import matplotlib.pyplot as plt
import seaborn as sns

import cv2

# SKLearn Libraries
from sklearn.model_selection import train_test_split

Now we read in the data from the local directory where it is stored, and process the images.

In [2]:
# We will define the paths to our files, each of these sets are for a different training environment
# Current directory structure: train has subdirectories A-Z, each with images stored inside,
# test just has the files with the letter, label is in file name.

# train_images = '../dataset/asl_alphabet_train/asl_alphabet_train'
# test_images = '../dataset/asl_alphabet_test/asl_alphabet_test'

# train_images = '/content/drive/My Drive/dataset/asl_alphabet_train/asl_alphabet_train'
# test_images = '/content/drive/My Drive/dataset/asl_alphabet_test/asl_alphabet_test'

train_images = '/lustre/isaac/scratch/jdosch1/dataset_COSC307/asl_alphabet_train/asl_alphabet_train'
test_images = '/lustre/isaac/scratch/jdosch1/dataset_COSC307/asl_alphabet_test/asl_alphabet_test'

In [3]:
!nvidia-smi

'nvidia-smi' is not recognized as an internal or external command,
operable program or batch file.


# Load Data #

Read in the data from the local directory and store it in a pandas DataFrame. We will display the first 5 rows with df.head() to validate our result.

In [4]:
train_data = pd.DataFrame()
categories = []
data = []
exclude_columns = {'del', 'asl_alphabet_train', 'nothing', 'space', 'J', 'Z'}

# For dataset restriction
include_columns = ['O', 'W', 'C', 'L', 'Y']

for subdir, dirs, files in os.walk(train_images, topdown=True):
    directory_name = os.path.basename(subdir)
    # Don't include the directory we are currently in, trying to get letter categories.
    if(directory_name in include_columns):
        categories.append(directory_name)
        directory_data = []
        for file in files:
          directory_data.append(file)

        data.append(directory_data)

sorted_categories_indices = sorted(range(len(categories)), key=lambda i: categories[i])
categories = [categories[i] for i in sorted_categories_indices]
data = [data[i] for i in sorted_categories_indices]

data = [directory for directory in data if directory] # Remove null elements
df = pd.DataFrame(data, categories)

print(df)

Empty DataFrame
Columns: []
Index: []


Some information about our dataset

In [5]:
print(f"Dataset shape: {df.shape}")
print(f"Dataset size: {df.size}")
print(f"Data type: {df.dtypes}")
print(f"Dataset columns: {df.columns}")

Dataset shape: (0, 0)
Dataset size: 0
Data type: Series([], dtype: object)
Dataset columns: RangeIndex(start=0, stop=0, step=1)


#### Preprocess data if necessary (i.e. drop certain columns we are not using). Since pandas DataFrames do not support images, we will read the data into a numpy array. Since the dataset is considerably large, we will use a Keras function to support batching ####

In [6]:
# Read dataset into a dataloader
directory = train_images
training_images, validation_images = keras.utils.image_dataset_from_directory(
    directory,
    labels="inferred",
    label_mode="int",
    class_names=include_columns,
    color_mode="grayscale",
    batch_size=128,
    image_size=(128, 128),
    seed=42,
    shuffle=True,
    validation_split=0.2,
    subset='both', # we will use 20% data for validation
)

class_names = training_images.class_names
print("Class names: ", class_names)

# Progress bar for augmentation
from tqdm.auto import tqdm

# Standardize dataset
means = []
stds = []

for images, _ in tqdm(training_images.unbatch()):
    images = images.numpy()
    images = images.flatten()
    means.append(images.mean())
    stds.append(images.std())


means = np.mean(means)
stds = np.std(stds)
print("Mean: ", means)
print("std: ", stds)

# Check current datatype
for images, _ in training_images.take(1): 
    print(type(images[0]))

NotFoundError: Could not find directory /lustre/isaac/scratch/jdosch1/dataset_COSC307/asl_alphabet_train/asl_alphabet_train

In [None]:
print(training_images)

brightness_layer = tf.keras.layers.RandomBrightness(factor=(0.0, 0.3)) # factor is the range for adjustment
contrast_layer = tf.keras.layers.RandomContrast(factor=(0.6)) # factor determines contrast adjustment range
gaussian_noise_layer = tf.keras.layers.GaussianNoise(stddev=0.35)
rotation_layer = tf.keras.layers.RandomRotation(factor=(-1/36, 1/36), fill_mode='reflect')
translation_layer = tf.keras.layers.RandomTranslation(height_factor=(-0.20, 0.20), width_factor=(-0.20, 0.20), fill_mode='reflect')
zoom_layer = tf.keras.layers.RandomZoom(height_factor=(-0.10, 0.10), width_factor=(-0.10, 0.10), fill_mode='reflect', interpolation='bilinear')

# Augment the data to make the model more generalized by adding:
# brightness, contrast, blur, rotation, translation, and zoom at random
augmentation = tf.keras.Sequential([
    brightness_layer,
    contrast_layer,
    gaussian_noise_layer,
    rotation_layer,
    translation_layer, 
    zoom_layer
]) 

In [None]:
def augmentation_process(image, label):
    image = augmentation(image)
    return image, label

training_images_aug = training_images.map(lambda x, y: (augmentation(x, training=True), y))
    
# Standardization
def standardize(image, label):
    image = (image - means) / stds
    return image, label

training_images_aug = training_images_aug.map(standardize)
validation_images = validation_images.map(standardize)

# Check current datatype
for images, _ in training_images_aug.take(1): 
    print(type(images[0]))
    
for images, _ in training_images_aug.take(1):  # Take a single batch

    plt.figure(figsize=(10, 10))
    for i in range(len(images)):
        # Display augmented images
        ax = plt.subplot(16, 16, i * 2 + 1)
        plt.imshow(images[i].numpy(), cmap='gray')
        plt.title("Original")
        plt.axis("off")


    plt.show() 

### Define the model: we will use a sequential model, with a relu activation function and MaxPooling 2D layers, to extract the image features, then  ###

Conv2D documentation: https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D
    We will use the default kernel initializer and no bias for now, but can change later for optimization.
MaxPooling2D documentation: https://www.tensorflow.org/api_docs/python/tf/keras/layers/MaxPool2D
    

In [None]:
# Mimicking the VGG16 architecture with grayscale inputs
model = models.Sequential()
model.add(Input(shape=(128, 128, 1)))


# model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))

# model.add(layers.Conv2D(16, (3, 3), activation='relu'))
model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))

# model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))

model.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))

model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))
'''
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same'))
'''
model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))


model.add(layers.Flatten())
model.add(layers.Dense(4096, activation='relu'))
#model.add(layers.Dense(4096, activation='relu'))
model.add(layers.Dropout(0.5))

model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.2))

# Using softmax because of this article, could change later: https://emeritus.org/blog/cnn-neural-network/#:~:text=The%20Fully%20Connected%20Layer:%20Making,applications%20such%20as%20image%20recognition.
model.add(layers.Dense(5, activation='softmax'))

model.summary()

In [None]:
from keras.optimizers import SGD
from keras.callbacks import ReduceLROnPlateau
callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, mode='min') # could use restore_best_weights, reference: https://keras.io/api/callbacks/early_stopping/

# SGD chosen to adapt learning rate to break through plateaus
model.compile(loss='sparse_categorical_crossentropy', optimizer=SGD(learning_rate=0.0001, momentum=0.9), metrics=['accuracy'])


# Set save model weights callback in case we need to stop and restart training
save_callback = tf.keras.callbacks.ModelCheckpoint(
   filepath = "./VGGweights.11_27.weights.h5", verbose=1, save_weights_only=True,
   save_freq='epoch')

# Set learning rate scheduler to prevent stagnation during training
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# Set custom early stop function
class MyThresholdCallback(tf.keras.callbacks.Callback):
    def __init__(self, threshold):
        super(MyThresholdCallback, self).__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs=None): 
        val_acc = logs["val_accuracy"]
        if val_acc >= self.threshold:
            self.model.stop_training = True

# Reference for custom callback: https://stackoverflow.com/questions/59563085/how-to-stop-training-when-it-hits-a-specific-validation-accuracy
custom_callback = MyThresholdCallback(threshold=0.97)
EPOCHS = 1000 # Can modify later

# Fit the model to training data
history = model.fit(x=training_images_aug, validation_data=validation_images, epochs=EPOCHS, batch_size = 128, callbacks=[custom_callback, lr_scheduler])

Epoch 1/1000
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10387s[0m 23s/step - accuracy: 0.0551 - loss: 3.4578 - val_accuracy: 0.2515 - val_loss: 2.9044 - learning_rate: 1.0000e-04
Epoch 2/1000
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10399s[0m 23s/step - accuracy: 0.1728 - loss: 2.8281 - val_accuracy: 0.5978 - val_loss: 1.7604 - learning_rate: 1.0000e-04
Epoch 3/1000
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10322s[0m 23s/step - accuracy: 0.4218 - loss: 1.8916 - val_accuracy: 0.8060 - val_loss: 0.8605 - learning_rate: 1.0000e-04
Epoch 4/1000
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10344s[0m 23s/step - accuracy: 0.6265 - loss: 1.1770 - val_accuracy: 0.8827 - val_loss: 0.4904 - learning_rate: 1.0000e-04
Epoch 5/1000
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10364s[0m 23s/step - accuracy: 0.7424 - loss: 0.8025 - val_accuracy: 0.9290 - val_loss: 0.3062 - learning_rate: 1.0000e-04
Epoch 6/1000
[1m450

In [None]:
# Plot the val loss and train loss to determine if the model is overfitting
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
# Save the model weights
model.save('./COSC307_canny_edge.keras')