In [62]:
# Libraries for data storage and image processing
import numpy as np
import pandas as pd
import os, re
from PIL import Image

# TensorFlow/Keras
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense, Input
from keras.models import Sequential
from keras import metrics
from keras.preprocessing import image
# TODO: figure out layers and method of training

# SKLearn Libraries
from sklearn.model_selection import train_test_split

Now we read in the data from the local directory where it is stored, and process the images.

In [63]:
# We will define the paths to our files
# Current directory structure: train has subdirectories A-Z, each with images stored inside,
# test just has the files with the letter, label is in file name.
train_images = '../dataset/asl_alphabet_train/asl_alphabet_train'
test_images = '../dataset/asl_alphabet_test/asl_alphabet_test'

# Load Data #

Read in the data from the local directory and store it in a pandas DataFrame. We will display the first 5 rows with df.head() to validate our result.

In [64]:
train_data = pd.DataFrame()
categories = []
data = []

for subdir, dirs, files in os.walk(train_images, topdown=True):
    directory_name = os.path.basename(subdir).split("\\")[0]
    # Don't include the directory we are currently in, trying to get letter categories.
    if(directory_name != 'asl_alphabet_train'):
        categories.append(os.path.basename(subdir).split("\\")[0])
    directory_data = []
    for file in files:
        directory_data.append(file)

    data.append(directory_data)

data = [directory for directory in data if directory] # Remove null elements
df = pd.DataFrame(data, categories)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2990,2991,2992,2993,2994,2995,2996,2997,2998,2999
A,A1.jpg,A10.jpg,A100.jpg,A1000.jpg,A1001.jpg,A1002.jpg,A1003.jpg,A1004.jpg,A1005.jpg,A1006.jpg,...,A990.jpg,A991.jpg,A992.jpg,A993.jpg,A994.jpg,A995.jpg,A996.jpg,A997.jpg,A998.jpg,A999.jpg
B,B1.jpg,B10.jpg,B100.jpg,B1000.jpg,B1001.jpg,B1002.jpg,B1003.jpg,B1004.jpg,B1005.jpg,B1006.jpg,...,B990.jpg,B991.jpg,B992.jpg,B993.jpg,B994.jpg,B995.jpg,B996.jpg,B997.jpg,B998.jpg,B999.jpg
C,C1.jpg,C10.jpg,C100.jpg,C1000.jpg,C1001.jpg,C1002.jpg,C1003.jpg,C1004.jpg,C1005.jpg,C1006.jpg,...,C990.jpg,C991.jpg,C992.jpg,C993.jpg,C994.jpg,C995.jpg,C996.jpg,C997.jpg,C998.jpg,C999.jpg
D,D1.jpg,D10.jpg,D100.jpg,D1000.jpg,D1001.jpg,D1002.jpg,D1003.jpg,D1004.jpg,D1005.jpg,D1006.jpg,...,D990.jpg,D991.jpg,D992.jpg,D993.jpg,D994.jpg,D995.jpg,D996.jpg,D997.jpg,D998.jpg,D999.jpg
del,del1.jpg,del10.jpg,del100.jpg,del1000.jpg,del1001.jpg,del1002.jpg,del1003.jpg,del1004.jpg,del1005.jpg,del1006.jpg,...,del990.jpg,del991.jpg,del992.jpg,del993.jpg,del994.jpg,del995.jpg,del996.jpg,del997.jpg,del998.jpg,del999.jpg


Some information about our dataset

In [65]:
print(f"Dataset shape: {df.shape}")
print(f"Dataset size: {df.size}")
print(f"Data type: {df.dtypes}")
print(f"Dataset columns: {df.columns}")

Dataset shape: (29, 3000)
Dataset size: 87000
Data type: 0       object
1       object
2       object
3       object
4       object
         ...  
2995    object
2996    object
2997    object
2998    object
2999    object
Length: 3000, dtype: object
Dataset columns: RangeIndex(start=0, stop=3000, step=1)


#### Preprocess data if necessary (i.e. drop certain columns we are not using). Since pandas DataFrames do not support images, we will read the data into a numpy array. Since the dataset is considerably large, we will use a Keras function to support batching ####

In [66]:
'''train_images_list = []
for index, row in df.iterrows():
    train_images_dir = f"{train_images}/{index}"
    check = True
    for value in row:
        image_path = f"{train_images_dir}/{value}"
        img = image.load_img(image_path, target_size=(200, 200))
        img = image.img_to_array(img)
        train_images_list.append(img)

training_images = np.array(train_images_list)'''
directory = train_images
training_images, validation_images = keras.utils.image_dataset_from_directory(
    directory,
    labels="inferred",
    label_mode="int",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    seed=42,
    shuffle=True,
    validation_split=0.2,
    subset='both', # we will use 10% data for validation
)
    

Found 87000 files belonging to 29 classes.
Using 78300 files for training.
Using 8700 files for validation.


In [67]:
print(training_images)



<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>


### Define the model: we will use a sequential model, with a relu activation function and MaxPooling 2D layers, to extract the image features, then  ###

Conv2D documentation: https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D
    We will use the default kernel initializer and no bias for now, but can change later for optimization.
MaxPooling2D documentation: https://www.tensorflow.org/api_docs/python/tf/keras/layers/MaxPool2D
    

In [68]:
model = models.Sequential()
model.add(Input(shape=(256, 256, 3)))
model.add(layers.Conv2D(256, (3, 3), activation='relu'))
# For pooling layer, downscale by a factor of 2
model.add(layers.MaxPool2D(pool_size=(2, 2)))
model.add(layers.Conv2D(256, (3, 3), activation='relu'))
model.add(layers.MaxPool2D(pool_size=(2, 2)))
model.add(layers.Conv2D(256, (3, 3), activation='relu'))

# Perform classification by feeding final output tensor into dense layers
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))

# Using softmax because of this article, could change later: https://emeritus.org/blog/cnn-neural-network/#:~:text=The%20Fully%20Connected%20Layer:%20Making,applications%20such%20as%20image%20recognition.
model.add(layers.Dense(29, activation='softmax'))

model.summary()

In [None]:
# We will use the Adam optimizer, for the performance reasons discussed in the following link: https://www.geeksforgeeks.org/adam-optimizer/
# Can potentially use other loss functions later for performance boost, this one chosen because
# of how the labels are meant to be classified (one-hot encoded 1-29)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

EPOCHS = 1 # Can modify later
# Fit the model to training data
model.fit(x=training_images, validation_data=validation_images, epochs=EPOCHS)

Epoch 1/200
[1m   1/2447[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m47:12:46[0m 69s/step - accuracy: 0.0312 - loss: 14.9456