In [None]:
# Import Library
!pip install split-folders

import os
import numpy
import shutil
import filecmp
import splitfolders
import tensorflow as tf
import matplotlib.pyplot as plt

from zipfile import ZipFile
from google.colab import drive
from tensorflow.keras.layers import Input
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import ResNet152V2
from tensorflow.keras.models import model_from_json
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [None]:
# Google drive mounting
drive.mount('/content/gdrive')
os.environ['KAGGLE_CONFIG_DIR'] = "/content/gdrive/My Drive/Kaggle"
%cd /content/gdrive/My Drive/Kaggle


In [None]:
# Download dataset from kaggle
!kaggle datasets download -d muhammadkhalid/sign-language-for-alphabets
!kaggle datasets download -d grassknoted/asl-alphabet
!kaggle datasets download -d ahmedkhanak1995/sign-language-gesture-images-dataset
!kaggle datasets download -d kapillondhe/american-sign-language

In [None]:
# Creating folder for dataset
raw_data = '/content/gdrive/MyDrive/Kaggle/raw_data'
home_dir = '/content/gdrive/MyDrive/Kaggle/dataset'
final_dir = '/content/gdrive/MyDrive/Kaggle/final'

if not os.path.exists(raw_data):
    os.mkdir(raw_data)
    print('Folder raw_data created')
else:
    print('Folder raw_data already exist')

if not os.path.exists(home_dir):
    os.mkdir(home_dir)
    print('Folder home_dir created')
else:
    print('Folder home_dir already exist')

if not os.path.exists(final_dir):
    os.mkdir(final_dir)
    print('Folder final_dir created')
else:
    print('Folder final_dir already exist')


In [None]:
# Extracting the first dataset
zip_loc1 = '/content/gdrive/MyDrive/Kaggle/sign-language-for-alphabets.zip'
with ZipFile(zip_loc1, 'r') as zip1:
    zip1.extractall(path=raw_data)
print('Done extracting first dataset')


In [None]:
# Extracting the second dataset
zip_loc2 = '/content/gdrive/MyDrive/Kaggle/asl-alphabet.zip'
with ZipFile(zip_loc2, 'r') as zip2:
    zip2.extractall(path=raw_data)
print('Done extracting second dataset')


In [None]:
# Extracting the third dataset
zip_loc3 = '/content/gdrive/MyDrive/Kaggle/sign-language-gesture-images-dataset.zip'
with ZipFile(zip_loc3, 'r') as zip3:
    zip3.extractall(path=raw_data)
print('Done extracting third dataset')

In [None]:
# Extracting the fourth dataset
zip_loc4 = '/content/gdrive/MyDrive/Kaggle/american-sign-language.zip'
with ZipFile(zip_loc4, 'r') as zip4:
    zip4.extractall(path=raw_data)
print('Done extracting fourth dataset')

In [None]:
# Deleting unecesarry file from the dataset
del_list = ['/content/gdrive/MyDrive/Kaggle/raw_data/asl_alphabet_train/asl_alphabet_train/del',
            '/content/gdrive/MyDrive/Kaggle/raw_data/asl_alphabet_train/asl_alphabet_train/nothing',
            '/content/gdrive/MyDrive/Kaggle/raw_data/asl_alphabet_train/asl_alphabet_train/space',
            '/content/gdrive/MyDrive/Kaggle/raw_data/Sign Language for Alphabets/unknown',
            '/content/gdrive/MyDrive/Kaggle/raw_data/asl_alphabet_test',
            '/content/gdrive/MyDrive/Kaggle/raw_data/Gesture Image Pre-Processed Data',
            '/content/gdrive/MyDrive/Kaggle/raw_data/ASL_Dataset/Test',
            '/content/gdrive/MyDrive/Kaggle/raw_data/Gesture Image Data/_',
            '/content/gdrive/MyDrive/Kaggle/raw_data/ASL_Dataset/Train/Nothing',
            '/content/gdrive/MyDrive/Kaggle/raw_data/ASL_Dataset/Train/Space'
            ]

for o in range(0,10):
    del_list.append(os.path.join('/content/gdrive/MyDrive/Kaggle/raw_data/Gesture Image Data', str(o)))

for i in del_list:
    if not os.path.exists(i):
        print(f'Folder {i} not found')
    else:
        shutil.rmtree(i)


In [None]:
# Preparing for data merging
dataset1_dir = '/content/gdrive/MyDrive/Kaggle/raw_data/Sign Language for Alphabets'
dataset2_dir = '/content/gdrive/MyDrive/Kaggle/raw_data/asl_alphabet_train/asl_alphabet_train'
dataset3_dir = '/content/gdrive/MyDrive/Kaggle/raw_data/Gesture Image Data'
dataset4_dir = '/content/gdrive/MyDrive/Kaggle/raw_data/ASL_Dataset/Train'
class_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
              'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

for i in class_list:
    current_wd = os.path.join(home_dir, i)
    if not os.path.exists(current_wd):
        os.mkdir(current_wd)
    else:
        print(f'Folder {i} already exist')

print('Done')


In [None]:
# Copy the first dataset
x = -2
for (dirpath, dirnames, filenames) in os.walk(dataset1_dir):
    print('Copy ==> ' + dirpath)
    x = x + 1
    for i in filenames:
        src = os.path.join(dirpath, i)
        dst = os.path.join(home_dir, class_list[x], 'AA' + i)
        if not os.path.exists(dst) or not filecmp.cmp(src, dst):
            shutil.copyfile(src, dst)
        else:
            print(f'File {i} already exist')


In [None]:
# Copy the second dataset
x = -2
for (dirpath, dirnames, filenames) in os.walk(dataset2_dir):
    print('Copy ==> ' + dirpath)
    x = x + 1
    for i in filenames:
        src = os.path.join(dirpath, i)
        dst = os.path.join(home_dir, class_list[x], 'BB' + i)
        if not os.path.exists(dst) or not filecmp.cmp(src, dst):
            shutil.copyfile(src, dst)
        else:
            print(f'File {i} already exist')


In [None]:
# Copy the third dataset
x = -2
for (dirpath, dirnames, filenames) in os.walk(dataset3_dir):
    print('Copy ==> ' + dirpath)
    x = x + 1
    for i in filenames:
        src = os.path.join(dirpath, i)
        dst = os.path.join(home_dir, class_list[x], 'CC' + i)
        if not os.path.exists(dst) or not filecmp.cmp(src, dst):
            shutil.copyfile(src, dst)
        else:
            print(f'File {i} already exist')

In [None]:
# Copy the fourth dataset
x = -2
for (dirpath, dirnames, filenames) in os.walk(dataset4_dir):
    print('Copy ==> ' + dirpath)
    x = x + 1
    for i in filenames:
        src = os.path.join(dirpath, i)
        dst = os.path.join(home_dir, class_list[x], 'DD' + i)
        if not os.path.exists(dst) or not filecmp.cmp(src, dst):
            shutil.copyfile(src, dst)
        else:
            print(f'File {i} already exist')

In [None]:
# Splitting the dataset to train and validation folder
splitfolders.ratio(home_dir, 
                   output=final_dir, 
                   seed=796234751384, 
                   ratio=(0.8, 0.2), 
                   group_prefix=None)

In [None]:
# Deleting unecesarry file from the folder
shutil.rmtree('/content/gdrive/MyDrive/Kaggle/final/train/.ipynb_checkpoints')
shutil.rmtree('/content/gdrive/MyDrive/Kaggle/final/val/.ipynb_checkpoints')

In [None]:
# Import ImageDataGenerator for image augmentation
train_dir = '/content/gdrive/MyDrive/Kaggle/final/train'
valid_dir = '/content/gdrive/MyDrive/Kaggle/final/val'

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    brightness_range=(0.5, 1.0),
    shear_range=0.2,
    fill_mode='nearest')

valid_datagen = ImageDataGenerator(
    rescale=1./255)

# Generator for training
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    color_mode='rgb',
    batch_size=256,
    class_mode='categorical',
)

# Generator for validation
valid_generator = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=(224, 224),
    color_mode='rgb',
    batch_size=256,
    class_mode='categorical',
)


In [None]:
# Callback for validation accuracy 
class myCallbacks(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('val_accuracy') >= .92 and logs.get('accuracy') >= .92):
            self.model.stop_training = True
            print('Accuracy Complete')

callbacks = myCallbacks()


In [None]:
# Model creation
# Model uses transfer learning from ResNet152V2
model = tf.keras.Sequential([
    ResNet152V2(weights="imagenet",
                include_top=False,
                input_tensor=Input(shape=(224, 224, 3))),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(26, activation='softmax')
])

model.layers[0].trainable = False


In [None]:
# Compiling the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Model summary
model.summary()


In [None]:
history = model.fit(train_generator,
                    validation_data=valid_generator,
                    epochs=100,
                    verbose=1,
                    callbacks=[callbacks]
                    )


In [None]:
# Plotting the loss and accuracy of train and validation
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 7))
fig.suptitle('Loss and Accuracy')

ax1.plot(history.history['accuracy'])
ax1.plot(history.history['val_accuracy'])
ax1.legend(['Train', 'Validation'])

ax2.plot(history.history['loss'])
ax2.plot(history.history['val_loss'])
ax2.legend(['Train', 'Validation'])
plt.show()


In [None]:
# Convert the model to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the model
with open('model.tflite', 'wb') as f:
  f.write(tflite_model)

In [None]:
# Serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# Serialize weights to HDF5
model.save_weights("weights.h5")
print("Saved model to disk")