# Final Deep Learning Project - University of Brasília
## Students: Luis Gustavo Avelino(15/0016310) and Matheus Felizola(17/0019098)

The intent of this project is to create a classifier for the ASL(American Sign Language) digits (from 0 to 9), capable of identifying the hand gestures independently of skin color or background.

In [4]:
# Cloning the dataset repository
!git clone https://github.com/ardamavi/Sign-Language-Digits-Dataset.git

Cloning into 'Sign-Language-Digits-Dataset'...
remote: Enumerating objects: 2089, done.[K
remote: Total 2089 (delta 0), reused 0 (delta 0), pack-reused 2089[K
Receiving objects: 100% (2089/2089), 15.07 MiB | 6.29 MiB/s, done.
Resolving deltas: 100% (658/658), done.


Let's install tqdm, a progress bar library, to help us visualize the progress of the code while it runs.

In [3]:
!pip3 install tqdm

Collecting tqdm
[?25l  Downloading https://files.pythonhosted.org/packages/bb/62/6f823501b3bf2bac242bd3c320b592ad1516b3081d82c77c1d813f076856/tqdm-4.39.0-py2.py3-none-any.whl (53kB)
[K     |████████████████████████████████| 61kB 597kB/s eta 0:00:011
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.39.0


In [5]:
# Importing useful libraries: keras, numpy, tqdm, matplotlib...

import os, cv2, math
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model
import numpy as np
from keras.preprocessing import image
from sklearn.model_selection import train_test_split
from shutil import copyfile
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

Using TensorFlow backend.


TypeError: __new__() got an unexpected keyword argument 'serialized_options'

In [6]:
# Creating the example images for the numbers (10 numbers in total - 2 x 5)
number_of_rows, number_of_columns = 2, 5
plt.rcParams['figure.figsize'] = (number_of_rows * 3, number_of_columns * 3)

for row in range(number_of_rows):
    for column in range(number_of_columns):
        image_index = row * number_of_columns + column
        # Loading the images from the dataset
        digit_image = image.load_img('Sign-Language-Digits-Dataset/Examples/example_' + str(image_index) + '.JPG', target_size = (64, 64))
        plt.subplot(number_of_rows, number_of_columns, image_index + 1)
        plt.imshow(digit_image)
        plt.title(image_index)
        plt.axis('off')

NameError: name 'plt' is not defined

In [15]:
# Creating the path to data downloaded dataset
PATH_TO_DATASET = 'Sign-Language-Digits-Dataset/Dataset/'

## Dividing the dataset in three: train, validation and test.

This is a dataset preparation step required to train and validate the network afterwards.

In [16]:
# Initialising the empty lists for each dataset
train_dataset = {}
validation_dataset = {}
test_dataset = {}

for cat in os.listdir(PATH_TO_DATASET):
    cat_dir = os.path.join(PATH_TO_DATASET, cat) # e.g. DATASET_PATH/'0'
    cat_files = os.listdir(cat_dir)
    
    # Size of training dataset will be 70% of the total dataset
    train_list , test_list = train_test_split(cat_files, test_size = 0.3)
    
    # Both test and validation datasets are 15% of the total dataset
    validation_list, test_list = train_test_split(test_list, test_size = 0.5)
    
    train_dataset[cat] = train_list
    validation_dataset[cat] = validation_list
    test_dataset[cat] = test_list

### Train

In [17]:
for cat in tqdm(train_dataset.keys()):
  cat_dir = os.path.join(PATH_TO_DATASET, 'training_dataset', 'class_0' + str(cat))
  os.makedirs(cat_dir)
  for file in train_dataset[cat]:
    # src path is DATASET_PATH/'0'/file
    src = os.path.join(PATH_TO_DATASET, cat, file)
    # dest path is DATASET_PATH/'training_set'/'class_00'
    # to accomodate for the directory format required by flow_from_directory method in keras
    dest = os.path.join(cat_dir, file)
    copyfile(src, dest)

100%|██████████| 10/10 [00:01<00:00,  5.58it/s]


### Validation

In [18]:
for cat in tqdm(validation_dataset.keys()):
  cat_dir = os.path.join(PATH_TO_DATASET, 'validation_dataset', 'class_0' + str(cat))
  os.makedirs(cat_dir)
  for file in validation_dataset[cat]:
    # src path is DATASET_PATH/'0'/file
    src = os.path.join(PATH_TO_DATASET, cat, file)
    # dest path is DATASET_PATH/'validation_set'/'class_00'
    # to accomodate for the directory format required by flow_from_directory method in keras
    dest = os.path.join(cat_dir, file)
    copyfile(src, dest)

100%|██████████| 10/10 [00:00<00:00, 26.93it/s]


### Test

In [19]:
for cat in tqdm(test_dataset.keys()):
  cat_dir = os.path.join(PATH_TO_DATASET, 'test_dataset', 'class_0' + str(cat))
  os.makedirs(cat_dir)
  for file in test_dataset[cat]:
    # src path is DATASET_PATH/'0'/file
    src = os.path.join(PATH_TO_DATASET, cat, file)
    # dest path is DATASET_PATH/'test_set'/'class_00'
    # to accomodate for the directory format required by flow_from_directory method in keras
    dest = os.path.join(cat_dir, file)
    copyfile(src, dest)

100%|██████████| 10/10 [00:00<00:00, 19.28it/s]


In [21]:
for index in range(10):
  train_size = len(train_dataset[str(index)])
  validation_size = len(validation_dataset[str(index)])
  test_size = len(test_dataset[str(index)])
  print("0{} : Training size({}) Validation size({}) Test size({})".format(index, train_size, validation_size, test_size))

00 : Training size(143) Validation size(31) Test size(31)
01 : Training size(144) Validation size(31) Test size(31)
02 : Training size(144) Validation size(31) Test size(31)
03 : Training size(144) Validation size(31) Test size(31)
04 : Training size(144) Validation size(31) Test size(32)
05 : Training size(144) Validation size(31) Test size(32)
06 : Training size(144) Validation size(31) Test size(32)
07 : Training size(144) Validation size(31) Test size(31)
08 : Training size(145) Validation size(31) Test size(32)
09 : Training size(142) Validation size(31) Test size(31)


## Data Augmentation

To aid the network on learning to classify the data correctly, even with image distortions, we'll do some data augmentation.

In [None]:
# Performing data augmentation on training dataset
train_datagenerator = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

# For validation dataset, only rescale the pictures
validation_datagenerator = ImageDataGenerator(rescale = 1./255)

# For test dataset, only rescale the pictures
test_datagenerator = ImageDataGenerator(rescale = 1./255)

training_data = train_datagenerator.flow_from_directory(os.path.join(DATASET_PATH, 'training_set'),
                                                 target_size = (64, 64),
                                                 batch_size = 32,
                                                 class_mode = 'categorical')

validation_data = validation_datagenerator.flow_from_directory(os.path.join(DATASET_PATH, 'validation_set'),
                                                 target_size = (64, 64),
                                                 batch_size = 32,
                                                 class_mode = 'categorical')

test_data = test_datagenerator.flow_from_directory(os.path.join(DATASET_PATH, 'test_set'),
                                            target_size = (64, 64),
                                            batch_size = 32,
                                            class_mode = 'categorical')