In [1]:
"""
    Import all the important required Deep Learning Libraries to train the gestures model.
    
    Keras is an Application Programming Interface (API) which can run on top of tensorflow.
    
    tensorflow will be the main deep learning module we will use to build our deep learning model.
    
    from tensorflow we will be referring to a pre-trained model called VGG-16. We will be 
    using VGG-16 with custom convolutional neural networks (CNN's) i.e. We will be using 
    our transfer learning model VGG-16 alongside our own custom model to train an overall 
    accurate model. The VGG-16 model in keras is pretrained with the imagenet weights.
    
    The ImageDataGenerator is used for Data augmentation where the model can see more copies of 
        the model. Data Augmentation is used for creating replications of the original images 
        and using those transformations in each epoch.
        
    The layers for training which will be used are as follows:
    1. Input = The input layer which we pass the input shape.
    2. Conv2D = The Convoluional layer combined with Input to provide a output of tensors
    3. Maxpool2D = Downsampling the Data from the convolutional layer.
    4. Batch normalization = It is a technique for training very deep neural networks that standardizes 
       the inputs to a layer for each mini-batch. This has the effect of stabilizing the learning 
       process and dramatically reducing the number of training epochs required to train deep networks.
    5. Dropout = Dropout is a technique where randomly selected neurons are ignored during training. 
                 They are “dropped-out” randomly and this prevents over-fitting.
    6. Dense = Fully Connected layers.
    7. Flatten = Flatten the entire structure to a 1-D array.
    
    The Models can be built in a model like structure as shown in this particular model or can be built 
        in a sequential manner.
        
    Use of l2 regularization for fine tuning.
    
    The optimizer used will be Adam as it is performs better than the other optimizers on this model.
    
    We are also importing the os module to make it compatible with the windows environment. 

"""

import keras
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, Dropout, BatchNormalization, Dense, Flatten
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
import os

In [4]:


"""
    Specify the train and the validation directory for the stored images.
    train_dir is the directory which will contain the set of images for training.
    validation_dir is the directory will contain the set of validation images.

"""

train_dir = "C:/Users/dell/Desktop/PROJECT DATA/asl_alphabet_train/asl_alphabet_train/train1/"
validation_dir = "C:/Users/dell/Desktop/PROJECT DATA/asl_alphabet_train/asl_alphabet_train/validation1/"



In [5]:
"""
    We have 4 classes of gestures which is namely punch, Victory, Super 
    and Loser.
    
    Each of the images have a height and width of 200 as well as it is a 
    RGB image i.e. a 3-Dimensional image.
    
    We will be using a batch_size of 128 for the image Data Augmentation.

"""

num_classes = 4

Img_Height = 200
Img_width = 200

batch_size = 128

In [6]:
"""
    Reference: https://keras.io/api/preprocessing/image/
    
    The ImageDataGenerator is used here to make copies of the image. 
    This modification will be applied to each epoch as well .

    
    train_datagen.flow_from_directory Takes the path to a directory & generates batches of 
    augmented data.

"""

train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=30,
                                   shear_range=0.3,
                                   zoom_range=0.3,
                                   width_shift_range=0.4,
                                   height_shift_range=0.4,
                                   horizontal_flip=True,
                                   fill_mode='nearest')

validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(Img_Height, Img_width),
                                                    batch_size=batch_size,
                                                    class_mode='categorical',
                                                    shuffle=True)

validation_generator = validation_datagen.flow_from_directory(validation_dir,
                                                             target_size=(Img_Height, Img_width),
                                                             batch_size=batch_size,
                                                             class_mode='categorical',
                                                             shuffle=True)

Found 9600 images belonging to 4 classes.
Found 2400 images belonging to 4 classes.


In [7]:
""" 
    We will be importing VGG-16 model and the VGG-16 has no top layer,
    we are going to add custom layers into the VGG16. 
    To Avoid training VGG16 Layers we give the command below:
    layers.trainable = False


"""

VGG16_MODEL = VGG16(input_shape=(Img_width, Img_Height, 3), include_top=False, weights='imagenet')

for layers in VGG16_MODEL.layers: 
    layers.trainable=False

for layers in VGG16_MODEL.layers:
    print(layers.trainable)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False


In [8]:
"""
    The Finger Gesture Model we are building will be trained by using 
    transfer learning. We will be using VGG-16 model with no top layer.
    We will be adding custom layers to the top layer of the VGG-16 model 
    and then we will use this transfer learning model for prediction of
    the finger gestures.
    
    The Custom layer consists of the input layer which is basically the 
    output of the VGG-16 Model. We add a convolutional layer with 32 filters,
    kernel_size of (3,3) and default strides of (1,1) and we use activation
    as relu with he_normal as initializer.
    
    We will be using the pooling layer to downsampled the layers from the
    convolutional layer.
    
    The 2 fully connected layers are used with activation as relu i.e. a 
    Dense architecture after the sample being passed through a flatten
    layer.
    
    The output layer has a softmax activation with num_classes is 4 that
    predicts the probabilies for the num_classes namely Punch, Super, 
    Victoy and Loser.
    
    The final Model takes the input as the start of the VGG-16 model
    and outputs as the final output layer.

"""

# Input layer
input_layer = VGG16_MODEL.output

# Convolutional Layer
Conv1 = Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding='valid',
               data_format='channels_last', activation='relu', 
               kernel_initializer=keras.initializers.he_normal(seed=0), 
               name='Conv1')(input_layer)

# MaxPool Layer
Pool1 = MaxPool2D(pool_size=(2,2),strides=(2,2),padding='valid', 
                  data_format='channels_last',name='Pool1')(Conv1)

# Flatten
flatten = Flatten(data_format='channels_last',name='Flatten')(Pool1)

# Fully Connected layer-1
FC1 = Dense(units=30, activation='relu', 
            kernel_initializer=keras.initializers.glorot_normal(seed=32), 
            name='FC1')(flatten)

# Fully Connected layer-2
FC2 = Dense(units=30, activation='relu', 
            kernel_initializer=keras.initializers.glorot_normal(seed=33),
            name='FC2')(FC1)

# Output layer
Out = Dense(units=num_classes, activation='softmax', 
            kernel_initializer=keras.initializers.glorot_normal(seed=3), 
            name='Output')(FC2)

model1 = Model(inputs=VGG16_MODEL.input,outputs=Out)

In [9]:
model1.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 200, 200, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 200, 200, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 200, 200, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 100, 100, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 100, 100, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 100, 100, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 50, 50, 128)       0     

In [10]:
from tensorflow import keras
from keras.utils.vis_utils import plot_model
from keras.utils import np_utils

keras.utils.plot_model(model1, to_file='model1.png', show_layer_names=True)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


In [13]:


from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import TensorBoard

checkpoint = ModelCheckpoint("gesturefinal.h5", monitor='accuracy', verbose=1,
    save_best_only=True, mode='auto', save_freq=1)

reduce = ReduceLROnPlateau(monitor='accuracy', factor=0.2, patience=5, min_lr=0.00001, verbose = 1)

logdir='logsgesture'
tensorboard_Visualization = TensorBoard(log_dir=logdir, histogram_freq=True)



In [14]:
model1.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=0.001),
              metrics=['accuracy']
              )

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [15]:
train_samples = 9600
validation_samples = 2400

epochs = 50

batch_size = 128

model1.fit(train_generator,
           steps_per_epoch = train_samples//batch_size,
           epochs = epochs,
           callbacks = [checkpoint, reduce, tensorboard_Visualization],
           validation_data = validation_generator,
           validation_steps = validation_samples//batch_size)

Epoch 1/50
 1/75 [..............................] - ETA: 24:13 - loss: 1.8412 - accuracy: 0.1953
Epoch 00001: accuracy improved from -inf to 0.19531, saving model to gesturefinal.h5
 2/75 [..............................] - ETA: 17:30 - loss: 1.6408 - accuracy: 0.2070
Epoch 00001: accuracy improved from 0.19531 to 0.20703, saving model to gesturefinal.h5
 3/75 [>.............................] - ETA: 19:22 - loss: 1.5599 - accuracy: 0.2266
Epoch 00001: accuracy improved from 0.20703 to 0.22656, saving model to gesturefinal.h5
 4/75 [>.............................] - ETA: 18:29 - loss: 1.5178 - accuracy: 0.2305
Epoch 00001: accuracy improved from 0.22656 to 0.23047, saving model to gesturefinal.h5
 5/75 [=>............................] - ETA: 18:18 - loss: 1.4981 - accuracy: 0.2359
Epoch 00001: accuracy improved from 0.23047 to 0.23594, saving model to gesturefinal.h5
 6/75 [=>............................] - ETA: 18:04 - loss: 1.4794 - accuracy: 0.2383
Epoch 00001: accuracy improved from 

<tensorflow.python.keras.callbacks.History at 0x2084f5f3d88>