# Gesture Recognition
In this project, we you aim to play with both a 3D Conv model and a 2D Conv + RNN architecture that will be able to predict the 5 gestures correctly.

In [1]:
# Importing necessary libraries
import numpy as np
import os
# from scipy.misc import imread, imresize
from imageio import imread
import skimage
from skimage.transform import resize
import datetime
import os

We set the random seed so that the results don't vary drastically.

In [2]:
np.random.seed(30)
import random as rn
rn.seed(30)
from keras import backend as K
import tensorflow as tf
tf.random.set_seed(30)

In [3]:
# For mounting google drive 
"""from google.colab import drive 
drive.mount('/content/drive')"""

Mounted at /content/drive


In [None]:
# Required to extract uploaded zip file
"""!pip install unrar"""

Collecting unrar
  Downloading https://files.pythonhosted.org/packages/bb/0b/53130ccd483e3db8c8a460cb579bdb21b458d5494d67a261e1a5b273fbb9/unrar-0.4-py3-none-any.whl
Installing collected packages: unrar
Successfully installed unrar-0.4


In [None]:
# For extracting the dataset zip file
"""import zipfile
path_to_zip_file = '/content/drive/MyDrive/Dataset/Project_data.zip'
with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
    zip_ref.extractall('/content/drive/MyDrive/Dataset/Gesture_Recognition/')"""

In this block, you read the folder names for training and validation. You also set the `batch_size` here. Note that you set the batch size in such a way that you are able to use the GPU in full capacity. You keep increasing the batch size until the machine throws an error.

In [4]:
train_doc = np.random.permutation(open('/content/drive/MyDrive/Dataset/Gesture_Recognition/Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('/content/drive/MyDrive/Dataset/Gesture_Recognition/Project_data/val.csv').readlines())
batch_size = 20 #experiment with the batch size

In [None]:
tf.__version__

'2.4.0'

## Generator
With the generator, we are going to preprocess the images as we have images of 2 different dimensions as well as create a batch of video frames. We have to experiment with `img_idx`, `y`,`z` and normalization to get good validation scores. 

In [5]:
# Dimensions
x = 30 # Sequence Size 
y = 100 # Image height
z = 100 # Image Width
def generator(source_path, folder_list, batch_size):
    print( '\nSource path = ', source_path, '; batch size =', batch_size)
    img_idx = [x for x in range(0,30)]#create a list of image numbers you want to use for a particular video
    while True:
        t = np.random.permutation(folder_list)
        num_batches = int(len(folder_list) / batch_size) # calculate the number of batches
        remaining = len(folder_list) % batch_size
        print("\nNumber of batches: ", num_batches)
        for batch in range(num_batches): # we iterate over the number of batches
            batch_data = np.zeros((batch_size,x,y,z,3), dtype = int) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
            batch_labels = np.zeros((batch_size,5), dtype = int) # batch_labels is the one hot representation of the output
            # print("\nBatch Number: ", batch, "\n")
            for folder in range(batch_size): # iterate over the batch_size
                imgs = os.listdir(source_path+'/'+ t[folder + (batch*batch_size)].split(';')[0]) # read all the images in the folder
                for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                    try:
                      image = imread(source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item], pilmode="RGB").astype(np.float32)
                    except: 
                      print("\n\nSome issue with the image: \t", source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item])
                      continue    
                    #print("Processing image: ", source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item])
                    #crop the images and resize them. Note that the images are of 2 different shape
                    resized = skimage.transform.resize(image, (y,z))
                    # cropped = resized[:,20:140] 
                    #and the conv3D will throw error if the inputs in a batch have different shapes
                    
                    batch_data[folder,idx,:,:,0] = resized[:,:,0]/255 #normalise and feed in the image
                    batch_data[folder,idx,:,:,1] = resized[:,:,1]/255 #normalise and feed in the image
                    batch_data[folder,idx,:,:,2] = resized[:,:,2]/255 #normalise and feed in the image
                    
                batch_labels[folder, int(t[folder + (batch*batch_size)].strip().split(';')[2])] = 1
            yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember what does yield do

        
        # write the code for the remaining data points which are left after full batches
        if remaining != 0:
          batch_data = np.zeros((remaining,x,y,z,3), dtype = int) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
          batch_labels = np.zeros((remaining,5), dtype = int)
          remaining_starter = len(folder_list) - remaining + 1
          # print("remaining_starter: ", remaining_starter)
          folder_list2 = folder_list[remaining_starter-1:]
          # print("len(folder_list2): ", len(folder_list2))
          t2 = np.random.permutation(folder_list2)
          # print("t2: ", t2)
          for i, folder in enumerate(range(len(folder_list2)-1)):
            imgs = os.listdir(source_path+'/'+ t2[folder].split(';')[0]) # read all the images in the folder
            # print("len(imgs):", len(imgs))
            for idx,item in enumerate(img_idx): #  Iterate over the frames/images of a folder to read them in
                  image = imread(source_path+'/'+ t2[folder].strip().split(';')[0]+'/'+imgs[item], pilmode="RGB").astype(np.float32)
                  
                  #crop the images and resize them. Note that the images are of 2 different shape
                  # if image.shape[0] < 224:
                    # image = upscale(image)
                  resized = skimage.transform.resize(image, (y,z))  
                  # resized = skimage.transform.resize(image, (120,160)) #224x224
                  # cropped = resized[:,20:140] 
                  #and the conv3D will throw error if the inputs in a batch have different shapes
                  
                  batch_data[folder,idx,:,:,0] = resized[:,:,0]/255 #normalise and feed in the image
                  batch_data[folder,idx,:,:,1] = resized[:,:,1]/255 #normalise and feed in the image
                  batch_data[folder,idx,:,:,2] = resized[:,:,2]/255 #normalise and feed in the image
            # if (i == 22):
              # break       

            batch_labels[folder, int(t2[folder].strip().split(';')[2])] = 1
            yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember what does yield do


Note here that a video is represented above in the generator as (number of images, height, width, number of channels). Take this into consideration while creating the model architecture.

In [6]:
curr_dt_time = datetime.datetime.now()
train_path = '/content/drive/MyDrive/Dataset/Gesture_Recognition/Project_data/train/'
val_path = '/content/drive/MyDrive/Dataset/Gesture_Recognition/Project_data/val/'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
num_epochs = 10 # choose the number of epochs
print ('# epochs =', num_epochs)

# training sequences = 663
# validation sequences = 100
# epochs = 10


## Model
Here you make the model using different functionalities that Keras provides. Remember to use `Conv3D` and `MaxPooling3D` and not `Conv2D` and `Maxpooling2D` for a 3D convolution model. You would want to use `TimeDistributed` while building a Conv2D + RNN model. Also remember that the last layer is the softmax. Design the network in such a way that the model is able to give good accuracy on the least number of parameters so that it can fit in the memory of the webcam.

In [8]:
from keras.models import Sequential, Model
from keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation, GlobalAveragePooling2D, Dropout, LSTM, Lambda, Reshape
from keras.layers.convolutional import Conv3D, Conv2D, MaxPooling3D, MaxPooling2D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, Callback
from keras import optimizers
from keras.applications.resnet50 import ResNet50
from tensorflow.keras import Input

## Model 1

In [None]:
# Model - 1
# Transfer Learning With ResNet-50
res_50 = ResNet50(include_top = False, weights = "imagenet", input_tensor = Input(shape=(y,z,3)))
# for layer in res_50.layers[:143]:
  # layer.trainable = False
# Adding our own layers, removing some dropouts
cnn_1 = Sequential()
cnn_1.add(res_50)
# cnn_5.add(Dense(256, activation='relu'))
# cnn_5.add(BatchNormalization())
# cnn_5.add(Dense(128, activation='relu'))
# cnn_5.add(BatchNormalization())
#cnn_5.add(Dense(64, activation='relu'))
#cnn_5.add(Dropout(0.25))
#cnn_5.add(BatchNormalization())
cnn_1.add(Flatten())
rnn_1 = Sequential()    # add the convnet with (30, 100, 100, 3) shape
rnn_1.add(TimeDistributed(cnn_1, input_shape=(30, y, z, 3)))    # here, you can also use GRU or LSTM
rnn_1.add(LSTM(256))    # and finally, we make a decision network
rnn_1.add(Dense(1024, activation='relu'))
rnn_1.add(Dense(512, activation='relu'))
rnn_1.add(Dense(128, activation='relu'))
rnn_1.add(Dropout(.25))
rnn_1.add(Dense(64, activation='relu'))
rnn_1.add(Dense(5, activation='softmax'))

#optimiser = optimizers.SGD(learning_rate=0.1)
#rnn_5.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

Now that you have written the model, the next step is to `compile` the model. When you print the `summary` of the model, you'll see the total number of parameters you have to train.

In [None]:
optimiser = optimizers.SGD(learning_rate=0.1)
rnn_1.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print (rnn_1.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed (TimeDistri (None, 30, 32768)         23587712  
_________________________________________________________________
lstm (LSTM)                  (None, 256)               33817600  
_________________________________________________________________
dense (Dense)                (None, 1024)              263168    
_________________________________________________________________
dense_1 (Dense)              (None, 512)               524800    
_________________________________________________________________
dense_2 (Dense)              (None, 128)               65664     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 64)               

Let us create the `train_generator` and the `val_generator` which will be used in `.fit_generator`.

In [9]:
train_generator = generator(train_path, train_doc, batch_size)
val_generator = generator(val_path, val_doc, batch_size)

In [None]:
train_generator

<generator object generator at 0x7ff265ea85c8>

In [10]:
# Model saved with checkpoints and leveraging callbacks list
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=5, min_lr=0.001)
callbacks_list = [checkpoint, LR]



The `steps_per_epoch` and `validation_steps` are used by `fit_generator` to decide the number of next() calls it need to make.

In [11]:
if (num_train_sequences%batch_size) == 0:
    #steps_per_epoch = int(num_train_sequences/batch_size)
    steps_per_epoch = num_train_sequences/batch_size
else:
    steps_per_epoch = num_train_sequences//batch_size + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = num_val_sequences/batch_size
else:
    validation_steps = num_val_sequences//batch_size + 1

Let us now fit the model. This will start training the model and with the help of the checkpoints, you'll be able to save the model at the end of each epoch.

In [None]:
rnn_1.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
              callbacks=callbacks_list, validation_data=val_generator, validation_steps=validation_steps, 
              class_weight=None, workers=1, initial_epoch=0)


Source path =  /content/drive/MyDrive/Dataset/Gesture_Recognition/Project_data/train/ ; batch size = 20

Number of batches:  33

Batch Number:  0 

Epoch 1/10

Batch Number:  1 

 1/34 [..............................] - ETA: 9:59 - loss: 1.5785 - categorical_accuracy: 0.3000
Batch Number:  2 

 2/34 [>.............................] - ETA: 1:09:14 - loss: 1.5719 - categorical_accuracy: 0.3250
Batch Number:  3 

 3/34 [=>............................] - ETA: 1:07:54 - loss: 1.5733 - categorical_accuracy: 0.3056
Batch Number:  4 

 4/34 [==>...........................] - ETA: 1:07:39 - loss: 1.5872 - categorical_accuracy: 0.2885
Batch Number:  5 

 5/34 [===>..........................] - ETA: 1:04:36 - loss: 1.5921 - categorical_accuracy: 0.2868
Batch Number:  6 

 6/34 [====>.........................] - ETA: 1:02:22 - loss: 1.5979 - categorical_accuracy: 0.2849
Batch Number:  7 

 7/34 [=====>........................] - ETA: 59:54 - loss: 1.6030 - categorical_accuracy: 0.2809  
Batch Num

<tensorflow.python.keras.callbacks.History at 0x7fde50eb9358>

In [None]:
!nvidia-smi

Sat Dec 26 15:45:59 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.27.04    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   76C    P0    35W /  70W |  14577MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
tf.test.gpu_device_name()

'/device:GPU:0'

## Model - 2

In [None]:
# Model-2: Modified #Epochs, Froze resnet layers 
# Transfer Learning With ResNet-50
res_50 = ResNet50(include_top = False, weights = "imagenet", input_tensor = Input(shape=(y,z,3)))
for layer in res_50.layers[:143]:
  layer.trainable = False
# Adding our own layers, removing some dropouts
cnn_2 = Sequential()
cnn_2.add(res_50)
cnn_2.add(Dense(256, activation='relu'))
cnn_2.add(BatchNormalization())
cnn_2.add(Dense(128, activation='relu'))
cnn_2.add(BatchNormalization())
cnn_2.add(Dense(64, activation='relu'))
cnn_2.add(Dropout(0.25))
cnn_2.add(BatchNormalization())
cnn_2.add(Flatten())
rnn_2 = Sequential()    
rnn_2.add(TimeDistributed(cnn_2, input_shape=(30, y, z, 3))) # add the convnet with (30, 100, 100, 3) shape  
rnn_2.add(LSTM(256))     # GRU can also be used
rnn_2.add(Dense(1024, activation='relu'))  # we make a decision network
rnn_2.add(Dense(512, activation='relu'))
rnn_2.add(Dense(128, activation='relu'))
rnn_2.add(Dropout(.25))
rnn_2.add(Dense(64, activation='relu'))
rnn_2.add(Dense(5, activation='softmax'))

optimiser = optimizers.Adadelta(
    learning_rate=0.001, rho=0.95, epsilon=1e-07, name="Adadelta")
rnn_2.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
rnn_2.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=20, verbose=1, 
              callbacks=callbacks_list, validation_data=val_generator, validation_steps=validation_steps, 
              class_weight=None, workers=1, initial_epoch=0)


Batch Number:  26 

Epoch 1/20

Batch Number:  27 

 1/34 [..............................] - ETA: 3:38 - loss: 1.5724 - categorical_accuracy: 0.3500
Batch Number:  28 

 2/34 [>.............................] - ETA: 3:11 - loss: 1.5860 - categorical_accuracy: 0.3250
Batch Number:  29 

 3/34 [=>............................] - ETA: 3:02 - loss: 1.5983 - categorical_accuracy: 0.2944
Batch Number:  30 

 4/34 [==>...........................] - ETA: 2:55 - loss: 1.6009 - categorical_accuracy: 0.2865
Batch Number:  31 

 5/34 [===>..........................] - ETA: 2:51 - loss: 1.6005 - categorical_accuracy: 0.2852
Batch Number:  32 

Number of batches:  33

Batch Number:  0 

Batch Number:  1 

Batch Number:  2 

Batch Number:  3 

Batch Number:  4 

Batch Number:  5 

Batch Number:  6 

Some issue with the image: 	 /content/drive/MyDrive/Dataset/Gesture_Recognition/Project_data/train//WIN_20180907_15_35_09_Pro_Right Swipe_new/WIN_20180907_15_35_09_Pro_00030.png

Batch Number:  7 

Batch N

<tensorflow.python.keras.callbacks.History at 0x7fda5aabc518>

## Model - 3

In [None]:
# Model-3: Removed additional layers, added reshape layer
res50_v2 = ResNet50(include_top=False, weights='imagenet', pooling = 'avg')
rnn_3 = Sequential()    
rnn_3.add(TimeDistributed(res50_v2, input_shape=(30, y, z, 3)))
rnn_3.add(Dropout(0.25))
rnn_3.add(Reshape((30,2048)))
rnn_3.add(LSTM(256))
rnn_3.add(Dense(5, activation='softmax'))

rnn_3.compile(optimizer=optimizers.SGD(learning_rate=0.1), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
rnn_3.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=10, verbose=1, 
              callbacks=callbacks_list, validation_data=val_generator, validation_steps=validation_steps, 
              class_weight=None, workers=1, initial_epoch=0)


Batch Number:  7 

Epoch 1/10

Batch Number:  8 

 1/34 [..............................] - ETA: 4:37 - loss: 1.7944 - categorical_accuracy: 0.1000
Batch Number:  9 

 2/34 [>.............................] - ETA: 3:28 - loss: 1.8142 - categorical_accuracy: 0.1250
Batch Number:  10 

 3/34 [=>............................] - ETA: 3:53 - loss: 1.8162 - categorical_accuracy: 0.1278
Batch Number:  11 

 4/34 [==>...........................] - ETA: 3:40 - loss: 1.8005 - categorical_accuracy: 0.1333
Batch Number:  12 

 5/34 [===>..........................] - ETA: 3:30 - loss: 1.7884 - categorical_accuracy: 0.1387
Batch Number:  13 

 6/34 [====>.........................] - ETA: 3:11 - loss: 1.7805 - categorical_accuracy: 0.1419
Batch Number:  14 

 7/34 [=====>........................] - ETA: 3:05 - loss: 1.7707 - categorical_accuracy: 0.1451
Batch Number:  15 

Batch Number:  16 

Batch Number:  17 

Batch Number:  18 

Batch Number:  19 

Batch Number:  20 

Batch Number:  21 

Batch Numbe

<tensorflow.python.keras.callbacks.History at 0x7fda581abf28>

So, we can a significant improvement in accuracy here (~70%). Let's fine-tune it further, 

## Model - 4

In [None]:
# Model-4: Changed optimizer, increased dropout value
# Replaced ReduceLROnPlateau with DecayLR
class DecayLR(Callback):
    def __init__(self, base_lr=0.1, decay_epoch=1):
        super(DecayLR, self).__init__()
        self.base_lr = base_lr
        self.decay_epoch = decay_epoch 
        self.lr_history = []
        
    def on_train_begin(self, logs={}):
        K.set_value(self.model.optimizer.lr, self.base_lr)

    def on_epoch_end(self, epoch, logs={}):
        new_lr = self.base_lr * (0.5 ** (epoch // self.decay_epoch))
        self.lr_history.append(K.get_value(self.model.optimizer.lr))
        K.set_value(self.model.optimizer.lr, new_lr)

LR = DecayLR()
callbacks_list = [checkpoint, LR]       

In [None]:
# res50_v2 = ResNet50(include_top=False, weights='imagenet', pooling = 'avg', input_tensor = Input(shape=(30,y,z,3)))
rnn_4 = Sequential()    
rnn_4.add(TimeDistributed(res50_v2, input_shape=(30, y, z, 3))) # Reusing the resnet object
rnn_4.add(Dropout(0.5))
rnn_4.add(Reshape((30,2048)))
rnn_4.add(LSTM(256))
rnn_4.add(Dense(5, activation='softmax'))

optimiser = optimizers.Adadelta(
    learning_rate=0.001, rho=0.95, epsilon=1e-07, name="Adadelta")

rnn_4.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
rnn_4.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=10, verbose=1, 
              callbacks=callbacks_list, validation_data=val_generator, validation_steps=validation_steps, 
              class_weight=None, workers=1, initial_epoch=0)

Epoch 1/10
 1/34 [..............................] - ETA: 4:24 - loss: 0.5493 - categorical_accuracy: 0.0000e+00
Number of batches:  33

Batch Number:  0 

 2/34 [>.............................] - ETA: 9s - loss: 0.6324 - categorical_accuracy: 0.0833      
Batch Number:  1 

 3/34 [=>............................] - ETA: 2:09 - loss: 0.8885 - categorical_accuracy: 0.1453
Batch Number:  2 

 4/34 [==>...........................] - ETA: 2:26 - loss: 1.0227 - categorical_accuracy: 0.2014
Batch Number:  3 

 5/34 [===>..........................] - ETA: 2:30 - loss: 1.0992 - categorical_accuracy: 0.2429
Batch Number:  4 

 6/34 [====>.........................] - ETA: 2:34 - loss: 1.1435 - categorical_accuracy: 0.2799
Batch Number:  5 

 7/34 [=====>........................] - ETA: 2:41 - loss: 1.1688 - categorical_accuracy: 0.3100
Batch Number:  6 

Batch Number:  7 

Batch Number:  8 

Batch Number:  9 

Batch Number:  10 

Batch Number:  11 

Batch Number:  12 

Batch Number:  13 

Batch Nu

<tensorflow.python.keras.callbacks.History at 0x7fda58d5b1d0>

## Model - 5

In [None]:
# Model-5: Added Dropout Layer
rnn_5 = Sequential()    
rnn_5.add(TimeDistributed(res50_v2, input_shape=(30, y, z, 3)))
rnn_5.add(Dropout(0.5))
rnn_5.add(Reshape((30,2048)))
rnn_5.add(LSTM(256))
rnn_5.add(Dropout(0.5))
rnn_5.add(Dense(5, activation='softmax'))

optimiser = optimizers.Adadelta(
    learning_rate=0.001, rho=0.95, epsilon=1e-07, name="Adadelta")

rnn_5.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
rnn_5.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=10, verbose=1, 
              callbacks=callbacks_list, validation_data=val_generator, validation_steps=validation_steps, 
              class_weight=None, workers=1, initial_epoch=0)


Batch Number:  24 

Epoch 1/10

Batch Number:  25 

 1/34 [..............................] - ETA: 5:07 - loss: 1.7873 - categorical_accuracy: 0.2000
Batch Number:  26 

 2/34 [>.............................] - ETA: 3:47 - loss: 1.6856 - categorical_accuracy: 0.2500
Batch Number:  27 

 3/34 [=>............................] - ETA: 3:35 - loss: 1.6237 - categorical_accuracy: 0.2833
Batch Number:  28 

 4/34 [==>...........................] - ETA: 3:20 - loss: 1.5791 - categorical_accuracy: 0.3094
Batch Number:  29 

 5/34 [===>..........................] - ETA: 3:12 - loss: 1.5448 - categorical_accuracy: 0.3355
Batch Number:  30 

 6/34 [====>.........................] - ETA: 3:08 - loss: 1.5146 - categorical_accuracy: 0.3546
Batch Number:  31 

 7/34 [=====>........................] - ETA: 3:02 - loss: 1.4815 - categorical_accuracy: 0.3754
Batch Number:  32 

Number of batches:  33

Batch Number:  0 

Batch Number:  1 

Some issue with the image: 	 /content/drive/MyDrive/Dataset/Gestur

<tensorflow.python.keras.callbacks.History at 0x7fda540748d0>

Accuracy has crossed 86% but there are some issues with validation accuracy. Let's fine-tune further to reduce model overfitting. 

## Model - 6

In [None]:
# Model-6: Added Batch-Norm
# An update over model-5
rnn_5b = Sequential()    # add the convnet with (30, 100, 100, 3) shape
rnn_5b.add(TimeDistributed(res50_v2, input_shape=(30, y, z, 3)))
rnn_5b.add(Dropout(0.5))
rnn_5b.add(BatchNormalization())
rnn_5b.add(Reshape((30,2048)))
rnn_5b.add(LSTM(256))
rnn_5b.add(Dropout(0.5))
rnn_5b.add(BatchNormalization())
rnn_5b.add(Dense(5, activation='softmax'))

optimiser = optimizers.Adadelta(
    learning_rate=0.001, rho=0.95, epsilon=1e-07, name="Adadelta")

rnn_5b.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
rnn_5b.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=5, verbose=1, 
              callbacks=callbacks_list, validation_data=val_generator, validation_steps=validation_steps, 
              class_weight=None, workers=1, initial_epoch=0)


Batch Number:  15 

Epoch 1/5

Batch Number:  16 

 1/34 [..............................] - ETA: 5:14 - loss: 2.3615 - categorical_accuracy: 0.2500
Batch Number:  17 

 2/34 [>.............................] - ETA: 3:24 - loss: 2.1764 - categorical_accuracy: 0.2500
Batch Number:  18 

 3/34 [=>............................] - ETA: 2:58 - loss: 2.0355 - categorical_accuracy: 0.2722
Batch Number:  19 

 4/34 [==>...........................] - ETA: 3:10 - loss: 1.9360 - categorical_accuracy: 0.2917
Batch Number:  20 

 5/34 [===>..........................] - ETA: 2:58 - loss: 1.8643 - categorical_accuracy: 0.3093
Batch Number:  21 

 6/34 [====>.........................] - ETA: 2:50 - loss: 1.7884 - categorical_accuracy: 0.3314
Batch Number:  22 

 7/34 [=====>........................] - ETA: 2:42 - loss: 1.7279 - categorical_accuracy: 0.3514
Batch Number:  23 

Batch Number:  24 

Batch Number:  25 

Batch Number:  26 

Batch Number:  27 

Batch Number:  28 

Batch Number:  29 

Batch Num

<tensorflow.python.keras.callbacks.History at 0x7fda51a4afd0>

## Model - 7

Let's experiment with Conv-3D architecture

In [12]:
# Model-7: Using Conv-3D
nb_filters = [8,16,32,64]
nb_dense = [256, 128, 5]

# Input
input_shape=(30,100,100,3)

# Define model
model_6 = Sequential()

model_6.add(Conv3D(nb_filters[0], 
                 kernel_size=(3,3,3), 
                 input_shape=input_shape,
                 padding='same'))
model_6.add(BatchNormalization())
model_6.add(Activation('relu'))

model_6.add(MaxPooling3D(pool_size=(2,2,2)))

model_6.add(Conv3D(nb_filters[1], 
                 kernel_size=(3,3,3), 
                 padding='same'))
model_6.add(BatchNormalization())
model_6.add(Activation('relu'))

model_6.add(MaxPooling3D(pool_size=(2,2,2)))

model_6.add(Conv3D(nb_filters[2], 
                 kernel_size=(1,3,3), 
                 padding='same'))
model_6.add(BatchNormalization())
model_6.add(Activation('relu'))

model_6.add(MaxPooling3D(pool_size=(2,2,2)))

model_6.add(Conv3D(nb_filters[3], 
                 kernel_size=(1,3,3), 
                 padding='same'))
model_6.add(Activation('relu'))
model_6.add(Dropout(0.25))

model_6.add(MaxPooling3D(pool_size=(2,2,2)))

#Flatten Layers
model_6.add(Flatten())

model_6.add(Dense(nb_dense[0], activation='relu'))
model_6.add(Dropout(0.5))

model_6.add(Dense(nb_dense[1], activation='relu'))
model_6.add(Dropout(0.5))

#softmax layer
model_6.add(Dense(nb_dense[2], activation='softmax'))


optimiser = optimizers.Adadelta(
    learning_rate=0.001, rho=0.95, epsilon=1e-07, name="Adadelta") #write your optimizer
model_6.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print (model_6.summary())


#batch_size = 20
num_epochs = 10
model_6.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, validation_steps=validation_steps, class_weight=None, workers=-1, initial_epoch=0)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d (Conv3D)              (None, 30, 100, 100, 8)   656       
_________________________________________________________________
batch_normalization (BatchNo (None, 30, 100, 100, 8)   32        
_________________________________________________________________
activation (Activation)      (None, 30, 100, 100, 8)   0         
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 15, 50, 50, 8)     0         
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 15, 50, 50, 16)    3472      
_________________________________________________________________
batch_normalization_1 (Batch (None, 15, 50, 50, 16)    64        
_________________________________________________________________
activation_1 (Activation)    (None, 15, 50, 50, 16)    0

<tensorflow.python.keras.callbacks.History at 0x7f382bfebf98>

## Final Model

Conv-3D model would have required a lot of fine-tuning, hence let's stick to our original plan

In [None]:
# Model-8: Added Batch-Norm, Architecture: Conv2D (Resnet50) + GRU 
rnn_5c = Sequential()    
rnn_5c.add(TimeDistributed(res50_v2, input_shape=(30, y, z, 3))) # add the convnet with (30, 100, 100, 3) shape
rnn_5c.add(Dropout(0.5))
rnn_5c.add(BatchNormalization())
rnn_5c.add(Reshape((30,2048)))
rnn_5c.add(GRU(128)) # Replaced LSTM Layer with GRU
rnn_5c.add(Dropout(0.5))
rnn_5c.add(BatchNormalization())
rnn_5c.add(Dense(5, activation='softmax'))

optimiser = optimizers.Adadelta(
    learning_rate=0.001, rho=0.95, epsilon=1e-07, name="Adadelta") # Adadelta Optimizer

rnn_5c.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
rnn_5c.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=5, verbose=1, 
              callbacks=callbacks_list, validation_data=val_generator, validation_steps=validation_steps, 
              class_weight=None, workers=1, initial_epoch=0)


Batch Number:  11 

Epoch 1/5

Batch Number:  12 

 1/34 [..............................] - ETA: 5:17 - loss: 2.0985 - categorical_accuracy: 0.2000
Batch Number:  13 

 2/34 [>.............................] - ETA: 3:48 - loss: 2.0439 - categorical_accuracy: 0.2250
Batch Number:  14 

 3/34 [=>............................] - ETA: 3:42 - loss: 2.0409 - categorical_accuracy: 0.2333
Batch Number:  15 

 4/34 [==>...........................] - ETA: 3:29 - loss: 1.9963 - categorical_accuracy: 0.2438
Batch Number:  16 

 5/34 [===>..........................] - ETA: 3:23 - loss: 1.9535 - categorical_accuracy: 0.2590
Batch Number:  17 

 6/34 [====>.........................] - ETA: 3:17 - loss: 1.8977 - categorical_accuracy: 0.2825
Batch Number:  18 

 7/34 [=====>........................] - ETA: 3:07 - loss: 1.8397 - categorical_accuracy: 0.3023
Batch Number:  19 

Batch Number:  20 

Batch Number:  21 

Batch Number:  22 

Batch Number:  23 

Batch Number:  24 

Batch Number:  25 

Batch Num

<tensorflow.python.keras.callbacks.History at 0x7fda4f3a2470>

There was a slight improvement in validation accuracy and the training accuracy (~85%) looks decent enough 

In [None]:
# Moving models to drive
"""!mv model_init_2020-12-2712_16_48.966578/model-00010-0.32328-0.85139-2.18279-0.34000.h5  drive/MyDrive/Models/"""