In [None]:
!nvidia-smi

Sun Aug 20 16:07:49 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   64C    P8    11W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
#importing necessary libraries
import numpy as np
import os
#from scipy.misc import imread, imresize
import datetime
import os
import warnings
warnings.filterwarnings("ignore")
import abc
from sys import getsizeof

setting the random seed so that the results don't vary drastically.

In [None]:
np.random.seed(30)
import random as rn
rn.seed(30)
from keras import backend as K
import tensorflow as tf
tf.compat.v1.set_random_seed(30)

In [None]:
from keras.models import Sequential, Model
from keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation, Dropout
from keras.layers.convolutional import Conv3D, MaxPooling3D , Conv2D, MaxPooling2D
from tensorflow.keras.layers import GlobalAveragePooling2D, Input
from tensorflow.keras.applications import mobilenet
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau ,EarlyStopping
from keras import optimizers

In [None]:
##mount the drive to get the dataset
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [11]:
## Getting the dataset to the colab runtime local memory
!pip install -U -q PyDrive

In [12]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [26]:
file_id = '1ehyrYBQ5rbQQe6yL4XbLWe3FMvuVUGiL' # URL id.
downloaded = drive.CreateFile({'id': file_id})
downloaded.GetContentFile('Project_data.zip')

In [None]:
!unzip /content/Project_data.zip

In [34]:
#defining the project folder path
project_folder_path = "/content/Project_data"

ModelClass:

This model class will handle all the initialisations of paths, image properties and model properties

This class will have a generator function as well for generating the data with labels

The images will be cropped to the size we want

The images with irregular dimension will be center cropped on the x-axis equally on both sides

We save the model only when the validation loss decreases

Learning rate decreases when approaching the minima

This class will have an abstract method to define our own model by inheriting this class

This class will have a generic train function as well.

The history is saved to plot the model as well

In [35]:
class ModelClass(metaclass= abc.ABCMeta):
    # initialisng the path where project data resides
    def initialize_path(self,project_folder_path):
        self.train_doc = np.random.permutation(open(project_folder_path + '/' + 'train.csv').readlines())
        self.val_doc = np.random.permutation(open(project_folder_path + '/' + 'val.csv').readlines())
        self.train_path = project_folder_path + '/' + 'train'
        self.val_path =  project_folder_path + '/' + 'val'
        self.num_train_sequences = len(self.train_doc)
        self.num_val_sequences = len(self.val_doc)

    # initialising the image properties
    def initialize_image_properties(self,image_height=120,image_width=120):
        self.image_height=image_height
        self.image_width=image_width
        self.channels=3
        self.num_classes=5
        self.total_frames=30

    # initialising the batch size, frames to sample and the no. of epochs
    def initialize_hyperparams(self,frames_to_sample=30,batch_size=32,num_epochs=50):
        self.frames_to_sample=frames_to_sample
        self.batch_size=batch_size
        self.num_epochs=num_epochs

    def initialize_modelparams(self):
        self.model = None
        self.history = None

    def generator(self,source_path, folder_list, model_type = "Conv3D"):
      if model_type == "Conv3D":
        img_idx = [rn.randint(0, self.total_frames-1) for i in range(self.frames_to_sample)] #create a list of image numbers you want to use for a particular video
      else:
        img_idx = [i for i in range(self.frames_to_sample)] ## CNN+GRU
      # print(img_idx)
      while True:
          t = np.random.permutation(folder_list)
          num_batches = len(folder_list)//self.batch_size # calculate the number of batches
          for batch_no in range(num_batches): # we iterate over the number of batches
              batch_data = np.zeros((self.batch_size,len(img_idx),self.image_height,self.image_width,self.channels)) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
              batch_labels = np.zeros((self.batch_size,self.num_classes)) # batch_labels is the one hot representation of the output
              for folder in range(self.batch_size): # iterate over the batch_size
                  imgs = os.listdir(source_path+'/'+ t[folder + (batch_no*self.batch_size)].split(';')[0]) # read all the images in the folder
                  # print("imgs: ",imgs)
                  for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                      image = imread(source_path+'/'+ t[folder + (batch_no*self.batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                      if image.shape == (360,360,3): ###if images are of 360 x 360
                        image = imresize(image,(self.image_height,self.image_width,self.channels))
                      else:
                        image = image[:,(image.shape[0] - self.image_height ) // 2 : image.shape[0] - (image.shape[0] - self.image_height ) // 2] ### if lower quality just centre crop
                      #crop the images and resize them. Note that the images are of 2 different shape
                      #and the conv3D will throw error if the inputs in a batch have different shapes


                      batch_data[folder,idx,:,:,0] = image[:,:,0]/255.0 #normalise and feed in the image
                      batch_data[folder,idx,:,:,1] = image[:,:,1]/255.0 #normalise and feed in the image
                      batch_data[folder,idx,:,:,2] = image[:,:,2]/255.0 #normalise and feed in the image

                  batch_labels[folder, int(t[folder + (batch_no*self.batch_size)].strip().split(';')[2])] = 1
              yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember what does yield do


      #     write the code for the remaining data points which are left after full batches
          left_images = len(folder_list)%self.batch_size
          left_overs = t[len(t)-left_images:]

          batch_data = np.zeros((left_images,len(img_idx),self.image_height,self.image_width,self.channels)) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
          batch_labels = np.zeros((left_images,self.num_classes)) # batch_labels is the one hot representation of the output
          for folder in range(left_images): # iterate over the batch_size
              imgs = os.listdir(source_path+'/'+ left_overs[folder].split(';')[0]) # read all the images in the folder
              # print("imgs: ",imgs)
              for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                  image = imread(source_path+'/'+ left_overs[folder].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                  if image.shape == (360,360,3): ###if images are of 360 x 360
                        image = imresize(image,(self.image_height,self.image_width,self.channels))
                  else:
                    image = image[:,(image.shape[0] - self.image_height ) // 2 : image.shape[0] - (image.shape[0] - self.image_height ) // 2] ### if lower quality just centre crop
                  #crop the images and resize them. Note that the images are of 2 different shape
                  #and the conv3D will throw error if the inputs in a batch have different shapes


                  batch_data[folder,idx,:,:,0] = image[:,:,0]/255.0 #normalise and feed in the image
                  batch_data[folder,idx,:,:,1] = image[:,:,1]/255.0 #normalise and feed in the image
                  batch_data[folder,idx,:,:,2] = image[:,:,2]/255.0 #normalise and feed in the image

              batch_labels[folder, int(left_overs[folder].strip().split(';')[2])] = 1
          yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember what does yield do

    @abc.abstractmethod
    def model_architecture(self):
      pass

    def train_model(self,model,model_name="model_init",model_type="Conv3D"):
        train_generator = self.generator(self.train_path, self.train_doc,model_type=model_type)
        val_generator = self.generator(self.val_path, self.val_doc,model_type=model_type)

        model_name = model_name + '_' + str(datetime.datetime.now()).replace(' ','').replace(':','_') + '/'

        if not os.path.exists(model_name):
            os.mkdir(model_name)

        filepath = "/content/drive/MyDrive/"+model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

        checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
        LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2, verbose=1, patience=4)

        # earlystop = EarlyStopping( monitor="val_loss", min_delta=0,patience=10,verbose=1)
        callbacks_list = [checkpoint, LR]

        if (self.num_train_sequences%self.batch_size) == 0:
            steps_per_epoch = int(self.num_train_sequences/self.batch_size)
        else:
            steps_per_epoch = (self.num_train_sequences//self.batch_size) + 1

        if (self.num_val_sequences%self.batch_size) == 0:
            validation_steps = int(self.num_val_sequences/self.batch_size)
        else:
            validation_steps = (self.num_val_sequences//self.batch_size) + 1

        history=model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=self.num_epochs, verbose=1,
                            callbacks=callbacks_list, validation_data=val_generator,
                            validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)
        self.history = history
        return history

    def plot_model(self):
        fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15,4))
        axes[0].plot(self.history.history['loss'])
        axes[0].plot(self.history.history['val_loss'])
        axes[0].legend(['loss','val_loss'])

        axes[1].plot(self.history.history['categorical_accuracy'])
        axes[1].plot(self.history.history['val_categorical_accuracy'])
        axes[1].legend(['categorical_accuracy','val_categorical_accuracy'])

LEts check sample image cropping

In [36]:
import imageio
#from skimage.io import imread, imresize

In [None]:
##actual image
image = imageio.imread('/content/drive/MyDrive/Project_data/train/WIN_20180925_17_44_57_Pro_Thumbs_Down_new/WIN_20180925_17_44_57_Pro_00015.png')
print(image.shape)
plt.imshow(image/255)

In [None]:
##centre cropping of the image looks like this for a (120,160) image
image = image[:,20:140]
plt.imshow(image)
plt.show()

**Model Building Approach 1: Conv3D model**

Lets write base model which has Conv3D + BatchNorm + MaxPooling3D layers lined up with a softmax at the end

In [None]:
class Conv3D1(ModelClass):

    def model_architecture(self,dense_neurons=64,dropout=0.25):

        model = Sequential()
        model.add(Conv3D(16, (3, 3, 3), padding='same',
                  input_shape=(self.frames_to_sample,self.image_height,self.image_width,self.channels)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))

        model.add(Conv3D(32, (2, 2, 2), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))

        model.add(Conv3D(64, (2, 2, 2), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))


        model.add(Conv3D(128, (2, 2, 2), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2), padding='same'))

        model.add(Flatten())
        model.add(Dense(dense_neurons,activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(dropout))

        model.add(Dense(dense_neurons,activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(dropout))

        model.add(Dense(self.num_classes,activation='softmax'))

        optimiser = "adam"
        model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
        return model

**Model 1: Conv3D model**

with

just 15 frames to be sampled

batch size of 32

Images cropped to 120,120

128 dense neurons in the two dense layers

In [None]:
conv3D1 = Conv3D1()
conv3D1.initialize_path(project_folder_path)
conv3D1.initialize_image_properties(image_height=120,image_width=120)
conv3D1.initialize_hyperparams(frames_to_sample=15,batch_size=32,num_epochs=20)
conv3D1.initialize_modelparams()
conv3D1_model=conv3D1.model_architecture(dense_neurons=128,dropout=0.25)
conv3D1_model.summary()

In [None]:
##train the model
conv3D1.train_model(conv3D1_model,model_name="conv3D1",model_type="Conv3D")

In [None]:
# plot the model
conv3D1.plot_model()

**Observations and Thoughts:**

Clearly overfitting with val accuracy just being around 30% and train accuracy reaching 99%+

Let's decrease the batch_size to reduce generalisability and increase the epochs and also increase the frames to sample

**Model 2: Conv3D model** with

20 frames to be sampled

batch size of 20

Images cropped to 120,120

128 dense neurons in the two dense layers

Epochs = 30

In [None]:
conv3D2 = Conv3D1()
conv3D2.initialize_path(project_folder_path)
conv3D2.initialize_image_properties(image_height=120,image_width=120)
conv3D2.initialize_hyperparams(frames_to_sample=20,batch_size=20,num_epochs=30)
conv3D2.initialize_modelparams()
conv3D2_model=conv3D2.model_architecture(dense_neurons=128,dropout=0.25)
conv3D2_model.summary()

In [None]:
##train the model
conv3D2.train_model(conv3D2_model,model_name="conv3D2",model_type="Conv3D")

In [None]:
conv3D2.plot_model()

**Observations and Thoughts:**

we have pushed the validation accuracy by a large extent from 20% to 83%

We still see there is some gap between training and testing accuracy which is around 15% where training accuracy is 99%

We should try increasing dropout and decrease batch_size to see if it reduces the gap

Still this model is decent after 19th epoch with loss: 0.0832 - categorical_accuracy: 0.9925 - val_loss: 0.5237 - val_categorical_accuracy: 0.8300. The losses are below 1

**Model 3: Conv3D model** with

20 frames to be sampled

batch size of 16 (perviously 20)

Images cropped to 120,120

128 dense neurons in the two dense layers

Epochs = 30

dropout = 0.5

In [None]:
conv3D3 = Conv3D1()
conv3D3.initialize_path(project_folder_path)
conv3D3.initialize_image_properties(image_height=120,image_width=120)
conv3D3.initialize_hyperparams(frames_to_sample=20,batch_size=16,num_epochs=30)
conv3D3.initialize_modelparams()
conv3D3_model=conv3D3.model_architecture(dense_neurons=128,dropout=0.5)
conv3D3_model.summary()

In [None]:
#train the model
conv3D3.train_model(conv3D3_model,model_name="conv3D3",model_type="Conv3D")

In [None]:
conv3D3.plot_model()

**Observations and Thoughts:**

We have now reduced the gap between training and validation accuracy, thereby completely overcoming overfitting.

We see that least validation loss is at 0.4635

The post 25th epoch had loss: 0.4083 - categorical_accuracy: 0.8537 - val_loss: 0.4636 - val_categorical_accuracy: 0.8600

We see that the training accuracy is lesser than the validation accuracy.
This happens when the validation set is easier to interpret than the training set.

This is NOT a negative sign and is much realistic as you see that the training and validation loss are very close by similar to categorical accuracies.
Let's try a few more models like above using different kernel size to see if we see some more improvement

**Model 4: Conv3D model** with

15 frames to be sampled

batch size of 32

Images cropped to 120,120

128 dense neurons in the two dense layers

Epochs = 20

dropout = 0.25

kernel size = (3,3,3)

In [None]:
class Conv3D4(ModelClass):

    def model_architecture(self,dense_neurons=64,dropout=0.25):

        model = Sequential()
        model.add(Conv3D(16, (3, 3, 3), padding='same',
                  input_shape=(self.frames_to_sample,self.image_height,self.image_width,self.channels)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))

        model.add(Conv3D(32, (3, 3, 3), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))

        model.add(Conv3D(64, (3, 3, 3), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))


        model.add(Conv3D(128, (3, 3, 3), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2), padding='same'))

        model.add(Flatten())
        model.add(Dense(dense_neurons,activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(dropout))

        model.add(Dense(dense_neurons,activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(dropout))

        model.add(Dense(self.num_classes,activation='softmax'))

        optimiser = "adam"
        model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
        return model

In [None]:
conv3D4 = Conv3D4()
conv3D4.initialize_path(project_folder_path)
conv3D4.initialize_image_properties(image_height=120,image_width=120)
conv3D4.initialize_hyperparams(frames_to_sample=15,batch_size=32,num_epochs=20)
conv3D4.initialize_modelparams()
conv3D4_model=conv3D4.model_architecture(dense_neurons=128,dropout=0.25)
conv3D4_model.summary()

In [None]:
conv3D4.train_model(conv3D4_model,model_name="conv3D4",model_type="Conv3D")

In [None]:
conv3D4.plot_model()

**Observations and Thoughts:**

A case of complete overfitting is seen here.
Lets reduce the batch size

**Model 5: Conv3D model** with

15 frames to be sampled

batch size of 20

Images cropped to 120,120

128 dense neurons in the two dense layers

Epochs = 20

dropout = 0.25

kernel size = (3,3,3)

In [None]:
conv3D5 = Conv3D4()
conv3D5.initialize_path(project_folder_path)
conv3D5.initialize_image_properties(image_height=120,image_width=120)
conv3D5.initialize_hyperparams(frames_to_sample=15,batch_size=20,num_epochs=20)
conv3D5.initialize_modelparams()
conv3D5_model=conv3D5.model_architecture(dense_neurons=128,dropout=0.25)
conv3D5_model.summary()

In [None]:
conv3D5.train_model(conv3D5_model,model_name="conv3D5",model_type="Conv3D")

In [None]:
conv3D5.plot_model()

**Observations and Thoughts:**

We have now reduced the gap between training and validation accuracy, thereby reduced overfitting by a large extent.

The validation loss is below 1 (~= 0.86)
The model has loss: 0.4693 - categorical_accuracy: 0.8281 - val_loss: 0.8504 - val_categorical_accuracy: 0.7100

The accuracies are a lot better than the previous model. Based on the graphs, lets try increasing the epochs, may be we might acheive something better.

**Model 6: Conv3D model** with

15 frames to be sampled

batch size of 20

Images cropped to 120,120

128 dense neurons in the two dense layers

Epochs = 40

dropout = 0.25

kernel size = (3,3,3)

In [None]:
conv3D6 = Conv3D4()
conv3D6.initialize_path(project_folder_path)
conv3D6.initialize_image_properties(image_height=120,image_width=120)
conv3D6.initialize_hyperparams(frames_to_sample=15,batch_size=20,num_epochs=40)
conv3D6.initialize_modelparams()
conv3D6_model=conv3D6.model_architecture(dense_neurons=128,dropout=0.25)
conv3D6_model.summary()

In [None]:
conv3D6.train_model(conv3D6_model,model_name="conv3D6",model_type="Conv3D")

In [None]:
conv3D6.plot_model()

**Observations and Thoughts:**

There is improvement in validation accuracy by 2% from 71% to 73%
Validation loss improved from 0.86 to 0.79

**Model 7: Conv3D model with added convolutional layers**

15 frames to be sampled

batch size of 32

Images cropped to 120,120

128 dense neurons in the two dense layers

Epochs = 20

dropout = 0.25

kernel size = (2,2,2)

In [None]:
class Conv3D7(ModelClass):

    def model_architecture(self,dense_neurons=64,dropout=0.25):

        model = Sequential()
        model.add(Conv3D(16, (3, 3, 3), padding='same',
                  input_shape=(self.frames_to_sample,self.image_height,self.image_width,self.channels)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))

        model.add(Conv3D(32, (2, 2, 2), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))

        model.add(Conv3D(64, (2, 2, 2), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2)))

        model.add(Conv3D(128, (2, 2, 2), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2), padding='same'))

        model.add(Conv3D(128, (2, 2, 2), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2), padding='same'))

        model.add(Conv3D(256, (2, 2, 2), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2), padding='same'))

        model.add(Conv3D(512, (2, 2, 2), padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=(2, 2, 2), padding='same'))

        model.add(Flatten())
        model.add(Dense(dense_neurons,activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(dropout))

        model.add(Dense(dense_neurons,activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(dropout))

        model.add(Dense(self.num_classes,activation='softmax'))

        optimiser = "adam"
        model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
        return model

In [None]:
conv3D7 = Conv3D7()
conv3D7.initialize_path(project_folder_path)
conv3D7.initialize_image_properties(image_height=120,image_width=120)
conv3D7.initialize_hyperparams(frames_to_sample=15,batch_size=32,num_epochs=20)
conv3D7.initialize_modelparams()
conv3D7_model=conv3D7.model_architecture(dense_neurons=128,dropout=0.25)
conv3D7_model.summary()

In [None]:
##train the model
conv3D7.train_model(conv3D7_model,model_name="conv3D7",model_type="Conv3D")

In [None]:
# plot the model
conv3D7.plot_model()

**Observations and Thoughts:**

Clearly overfitting with val accuracy just being around 24% and train accuracy reaching 97%+

Let's decrease the batch_size to increase generalisability

**Model 8: Conv3D model with added convolutional layers**

15 frames to be sampled

batch size of 20

Images cropped to 120,120

128 dense neurons in the two dense layers

Epochs = 20

dropout = 0.25

kernel size = (2,2,2)

In [None]:
conv3D8 = Conv3D7()
conv3D8.initialize_path(project_folder_path)
conv3D8.initialize_image_properties(image_height=120,image_width=120)
conv3D8.initialize_hyperparams(frames_to_sample=15,batch_size=20,num_epochs=20)
conv3D8.initialize_modelparams()
conv3D8_model=conv3D8.model_architecture(dense_neurons=128,dropout=0.25)
conv3D8_model.summary()

In [None]:
##train the model
conv3D8.train_model(conv3D8_model,model_name="conv3D8",model_type="Conv3D")

In [None]:
conv3D8.plot_model()

**Observations and Thoughts:**

Validation accuracy improved from 24% to 52%. Training accuracy decreased from 97% to 79%

Let's increase the epochs to see if validation increases further

**Model 9: Conv3D model with added convolutional layers and 40 epochs**

15 frames to be sampled

batch size of 20

Images cropped to 120,120

128 dense neurons in the two dense layers

Epochs = 40

dropout = 0.25

kernel size = (2,2,2)

In [None]:
conv3D9 = Conv3D7()
conv3D9.initialize_path(project_folder_path)
conv3D9.initialize_image_properties(image_height=120,image_width=120)
conv3D9.initialize_hyperparams(frames_to_sample=15,batch_size=20,num_epochs=40)
conv3D9.initialize_modelparams()
conv3D9_model=conv3D9.model_architecture(dense_neurons=128,dropout=0.25)
conv3D9_model.summary()

In [None]:
##train the model
conv3D9.train_model(conv3D9_model,model_name="conv3D9",model_type="Conv3D")

In [None]:
conv3D9.plot_model()

**Observations and Thoughts:**

Validation accuracy improved from 52% to 67%. Training accuracy increased from 79% to 84%

We have better model with us compared to this one. Let's finally use the weights of model3 with training accuracy 85% and validation accuracy 86%

**Approach 2: CNN+GRU Models**

CNN will act as the feature extractor

GRU will help us with the TimeSeries prediction of the images

ConvGRU class is extension of ModelClass with a different architecture CNN+GRU

Base Model with few conv2D- BatchNorm - maxPool ordered layers

Using GRU instead of LSTM to avoid more parameters, as we want to deploy on mobile applications and smart TV.

**Model 10: Conv2D+GRU**

18 frames to be sampled

batch size of 20

Images cropped to 120,120

128 dense neurons in the two dense layers

Epochs = 20

dropout = 0.25

gru cells = 128

In [None]:
class ConvGRU(ModelClass):

    def model_architecture(self,gru_cells=64,dense_neurons=64,dropout=0.25):

        model = Sequential()

        model.add(TimeDistributed(Conv2D(16, (3, 3) , padding='same', activation='relu'),
                                  input_shape=(self.frames_to_sample,self.image_height,self.image_width,self.channels)))
        model.add(TimeDistributed(BatchNormalization()))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))

        model.add(TimeDistributed(Conv2D(32, (3, 3) , padding='same', activation='relu')))
        model.add(TimeDistributed(BatchNormalization()))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))

        model.add(TimeDistributed(Conv2D(64, (3, 3) , padding='same', activation='relu')))
        model.add(TimeDistributed(BatchNormalization()))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))

        model.add(TimeDistributed(Conv2D(128, (3, 3) , padding='same', activation='relu')))
        model.add(TimeDistributed(BatchNormalization()))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))

        model.add(TimeDistributed(Conv2D(256, (3, 3) , padding='same', activation='relu')))
        model.add(TimeDistributed(BatchNormalization()))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))

        model.add(TimeDistributed(Flatten()))


        model.add(GRU(gru_cells))
        model.add(Dropout(dropout))

        model.add(Dense(dense_neurons,activation='relu'))
        model.add(Dropout(dropout))

        model.add(Dense(self.num_classes, activation='softmax'))
        optimiser = "adam"
        model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
        self.model = model
        return model

In [None]:
conv_gru = ConvGRU()
conv_gru.initialize_path(project_folder_path)
conv_gru.initialize_image_properties(image_height=120,image_width=120)
conv_gru.initialize_hyperparams(frames_to_sample=18,batch_size=20,num_epochs=20)
conv_gru.initialize_modelparams()
conv_gru_model=conv_gru.model_architecture(gru_cells=128,dense_neurons=128,dropout=0.25)
conv_gru_model.summary()

In [None]:
conv_gru.train_model(conv_gru_model,model_name="conv_gru",model_type="CNN_GRU")
conv_gru.plot_model()

**Observations and Thoughts:**

We see loss: 0.1611 - categorical_accuracy: 0.9593 - val_loss: 0.7751 - val_categorical_accuracy: 0.7100

We see a training loss and validation loss less than 1

Accuracies are 95% and 71% which is a sign of overfitting

Lets increase dropout and increase batch size to see if it reduces overfitting.

**Model 11: Conv2D+GRU**

20 frames to be sampled

batch size of 32 from 20

Images cropped to 120,120

128 dense neurons

Epochs = 25

dropout = 0.5

gru cells = 128

In [None]:
conv_gru2 = ConvGRU()
conv_gru2.initialize_path(project_folder_path)
conv_gru2.initialize_image_properties(image_height=120,image_width=120)
conv_gru2.initialize_hyperparams(frames_to_sample=20,batch_size=32,num_epochs=25)
conv_gru2.initialize_modelparams()
conv_gru_model2=conv_gru2.model_architecture(gru_cells=128,dense_neurons=128,dropout=0.5)
conv_gru_model2.summary()

In [None]:
conv_gru2.train_model(conv_gru_model2,model_name="conv_gru2",model_type="CNN_GRU")
conv_gru2.plot_model()

**Observations and Thoughts:**

Increasing the batch size didnot help and also increased the overfitting by a large extent

The models accuracy on validation decreased from 71 to 43%.

Instead of working on scratch models here, lets kick in with transfer learning for better image representation.

Lets now try to use transfer learning with MobileNet which is known to be the most prominent set of weights for light weight applications

**Model 12: MobileNet+GRU**

20 frames to be sampled

batch size of 32

Images cropped to 120,120

128 dense neurons

Epochs = 25

dropout = 0.5

gru cells = 128

In [None]:
mobilenet_transfer = mobilenet.MobileNet(weights='imagenet', include_top=False)

class ConvGRU_Transfer(ModelClass):

    def model_architecture(self,gru_cells=64,dense_neurons=64,dropout=0.25):

        model = Sequential()
        model.add(TimeDistributed(mobilenet_transfer,input_shape=(self.frames_to_sample,self.image_height,self.image_width,self.channels)))


        for layer in model.layers:
            layer.trainable = False


        model.add(TimeDistributed(BatchNormalization()))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))
        model.add(TimeDistributed(Flatten()))

        model.add(GRU(gru_cells))
        model.add(Dropout(dropout))

        model.add(Dense(dense_neurons,activation='relu'))
        model.add(Dropout(dropout))

        model.add(Dense(self.num_classes, activation='softmax'))


        optimiser = tf.keras.optimizers.Adam()
        model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
        return model


conv_gru_tl = ConvGRU_Transfer()
conv_gru_tl.initialize_path(project_folder_path)
conv_gru_tl.initialize_image_properties(image_height=120,image_width=120)
conv_gru_tl.initialize_hyperparams(frames_to_sample=20,batch_size=32,num_epochs=25)
conv_gru_tl.initialize_modelparams()
conv_gru_model_tl=conv_gru_tl.model_architecture(gru_cells=128,dense_neurons=128,dropout=0.5)
conv_gru_model_tl.summary()

In [None]:
conv_gru_tl.train_model(conv_gru_model_tl,model_name="conv_gru_tl",model_type="CNN_GRU")
conv_gru_tl.plot_model()

**Observations and Thoughts:**

The best epoch was the 7th epoch with a validation accuracy of 81% , Categorical accuracy of 88.39% , Validation loss of 0.47, categorical loss of 0.31

lets push the accuravy by decreasing batch size & dropout and increasing frames to sample

**Model 13: MobileNet+GRU**

25 frames to be sampled

batch size of 16

Images cropped to 120,120

128 dense neurons

Epochs = 25

dropout = 0.25

gru cells = 128

In [None]:
conv_gru_tl2 = ConvGRU_Transfer()
conv_gru_tl2.initialize_path(project_folder_path)
conv_gru_tl2.initialize_image_properties(image_height=120,image_width=120)
conv_gru_tl2.initialize_hyperparams(frames_to_sample=25,batch_size=16,num_epochs=25)
conv_gru_tl2.initialize_modelparams()
conv_gru_model_tl2=conv_gru_tl2.model_architecture(gru_cells=128,dense_neurons=128,dropout=0.25)
conv_gru_model_tl2.summary()

In [None]:
conv_gru_tl2.train_model(conv_gru_model_tl2,model_name="conv_gru_tl2",model_type="CNN_GRU")
conv_gru_tl2.plot_model()

**Observations and Thoughts:**

This just increased the overfitting and no improvisation validation loss and accuracy

We see loss: 0.1710 - categorical_accuracy: 0.9532 - val_loss: 0.6208 - val_categorical_accuracy: 0.7600

Let's increase the batch size to 64 and decrease the frames to sample from 25 to 20 and see if it helps.

**Model 14: MobileNet+GRU**

20 frames to be sampled

batch size of 64

Images cropped to 120,120

128 dense neurons

Epochs = 25

dropout = 0.25

gru cells = 128

In [None]:
conv_gru_tl3 = ConvGRU_Transfer()
conv_gru_tl3.initialize_path(project_folder_path)
conv_gru_tl3.initialize_image_properties(image_height=120,image_width=120)
conv_gru_tl3.initialize_hyperparams(frames_to_sample=20,batch_size=64,num_epochs=25)
conv_gru_tl3.initialize_modelparams()
conv_gru_model_tl3=conv_gru_tl3.model_architecture(gru_cells=128,dense_neurons=128,dropout=0.25)
conv_gru_model_tl3.summary()

In [None]:
conv_gru_tl3.train_model(conv_gru_model_tl3,model_name="conv_gru_tl3",model_type="CNN_GRU")
conv_gru_tl3.plot_model()

**Observations and Thoughts:**

We see loss: 0.0209 - categorical_accuracy: 1.0000 - val_loss: 0.5939 - val_categorical_accuracy: 0.8100

There is a slight decrease in the validation loss from 0.62 to 0.59 and increase in validation accuracy to 81% from 76%

Lets reduce the number of parameters by reducing the gru cells and dense neurons to 64 from 128 each

**Model 15: MobileNet+GRU**

20 frames to be sampled

batch size of 64

Images cropped to 120,120

64 dense neurons

Epochs = 25

dropout = 0.25

gru cells = 64

Reducing the number of neurons of gru and dense layer

In [None]:
conv_gru_tl4 = ConvGRU_Transfer()
conv_gru_tl4.initialize_path(project_folder_path)
conv_gru_tl4.initialize_image_properties(image_height=120,image_width=120)
conv_gru_tl4.initialize_hyperparams(frames_to_sample=20,batch_size=64,num_epochs=25)
conv_gru_tl4.initialize_modelparams()
conv_gru_model_tl4=conv_gru_tl4.model_architecture(gru_cells=64,dense_neurons=64,dropout=0.25)
conv_gru_model_tl4.summary()

In [None]:
conv_gru_tl4.train_model(conv_gru_model_tl4,model_name="conv_gru_tl4",model_type="CNN_GRU")
conv_gru_tl4.plot_model()

**Observations and Thoughts:**

We see loss: 0.0319 - categorical_accuracy: 0.9940 - val_loss: 0.5272 - val_categorical_accuracy: 0.8000

There is a slight decrease in the validation loss from 0.59 to 0.52

lets reduce the batch size to 8 and below configuration with previous increased dense and gru units of 128 , reduce the frames to 15

**Model 16: MobileNet+GRU**

15 frames to be sampled

batch size of 8

Images cropped to 120,120

128 dense neurons

Epochs = 25

dropout = 0.25

gru cells = 128

In [None]:
conv_gru_tl4_copy = ConvGRU_Transfer()
conv_gru_tl4_copy.initialize_path(project_folder_path)
conv_gru_tl4_copy.initialize_image_properties(image_height=120,image_width=120)
conv_gru_tl4_copy.initialize_hyperparams(frames_to_sample=15,batch_size=8,num_epochs=25)
conv_gru_tl4_copy.initialize_modelparams()
conv_gru_model_tl4_copy=conv_gru_tl4_copy.model_architecture(gru_cells=128,dense_neurons=128,dropout=0.25)
conv_gru_model_tl4_copy.summary()

In [None]:
conv_gru_tl4_copy.train_model(conv_gru_model_tl4_copy,model_name="conv_gru_tl4_copy",model_type="CNN_GRU

In [None]:
conv_gru_tl4_copy.plot_model()

**Observations and Thoughts:**

We see loss: 0.1109 - categorical_accuracy: 0.9698 - val_loss: 0.5595 - val_categorical_accuracy: 0.8100

There is a slight increase in the validation loss from 0.52 to 0.56
Reduced overfitting from 99:80 to 96:81 , train:val accuracy.

Lets try training on whole mobilenet model with the same parameters

**Model 17: MobileNet(on all the layers)+GRU**

15 frames to be sampled

batch size of 8

Images cropped to 120,120

128 dense neurons

Epochs = 25

dropout = 0.25

gru cells = 128

In [None]:
mobilenet_transfer = mobilenet.MobileNet(weights='imagenet', include_top=False)

class ConvGRU_Transfer_NoWeights(ModelClass):

    def model_architecture(self,gru_cells=64,dense_neurons=64,dropout=0.25):

        model = Sequential()
        model.add(TimeDistributed(mobilenet_transfer,input_shape=(self.frames_to_sample,self.image_height,self.image_width,self.channels)))


        for layer in model.layers:
            layer.trainable = True


        model.add(TimeDistributed(BatchNormalization()))
        model.add(TimeDistributed(MaxPooling2D((2, 2))))
        model.add(TimeDistributed(Flatten()))

        model.add(GRU(gru_cells))
        model.add(Dropout(dropout))

        model.add(Dense(dense_neurons,activation='relu'))
        model.add(Dropout(dropout))

        model.add(Dense(self.num_classes, activation='softmax'))


        optimiser = tf.keras.optimizers.Adam()
        model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
        return model

conv_gru_tl4_now = ConvGRU_Transfer_NoWeights()
conv_gru_tl4_now.initialize_path(project_folder_path)
conv_gru_tl4_now.initialize_image_properties(image_height=120,image_width=120)
conv_gru_tl4_now.initialize_hyperparams(frames_to_sample=15,batch_size=8,num_epochs=25)
conv_gru_tl4_now.initialize_modelparams()
conv_gru_model_tl4_now=conv_gru_tl4_now.model_architecture(gru_cells=128,dense_neurons=128,dropout=0.25)
conv_gru_model_tl4_now.summary()

In [None]:
conv_gru_tl4_now.train_model(conv_gru_model_tl4_now,model_name="conv_gru_tl4_now",model_type="CNN_GRU")

In [None]:
conv_gru_tl4_now.plot_model()

**Observations and Thoughts:**

loss: 0.0216 - categorical_accuracy: 0.9940 - val_loss: 0.1122 - val_categorical_accuracy: 0.9400

This is the best model till now with the highest validation accuracy of 94% and least validation loss of just 0.1122

**Which Model to Choose?? We have got a tradeoff between two models:**

Model3 -> Conv3D -> Epoch25th Model -> Validation accuracy 86% -> validation loss 0.46 -> Model Size 14MB -> Trainable Params 11,55,397

Model17 -> MobileNet(with complete training over all the layers) + GRU -> epoch 22nd Model -> Validation accuracy 94% -> validation loss 0.1122 -> Model Size 44MB -> 36,93,253

**We select the second one which is the MobileNet+GRU for the following reasons:**

The validation loss is way lesser ~0.1122 than the conv3D Model with 0.46

The accuracy is 94% which is remarkable than Conv3D model with 86%

Trainable params are almost thrice but a 44MB model is not a huge model when compared to CNNs which end up in GBs.

Models in MBs are easily deployable on smart devices.

# **Testing the model on a random batch in validation set**

In [None]:
# loading the model
import time
from keras.models import load_model
model = load_model('/content/drive/MyDrive/conv_gru_tl4_now_2021-10-2617_37_49.157949/Mobilenet_NoW_mode

In [None]:
test_generator=ConvGRU_Transfer_NoWeights()
test_generator.initialize_path(project_folder_path)
test_generator.initialize_image_properties(image_height=120,image_width=120)
test_generator.initialize_hyperparams(frames_to_sample=15,batch_size=8,num_epochs=25)

g=test_generator.generator(test_generator.val_path,test_generator.val_doc,model_type="Conv_GRU")
batch_data, batch_labels=next(g)

In [None]:
batch_labels. ##true labels

In [None]:
print(np.argmax(model.predict(batch_data[:,:,:,:,:]),axis=1)) ## predicted labels

In [None]:
import cv2
import os
for video in os.listdir('/content/drive/MyDrive/Videosforgesturerecognition'):
  capture = cv2.VideoCapture('/content/drive/MyDrive/Videosforgesturerecognition/'+video)

  frameNr = 0
  if video.split(".")[0] not in os.listdir('/content/drive/MyDrive/GR_FramedVideos'):
    os.mkdir('/content/drive/MyDrive/GR_FramedVideos/'+video.split(".")[0])
  while (True):

      success, frame = capture.read()

      if success:
          frame = cv2.resize(frame,(120,120))
          frame = cv2.rotate(frame, cv2.cv2.ROTATE_90_COUNTERCLOCKWISE)
          cv2.imwrite('/content/drive/MyDrive/GR_FramedVideos/'+video.split(".")[0]+'/frame_'+str(frameNr)+'.jpg', frame)

      else:
          break

      frameNr = frameNr+1

  capture.release()

In [None]:
##modifying the generator to accomodate our own video with a batch size of 1 as we want to test on our own video

def test_generator(source_path, folder_list, model_type = "Conv3D",frames_to_sample=15,image_height=120,image_width=120,batch_size=1,channels=3,num_classes=5):
      if model_type == "Conv3D":
        img_idx = [rn.randint(0, total_frames-1) for i in range(frames_to_sample)] #create a list of image numbers you want to use for a particular video
      else:
        img_idx = [i for i in range(frames_to_sample)] ## CNN+GRU
      # print(img_idx)
      while True:
          t = np.random.permutation(folder_list)
          num_batches = len(folder_list)//batch_size # calculate the number of batches
          for batch_no in range(num_batches): # we iterate over the number of batches
              batch_data = np.zeros((batch_size,len(img_idx),image_height,image_width,channels)) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
              batch_labels = np.zeros((batch_size,num_classes)) # batch_labels is the one hot representation of the output
              for folder in range(batch_size): # iterate over the batch_size
                  imgs = os.listdir(source_path+'/'+ t[folder + (batch_no*batch_size)].split(',')[0]) # read all the images in the folder
                  # print("imgs: ",imgs)
                  for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                      image = imread(source_path+'/'+ t[folder + (batch_no*batch_size)].strip().split(',')[0]+'/'+imgs[item]).astype(np.float32)
                      if image.shape == (360,360,3): ###if images are of 360 x 360
                        image = imresize(image,(image_height,image_width,channels))
                      else:
                        image = image[:,(image.shape[0] - image_height ) // 2 : image.shape[0] - (image.shape[0] - image_height ) // 2] ### if lower quality just centre crop
                      #crop the images and resize them. Note that the images are of 2 different shape
                      #and the conv3D will throw error if the inputs in a batch have different shapes


                      batch_data[folder,idx,:,:,0] = image[:,:,0]/255.0 #normalise and feed in the image
                      batch_data[folder,idx,:,:,1] = image[:,:,1]/255.0 #normalise and feed in the image
                      batch_data[folder,idx,:,:,2] = image[:,:,2]/255.0 #normalise and feed in the image

                  batch_labels[folder, int(t[folder + (batch_no*batch_size)].strip().split(',')[2])] = 1
              yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember what does yield do



In [None]:
test_gen = test_generator(
    source_path = "/content/drive/MyDrive/GR_FramedVideos",
    folder_list = np.random.permutation(open("/content/drive/MyDrive/test.csv").readlines()),
    model_type = "CNN_GRU",
    frames_to_sample=15
)

In [None]:
##generate a video representation
batch_data, batch_labels=next(test_gen)

In [None]:
batch_labels ##true label is one hot at index 3

In [None]:
print(np.argmax(model.predict(batch_data[:,:,:,:,:]),axis=1)) ## predicted labels

In [None]:
## lets look at an image from this video
plt.imshow(batch_data[0][13])
