In [23]:
import numpy as np
import os
from imageio import imread
from skimage.transform import resize
import datetime
from glob import glob
import tensorflow as tf

from keras.models import Sequential, Model
from keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation
from tensorflow.keras.layers import Conv2D, Conv3D, MaxPooling3D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras import optimizers

In [24]:
np.random.seed(30)
import random as rn
rn.seed(30)
from keras import backend as K
import tensorflow as tf
tf.random.set_seed(30)

In [25]:
train_doc = np.random.permutation(open('/datasets/Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('/datasets/Project_data/val.csv').readlines())
batch_size = 100 #experiment with the batch size
img_idx = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28]#create a list of image numbers you want to use
num_frames = len(img_idx)
num_classes = 5

In [26]:
def normalize_channel(channel):
    mean = np.mean(channel)
    std = np.std(channel)
    normalized_channel = (channel - mean) / std
    return normalized_channel

In [27]:
def generator(source_path, folder_list, batch_size, width, height, crop_dims):
    print( 'Source path = ', source_path, '; batch size =', batch_size)
    while True:
        t = np.random.permutation(folder_list)
        num_batches = len(t)//batch_size # calculate the number of batches
        for batch in range(num_batches): # we iterate over the number of batches
            batch_data = np.zeros((batch_size,num_frames,width,height,3)) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
            batch_labels = np.zeros((batch_size,num_classes)) # batch_labels is the one hot representation of the output
            #print(batch_data.shape)
            #print(batch_labels.shape)
            for folder in range(batch_size): # iterate over the batch_size
                imgs = os.listdir(source_path+'/'+ t[folder + (batch*batch_size)].split(';')[0]) # read all the images in the folder
                for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                    image = imread(source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)

                    #crop the images and resize them. Note that the images are of 2 different shape
                    #and the conv3D will throw error if the inputs in a batch have different shapes
                    image = crop_and_resize(image, width, height, crop_dims)

                    batch_data[folder,idx,:,:,0] = normalize_channel(image[:,:,0]) #normalise and feed in the image
                    batch_data[folder,idx,:,:,1] = normalize_channel(image[:,:,1]) #normalise and feed in the image
                    batch_data[folder,idx,:,:,2] = normalize_channel(image[:,:,2]) #normalise and feed in the image

                batch_labels[folder, int(t[folder + (batch*batch_size)].strip().split(';')[2])] = 1
            yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember what does yield do


        # write the code for the remaining data points which are left after full batches
        remaining_batches = len(t)%batch_size
        print(remaining_batches)
        if remaining_batches > 0:
            batch_data = np.zeros((remaining_batches,num_frames,width,height,3)) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
            batch_labels = np.zeros((remaining_batches,num_classes)) # batch_labels is the one hot representation of the output
            for folder in range(remaining_batches): # iterate over the batch_size
                imgs = os.listdir(source_path+'/'+ t[folder + (batch*batch_size)].split(';')[0]) # read all the images in the folder
                for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                    image = imread(source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)

                    #crop the images and resize them. Note that the images are of 2 different shape
                    #and the conv3D will throw error if the inputs in a batch have different shapes
                    image = crop_and_resize(image, width, height, crop_dims)

                    batch_data[folder,idx,:,:,0] = normalize_channel(image[:,:,0]) #normalise and feed in the image
                    batch_data[folder,idx,:,:,1] = normalize_channel(image[:,:,1]) #normalise and feed in the image
                    batch_data[folder,idx,:,:,2] = normalize_channel(image[:,:,2]) #normalise and feed in the image

                batch_labels[folder, int(t[folder + (batch*batch_size)].strip().split(';')[2])] = 1
            yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember what does yield do

In [28]:
def crop_and_resize(image, width, height,crop_dims):
    curr_width, curr_height, channel = image.shape
    curr_aspect_ratio = curr_width/curr_height
    target_aspect_ratio = width/height
    crop_dimensions = (crop_dims)
   # print("curr_aspect_ratio", curr_aspect_ratio)
   # print("curr_width", curr_width)
   # print("curr_height", curr_height)

    # Calculate the cropping dimensions
    if curr_aspect_ratio > target_aspect_ratio:
        new_width = int(height * target_aspect_ratio)
        offset = (width - new_width) // 2
        cropped_image = image[:, offset:offset + new_width:]
    elif curr_aspect_ratio < target_aspect_ratio:
        new_height = int(width / target_aspect_ratio)
        offset = (height - new_height) // 2
        cropped_image = image[offset:offset + new_height, :,:]
    else:
        cropped_image = image

    # Resize the cropped image
    resized_image = resize(cropped_image, (width, height))

   # print("Original shape:", image.shape)
   # print("Cropped shape:", cropped_image.shape)
   # print("Resized shape:", resized_image.shape)
    return resized_image

### As part of our first experiment we are going to run Ablation experiment with batch size 100 and 3 epochs. Here are the details:
#### Experiment 1(Ablation)
1. Batch_Size = 100
2. num_epochs = 3
3. img_width, img_height = 100, 100
4. Resize, Crop logic - Crop on corners - 10 pixels
5. Train on 15 frames, alternate frames chosen
6. Model Details: 1 Conv2D(Time Distributed, 16 filters, 3,3(kernel)), Flatten, 1 GRU(16 filter), Padding = Same, Optimizer = adam, categorical_accuracy evaluation

In [29]:
curr_dt_time = datetime.datetime.now()
train_path = '/datasets/Project_data/train'
val_path = '/datasets/Project_data/val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
num_epochs = 3 # choose the number of epochs
print ('# epochs =', num_epochs)
img_width = 100
img_height = 100

# training sequences = 663
# validation sequences = 100
# epochs = 3


In [8]:
rnn_model = Sequential()
rnn_model.add(TimeDistributed(Conv2D(16, (3, 3), padding='same', activation='relu'), input_shape=(num_frames,img_height, img_width, 3)))
rnn_model.add(TimeDistributed(Flatten()))  # Flatten the output before passing to GRU
rnn_model.add(GRU(16, return_sequences=True))
# rnn_model.add(TimeDistributed(Dense(64, activation='relu')))
rnn_model.add((Flatten()))
rnn_model.add(Dense(5, activation='softmax'))

# Compile the model
rnn_model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary
rnn_model.summary()

2024-08-31 10:48:36.026996: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
2024-08-31 10:48:36.027057: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14800 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:41:00.0, compute capability: 7.5


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDistr  (None, 15, 100, 100, 16)  448      
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 15, 160000)       0         
 tributed)                                                       
                                                                 
 gru (GRU)                   (None, 15, 16)            7680864   
                                                                 
 flatten_1 (Flatten)         (None, 240)               0         
                                                                 
 dense (Dense)               (None, 5)                 1205      
                                                                 
Total params: 7,682,517
Trainable params: 7,682,517
Non-

In [58]:
def initializeModel():
    model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'

    if not os.path.exists(model_name):
        os.mkdir(model_name)

    filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

    checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', save_freq='epoch')

    LR = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, mode='auto', min_lr=0.01)
    callbacks_list = [checkpoint, LR]
    return callbacks_list

In [31]:
def set_epoch_steps_val_steps():
    if (num_train_sequences%batch_size) == 0:
        steps_per_epoch = int(num_train_sequences/batch_size)
    else:
        steps_per_epoch = (num_train_sequences//batch_size) + 1

    if (num_val_sequences%batch_size) == 0:
        validation_steps = int(num_val_sequences/batch_size)
    else:
        validation_steps = (num_val_sequences//batch_size) + 1
    return steps_per_epoch, validation_steps

In [11]:
train_generator = generator(train_path, train_doc, batch_size, img_width, img_height, (10,10))
val_generator = generator(val_path, val_doc, batch_size, img_width, img_height, (10,10))

In [12]:
callbacks_list = initializeModel()
steps_per_epoch, validation_steps = set_epoch_steps_val_steps()

In [13]:

rnn_model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1,
                    callbacks=callbacks_list, validation_data=val_generator,
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  /datasets/Project_data/train ; batch size = 100
(100, 15, 100, 100, 3)
(100, 5)
Epoch 1/3
(100, 15, 100, 100, 3)
(100, 5)


2024-08-31 10:48:53.315954: I tensorflow/stream_executor/cuda/cuda_dnn.cc:377] Loaded cuDNN version 8302


1/7 [===>..........................] - ETA: 29s - loss: 1.7257 - categorical_accuracy: 0.1700(100, 15, 100, 100, 3)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 15, 100, 100, 3)
(100, 5)
0
(100, 15, 100, 100, 3)
(100, 5)

Epoch 00001: val_loss improved from inf to 1.56691, saving model to model_init_2024-08-3110_48_34.353482/model-00001-1.61282-0.24133-1.56691-0.31000.keras
Epoch 2/3
(100, 15, 100, 100, 3)
(100, 5)
1/7 [===>..........................] - ETA: 0s - loss: 1.4971 - categorical_accuracy: 0.3400(100, 15, 100, 100, 3)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 15, 100, 100, 3)
(100, 5)
0
(100, 15, 100, 100, 3)
(100, 5)

Epoch 00002: val_loss improved from 1.56691 to 1.56332, saving model to model_init_2024-08-3110_48_34.353482/model-00002-1.51562-0.29563-1.56332-0.27000.keras
Epoch 3/3
(100, 15, 100, 100, 3)
(100, 5)
1/7 [===>..........................] - ETA: 0s - loss: 1.4936 - categorical_accuracy: 0.3800(100, 15, 100, 100, 3)
(100, 5)
(100, 5)
(100, 5)
(100,

<keras.callbacks.History at 0x7f9f34b973d0>

##### We see that we were able to acheive val categorical accuracy of 30% on validation data and val_loss of 1.49 in the ablation experiment. The model did learn, we are in the right direction. Let's try now with more epochs and see if we can improve on our previous outcome
We also see that val_categorical_accuracy and categorical_accuracy are very close, that means the model is not overfitting

In [14]:
rm -rf /home/.local/share/Trash/

Experiment 2:
1. batch_size = 32
2. num_epochs = 5
3. img_width, img_height = 100, 100
4. Resize, Crop logic - Crop on corners - 10 pixels
5. Train on 15 frames, alternate frames chosen
6. Model Details: 1 Conv2D(Time Distributed, 16 filters, 3,3(kernel)), 1 Conv2D(Time Distributed, 16 filters, 3,3(kernel)), Flatten, 1 GRU(16 filter), Padding = Same, Optimizer = adam, categorical_accuracy evaluation

In [15]:
batch_size = 32
num_epochs = 5
img_width = 100
img_height = 100

In [16]:
rnn_model2 = Sequential()
rnn_model2.add(TimeDistributed(Conv2D(16, (3, 3), padding='same', activation='relu'), input_shape=(num_frames,img_height, img_width, 3)))
rnn_model2.add(TimeDistributed(Conv2D(32, (3, 3), padding='same', activation='relu'), input_shape=(num_frames,img_height, img_width, 3)))
rnn_model2.add(TimeDistributed(Flatten()))  # Flatten the output before passing to GRU
rnn_model2.add(GRU(32, return_sequences=True))
# rnn_model.add(TimeDistributed(Dense(64, activation='relu')))
rnn_model2.add((Flatten()))
rnn_model2.add(Dense(5, activation='softmax'))

# Compile the model
rnn_model2.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary
rnn_model2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_2 (TimeDis  (None, 15, 100, 100, 16)  448      
 tributed)                                                       
                                                                 
 time_distributed_3 (TimeDis  (None, 15, 100, 100, 32)  4640     
 tributed)                                                       
                                                                 
 time_distributed_4 (TimeDis  (None, 15, 320000)       0         
 tributed)                                                       
                                                                 
 gru_1 (GRU)                 (None, 15, 32)            30723264  
                                                                 
 flatten_3 (Flatten)         (None, 480)               0         
                                                      

In [17]:
train_generator = generator(train_path, train_doc, batch_size, img_width, img_height, (10,10))
val_generator = generator(val_path, val_doc, batch_size, img_width, img_height,(10,10))

In [18]:
callbacks_list = initializeModel()
steps_per_epoch, validation_steps = set_epoch_steps_val_steps()

In [19]:
rnn_model2.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1,
                    callbacks=callbacks_list, validation_data=val_generator,
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  /datasets/Project_data/train ; batch size = 32
(32, 15, 100, 100, 3)
(32, 5)
Epoch 1/5
(32, 15, 100, 100, 3)
(32, 5)
 1/21 [>.............................] - ETA: 42s - loss: 1.7071 - categorical_accuracy: 0.1250(32, 15, 100, 100, 3)
(32, 5)
 2/21 [=>............................] - ETA: 1:10 - loss: 1.8492 - categorical_accuracy: 0.1562(32, 15, 100, 100, 3)
(32, 5)
 3/21 [===>..........................] - ETA: 1:01 - loss: 1.8541 - categorical_accuracy: 0.1562(32, 15, 100, 100, 3)
(32, 5)
 4/21 [====>.........................] - ETA: 54s - loss: 1.7992 - categorical_accuracy: 0.2031 (32, 15, 100, 100, 3)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 5)
(32, 15, 100, 100, 3)
(32, 5)
(32, 15, 100, 100, 3)
(32, 5)
(32, 15, 100, 100, 3)
(32, 5)
4
(32, 15, 100, 100, 3)
(32, 5)

Epoch 00001: val_loss improved from inf to 1.61919, saving model to model_init_2024-08-3110_48_34.353482/model-00001-1.693

<keras.callbacks.History at 0x7f9f4ece1e80>

It is interesting to note that reducing the batch_size and increasing the number of epochs + adding of a conv2D layer did not help. Our categorical accuracy went down and loss also did not improve

Experiment 3:
Let's try changing out crop logic. As we can see after observing random images, top of the image does not contain any info. So we will crop 20 pixels from top only. Also we will increase our batch_size to 32, num_epochs = 5. We will further train at 10 epochs if the results are better than before.

In [20]:
rm -rf /home/.local/share/Trash/

In [38]:
batch_size = 32
num_epochs = 5
img_width = 100
img_height = 100

In [44]:
def crop_and_resize(image, width, height,crop_dims):
    curr_width, curr_height, channel = image.shape
    
    crop_x = crop_dims[0]
    crop_y = crop_dims[1]
        
    cropped_image = image[0:curr_height-crop_y,0:curr_width-crop_x]

    # Resize the cropped image
    resized_image = resize(cropped_image, (width, height))

    #print("Original shape:", image.shape)
    #print("Cropped shape:", cropped_image.shape)
    #print("Resized shape:", resized_image.shape)
    return resized_image

In [68]:
rnn_model2 = Sequential()
rnn_model2.add(TimeDistributed(Conv2D(16, (3, 3), padding='same', activation='relu'), input_shape=(num_frames,img_height, img_width, 3)))
rnn_model2.add(TimeDistributed(Conv2D(32, (3, 3), padding='same', activation='relu'), input_shape=(num_frames,img_height, img_width, 3)))
rnn_model2.add(TimeDistributed(Flatten()))  # Flatten the output before passing to GRU
rnn_model2.add(GRU(32, return_sequences=True))
# rnn_model.add(TimeDistributed(Dense(64, activation='relu')))
rnn_model2.add((Flatten()))
rnn_model2.add(Dense(5, activation='softmax'))

# Compile the model
rnn_model2.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary
rnn_model2.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_17 (TimeDi  (None, 22, 100, 100, 16)  448      
 stributed)                                                      
                                                                 
 time_distributed_18 (TimeDi  (None, 22, 100, 100, 32)  4640     
 stributed)                                                      
                                                                 
 time_distributed_19 (TimeDi  (None, 22, 320000)       0         
 stributed)                                                      
                                                                 
 gru_6 (GRU)                 (None, 22, 32)            30723264  
                                                                 
 flatten_13 (Flatten)        (None, 704)               0         
                                                      

In [45]:
train_generator = generator(train_path, train_doc, batch_size, img_width, img_height, (40,10))
val_generator = generator(val_path, val_doc, batch_size, img_width, img_height,(40,10))

In [46]:
callbacks_list = initializeModel()
steps_per_epoch, validation_steps = set_epoch_steps_val_steps()

In [47]:
rnn_model2.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1,
                    callbacks=callbacks_list, validation_data=val_generator,
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  /datasets/Project_data/train ; batch size = 32
Epoch 1/5
4

Epoch 00001: val_loss improved from inf to 1.64746, saving model to model_init_2024-08-3110_48_34.353482/model-00001-1.66816-0.24434-1.64746-0.19000.keras
Epoch 2/5

Epoch 00002: val_loss improved from 1.64746 to 1.57242, saving model to model_init_2024-08-3110_48_34.353482/model-00002-1.61837-0.20814-1.57242-0.29000.keras
Epoch 3/5

Epoch 00003: val_loss did not improve from 1.57242
Epoch 4/5
4

Epoch 00004: val_loss did not improve from 1.57242
Epoch 5/5

Epoch 00005: val_loss did not improve from 1.57242


<keras.callbacks.History at 0x7f9f348bb6a0>

Looks like our resizing did not affect the model positively. We will go back to previous resizing logic but keep cropping from top only. Also we will change the frames we will train on. We will get rid of beginning and end 4 frames. 

Experiment 4:
batch_size = 32
num_epochs = 5
img_width = 100
img_height = 100
crop logic = top 40 pixels, crop on basis of aspect ratio
img_idx = drop first 4 and last 4 frames, total 22 frames

In [74]:
rm -rf /home/.local/share/Trash/

In [75]:
batch_size = 32
num_epochs = 5
img_width = 100
img_height = 100
img_idx = [4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]
num_frames = len(img_idx)

In [76]:
train_generator = generator(train_path, train_doc, batch_size, img_width, img_height, (40,10))
val_generator = generator(val_path, val_doc, batch_size, img_width, img_height,(40,10))

In [77]:
callbacks_list = initializeModel()
steps_per_epoch, validation_steps = set_epoch_steps_val_steps()

In [78]:
rnn_model3 = Sequential()
rnn_model3.add(TimeDistributed(Conv2D(16, (3, 3), padding='same', activation='relu'), input_shape=(num_frames,img_height, img_width, 3)))
rnn_model3.add(TimeDistributed(Conv2D(32, (3, 3), padding='same', activation='relu'), input_shape=(num_frames,img_height, img_width, 3)))
rnn_model3.add(TimeDistributed(Flatten()))  # Flatten the output before passing to GRU
rnn_model3.add(GRU(32, return_sequences=True))
# rnn_model.add(TimeDistributed(Dense(64, activation='relu')))
rnn_model3.add((Flatten()))
rnn_model3.add(Dense(5, activation='softmax'))

# Compile the model
rnn_model3.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary
rnn_model3.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_20 (TimeDi  (None, 22, 100, 100, 16)  448      
 stributed)                                                      
                                                                 
 time_distributed_21 (TimeDi  (None, 22, 100, 100, 32)  4640     
 stributed)                                                      
                                                                 
 time_distributed_22 (TimeDi  (None, 22, 320000)       0         
 stributed)                                                      
                                                                 
 gru_7 (GRU)                 (None, 22, 32)            30723264  
                                                                 
 flatten_15 (Flatten)        (None, 704)               0         
                                                      

In [79]:
rnn_model3.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1,
                    callbacks=callbacks_list, validation_data=val_generator,
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  /datasets/Project_data/train ; batch size = 32
Epoch 1/5
4

Epoch 00001: val_loss improved from inf to 1.66071, saving model to model_init_2024-08-3110_48_34.353482/model-00001-1.83490-0.20060-1.66071-0.17000.keras
Epoch 2/5

Epoch 00002: val_loss improved from 1.66071 to 1.60603, saving model to model_init_2024-08-3110_48_34.353482/model-00002-1.63799-0.16893-1.60603-0.24000.keras
Epoch 3/5

Epoch 00003: val_loss did not improve from 1.60603
Epoch 4/5
4

Epoch 00004: val_loss did not improve from 1.60603
Epoch 5/5

Epoch 00005: val_loss did not improve from 1.60603


<keras.callbacks.History at 0x7f9f91f29790>

There was no improvement in the model, we will go back to using alternate frames in img_idx, keep the batch_size at 32 

Experiment 4
1. batch_size = 32
2. img_idx = alternate frames
3. img_width, img_height = 100, 100
4. num_epochs = 5
5. Add batch_normalization

In [80]:
rm -rf /home/.local/share/Trash/

In [81]:
train_doc = np.random.permutation(open('/datasets/Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('/datasets/Project_data/val.csv').readlines())
batch_size = 32 #experiment with the batch size
img_idx = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28]#create a list of image numbers you want to use
num_frames = len(img_idx)
num_classes = 5
batch_size = 32
num_epochs = 5
img_width = 100
img_height = 100

In [82]:
from keras.models import Sequential
from keras.layers import TimeDistributed, Conv2D, Flatten, GRU, Dense, BatchNormalization

rnn_model4 = Sequential()
rnn_model4.add(TimeDistributed(Conv2D(16, (3, 3), padding='same', activation='relu'), input_shape=(num_frames, img_height, img_width, 3)))
rnn_model4.add(TimeDistributed(BatchNormalization()))
rnn_model4.add(TimeDistributed(Conv2D(32, (3, 3), padding='same', activation='relu')))
rnn_model4.add(TimeDistributed(BatchNormalization()))
rnn_model4.add(TimeDistributed(Flatten()))  # Flatten the output before passing to GRU
rnn_model4.add(GRU(64, return_sequences=True))
rnn_model4.add(Flatten())
rnn_model4.add(Dense(5, activation='softmax'))

# Compile the model
rnn_model4.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary
rnn_model4.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_23 (TimeDi  (None, 15, 100, 100, 16)  448      
 stributed)                                                      
                                                                 
 time_distributed_24 (TimeDi  (None, 15, 100, 100, 16)  64       
 stributed)                                                      
                                                                 
 time_distributed_25 (TimeDi  (None, 15, 100, 100, 32)  4640     
 stributed)                                                      
                                                                 
 time_distributed_26 (TimeDi  (None, 15, 100, 100, 32)  128      
 stributed)                                                      
                                                                 
 time_distributed_27 (TimeDi  (None, 15, 320000)      

In [83]:
train_generator = generator(train_path, train_doc, batch_size, img_width, img_height, (40,10))
val_generator = generator(val_path, val_doc, batch_size, img_width, img_height,(40,10))

In [84]:
callbacks_list = initializeModel()
steps_per_epoch, validation_steps = set_epoch_steps_val_steps()

In [85]:
rnn_model4.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1,
                    callbacks=callbacks_list, validation_data=val_generator,
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  /datasets/Project_data/train ; batch size = 32
Epoch 1/5
4

Epoch 00001: val_loss improved from inf to 1.65546, saving model to model_init_2024-08-3110_48_34.353482/model-00001-1.80952-0.25641-1.65546-0.29000.keras
Epoch 2/5

Epoch 00002: val_loss improved from 1.65546 to 1.55797, saving model to model_init_2024-08-3110_48_34.353482/model-00002-1.46968-0.36048-1.55797-0.35000.keras
Epoch 3/5

Epoch 00003: val_loss did not improve from 1.55797
Epoch 4/5
4

Epoch 00004: val_loss did not improve from 1.55797
Epoch 5/5

Epoch 00005: val_loss did not improve from 1.55797


<keras.callbacks.History at 0x7f9f9252f280>

We can see that there in definite improvement in the categorical accuracy in just 5 epochs. However val_categorical_accuracy is lower. We will now try Conv3D layer instead of Conv2D

Experiment 5

batch_size = 32
img_idx = alternate frames
img_width, img_height = 100, 100
num_epochs = 5
Add batch_normalization
Time distributed Conv 3D layer

In [10]:
rm -rf /home/.local/share/Trash/

In [11]:
train_doc = np.random.permutation(open('/datasets/Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('/datasets/Project_data/val.csv').readlines())
batch_size = 32 #experiment with the batch size
img_idx = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28]#create a list of image numbers you want to use
num_frames = len(img_idx)
num_classes = 5
batch_size = 32
num_epochs = 5
img_width = 100
img_height = 100

In [12]:
from keras.models import Sequential
from keras.layers import TimeDistributed, Conv3D, Flatten, GRU, Dense, BatchNormalization

# Conv3D: Define the input shape (time_steps, depth, height, width, channels)

rnn_model5 = Sequential()
rnn_model5.add(Conv3D(16, (3, 3, 3), padding='same', activation='relu', input_shape=(num_frames,img_height, img_width, 3)))
rnn_model5.add(BatchNormalization())
rnn_model5.add(Conv3D(32, (3, 3, 3), padding='same', activation='relu'))
rnn_model5.add(BatchNormalization())
rnn_model5.add(TimeDistributed(Flatten()))  # Flatten the output before passing to GRU
rnn_model5.add(GRU(64, return_sequences=False))
rnn_model5.add(Flatten())
rnn_model5.add(Dense(5, activation='softmax'))

# Compile the model
rnn_model5.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary
rnn_model5.summary()

2024-09-01 05:42:38.229375: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
2024-09-01 05:42:38.229449: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14800 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:1b:00.0, compute capability: 7.5


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 15, 100, 100, 16)  1312      
                                                                 
 batch_normalization (BatchN  (None, 15, 100, 100, 16)  64       
 ormalization)                                                   
                                                                 
 conv3d_1 (Conv3D)           (None, 15, 100, 100, 32)  13856     
                                                                 
 batch_normalization_1 (Batc  (None, 15, 100, 100, 32)  128      
 hNormalization)                                                 
                                                                 
 time_distributed (TimeDistr  (None, 15, 320000)       0         
 ibuted)                                                         
                                                        

In [13]:
train_generator = generator(train_path, train_doc, batch_size, img_width, img_height, (40,10))
val_generator = generator(val_path, val_doc, batch_size, img_width, img_height,(40,10))

In [14]:
callbacks_list = initializeModel()
steps_per_epoch, validation_steps = set_epoch_steps_val_steps()

In [15]:
rnn_model5.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1,
                    callbacks=callbacks_list, validation_data=val_generator,
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  /datasets/Project_data/train ; batch size = 32
Epoch 1/5


2024-09-01 05:42:56.499371: I tensorflow/stream_executor/cuda/cuda_dnn.cc:377] Loaded cuDNN version 8302


4

Epoch 00001: val_loss improved from inf to 2.62865, saving model to model_init_2024-09-0105_41_48.686207/model-00001-1.82578-0.25943-2.62865-0.27000.keras
Epoch 2/5

Epoch 00002: val_loss improved from 2.62865 to 1.90378, saving model to model_init_2024-09-0105_41_48.686207/model-00002-1.45340-0.38009-1.90378-0.20000.keras
Epoch 3/5

Epoch 00003: val_loss improved from 1.90378 to 1.77002, saving model to model_init_2024-09-0105_41_48.686207/model-00003-1.46185-0.34540-1.77002-0.30000.keras
Epoch 4/5
4

Epoch 00004: val_loss improved from 1.77002 to 1.68437, saving model to model_init_2024-09-0105_41_48.686207/model-00004-1.45754-0.36802-1.68437-0.29000.keras
Epoch 5/5

Epoch 00005: val_loss did not improve from 1.68437


<keras.callbacks.History at 0x7f21d894f490>

We can see that the gap between the train and val accuracy has reduced.

Experiment 6:
batch_size = 32
num_epochs = 5
img_width, img_height = 100
frames = alternate, 15
layers - 2 - Conv 3D layer - Time distributed flatten - 2-GRU layer - Dense layer
optimizer = sgd

In [54]:
rm -rf /home/.local/share/Trash/

In [16]:
train_doc = np.random.permutation(open('/datasets/Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('/datasets/Project_data/val.csv').readlines())
batch_size = 32 #experiment with the batch size
img_idx = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28]#create a list of image numbers you want to use
num_frames = len(img_idx)
num_classes = 5
batch_size = 32
num_epochs = 5
img_width = 100
img_height = 100

In [17]:
from keras.models import Sequential
from keras.layers import TimeDistributed, Conv3D, Flatten, GRU, Dense, BatchNormalization

# Conv3D: Define the input shape (time_steps, depth, height, width, channels)

rnn_model6 = Sequential()
rnn_model6.add(Conv3D(16, (3, 3, 3), padding='same', activation='relu', input_shape=(num_frames,img_height, img_width, 3)))
rnn_model6.add(BatchNormalization())
rnn_model6.add(Conv3D(32, (3, 3, 3), padding='same', activation='relu'))
rnn_model6.add(BatchNormalization())
rnn_model6.add(TimeDistributed(Flatten()))  # Flatten the output before passing to GRU
rnn_model6.add(GRU(64, return_sequences=True))
rnn_model6.add(GRU(128, return_sequences=True))
rnn_model6.add(Flatten())
rnn_model6.add(Dense(5, activation='softmax'))

# Compile the model
rnn_model6.compile(optimizer="sgd", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary
rnn_model6.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_2 (Conv3D)           (None, 15, 100, 100, 16)  1312      
                                                                 
 batch_normalization_2 (Batc  (None, 15, 100, 100, 16)  64       
 hNormalization)                                                 
                                                                 
 conv3d_3 (Conv3D)           (None, 15, 100, 100, 32)  13856     
                                                                 
 batch_normalization_3 (Batc  (None, 15, 100, 100, 32)  128      
 hNormalization)                                                 
                                                                 
 time_distributed_1 (TimeDis  (None, 15, 320000)       0         
 tributed)                                                       
                                                      

In [18]:
train_generator = generator(train_path, train_doc, batch_size, img_width, img_height, (40,10))
val_generator = generator(val_path, val_doc, batch_size, img_width, img_height,(40,10))

In [19]:
callbacks_list = initializeModel()
steps_per_epoch, validation_steps = set_epoch_steps_val_steps()

In [20]:
rnn_model6.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1,
                    callbacks=callbacks_list, validation_data=val_generator,
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  /datasets/Project_data/train ; batch size = 32
Epoch 1/5
4

Epoch 00001: val_loss improved from inf to 1.73248, saving model to model_init_2024-09-0105_41_48.686207/model-00001-1.34315-0.44042-1.73248-0.31000.keras
Epoch 2/5

Epoch 00002: val_loss improved from 1.73248 to 1.56720, saving model to model_init_2024-09-0105_41_48.686207/model-00002-0.53883-0.86124-1.56720-0.37000.keras
Epoch 3/5

Epoch 00003: val_loss improved from 1.56720 to 1.18998, saving model to model_init_2024-09-0105_41_48.686207/model-00003-0.15947-0.98643-1.18998-0.50000.keras
Epoch 4/5
4

Epoch 00004: val_loss did not improve from 1.18998
Epoch 5/5

Epoch 00005: val_loss improved from 1.18998 to 1.09934, saving model to model_init_2024-09-0105_41_48.686207/model-00005-0.02896-1.00000-1.09934-0.52000.keras


<keras.callbacks.History at 0x7f217b096d30>

Our categorical accuracy took a huge jump by adding a GRU unit but there is a huge gap between val accuracy and train accuracy. Let's introduce maxpooling and dropout for better generalization and overfitting prevention

Experiment 7:
1. batch_size = 32
2. num_epochs = 5
3. img_width, img_height = 100, 100
4. Add batch_normalization
5. Conv3d, Conv3d + Maxpooling + Dropout(0.10), TimeDistributed Flatten + GRU + GRU + Dropout(0.10) + Flatten + Dense


In [32]:
train_doc = np.random.permutation(open('/datasets/Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('/datasets/Project_data/val.csv').readlines())
batch_size = 32 #experiment with the batch size
img_idx = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28]#create a list of image numbers you want to use
num_frames = len(img_idx)
num_classes = 5
batch_size = 32
num_epochs = 5
img_width = 100
img_height = 100

In [34]:
import numpy as np
from keras.models import Sequential
from keras.layers import Conv3D, BatchNormalization, TimeDistributed, Flatten, GRU, Dense, Dropout, MaxPooling3D

rnn_model7 = Sequential()
rnn_model7.add(Conv3D(16, (3, 3, 3), padding='same', activation='relu', input_shape=(num_frames, img_height, img_width, 3)))
rnn_model7.add(BatchNormalization())

rnn_model7.add(Conv3D(32, (3, 3, 3), padding='same', activation='relu'))
rnn_model7.add(BatchNormalization())
rnn_model7.add(MaxPooling3D(pool_size=(2, 2, 2)))
rnn_model7.add(Dropout(0.10))

rnn_model7.add(TimeDistributed(Flatten()))  # Flatten the output before passing to GRU
rnn_model7.add(GRU(64, return_sequences=True))
rnn_model7.add(GRU(128, return_sequences=True))
rnn_model7.add(Dropout(0.10))
rnn_model7.add(Flatten())
rnn_model7.add(Dense(5, activation='softmax'))

# Compile the model
rnn_model7.compile(optimizer="sgd", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Now print the model summary
rnn_model7.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_8 (Conv3D)           (None, 15, 100, 100, 16)  1312      
                                                                 
 batch_normalization_8 (Batc  (None, 15, 100, 100, 16)  64       
 hNormalization)                                                 
                                                                 
 conv3d_9 (Conv3D)           (None, 15, 100, 100, 32)  13856     
                                                                 
 batch_normalization_9 (Batc  (None, 15, 100, 100, 32)  128      
 hNormalization)                                                 
                                                                 
 max_pooling3d_4 (MaxPooling  (None, 7, 50, 50, 32)    0         
 3D)                                                             
                                                      

In [35]:
train_generator = generator(train_path, train_doc, batch_size, img_width, img_height, (40,10))
val_generator = generator(val_path, val_doc, batch_size, img_width, img_height,(40,10))

In [36]:
callbacks_list = initializeModel()
steps_per_epoch, validation_steps = set_epoch_steps_val_steps()

In [37]:
rnn_model7.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1,
                    callbacks=callbacks_list, validation_data=val_generator,
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  /datasets/Project_data/train ; batch size = 32
Epoch 1/5
4

Epoch 00001: val_loss improved from inf to 1.47625, saving model to model_init_2024-09-0106_15_55.769024/model-00001-1.45604-0.37255-1.47625-0.30000.h5
Epoch 2/5

Epoch 00002: val_loss improved from 1.47625 to 1.46415, saving model to model_init_2024-09-0106_15_55.769024/model-00002-0.94587-0.67270-1.46415-0.31000.h5
Epoch 3/5

Epoch 00003: val_loss did not improve from 1.46415
Epoch 4/5
4

Epoch 00004: val_loss did not improve from 1.46415
Epoch 5/5

Epoch 00005: val_loss did not improve from 1.46415


<keras.callbacks.History at 0x7f21d890d250>

Our train accuracy is holding good at 98.9% but val accuracy is still very low. Let's add more dropout and max pooling layer

Experiment 8:
1. batch_size = 32
2. num_epochs = 5
3. img_width, img_height = 100, 100
4. Add batch_normalization
5. Conv3d + Maxpooling + Dropout(0.25), Conv3d + Maxpooling + Dropout(0.25), TimeDistributed Flatten + GRU + Dropout(0.25) + GRU + Dropout(0.25) + Flatten + Dense

In [38]:
rm -rf /home/.local/share/Trash/

In [39]:
train_doc = np.random.permutation(open('/datasets/Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('/datasets/Project_data/val.csv').readlines())
batch_size = 32 #experiment with the batch size
img_idx = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28]#create a list of image numbers you want to use
num_frames = len(img_idx)
num_classes = 5
batch_size = 32
num_epochs = 5
img_width = 100
img_height = 100

In [41]:
import numpy as np
from keras.models import Sequential
from keras.layers import Conv3D, BatchNormalization, TimeDistributed, Flatten, GRU, Dense, Dropout, MaxPooling3D

rnn_model8 = Sequential()
rnn_model8.add(Conv3D(16, (3, 3, 3), padding='same', activation='relu', input_shape=(num_frames, img_height, img_width, 3)))
rnn_model8.add(BatchNormalization())
rnn_model8.add(MaxPooling3D(pool_size=(2, 2, 2)))
rnn_model8.add(Dropout(0.25))

rnn_model8.add(Conv3D(32, (3, 3, 3), padding='same', activation='relu'))
rnn_model8.add(BatchNormalization())
rnn_model8.add(MaxPooling3D(pool_size=(2, 2, 2)))
rnn_model8.add(Dropout(0.25))

rnn_model8.add(TimeDistributed(Flatten()))  # Flatten the output before passing to GRU
rnn_model8.add(GRU(64, return_sequences=True))
rnn_model8.add(Dropout(0.25))
rnn_model8.add(GRU(128, return_sequences=True))
rnn_model8.add(Dropout(0.25))
rnn_model8.add(Flatten())
rnn_model8.add(Dense(5, activation='softmax'))

# Compile the model
rnn_model8.compile(optimizer="sgd", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

rnn_model8.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_12 (Conv3D)          (None, 15, 100, 100, 16)  1312      
                                                                 
 batch_normalization_12 (Bat  (None, 15, 100, 100, 16)  64       
 chNormalization)                                                
                                                                 
 max_pooling3d_7 (MaxPooling  (None, 7, 50, 50, 16)    0         
 3D)                                                             
                                                                 
 dropout_12 (Dropout)        (None, 7, 50, 50, 16)     0         
                                                                 
 conv3d_13 (Conv3D)          (None, 7, 50, 50, 32)     13856     
                                                                 
 batch_normalization_13 (Bat  (None, 7, 50, 50, 32)   

In [42]:
train_generator = generator(train_path, train_doc, batch_size, img_width, img_height, (40,10))
val_generator = generator(val_path, val_doc, batch_size, img_width, img_height,(40,10))

In [43]:
callbacks_list = initializeModel()
steps_per_epoch, validation_steps = set_epoch_steps_val_steps()

In [44]:
rnn_model8.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1,
                    callbacks=callbacks_list, validation_data=val_generator,
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  /datasets/Project_data/train ; batch size = 32
Epoch 1/5
4

Epoch 00001: val_loss improved from inf to 1.55213, saving model to model_init_2024-09-0106_15_55.769024/model-00001-1.60562-0.23680-1.55213-0.32000.h5
Epoch 2/5

Epoch 00002: val_loss improved from 1.55213 to 1.49338, saving model to model_init_2024-09-0106_15_55.769024/model-00002-1.47038-0.37858-1.49338-0.38000.h5
Epoch 3/5

Epoch 00003: val_loss improved from 1.49338 to 1.38947, saving model to model_init_2024-09-0106_15_55.769024/model-00003-1.33450-0.48718-1.38947-0.50000.h5
Epoch 4/5
4

Epoch 00004: val_loss improved from 1.38947 to 1.31422, saving model to model_init_2024-09-0106_15_55.769024/model-00004-1.22442-0.51584-1.31422-0.49000.h5
Epoch 5/5

Epoch 00005: val_loss improved from 1.31422 to 1.22595, saving model to model_init_2024-09-0106_15_55.769024/model-00005-1.09241-0.60332-1.22595-0.55000.h5


<keras.callbacks.History at 0x7f21c0f62280>

The model is much more balanced now. Training loss and Validation loss are very close to each other. categorical accuracy has dropped but that is to be expected after 5 epochs. Best thing is that validation accuracy and train accuracy are close to each other.Let's reduce the dropout rate to 10% in Conv3D and keep it at 25% in GRU and see if it helps improve the value of accuracy overall while keep at two accuracies close to each other.

Experiment 9:
1. batch_size = 32
2. num_epochs = 5
3. img_width, img_height = 100, 100
4. Add batch_normalization
5. Conv3d + Maxpooling + Dropout(0.10), Conv3d + Maxpooling + Dropout(0.10), TimeDistributed Flatten + GRU + Dropout(0.25) + GRU + Dropout(0.25) + Flatten + Dense

In [52]:
rm -rf /home/.local/share/Trash/

In [53]:
train_doc = np.random.permutation(open('/datasets/Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('/datasets/Project_data/val.csv').readlines())
batch_size = 32 #experiment with the batch size
img_idx = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28]#create a list of image numbers you want to use
num_frames = len(img_idx)
num_classes = 5
batch_size = 32
num_epochs = 5
img_width = 100
img_height = 100

In [54]:
import numpy as np
from keras.models import Sequential
from keras.layers import Conv3D, BatchNormalization, TimeDistributed, Flatten, GRU, Dense, Dropout, MaxPooling3D

rnn_model9 = Sequential()
rnn_model9.add(Conv3D(16, (3, 3, 3), padding='same', activation='relu', input_shape=(num_frames, img_height, img_width, 3)))
rnn_model9.add(BatchNormalization())
rnn_model9.add(MaxPooling3D(pool_size=(2, 2, 2)))
rnn_model9.add(Dropout(0.10))

rnn_model9.add(Conv3D(32, (3, 3, 3), padding='same', activation='relu'))
rnn_model9.add(BatchNormalization())
rnn_model9.add(MaxPooling3D(pool_size=(2, 2, 2)))
rnn_model9.add(Dropout(0.10))

rnn_model9.add(TimeDistributed(Flatten()))  # Flatten the output before passing to GRU
rnn_model9.add(GRU(64, return_sequences=True))
rnn_model9.add(Dropout(0.25))
rnn_model9.add(GRU(128, return_sequences=True))
rnn_model9.add(Dropout(0.25))
rnn_model9.add(Flatten())
rnn_model9.add(Dense(5, activation='softmax'))

# Compile the model
rnn_model9.compile(optimizer="sgd", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

rnn_model9.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_18 (Conv3D)          (None, 15, 100, 100, 16)  1312      
                                                                 
 batch_normalization_18 (Bat  (None, 15, 100, 100, 16)  64       
 chNormalization)                                                
                                                                 
 max_pooling3d_13 (MaxPoolin  (None, 7, 50, 50, 16)    0         
 g3D)                                                            
                                                                 
 dropout_24 (Dropout)        (None, 7, 50, 50, 16)     0         
                                                                 
 conv3d_19 (Conv3D)          (None, 7, 50, 50, 32)     13856     
                                                                 
 batch_normalization_19 (Bat  (None, 7, 50, 50, 32)   

In [55]:
train_generator = generator(train_path, train_doc, batch_size, img_width, img_height, (40,10))
val_generator = generator(val_path, val_doc, batch_size, img_width, img_height,(40,10))

In [56]:
callbacks_list = initializeModel()
steps_per_epoch, validation_steps = set_epoch_steps_val_steps()

In [57]:
rnn_model9.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1,
                    callbacks=callbacks_list, validation_data=val_generator,
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  /datasets/Project_data/train ; batch size = 32
Epoch 1/5
4

Epoch 00001: val_loss improved from inf to 1.53994, saving model to model_init_2024-09-0106_15_55.769024/model-00001-1.54805-0.29261-1.53994-0.34000.h5
Epoch 2/5

Epoch 00002: val_loss improved from 1.53994 to 1.51097, saving model to model_init_2024-09-0106_15_55.769024/model-00002-1.34116-0.46757-1.51097-0.34000.h5
Epoch 3/5

Epoch 00003: val_loss improved from 1.51097 to 1.32868, saving model to model_init_2024-09-0106_15_55.769024/model-00003-1.15791-0.59125-1.32868-0.48000.h5
Epoch 4/5
4

Epoch 00004: val_loss improved from 1.32868 to 1.26473, saving model to model_init_2024-09-0106_15_55.769024/model-00004-0.98617-0.68477-1.26473-0.51000.h5
Epoch 5/5

Epoch 00005: val_loss improved from 1.26473 to 1.21662, saving model to model_init_2024-09-0106_15_55.769024/model-00005-0.83281-0.75566-1.21662-0.51000.h5


<keras.callbacks.History at 0x7f21b7fac790>

There is a definite improvement in train accuracy by reducing dropout in the initial layer but validation accuracy is lower than train accuracy by a lot. Let's introduce regularization and see if it helps close the gap between the training and validation accuracy

Experiment 10:
1. batch_size = 32
2. num_epochs = 15
3. img_width, img_height = 100, 100
4. Add batch_normalization
5. Conv3d + Maxpooling + Dropout(0.10) + L2(0.01), Conv3d + Maxpooling + Dropout(0.10) + L2(0.01), TimeDistributed Flatten + GRU + Dropout(0.25)+ L2(0.01) + GRU + Dropout(0.25) + L2(0.01)+ Flatten + Dense + L2(0.01)

In [65]:
rm -rf /home/.local/share/Trash/

In [66]:
train_doc = np.random.permutation(open('/datasets/Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('/datasets/Project_data/val.csv').readlines())
batch_size = 32 #experiment with the batch size
img_idx = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28]#create a list of image numbers you want to use
num_frames = len(img_idx)
num_classes = 5
batch_size = 32
num_epochs = 15
img_width = 100
img_height = 100

In [67]:
import numpy as np
from keras.models import Sequential
from keras.layers import Conv3D, BatchNormalization, TimeDistributed, Flatten, GRU, Dense, Dropout, MaxPooling3D
from keras.regularizers import l2

rnn_model10 = Sequential()
rnn_model10.add(Conv3D(16, (3, 3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.01), input_shape=(num_frames, img_height, img_width, 3)))
rnn_model10.add(BatchNormalization())
rnn_model10.add(MaxPooling3D(pool_size=(2, 2, 2)))
rnn_model10.add(Dropout(0.10))

rnn_model10.add(Conv3D(32, (3, 3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.01)))
rnn_model10.add(BatchNormalization())
rnn_model10.add(MaxPooling3D(pool_size=(2, 2, 2)))
rnn_model10.add(Dropout(0.10))

rnn_model10.add(TimeDistributed(Flatten()))  # Flatten the output before passing to GRU
rnn_model10.add(GRU(64, return_sequences=True, kernel_regularizer=l2(0.01)))
rnn_model10.add(Dropout(0.25))
rnn_model10.add(GRU(128, return_sequences=True, kernel_regularizer=l2(0.01)))
rnn_model10.add(Dropout(0.25))
rnn_model10.add(Flatten())
rnn_model10.add(Dense(5, activation='softmax', kernel_regularizer=l2(0.01)))

# Compile the model
rnn_model10.compile(optimizer="sgd", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Now print the model summary
rnn_model10.summary()


Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_22 (Conv3D)          (None, 15, 100, 100, 16)  1312      
                                                                 
 batch_normalization_22 (Bat  (None, 15, 100, 100, 16)  64       
 chNormalization)                                                
                                                                 
 max_pooling3d_17 (MaxPoolin  (None, 7, 50, 50, 16)    0         
 g3D)                                                            
                                                                 
 dropout_32 (Dropout)        (None, 7, 50, 50, 16)     0         
                                                                 
 conv3d_23 (Conv3D)          (None, 7, 50, 50, 32)     13856     
                                                                 
 batch_normalization_23 (Bat  (None, 7, 50, 50, 32)  

In [68]:
train_generator = generator(train_path, train_doc, batch_size, img_width, img_height, (40,10))
val_generator = generator(val_path, val_doc, batch_size, img_width, img_height,(40,10))

In [69]:
callbacks_list = initializeModel()
steps_per_epoch, validation_steps = set_epoch_steps_val_steps()

In [70]:
rnn_model10.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1,
                    callbacks=callbacks_list, validation_data=val_generator,
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  /datasets/Project_data/train ; batch size = 32
Epoch 1/15
4

Epoch 00001: val_loss improved from inf to 6.73724, saving model to model_init_2024-09-0106_15_55.769024/model-00001-6.81278-0.30317-6.73724-0.45000.h5
Epoch 2/15

Epoch 00002: val_loss improved from 6.73724 to 6.55454, saving model to model_init_2024-09-0106_15_55.769024/model-00002-6.54584-0.48115-6.55454-0.53000.h5
Epoch 3/15

Epoch 00003: val_loss improved from 6.55454 to 6.46800, saving model to model_init_2024-09-0106_15_55.769024/model-00003-6.30435-0.61840-6.46800-0.49000.h5
Epoch 4/15
4

Epoch 00004: val_loss improved from 6.46800 to 6.29401, saving model to model_init_2024-09-0106_15_55.769024/model-00004-6.12703-0.66365-6.29401-0.56000.h5
Epoch 5/15

Epoch 00005: val_loss improved from 6.29401 to 6.18236, saving model to model_init_2024-09-0106_15_55.769024/model-00005-5.92626-0.73454-6.18236-0.53000.h5
Epoch 6/15

Epoch 00006: val_loss improved from 6.18236 to 6.06209, saving model to model_init_202

<keras.callbacks.History at 0x7f21c13d58e0>

The training accuracy is really good at 99.4% but validation accuracy is 70%. Let's increase dropout to 25% along with regularization and see if it closes the gap

Experiment 11:

batch_size = 32
num_epochs = 15
img_width, img_height = 100, 100
Add batch_normalization
Conv3d + Maxpooling + Dropout(0.25) + L2(0.01), Conv3d + Maxpooling + Dropout(0.25) + L2(0.01), TimeDistributed Flatten + GRU + Dropout(0.25)+ L2(0.01) + GRU + Dropout(0.25) + L2(0.01)+ Flatten + Dense + L2(0.01)

In [78]:
rm -rf /home/.local/share/Trash/

In [79]:
train_doc = np.random.permutation(open('/datasets/Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('/datasets/Project_data/val.csv').readlines())
batch_size = 32 #experiment with the batch size
img_idx = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28]#create a list of image numbers you want to use
num_frames = len(img_idx)
num_classes = 5
batch_size = 32
num_epochs = 15
img_width = 100
img_height = 100

In [80]:
import numpy as np
from keras.models import Sequential
from keras.layers import Conv3D, BatchNormalization, TimeDistributed, Flatten, GRU, Dense, Dropout, MaxPooling3D
from keras.regularizers import l2

rnn_model11 = Sequential()
rnn_model11.add(Conv3D(16, (3, 3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.01), input_shape=(num_frames, img_height, img_width, 3)))
rnn_model11.add(BatchNormalization())
rnn_model11.add(MaxPooling3D(pool_size=(2, 2, 2)))
rnn_model11.add(Dropout(0.25))

rnn_model11.add(Conv3D(32, (3, 3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.01)))
rnn_model11.add(BatchNormalization())
rnn_model11.add(MaxPooling3D(pool_size=(2, 2, 2)))
rnn_model11.add(Dropout(0.25))

rnn_model11.add(TimeDistributed(Flatten()))  # Flatten the output before passing to GRU
rnn_model11.add(GRU(64, return_sequences=True, kernel_regularizer=l2(0.01)))
rnn_model11.add(Dropout(0.25))
rnn_model11.add(GRU(128, return_sequences=True, kernel_regularizer=l2(0.01)))
rnn_model11.add(Dropout(0.25))
rnn_model11.add(Flatten())
rnn_model11.add(Dense(5, activation='softmax', kernel_regularizer=l2(0.01)))

# Compile the model
rnn_model11.compile(optimizer="sgd", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Now print the model summary
rnn_model11.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_26 (Conv3D)          (None, 15, 100, 100, 16)  1312      
                                                                 
 batch_normalization_26 (Bat  (None, 15, 100, 100, 16)  64       
 chNormalization)                                                
                                                                 
 max_pooling3d_21 (MaxPoolin  (None, 7, 50, 50, 16)    0         
 g3D)                                                            
                                                                 
 dropout_40 (Dropout)        (None, 7, 50, 50, 16)     0         
                                                                 
 conv3d_27 (Conv3D)          (None, 7, 50, 50, 32)     13856     
                                                                 
 batch_normalization_27 (Bat  (None, 7, 50, 50, 32)  

In [81]:
train_generator = generator(train_path, train_doc, batch_size, img_width, img_height, (40,10))
val_generator = generator(val_path, val_doc, batch_size, img_width, img_height,(40,10))

In [82]:
callbacks_list = initializeModel()
steps_per_epoch, validation_steps = set_epoch_steps_val_steps()

In [83]:
rnn_model11.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1,
                    callbacks=callbacks_list, validation_data=val_generator,
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  /datasets/Project_data/train ; batch size = 32
Epoch 1/15
4

Epoch 00001: val_loss improved from inf to 6.76166, saving model to model_init_2024-09-0106_15_55.769024/model-00001-6.84000-0.25038-6.76166-0.33000.h5
Epoch 2/15

Epoch 00002: val_loss improved from 6.76166 to 6.64800, saving model to model_init_2024-09-0106_15_55.769024/model-00002-6.67419-0.38763-6.64800-0.40000.h5
Epoch 3/15

Epoch 00003: val_loss improved from 6.64800 to 6.54471, saving model to model_init_2024-09-0106_15_55.769024/model-00003-6.52562-0.46757-6.54471-0.47000.h5
Epoch 4/15
4

Epoch 00004: val_loss improved from 6.54471 to 6.43283, saving model to model_init_2024-09-0106_15_55.769024/model-00004-6.37972-0.52338-6.43283-0.50000.h5
Epoch 5/15

Epoch 00005: val_loss improved from 6.43283 to 6.31659, saving model to model_init_2024-09-0106_15_55.769024/model-00005-6.20689-0.60633-6.31659-0.53000.h5
Epoch 6/15

Epoch 00006: val_loss improved from 6.31659 to 6.16962, saving model to model_init_202

<keras.callbacks.History at 0x7f21c1363f70>

FINAL CONCLUSION:
We will go with model 10, it had better train accuracy anf 