In [1]:
import os
import cv2
#import pafy
import math
import random
import numpy as np
import datetime as dt
import tensorflow as tf
from collections import deque
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam

In [2]:
# Specify the height and width to which each video frame will be resized in our dataset.
IMAGE_HEIGHT , IMAGE_WIDTH = 128, 128

# Specify the number of frames of a video that will be fed to the model as one sequence.
SEQUENCE_LENGTH = 30

# Specify the directory containing the UCF50 dataset. 
DATASET_DIR = "Frame_data"

# Thirty videos worth of data
no_sequences = 50

# Specify the list containing the names of the classes used for training. Feel free to choose any set of classes.
CLASSES_LIST = ['Hi', 'I am', 'From', 'Singapore', 'Paiseh', 'I', 'Cannot', 'Speak', 'But', 'Machine', 'Learning', 'Can', 'Help', 'Translate']

In [3]:
label_map = {label:num for num, label in enumerate(CLASSES_LIST)}

In [4]:
def create_dataset():
    '''
    This function will extract the data of the selected classes and create the required dataset.
    Returns:
        features:          A list containing the extracted frames of the videos.
        labels:            A list containing the indexes of the classes associated with the videos.
        video_files_paths: A list containing the paths of the videos in the disk.
    '''

    # Declared Empty Lists to store the features, labels and video file path values.
    features = []
    labels = []
    video_files_paths = []
    
    # Iterating through all the classes mentioned in the classes list
    for class_index, class_name in enumerate(CLASSES_LIST):
        
        # Display the name of the class whose data is being extracted.
        print(f'Extracting Data of Class: {class_name}')
        
        # Get the list of video files present in the specific class name directory.
        files_list = os.listdir(os.path.join(DATASET_DIR, class_name))
        
        # Iterate through all the files present in the files list.
        for file_name in range(no_sequences):
            window = []
            for frame_num in range(SEQUENCE_LENGTH):
            
                # Get the complete video path.
                video_file_path = os.path.join(DATASET_DIR, class_name, str(file_name), str(frame_num)+".npy")

                # Extract the frames of the video file.
                frames = np.load(video_file_path) /255
                window.append(frames)

                # Check if the extracted frames are equal to the SEQUENCE_LENGTH specified above.
                # So ignore the vides having frames less than the SEQUENCE_LENGTH.

                # Append the data to their repective lists.
            features.append(window)
            labels.append(label_map[class_name])
            #video_files_paths.append(video_file_path)

    # Converting the list to numpy arrays
    features = np.asarray(features)
    labels = np.array(labels)  
    
    # Return the frames, class index, and video file path.
    return features, labels

In [5]:
features, labels = create_dataset()
one_hot_encoded_labels = to_categorical(labels)

Extracting Data of Class: Hi
Extracting Data of Class: I am
Extracting Data of Class: From
Extracting Data of Class: Singapore
Extracting Data of Class: Paiseh
Extracting Data of Class: I
Extracting Data of Class: Cannot
Extracting Data of Class: Speak
Extracting Data of Class: But
Extracting Data of Class: Machine
Extracting Data of Class: Learning
Extracting Data of Class: Can
Extracting Data of Class: Help
Extracting Data of Class: Translate


In [6]:
def create_convlstm_model():
    '''
    This function will construct the required convlstm model.
    Returns:
        model: It is the required constructed convlstm model.
    '''

    # We will use a Sequential model for model construction
    model = Sequential()

    # Define the Model Architecture.
    ########################################################################################################################
    
    model.add(ConvLSTM2D(filters = 4, kernel_size = (3, 3), activation = 'tanh',data_format = "channels_last",
                         recurrent_dropout=0.2, return_sequences=True, input_shape = (SEQUENCE_LENGTH,
                                                                                      IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
    
    model.add(MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last'))
    model.add(TimeDistributed(Dropout(0.2)))
    
    model.add(ConvLSTM2D(filters = 8, kernel_size = (3, 3), activation = 'tanh', data_format = "channels_last",
                         recurrent_dropout=0.2, return_sequences=True))
    
    model.add(MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last'))
    model.add(TimeDistributed(Dropout(0.2)))
    
    model.add(ConvLSTM2D(filters = 14, kernel_size = (3, 3), activation = 'tanh', data_format = "channels_last",
                         recurrent_dropout=0.2, return_sequences=True))
    
    model.add(MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last'))
    model.add(TimeDistributed(Dropout(0.2)))
    
    model.add(ConvLSTM2D(filters = 16, kernel_size = (3, 3), activation = 'tanh', data_format = "channels_last",
                         recurrent_dropout=0.2, return_sequences=True))
    
    model.add(MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last'))
    #model.add(TimeDistributed(Dropout(0.2)))
    
    model.add(Flatten()) 
    
    model.add(Dense(len(CLASSES_LIST), activation = "softmax"))
    
    ########################################################################################################################
     
    # Display the models summary.
    model.summary()
    
    # Return the constructed convlstm model.
    return model

In [None]:
accuracies = []
f1_scores = []
precisions = []
recalls = []

kf = KFold(n_splits=5, shuffle=True)
for train_index, test_index in kf.split(features):
    
    np.random.shuffle(train_index)
    np.random.shuffle(test_index)
    
    features_train, features_test = features[train_index], features[test_index]
    labels_train, labels_test = one_hot_encoded_labels[train_index], one_hot_encoded_labels[test_index]
    
    # Construct the required LRCN model.
    convlstm_model = create_convlstm_model()

    # Display the success message.
    print("Model Created Successfully!")
    
    # Create an Instance of Early Stopping Callback
    early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 10, mode = 'min', restore_best_weights = True)

    # Compile the model and specify loss function, optimizer and metrics values to the model
    convlstm_model.compile(loss = 'categorical_crossentropy', optimizer = "Adam", metrics = ["accuracy"])

    # Start training the model.
    convlstm_model_training_history = convlstm_model.fit(x = features_train, y = labels_train, epochs = 50, batch_size = 4,
                                                         shuffle = True, validation_split = 0.2, 
                                                         callbacks = [early_stopping_callback])

    # Evaluate the trained model.
    predictions = convlstm_model.predict(features_test)
    y_true = np.argmax(labels_test, axis=1).tolist()
    y_hat = np.argmax(predictions, axis=1).tolist()
    conf = multilabel_confusion_matrix(y_true,y_hat)
    acc = accuracy_score(y_true, y_hat)
    
    indiv_f1 = []
    indiv_pre = []
    indiv_re = []
    for matrix in conf:
        FP = matrix[0][1]
        FN = matrix[1][0]
        TP = matrix[1][1]
        precision = TP/(TP+FP)
        recall = TP/(TP+FN)
        indiv_f1.append((2*precision*recall)/(precision+recall))
        indiv_pre.append(precision)
        indiv_re.append(recall)
    f1 = np.nansum(indiv_f1)/len(indiv_f1)
    pre = np.nansum(indiv_pre)/len(indiv_pre)
    re = np.nansum(indiv_re)/len(indiv_re)
    
    accuracies.append(acc)
    f1_scores.append(f1)
    precisions.append(pre)
    recalls.append(re)
    
    print(acc)
    print(f1)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDistr  (None, 30, 128, 128, 16)  448      
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 30, 32, 32, 16)   0         
 tributed)                                                       
                                                                 
 time_distributed_2 (TimeDis  (None, 30, 32, 32, 16)   0         
 tributed)                                                       
                                                                 
 time_distributed_3 (TimeDis  (None, 30, 32, 32, 32)   4640      
 tributed)                                                       
                                                                 
 time_distributed_4 (TimeDis  (None, 30, 8, 8, 32)     0

Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120


Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120

In [8]:
print("Accuracies:", accuracies)
print("F1 Scores:", f1_scores)
print("Precisions:", precisions)
print("Recalls:", recalls)

In [None]:
print("Mean Accuracy:", np.nansum(accuracies)/len(accuracies))
print("Mean F1 Score:", np.nansum(f1_scores)/len(f1_scores))
print("Mean Precision:", np.nansum(precisions)/len(precisions))
print("Mean Recall:", np.nansum(recalls)/len(recalls))