In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

from sklearn.model_selection import train_test_split

from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model

In [None]:
%%capture
!wget --no-check-certificate https://www.crcv.ucf.edu/datasets/human-actions/ucf101/UCF101.rar
!unrar x UCF101.rar


In [None]:
IMAGE_HEIGHT , IMAGE_WIDTH = 64, 64

SEQUENCE_LENGTH = 20

DATASET_DIR = "UCF-101"

CLASSES_LIST = ["BabyCrawling", "CuttingInKitchen", "PizzaTossing", "Punch", "Mixing", "MoppingFloor"]

In [None]:
def frames_extraction(video_path):

    # Declare a list to store video frames.
    frames_list = []

    video_reader = cv2.VideoCapture(video_path)

    total_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    skip_frames = max(int(total_frames/SEQUENCE_LENGTH), 1)

    for frame_counter in range(SEQUENCE_LENGTH):

        # Set the current frame position of the video.
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames)

        success, frame = video_reader.read()

        if not success:
            break

        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))
        normalized_frame = resized_frame / 255

        frames_list.append(normalized_frame)

    video_reader.release()

    return frames_list

In [None]:
def create_dataset():

    features = [] # Contains extracted frames of the video
    labels = [] # Indexes of classes associated with the video
    video_paths = [] # Path of video

    for class_index, class_name in enumerate(CLASSES_LIST):

        print(f'Extracting Data of Class: {class_name}')
        files_list = os.listdir(os.path.join(DATASET_DIR, class_name))

        # Iterate through all the files present in the files list.
        for file_name in files_list:

            video_path = os.path.join(DATASET_DIR, class_name, file_name)
            frames = frames_extraction(video_path)

            if len(frames) == SEQUENCE_LENGTH:

                # Append the data to their repective lists.
                features.append(frames)
                labels.append(class_index)
                video_paths.append(video_path)

    # Converting the list to numpy arrays
    features = np.asarray(features)
    labels = np.array(labels)

    return features, labels, video_paths

In [None]:
# Create the dataset.
features, labels, video_files_paths = create_dataset()

Extracting Data of Class: BabyCrawling
Extracting Data of Class: CuttingInKitchen
Extracting Data of Class: PizzaTossing
Extracting Data of Class: Punch
Extracting Data of Class: Mixing
Extracting Data of Class: MoppingFloor


In [None]:
one_hot_encoded_labels = to_categorical(labels)

In [None]:
features_train, features_test, labels_train, labels_test = train_test_split(features, one_hot_encoded_labels, test_size = 0.25, shuffle = True, random_state = 20)

In [None]:
def create_LRCN_model():

    model = Sequential()


    model.add(TimeDistributed(Conv2D(16, (3, 3), padding='same',activation = 'relu'),
                              input_shape = (SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH, 3)))

    model.add(TimeDistributed(MaxPooling2D((4, 4))))
    model.add(TimeDistributed(Dropout(0.25)))

    model.add(TimeDistributed(Conv2D(32, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((4, 4))))
    model.add(TimeDistributed(Dropout(0.25)))

    model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Dropout(0.25)))

    model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))

    model.add(TimeDistributed(Flatten()))

    model.add(LSTM(32))

    model.add(Dense(len(CLASSES_LIST), activation = 'softmax'))


    # Display the models summary.
    model.summary()

    return model

In [None]:
# Construct the required LRCN model.
LRCN_model = create_LRCN_model()

print("Model Created Successfully!")

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDist  (None, 20, 64, 64, 16)    448       
 ributed)                                                        
                                                                 
 time_distributed_1 (TimeDi  (None, 20, 16, 16, 16)    0         
 stributed)                                                      
                                                                 
 time_distributed_2 (TimeDi  (None, 20, 16, 16, 16)    0         
 stributed)                                                      
                                                                 
 time_distributed_3 (TimeDi  (None, 20, 16, 16, 32)    4640      
 stributed)                                                      
                                                                 
 time_distributed_4 (TimeDi  (None, 20, 4, 4, 32)      0

In [None]:
# Compile the model
LRCN_model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics = ["accuracy"])

LRCN_model_training_history = LRCN_model.fit(x = features_train, y = labels_train, epochs = 50, batch_size = 8 , shuffle = True, validation_split = 0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# Evaluation
evaluation_history = LRCN_model.evaluate(features_test, labels_test)
print("Test Loss:", evaluation_history[0])
print("Test Accuracy:", evaluation_history[1])

Test Loss: 0.6356045603752136
Test Accuracy: 0.8324607610702515


In [None]:
y_pred = LRCN_model.predict(features_test)
y_pred_labels = np.argmax(y_pred, axis=1)
y_true_labels = np.argmax(labels_test, axis=1)

# Calculate confusion matrix
confusion_mat = confusion_matrix(y_true_labels, y_pred_labels)
print("Confusion Matrix:")
print(confusion_mat)

# Precision = True Positives / (True Positives + False Positives)\
# Recall = True Positives / (True Positives + False Negatives)
# F1-score = 2 * (Precision * Recall) / (Precision + Recall)
class_report = classification_report(y_true_labels, y_pred_labels, target_names=CLASSES_LIST)
print("\n""Classification Report:")
print(class_report)

Confusion Matrix:
[[28  0  4  0  0  1]
 [ 5 18  0  0  2  1]
 [ 2  0 24  3  0  1]
 [ 0  0  1 45  0  0]
 [ 0  2  1  1 21  0]
 [ 3  1  1  0  3 23]]

Classification Report:
                  precision    recall  f1-score   support

    BabyCrawling       0.74      0.85      0.79        33
CuttingInKitchen       0.86      0.69      0.77        26
    PizzaTossing       0.77      0.80      0.79        30
           Punch       0.92      0.98      0.95        46
          Mixing       0.81      0.84      0.82        25
    MoppingFloor       0.88      0.74      0.81        31

        accuracy                           0.83       191
       macro avg       0.83      0.82      0.82       191
    weighted avg       0.84      0.83      0.83       191

