## References
- https://youtu.be/QmtSkq3DYko?si=6VzZc_NH5glCPi0m
- https://learnopencv.com/introduction-to-video-classification-and-human-activity-recognition/

In [None]:
%%capture

# for downloading youtube vod for training/testing
%pip install pafy youtube-dl moviepy
%pip install pydot graphviz

In [None]:
import os
import cv2
import pafy
import math
import random
import numpy as np
import datetime as dt
import tensorflow as tf
from tensorflow import keras
from collections import deque
import matplotlib.pyplot as plt
from moviepy.editor import *
from sklearn.model_selection import train_test_split

%matplotlib inline

from keras.layers import *
from keras.models import Sequential
from keras.utils import to_categorical, plot_model
from keras.callbacks import EarlyStopping


from os import listdir
from os.path import join
from cv2 import (
    VideoCapture, 
    cvtColor, 
    COLOR_BGR2RGB,
    putText,
    FONT_HERSHEY_SIMPLEX,
    CAP_PROP_FRAME_COUNT,
    CAP_PROP_POS_FRAMES,
    resize,
)
from numpy import asarray, array

to make results consistent on every execution

In [None]:
SEED = 27
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

downloading and extracting UCF50 dataset<br>
source: https://www.crcv.ucf.edu/data/UCF50.rar

In [None]:
%%capture

# !wget --no-check-certificate https://www.crcv.ucf.edu/data/UCF50.rar

# uncomment this to unrar the rar dataset file \
# or use some unpacking software like 7-ZIP like I did
# !unrar x UCF50.rar

visualizing the dataset<br>
not necessary to do

In [None]:
plt.figure(figsize=(20, 20))
allClassNames = listdir("UCF50")
print(allClassNames)
samplesInEachClass = [len(listdir(join("UCF50", i))) for i in allClassNames]
print(samplesInEachClass)

for i in range(len(allClassNames)):
    allVideosInClass = listdir(join("UCF50", allClassNames[i]))
    randomSelectedVideo = random.choice(allVideosInClass)
    videoReader = VideoCapture(join(
        "UCF50", allClassNames[i], randomSelectedVideo
    ))
    _, bgrFrame = videoReader.read()
    videoReader.release()
    rgbFrame = cvtColor(bgrFrame, COLOR_BGR2RGB)
    putText(
        rgbFrame, 
        allClassNames[i], 
        (10, 30), 
        FONT_HERSHEY_SIMPLEX, 
        1, 
        (255, 255, 255), 
        2
    )
    plt.subplot(5, 4, i+1)
    plt.imshow(rgbFrame)
    plt.axis("off")

In [None]:
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
IMAGE_DIMENSION = (IMAGE_WIDTH, IMAGE_HEIGHT)
SEQUENCE_LENGTH = 20
DATASET_DIR = "UCF50"
CLASSES = ["BenchPress", "CleanAndJerk", "Diving", "BreastStroke"]

In [None]:
def frameExtraction(videoPath):
    frames = []
    videoReader = VideoCapture(videoPath)
    frameCount = int(videoReader.get(CAP_PROP_FRAME_COUNT))
    skipFrameWindow = max(int(frameCount / SEQUENCE_LENGTH), 1)
    for i in range(SEQUENCE_LENGTH):
        videoReader.set(CAP_PROP_POS_FRAMES, i * skipFrameWindow)
        success, frame = videoReader.read()
        if not success:
            break
        frames.append(resize(frame, IMAGE_DIMENSION) / 255)
    videoReader.release()
    return frames

In [None]:
def datasetCreation():
    features = []
    labels = []
    videoFilePaths = []
    for classId, className in enumerate(CLASSES):
        print(f"Extracting Data of Class: {className}")
        files = listdir(join(DATASET_DIR, className))
        for file in files:
            videoFilePath = join(DATASET_DIR, className, file)
            frames = frameExtraction(videoFilePath)
            if len(frames) == SEQUENCE_LENGTH:
                features.append(frames)
                labels.append(classId)
                videoFilePaths.append(videoFilePath)
    features = asarray(features)
    labels = array(labels)
    return features, labels, videoFilePaths

In [None]:
features, labels, videoFilePaths = datasetCreation()

In [None]:
oneHotEncodedLabels = to_categorical(labels)

In [None]:
featuresTrain, featuresTest, labelsTrain, labelsTest = train_test_split(
    features, 
    oneHotEncodedLabels, 
    test_size=0.2, 
    shuffle=True,
    random_state=SEED
)

In [None]:
def createModelArchitecture():
    model = Sequential([
        ConvLSTM2D(
            filters=4, 
            kernel_size=(3, 3), 
            activation="tanh",
            data_format = "channels_last",
            recurrent_dropout=0.2,
            return_sequences=True, 
            input_shape = (SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH, 3)
        ),
        MaxPooling3D(
            pool_size=(1, 2, 2), 
            padding='same', 
            data_format="channels_last"
        ),
        TimeDistributed(Dropout(0.2)),
        ConvLSTM2D(
            filters=8, 
            kernel_size=(3, 3), 
            activation="tanh",
            data_format = "channels_last",
            recurrent_dropout=0.2,
            return_sequences=True
        ),
        MaxPooling3D(
            pool_size=(1, 2, 2), 
            padding='same', 
            data_format="channels_last"
        ),
        TimeDistributed(Dropout(0.2)),
        ConvLSTM2D(
            filters=14, 
            kernel_size=(3, 3), 
            activation="tanh",
            data_format = "channels_last",
            recurrent_dropout=0.2,
            return_sequences=True
        ),
        MaxPooling3D(
            pool_size=(1, 2, 2), 
            padding='same', 
            data_format="channels_last"
        ),
        TimeDistributed(Dropout(0.2)),
        ConvLSTM2D(
            filters=16, 
            kernel_size=(3, 3), 
            activation="tanh",
            data_format = "channels_last",
            recurrent_dropout=0.2,
            return_sequences=True
        ),
        MaxPooling3D(
            pool_size=(1, 2, 2), 
            padding='same', 
            data_format="channels_last"
        ),
        Flatten(),
        Dense(len(CLASSES), activation="softmax")
    ])
    print(model.summary())
    return model

In [None]:
model = createModelArchitecture()

In [None]:
earlyStoppingCallback = EarlyStopping(monitor='val_loss', patience=10, mode='min', restore_best_weights=True)
model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=["accuracy"])
modelTrainingHistory = model.fit(
    x=featuresTrain, 
    y=labelsTrain, 
    epochs=7, 
    batch_size=4, 
    shuffle=True, 
    validation_split=0.2, 
    callbacks = [earlyStoppingCallback]
)

In [None]:
def plot_metric(modelTrainingHistory, metricName1, metricName2, plotName):
    metricValue1 = modelTrainingHistory.history[metricName1]
    metricValue2 = modelTrainingHistory.history[metricName2]
    epochs = range(len(metricValue1))
    plt.plot(epochs, metricValue1, 'blue', label=metricName1)
    plt.plot(epochs, metricValue2, 'red', label=metricName2)
    plt.title(str(plotName))
    plt.legend()

In [None]:
plot_metric(modelTrainingHistory, 'loss', 'val_loss', 'Total Loss vs Total Validation Loss')

In [None]:
model.save(f"{DATASET_DIR}.h5")

In [None]:
loss, accuracy = model.evaluate(featuresTest, labelsTest)
print(loss)
print(accuracy)

In [None]:
def predictVideo(videoPath):
    frames = frameExtraction(videoPath)
    confidences = model.predict(asarray([frames]))
    i = 0
    confidence = confidences[i]
    for j in range(1, len(confidences)):
        if j > confidence:
            confidence = confidences[j]
            i = j
    return CLASSES[i]

In [None]:
prediction = predictVideo(r"UCF50/BenchPress/v_BenchPress_g01_c01.avi")
print(prediction)