In [1]:
# Import the required libraries.
import os
import cv2
import math
import numpy as np
import datetime as dt
import tensorflow as tf

In [2]:
# Specify the height and width to which each video frame will be resized in our dataset.
IMAGE_HEIGHT , IMAGE_WIDTH = 64, 64

# Specify the number of frames of a video that will be fed to the model as one sequence.
SEQUENCE_LENGTH = 20

# Specify the directory containing the UCF50 dataset. 
DATASET_DIR = "SPHAR-Dataset-1.0/videos"

# Specify the list containing the names of the classes used for training. Feel free to choose any set of classes.
CLASSES_LIST = [name for name in os.listdir('SPHAR-Dataset-1.0/videos') if os.path.isdir(os.path.join('SPHAR-Dataset-1.0/videos', name))]

In [3]:
def frames_extraction(video_path):
    # Declare a list to store video frames.
    frames_list = []
    
    # Read the Video File using the VideoCapture object.
    video_reader = cv2.VideoCapture(video_path)

    # Get the total number of frames in the video.
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the the interval after which frames will be added to the list.
    skip_frames_window = max(int(video_frames_count/SEQUENCE_LENGTH), 1)

    # Iterate through the Video Frames.
    for frame_counter in range(SEQUENCE_LENGTH):

        # Set the current frame position of the video.
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)

        # Reading the frame from the video. 
        success, frame = video_reader.read() 

        # Check if Video frame is not successfully read then break the loop
        if not success:
            break

        # Resize the Frame to fixed height and width.
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))
        
        # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1
        normalized_frame = resized_frame / 255
        
        # Append the normalized frame into the frames list
        frames_list.append(normalized_frame)
    
    # Release the VideoCapture object. 
    video_reader.release()

    # Return the frames list.
    return frames_list

In [4]:
trainX = frames_extraction('tt2.mp4')
print(trainX)

[array([[[1.        , 0.99215686, 1.        ],
        [1.        , 0.99215686, 1.        ],
        [1.        , 0.99215686, 1.        ],
        ...,
        [0.56078431, 0.55294118, 0.51764706],
        [0.38823529, 0.36862745, 0.36470588],
        [0.63921569, 0.63529412, 0.58431373]],

       [[1.        , 1.        , 1.        ],
        [1.        , 1.        , 1.        ],
        [1.        , 0.99607843, 1.        ],
        ...,
        [0.57647059, 0.56862745, 0.53333333],
        [0.37254902, 0.35294118, 0.34901961],
        [0.6627451 , 0.65882353, 0.60784314]],

       [[1.        , 0.97254902, 1.        ],
        [1.        , 0.99607843, 1.        ],
        [1.        , 0.99607843, 1.        ],
        ...,
        [0.58039216, 0.57254902, 0.5372549 ],
        [0.36078431, 0.34901961, 0.3254902 ],
        [0.69803922, 0.69411765, 0.64313725]],

       ...,

       [[0.74117647, 0.74901961, 0.81176471],
        [0.76078431, 0.76862745, 0.83137255],
        [0.76078431, 

In [5]:
from tensorflow.keras.models import load_model

model = load_model('convlstm_model___Date_Time_2023_09_05__13_49_34___Loss_3.353947401046753___Accuracy_0.4680851101875305.h5')

In [6]:
# result = model.predict(trainX)
trainX = np.array(trainX)
b = trainX[np.newaxis, :]
result = model.predict(b)

print(CLASSES_LIST[np.argmax(result[0])])

hitting


In [7]:
print("Top 5 actions:")
    
for i in np.argsort(result[0])[::-1][:5]:
    print(f"  {CLASSES_LIST[i]:22}: {result[0][i] * 100:5.2f}%")

Top 5 actions:
  hitting               : 91.26%
  kicking               :  4.90%
  carcrash              :  3.67%
  stealing              :  0.13%
  murdering             :  0.03%
