## Import Dependencies and Set Parameters

In [3]:
# Import Dependencies
import cv2
import numpy as np
from collections import deque
from tensorflow.keras.models import load_model

In [4]:
# Define dataset parameters
VIS_FRAME_WIDTH = 224
VIS_FRAME_HEIGHT = 224
FRAME_WIDTH = 112              # Width of each frame
FRAME_HEIGHT = 112             # Height of each frame
SEQUENCE_LENGTH = 16            # Number of frames in each sequence
BATCH_SIZE = 8                 # Batch size for training
TRAIN_RATIO = 0.7               # Training data ratio
VAL_RATIO = 0.15               # Validation data ratio
TEST_RATIO = 0.15               # Testing data ratio

# Define classes list
CLASSES_LIST = ['RoadAccidents', 'Assault', 'Vandalism', 'Arrest', 'Shooting', 'Arson', 'Explosion', 'Shoplifting', 'Robbery', 'Stealing', 'Burglary', 'Abuse', 'Fighting']
class_to_idx = {cls_name: idx for idx, cls_name in enumerate(CLASSES_LIST)}

## Load Model

In [5]:
# Load model
model = load_model('my_model.h5')

2024-11-15 11:38:55.308424: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


## Predict Single Action

In [30]:
def predict_on_video(video_file_path, model, sequence_length, frame_width, frame_height, classes_list):
    """
    Perform action recognition on a video using the trained LRCN model and display
    the predicted action label with its confidence score for the whole video.

    Args:
    video_file_path (str): Path of the video on which action recognition is to be performed.
    model (tf.keras.Model): Trained LRCN model for action recognition.
    sequence_length (int): Fixed number of frames for each input sequence.
    frame_width (int): Width to which each frame is resized.
    frame_height (int): Height to which each frame is resized.
    classes_list (list): List of class names indexed according to the model's output.
    """

    # Initialize the VideoCapture object to read from the video file.
    video_reader = cv2.VideoCapture(video_file_path)
    frames_queue = deque(maxlen=sequence_length)
    predictions = []

    # Process each frame in the video.
    while video_reader.isOpened():
        ok, frame = video_reader.read()
        if not ok:
            break  # Stop if no frame is returned

        # Resize the frame and apply CLAHE preprocessing for low-light enhancement.
        resized_frame = cv2.resize(frame, (frame_width, frame_height))
        gray_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2GRAY)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        enhanced_frame = clahe.apply(gray_frame)

        # Normalize the frame and append it to the frame queue
        normalized_frame = enhanced_frame / 255.0  # Normalize to [0, 1]
        frames_queue.append(normalized_frame)

        # When enough frames are gathered, predict the action
        if len(frames_queue) == sequence_length:
            input_sequence = np.expand_dims(frames_queue, axis=(0, -1))  # Shape: (1, sequence_length, frame_height, frame_width, 1)
            predicted_probabilities = model.predict(input_sequence)[0]
            predictions.append(predicted_probabilities)

    # Release resources
    video_reader.release()
    print('predictions_size: ', len(predictions))

    # Compute average probabilities across all predictions and get the highest confidence label
    if predictions:
        average_probabilities = np.mean(predictions, axis=0)
        predicted_label = np.argmax(average_probabilities)
        confidence_score = average_probabilities[predicted_label] * 100
        predicted_class_name = classes_list[predicted_label]

        print(f"Predicted Action: {predicted_class_name} ({confidence_score:.2f}% confidence)")
    else:
        print("No frames were processed, prediction could not be made.")


## Predict all actions in a single video

In [41]:
from collections import deque

def predict_on_video_full(video_file_path, output_file_path, model, sequence_length, frame_width, frame_height, classes_list):
    """
    Perform action recognition on a video using the trained LRCN model.
    
    Args:
    video_file_path (str): Path of the video on which action recognition is to be performed.
    output_file_path (str): Path where the output video with the predicted action will be saved.
    model (tf.keras.Model): Trained LRCN model for action recognition.
    sequence_length (int): Fixed number of frames for each input sequence.
    frame_width (int): Width to which each frame is resized.
    frame_height (int): Height to which each frame is resized.
    classes_list (list): List of class names indexed according to the model's output.
    """

    # Initialize the VideoCapture object to read from the video file.
    video_reader = cv2.VideoCapture(video_file_path)
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initialize the VideoWriter Object to save the output video.
    video_writer = cv2.VideoWriter(
        output_file_path, cv2.VideoWriter_fourcc(*'DIVX'),
        video_reader.get(cv2.CAP_PROP_FPS), (original_video_width, original_video_height)
    )

    # Initialize a deque to store video frames and set a placeholder for the predicted action.
    frames_queue = deque(maxlen=sequence_length)
    predicted_class_name = ''

    # Process each frame in the video.
    while video_reader.isOpened():
        ok, frame = video_reader.read()
        if not ok:
            break  # Stop if no frame is returned

        # Resize the frame and apply CLAHE preprocessing for low-light enhancement.
        resized_frame = cv2.resize(frame, (frame_width, frame_height))
        gray_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2GRAY)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        enhanced_frame = clahe.apply(gray_frame)

        # Normalize the frame and append it to the frame queue
        normalized_frame = enhanced_frame / 255.0  # Normalize to [0, 1]
        frames_queue.append(normalized_frame)

        # When enough frames are gathered, predict the action
        if len(frames_queue) == sequence_length:
            input_sequence = np.expand_dims(frames_queue, axis=(0, -1))  # Shape: (1, sequence_length, frame_height, frame_width, 1)
            predicted_probabilities = model.predict(input_sequence)[0]
            predicted_label = np.argmax(predicted_probabilities)
            predicted_class_name = classes_list[predicted_label]

        # Display the predicted class name on the frame
        cv2.putText(frame, predicted_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        # Save the frame with the overlayed prediction to the output video
        video_writer.write(frame)

    # Release resources
    video_reader.release()
    video_writer.release()


## Test Configuration

In [38]:
def get_video_paths(test_video_name):
    extension = '.mp4'
    test_folder_path = 'test'
    output_folder_path = 'output'
    
    video_file_path = f"{test_folder_path}/{test_video_name}{extension}"
    output_file_path = f"{output_folder_path}/{test_video_name}{extension}"
    
    return video_file_path, output_file_path

# Example 1
ip1, op1 = get_video_paths('fighting')
ip2, op2 = get_video_paths('accident')
ip3, op3 = get_video_paths('arson')

print(ip1)



test/fighting.mp4


## Example 1: Fighting

In [39]:
# Example 1
out1 = predict_on_video(video_file_path=ip1, 
                 model=model, 
                 sequence_length=SEQUENCE_LENGTH, 
                 frame_width=FRAME_WIDTH, 
                 frame_height=FRAME_HEIGHT, 
                 classes_list=CLASSES_LIST)
print(out1)

from IPython.display import HTML

HTML(f"""
<video width="600" height="400" controls>
  <source src="{ip1}" type="video/mp4">
  Your browser does not support the video tag.
</video>
""")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms

## Example 2: Road Accident

In [1]:
# Example 2
out2 = predict_on_video(video_file_path=ip2, 
                 model=model, 
                 sequence_length=SEQUENCE_LENGTH, 
                 frame_width=FRAME_WIDTH, 
                 frame_height=FRAME_HEIGHT, 
                 classes_list=CLASSES_LIST)
print(out2)

from IPython.display import HTML

HTML(f"""
<video width="600" height="400" controls>
  <source src="{ip2}" type="video/mp4">
  Your browser does not support the video tag.
</video>
""")

NameError: name 'predict_on_video' is not defined

## Example 3: Arson

In [22]:
# Example 2
out3 = predict_on_video(video_file_path=ip3, 
                 model=model, 
                 sequence_length=SEQUENCE_LENGTH, 
                 frame_width=FRAME_WIDTH, 
                 frame_height=FRAME_HEIGHT, 
                 classes_list=CLASSES_LIST)
print(out3)

from IPython.display import HTML

HTML(f"""
<video width="600" height="400" controls>
  <source src="{ip3}" type="video/mp4">
  Your browser does not support the video tag.
</video>
""")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9

## Show frame by frame predictions on whole video

In [42]:
print(ip1, op1)
predict_on_video_full(video_file_path=ip1, 
                      output_file_path=op1, 
                      model=model, 
                      sequence_length=SEQUENCE_LENGTH, 
                      frame_width=FRAME_WIDTH, 
                      frame_height=FRAME_HEIGHT, 
                      classes_list=CLASSES_LIST)

test/fighting.mp4 output/fighting.mp4
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step


OpenCV: FFMPEG: tag 0x58564944/'DIVX' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0