# All model test on video dataset

### VGG16 Model test on RWF-2000 video dataset

In [9]:
import tensorflow as tf
import numpy as np
import cv2
import os
import xml.etree.ElementTree as ET

# Define the custom loss function for smooth L1 loss
def smooth_l1_loss(y_true, y_pred):
    diff = tf.abs(y_true - y_pred)
    less_than_one = tf.cast(tf.less(diff, 1.0), dtype=tf.float32)
    loss = less_than_one * 0.5 * diff ** 2 + (1 - less_than_one) * (diff - 0.5)
    return tf.reduce_mean(loss)

# Define the custom metric function for mean IoU
def mean_iou(y_true, y_pred):
    intersection = tf.reduce_sum(tf.minimum(y_true, y_pred))
    union = tf.reduce_sum(tf.maximum(y_true, y_pred))
    return intersection / (union + tf.keras.backend.epsilon())

# Load the saved model, providing the custom loss and metric functions in the custom_objects parameter
model_path = '/content/drive/MyDrive/thesis/cnn_models/VGG16Modelv2.h5'
Model = tf.keras.models.load_model(model_path, custom_objects={'smooth_l1_loss': smooth_l1_loss, 'mean_iou': mean_iou})

# Define the batch size
batch_size = 32

# input shape of the model
input_shape = (224, 224, 3)


# Define the directory path containing the video files
video_dir = '/content/drive/MyDrive/thesis/dataset/mixed_videos2'

# Get a list of all the video files in the directory
video_files = [f for f in os.listdir(video_dir) if f.endswith('.avi')]

# Define the model name
model_name = 'VGG16Modelv2'

# Define a data generator that yields batches of frames and their corresponding annotations
def video_data_generator(video_path, batch_size, input_shape):
    cap = cv2.VideoCapture(video_path)
    while True:
        data = []
        video_frame = []
        while len(data) < batch_size:
            ret, frame = cap.read()
            if not ret:
                break  # Break the loop if the video ends

            video_frame.append(frame)
            if len(video_frame) == batch_size:
                # Preprocess the frames and add them to the data list
                data.extend([cv2.resize(frame, input_shape[:2]) for frame in video_frame])
                video_frame = []

        if len(data) == 0:
            break

        yield np.array(data)

    cap.release()

# Initialize the VideoWriter
output_shape = (input_shape[1], input_shape[0])  # Width, Height
fourcc = cv2.VideoWriter_fourcc(*'mp4v')

# Loop through each video file
for video_file in video_files:
    video_path = os.path.join(video_dir, video_file)

    # Initialize the data generator for the current video
    video_generator = video_data_generator(video_path, batch_size, input_shape)

    # Define the output video path for the current video
    output_video_dir = '/content/drive/MyDrive/thesis/cnn_models_output/RWF_VGG16Modelv2_output'
    os.makedirs(output_video_dir, exist_ok=True)
    output_video_filename = f'{os.path.splitext(video_file)[0]}_{model_name}.mp4'
    output_video_path = os.path.join(output_video_dir, output_video_filename)

    # Initialize the VideoWriter for the current video
    output_video = cv2.VideoWriter(output_video_path, fourcc, 30.0, output_shape)

    # Loop through the video frames and make predictions for the current video
    try:
        while True:
            frames = next(video_generator)
            if len(frames) == 0:
                break  # Break the loop if there are no more frames

            predictions = Model.predict(frames)

            for frame_idx, frame in enumerate(frames):
                pred = predictions[frame_idx]

                # Convert the normalized coordinates to pixel values for 224x224 input shape
                xmin, ymin, xmax, ymax = [int(p * input_shape[i // 2]) for i, p in enumerate(pred)]

                # Determine the class label based on the prediction score
                class_label = "Violence" if pred[0] > 0.5 else "NonViolence"

                # Set the color of the bounding box based on the class label
                box_color = (0, 255, 0)  # Green color for NonViolence
                if class_label == "Violence":
                    box_color = (0, 0, 255)  # Red color for Violence

                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), box_color, 2)
                cv2.putText(frame, class_label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 1)

                # Write the frame with annotations to the output video
                output_video.write(frame)

    except StopIteration:
        pass

    # Release the video writer and close the output video file for the current video
    output_video.release()




### VGG19 Model test on RWF-2000 video dataset

In [8]:
import tensorflow as tf
import numpy as np
import cv2
import os
import xml.etree.ElementTree as ET

# Define the custom loss function for smooth L1 loss
def smooth_l1_loss(y_true, y_pred):
    diff = tf.abs(y_true - y_pred)
    less_than_one = tf.cast(tf.less(diff, 1.0), dtype=tf.float32)
    loss = less_than_one * 0.5 * diff ** 2 + (1 - less_than_one) * (diff - 0.5)
    return tf.reduce_mean(loss)

# Define the custom metric function for mean IoU
def mean_iou(y_true, y_pred):
    intersection = tf.reduce_sum(tf.minimum(y_true, y_pred))
    union = tf.reduce_sum(tf.maximum(y_true, y_pred))
    return intersection / (union + tf.keras.backend.epsilon())

# Load the saved model, providing the custom loss and metric functions in the custom_objects parameter
model_path = '/content/drive/MyDrive/thesis/cnn_models/VGG19Modelv1.h5'
Model = tf.keras.models.load_model(model_path, custom_objects={'smooth_l1_loss': smooth_l1_loss, 'mean_iou': mean_iou})

# Define the batch size
batch_size = 32

# input shape of the model
input_shape = (224, 224, 3)


# Define the directory path containing the video files
video_dir = '/content/drive/MyDrive/thesis/dataset/mixed_videos2'

# Get a list of all the video files in the directory
video_files = [f for f in os.listdir(video_dir) if f.endswith('.avi')]

# Define the model name
model_name = 'VGG19Modelv1'

# Define a data generator that yields batches of frames and their corresponding annotations
def video_data_generator(video_path, batch_size, input_shape):
    cap = cv2.VideoCapture(video_path)
    while True:
        data = []
        video_frame = []
        while len(data) < batch_size:
            ret, frame = cap.read()
            if not ret:
                break  # Break the loop if the video ends

            video_frame.append(frame)
            if len(video_frame) == batch_size:
                # Preprocess the frames and add them to the data list
                data.extend([cv2.resize(frame, input_shape[:2]) for frame in video_frame])
                video_frame = []

        if len(data) == 0:
            break

        yield np.array(data)

    cap.release()

# Initialize the VideoWriter
output_shape = (input_shape[1], input_shape[0])  # Width, Height
fourcc = cv2.VideoWriter_fourcc(*'mp4v')

# Loop through each video file
for video_file in video_files:
    video_path = os.path.join(video_dir, video_file)

    # Initialize the data generator for the current video
    video_generator = video_data_generator(video_path, batch_size, input_shape)

    # Define the output video path for the current video
    output_video_dir = '/content/drive/MyDrive/thesis/cnn_models_output/RWF_VGG19Modelv1_output'
    os.makedirs(output_video_dir, exist_ok=True)
    output_video_filename = f'{os.path.splitext(video_file)[0]}_{model_name}.mp4'
    output_video_path = os.path.join(output_video_dir, output_video_filename)

    # Initialize the VideoWriter for the current video
    output_video = cv2.VideoWriter(output_video_path, fourcc, 30.0, output_shape)

    # Loop through the video frames and make predictions for the current video
    try:
        while True:
            frames = next(video_generator)
            if len(frames) == 0:
                break  # Break the loop if there are no more frames

            predictions = Model.predict(frames)

            for frame_idx, frame in enumerate(frames):
                pred = predictions[frame_idx]

                # Convert the normalized coordinates to pixel values for 224x224 input shape
                xmin, ymin, xmax, ymax = [int(p * input_shape[i // 2]) for i, p in enumerate(pred)]

                # Determine the class label based on the prediction score
                class_label = "Violence" if pred[0] > 0.5 else "NonViolence"

                # Set the color of the bounding box based on the class label
                box_color = (0, 255, 0)  # Green color for NonViolence
                if class_label == "Violence":
                    box_color = (0, 0, 255)  # Red color for Violence

                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), box_color, 2)
                cv2.putText(frame, class_label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 1)

                # Write the frame with annotations to the output video
                output_video.write(frame)

    except StopIteration:
        pass

    # Release the video writer and close the output video file for the current video
    output_video.release()




### VGG19 Model test on RWF-2000 video dataset

In [10]:
import tensorflow as tf
import numpy as np
import cv2
import os
import xml.etree.ElementTree as ET

# Define the custom loss function for smooth L1 loss
def smooth_l1_loss(y_true, y_pred):
    diff = tf.abs(y_true - y_pred)
    less_than_one = tf.cast(tf.less(diff, 1.0), dtype=tf.float32)
    loss = less_than_one * 0.5 * diff ** 2 + (1 - less_than_one) * (diff - 0.5)
    return tf.reduce_mean(loss)

# Define the custom metric function for mean IoU
def mean_iou(y_true, y_pred):
    intersection = tf.reduce_sum(tf.minimum(y_true, y_pred))
    union = tf.reduce_sum(tf.maximum(y_true, y_pred))
    return intersection / (union + tf.keras.backend.epsilon())

# Load the saved model, providing the custom loss and metric functions in the custom_objects parameter
model_path = '/content/drive/MyDrive/thesis/cnn_models/MobileNetV2Modelv2.h5'
Model = tf.keras.models.load_model(model_path, custom_objects={'smooth_l1_loss': smooth_l1_loss, 'mean_iou': mean_iou})

# Define the batch size
batch_size = 32

# input shape of the model
input_shape = (224, 224, 3)


# Define the directory path containing the video files
video_dir = '/content/drive/MyDrive/thesis/dataset/mixed_videos2'

# Get a list of all the video files in the directory
video_files = [f for f in os.listdir(video_dir) if f.endswith('.avi')]

# Define the model name
model_name = 'MobileNetV2Modelv2'

# Define a data generator that yields batches of frames and their corresponding annotations
def video_data_generator(video_path, batch_size, input_shape):
    cap = cv2.VideoCapture(video_path)
    while True:
        data = []
        video_frame = []
        while len(data) < batch_size:
            ret, frame = cap.read()
            if not ret:
                break  # Break the loop if the video ends

            video_frame.append(frame)
            if len(video_frame) == batch_size:
                # Preprocess the frames and add them to the data list
                data.extend([cv2.resize(frame, input_shape[:2]) for frame in video_frame])
                video_frame = []

        if len(data) == 0:
            break

        yield np.array(data)

    cap.release()

# Initialize the VideoWriter
output_shape = (input_shape[1], input_shape[0])  # Width, Height
fourcc = cv2.VideoWriter_fourcc(*'mp4v')

# Loop through each video file
for video_file in video_files:
    video_path = os.path.join(video_dir, video_file)

    # Initialize the data generator for the current video
    video_generator = video_data_generator(video_path, batch_size, input_shape)

    # Define the output video path for the current video
    output_video_dir = '/content/drive/MyDrive/thesis/cnn_models_output/RWF_MobileNetV2Modelv2_output'
    os.makedirs(output_video_dir, exist_ok=True)
    output_video_filename = f'{os.path.splitext(video_file)[0]}_{model_name}.mp4'
    output_video_path = os.path.join(output_video_dir, output_video_filename)

    # Initialize the VideoWriter for the current video
    output_video = cv2.VideoWriter(output_video_path, fourcc, 30.0, output_shape)

    # Loop through the video frames and make predictions for the current video
    try:
        while True:
            frames = next(video_generator)
            if len(frames) == 0:
                break  # Break the loop if there are no more frames

            predictions = Model.predict(frames)

            for frame_idx, frame in enumerate(frames):
                pred = predictions[frame_idx]

                # Convert the normalized coordinates to pixel values for 224x224 input shape
                xmin, ymin, xmax, ymax = [int(p * input_shape[i // 2]) for i, p in enumerate(pred)]

                # Determine the class label based on the prediction score
                class_label = "Violence" if pred[0] > 0.5 else "NonViolence"

                # Set the color of the bounding box based on the class label
                box_color = (0, 255, 0)  # Green color for NonViolence
                if class_label == "Violence":
                    box_color = (0, 0, 255)  # Red color for Violence

                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), box_color, 2)
                cv2.putText(frame, class_label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 1)

                # Write the frame with annotations to the output video
                output_video.write(frame)

    except StopIteration:
        pass

    # Release the video writer and close the output video file for the current video
    output_video.release()




## Installing YOLO models (YOLOv8 & YOLO-NAS)

In [1]:
!pip  install ultralytics==8.0.138 super-gradients==3.1.3

Collecting ultralytics==8.0.138
  Downloading ultralytics-8.0.138-py3-none-any.whl (605 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m605.5/605.5 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting super-gradients==3.1.3
  Downloading super_gradients-3.1.3-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
Collecting boto3>=1.17.15 (from super-gradients==3.1.3)
  Downloading boto3-1.34.3-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
Collecting Deprecated>=1.2.11 (from super-gradients==3.1.3)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Collecting coverage~=5.3.1 (from super-gradients==3.1.3)
  Downloading coverage-5.3.1.tar.gz (684 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m684.5/684.5 kB[0m [31m5.7 MB/s[0m eta [36m

### YOLOv8 Model test on RWF-2000 video dataset

In [1]:
!yolo task=detect mode=predict model=/content/drive/MyDrive/thesis/yolo_models/best.pt conf=0.25 source=/content/drive/MyDrive/thesis/dataset/mixed_videos2

2023-12-19 18:33:08.205697: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-19 18:33:08.205749: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-19 18:33:08.207559: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Ultralytics YOLOv8.0.138 🚀 Python-3.10.12 torch-2.1.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
Model summary (fused): 168 layers, 11126358 parameters, 0 gradients

video 1/10 (1/150) /content/drive/MyDrive/thesis/dataset/mixed_videos2/NV1_F6kgeh23_0.avi: 320x416 1 NonViolence, 96.4ms
video 1/10 (2/150) /content/drive/MyDrive/thesis/dataset/mixed_videos2/NV1_F6kgeh23_0.a

In [2]:
!zip -r '/content/runs.zip' '/content/runs'

  adding: content/runs/ (stored 0%)
  adding: content/runs/detect/ (stored 0%)
  adding: content/runs/detect/predict/ (stored 0%)
  adding: content/runs/detect/predict/V5_83d-16REC40_1.avi (deflated 4%)
  adding: content/runs/detect/predict/NV3_wMcODEtzJIA_1.avi (deflated 11%)
  adding: content/runs/detect/predict/V4_qaAclucigpY_3.avi (deflated 3%)
  adding: content/runs/detect/predict/V1_v4dhdnsxiX4_1.avi (deflated 0%)
  adding: content/runs/detect/predict/V2_Ry5c1PbcIa0_0.avi (deflated 4%)
  adding: content/runs/detect/predict/NV2_v4dhdnsxiX4_0.avi (deflated 0%)
  adding: content/runs/detect/predict/V3_OvDMIkTb4XU_0.avi (deflated 0%)
  adding: content/runs/detect/predict/NV5_xRbCHA5YTi8_0.avi (deflated 1%)
  adding: content/runs/detect/predict/NV1_F6kgeh23_0.avi (deflated 4%)
  adding: content/runs/detect/predict/NV4_wyLoweAn_9Q_0.avi (deflated 4%)


### YOLO-NAS Model test on RWF-2000 video dataset

In [12]:
from super_gradients.training import models
import os
import torch

# loading the model

dataset_params = {
    'data_dir':'/content/drive/MyDrive/thesis/dataset/roboflow',
    'train_images_dir':'train/images',
    'train_labels_dir':'train/labels',
    'val_images_dir':'valid/images',
    'val_labels_dir':'valid/labels',
    'test_images_dir':'test/images',
    'test_labels_dir':'test/labels',
    'classes': ['NonViolence', 'Violence']
}

best_model = models.get('yolo_nas_s',
                        num_classes=len(dataset_params['classes']),
                        checkpoint_path="/content/drive/MyDrive/thesis/yolo_models/ckpt_latest.pth")




device = 0 if torch.cuda.is_available() else "cpu"

input_video_dir = "/content/drive/MyDrive/thesis/dataset/mixed_videos2/"
output_video_dir = "/content/drive/MyDrive/thesis/RWF_YoloNas_outputs"
modelname = "yolo_nas_s"

# List all files in the input video directory
input_files = os.listdir(input_video_dir)

for input_file in input_files:
    if input_file.endswith(".avi"):
        input_video_path = os.path.join(input_video_dir, input_file)

        # Create the output video name with the format: original_video_name + "_YoloNas_s.avi"
        output_video_name = os.path.splitext(input_file)[0] + "_" + modelname + ".avi"
        output_video_path = os.path.join(output_video_dir, output_video_name)

        # Perform prediction and save the output video
        best_model.to(device).predict(input_video_path).save(output_video_path)

[2023-12-19 18:59:41] INFO - checkpoint_utils.py - Successfully loaded model weights from /content/drive/MyDrive/thesis/yolo_models/ckpt_latest.pth EMA checkpoint.
Predicting Video:   0%|          | 0/150 [00:00<?, ?it/s][2023-12-19 18:59:42] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
Predicting Video: 100%|██████████| 150/150 [00:03<00:00, 45.14it/s]
Predicting Video:   0%|          | 0/150 [00:00<?, ?it/s][2023-12-19 18:59:46] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
Predicting Video: 100%|██████████| 150/150 [00:01<00:00, 96.82it/s] 
Predicting Video:   0%|          | 0/150 [00:00<?, ?it/s][2023-12-19 18:59:48] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
Predicting Video: 100%|██████████| 150/150

In [25]:
input_video_path = "/content/drive/MyDrive/thesis/dataset/mixed_videos2/V5_83d-16REC40_1.avi"
output_video_path = "/content/runs/yolo_nas_ourtput/V5_83d-16REC40_1.avi"

best_model.to(device).predict(input_video_path).save(output_video_path)

Predicting Video:   0%|          | 0/150 [00:00<?, ?it/s][2023-12-19 19:07:14] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
Predicting Video: 100%|██████████| 150/150 [00:02<00:00, 53.90it/s]


In [26]:
!zip -r '/content/runs/yolo_nas_ourtput.zip' '/content/runs/yolo_nas_ourtput'

  adding: content/runs/yolo_nas_ourtput/ (stored 0%)
  adding: content/runs/yolo_nas_ourtput/V5_83d-16REC40_1.avi (deflated 13%)
  adding: content/runs/yolo_nas_ourtput/NV3_wMcODEtzJIA_1.avi (deflated 3%)
  adding: content/runs/yolo_nas_ourtput/V4_qaAclucigpY_3.avi (deflated 2%)
  adding: content/runs/yolo_nas_ourtput/V1_v4dhdnsxiX4_1.avi (deflated 2%)
  adding: content/runs/yolo_nas_ourtput/V2_Ry5c1PbcIa0_0.avi (deflated 9%)
  adding: content/runs/yolo_nas_ourtput/NV2_v4dhdnsxiX4_0.avi (deflated 2%)
  adding: content/runs/yolo_nas_ourtput/V3_OvDMIkTb4XU_0.avi (deflated 1%)
  adding: content/runs/yolo_nas_ourtput/NV5_xRbCHA5YTi8_0.avi (deflated 7%)
  adding: content/runs/yolo_nas_ourtput/NV1_F6kgeh23_0.avi (deflated 3%)
  adding: content/runs/yolo_nas_ourtput/NV4_wyLoweAn_9Q_0.avi (deflated 7%)


In [27]:
!zip -r '/content/drive/MyDrive/thesis/cnn_models_output.zip' '/content/drive/MyDrive/thesis/cnn_models_output'

  adding: content/drive/MyDrive/thesis/cnn_models_output/ (stored 0%)
  adding: content/drive/MyDrive/thesis/cnn_models_output/VGG19Modelv1_output/ (stored 0%)
  adding: content/drive/MyDrive/thesis/cnn_models_output/VGG19Modelv1_output/NV_3_VGG19Modelv1.mp4 (deflated 2%)
  adding: content/drive/MyDrive/thesis/cnn_models_output/VGG19Modelv1_output/NV_42_VGG19Modelv1.mp4 (deflated 1%)
  adding: content/drive/MyDrive/thesis/cnn_models_output/VGG19Modelv1_output/NV_1_VGG19Modelv1.mp4 (deflated 1%)
  adding: content/drive/MyDrive/thesis/cnn_models_output/VGG19Modelv1_output/NV_2_VGG19Modelv1.mp4 (deflated 4%)
  adding: content/drive/MyDrive/thesis/cnn_models_output/VGG19Modelv1_output/V_1_VGG19Modelv1.mp4 (deflated 1%)
  adding: content/drive/MyDrive/thesis/cnn_models_output/VGG19Modelv1_output/NV_21_VGG19Modelv1.mp4 (deflated 1%)
  adding: content/drive/MyDrive/thesis/cnn_models_output/VGG19Modelv1_output/V_2_VGG19Modelv1.mp4 (deflated 1%)
  adding: content/drive/MyDrive/thesis/cnn_models