In [1]:
import time
import numpy as np
import cv2
import tensorflow as tf

import os
import cv2
import matplotlib.pyplot as plt
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/drive')

!unzip /content/drive/MyDrive/Thesis/RWF-2000.zip -d /content

DIR_PATH = "./RWF-2000"
TRAIN_DIR = os.path.join(DIR_PATH, "train")
TEST_DIR = os.path.join(DIR_PATH, "val")

Mounted at /content/drive
Archive:  /content/drive/MyDrive/Thesis/RWF-2000.zip
   creating: /content/RWF-2000/
   creating: /content/RWF-2000/train/
   creating: /content/RWF-2000/train/Fight/
  inflating: /content/RWF-2000/train/Fight/train_Fight (1).avi  
  inflating: /content/RWF-2000/train/Fight/train_Fight (10).avi  
  inflating: /content/RWF-2000/train/Fight/train_Fight (100).avi  
  inflating: /content/RWF-2000/train/Fight/train_Fight (101).avi  
  inflating: /content/RWF-2000/train/Fight/train_Fight (102).avi  
  inflating: /content/RWF-2000/train/Fight/train_Fight (103).avi  
  inflating: /content/RWF-2000/train/Fight/train_Fight (104).avi  
  inflating: /content/RWF-2000/train/Fight/train_Fight (105).avi  
  inflating: /content/RWF-2000/train/Fight/train_Fight (106).avi  
  inflating: /content/RWF-2000/train/Fight/train_Fight (107).avi  
  inflating: /content/RWF-2000/train/Fight/train_Fight (108).avi  
  inflating: /content/RWF-2000/train/Fight/train_Fight (109).avi  
  infl

In [3]:
CLASS_NAMES = os.listdir(TRAIN_DIR)
SEED = 1337
BATCH_SIZE = 10

ORIGINAL_FRAMES_PER_VIDEO = 150
FRAMES_PER_VIDEO = 50 + 1
VIDEO_WIDTH, VIDEO_HEIGHT = 100, 100
N_CHANNELS = 3

In [4]:

from tensorflow.keras import layers

@tf.keras.utils.register_keras_serializable(package="MotionBlur")
class MotionBlur(layers.Layer):
    def __init__(self, kernel_size=3, **kwargs):
        super(MotionBlur, self).__init__(**kwargs)
        self.kernel_size = kernel_size

    def build(self, input_shape):
        self.kernel = tf.constant([[1.0 / self.kernel_size] * self.kernel_size], dtype=tf.float32)
        self.kernel = tf.expand_dims(self.kernel, axis=-1)
        self.kernel = tf.expand_dims(self.kernel, axis=-1)
        self.kernel = tf.tile(self.kernel, [1, 1, input_shape[-1], 1])

    def call(self, inputs):
        return tf.nn.depthwise_conv2d(inputs, self.kernel, strides=[1, 1, 1, 1], padding='SAME')

    def compute_output_shape(self, input_shape):
        return input_shape

@tf.keras.utils.register_keras_serializable(package="FrameDifference")
class FrameDifference(layers.Layer):
    def __init__(self, **kwargs):
        super(FrameDifference, self).__init__(**kwargs)

    def call(self, video):
        return video[:, 1:] - video[:, :-1]

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1] - 1) + input_shape[2:]

In [5]:
def load_videos(video_IDs: list, video_labels: dict, video_frames: int = FRAMES_PER_VIDEO, video_width: int = VIDEO_WIDTH, video_height: int = VIDEO_HEIGHT,
                video_channels: int = N_CHANNELS, dtype = np.float32, normalize: bool = False) -> tuple:
    videos = np.empty((len(video_IDs), video_frames, video_height, video_width, video_channels), dtype=dtype)
    labels = np.empty((len(video_IDs),), dtype=np.int8)

    # Indexes of frames to be kept to comply with video_frames
    frames_idx = set(np.round(np.linspace(0, ORIGINAL_FRAMES_PER_VIDEO - 1, video_frames)).astype(int))

    for i, video_ID in enumerate(video_IDs):
        cap = cv2.VideoCapture(video_ID)
        frames = []
        index = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if index in frames_idx:
                frame = cv2.resize(frame, (video_width, video_height)).astype(dtype)
                if normalize:
                    frame /= 255.0
                frames.append(frame)
            index += 1
        cap.release()

        videos[i,] = np.array(frames)
        labels[i] = video_labels[video_ID]

    return videos, labels

In [6]:
# load test data
import glob

train_video_IDs = glob.glob(f"{DIR_PATH}/train/*/*")
train_video_labels = {video: 0 if 'NonFight' in video else 1 for video in train_video_IDs}

train_videos = load_videos(train_video_IDs, train_video_labels)

test_video_IDs = glob.glob(f"{DIR_PATH}/val/*/*")
test_video_labels = {video: 0 if 'NonFight' in video else 1 for video in test_video_IDs}

test_videos = load_videos(test_video_IDs, test_video_labels)

In [7]:
# get 100 random test videos
num_test_videos = 100

random_test_video_indexes = np.random.choice(range(len(test_videos[0])), size=num_test_videos, replace=False)

random_videos = test_videos[0][random_test_video_indexes]
print(random_test_video_indexes)

[ 77 359 127 109 235 162 197 118   1 183 126 263 174 140  25  84 131 285
 167 177 306 221 269 255 390 252 283   4 334  97  59 303 147 333 212 291
 293 372 198  15 207  62  68 332  41 284  20 145 384 369 186 153  33 233
 230 342 378  22 312 351   0  92 161 110 307 320  16 214 261 290  71 128
 205 100 399 279  35 280 170  13 251 297 184 213  87 270 151 336 364 116
 148 341 193 292 323 322 157  72 315 272]


In [8]:
# load model
model_vanilla = tf.keras.models.load_model('/content/drive/MyDrive/Thesis/saved models/vanilla_best_model.keras')
model_frame_diff = tf.keras.models.load_model('/content/drive/MyDrive/Thesis/saved models/frame_difference_best_model.keras')
model_frame_diff_motion_blur = tf.keras.models.load_model('/content/drive/MyDrive/Thesis/saved models/best_model_frame_diff_and_motion_blur.keras')
model_frame_diff_motion_blur_fine_tuned = tf.keras.models.load_model('/content/drive/MyDrive/Thesis/saved models/best_model_frame_diff_and_motion_blur_fine_tuning.keras')
model_conv3d = tf.keras.models.load_model('/content/drive/MyDrive/Thesis/saved models/conv3d_best_model.keras')


In [16]:
# predict videos
start_time = time.time()
predictions = model_vanilla.predict(random_videos)
end_time = time.time()

time_taken = end_time - start_time
average_inference_time = time_taken / num_test_videos
print(f"Vanilla Average inference time: {average_inference_time} seconds")


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step 
Vanilla Average inference time: 0.01064490556716919 seconds


In [17]:
# predict videos
start_time = time.time()
predictions = model_frame_diff.predict(random_videos)
end_time = time.time()

time_taken = end_time - start_time
average_inference_time = time_taken / num_test_videos
print(f"Frame Diff Average inference time: {average_inference_time} seconds")


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step 
Frame Diff Average inference time: 0.010627238750457764 seconds


In [18]:
# predict videos
start_time = time.time()
predictions = model_frame_diff_motion_blur.predict(random_videos)
end_time = time.time()

time_taken = end_time - start_time
average_inference_time = time_taken / num_test_videos
print(f"Frame Diff Motion Blur inference time: {average_inference_time} seconds")


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step 
Frame Diff Motion Blur inference time: 0.010838420391082763 seconds


In [19]:
# predict videos
start_time = time.time()
predictions = model_frame_diff_motion_blur_fine_tuned.predict(random_videos)
end_time = time.time()

time_taken = end_time - start_time
average_inference_time = time_taken / num_test_videos
print(f"Frame Diff Motion Blur Fine Tuned inference time: {average_inference_time} seconds")


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step 
Frame Diff Motion Blur Fine Tuned inference time: 0.010799310207366943 seconds


In [20]:
# predict videos
start_time = time.time()
predictions = model_conv3d.predict(random_videos)
end_time = time.time()

time_taken = end_time - start_time
average_inference_time = time_taken / num_test_videos
print(f"Conv3D Average inference time: {average_inference_time} seconds")


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 584ms/step
Conv3D Average inference time: 0.02504102945327759 seconds
