In [None]:
import cv2
import math
import numpy as np
import pathlib
import pandas as pd
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import einops
import video_tools

DEBUG = False

Below I'll show the GIF of the filtered frames. I simulate a "collision" of my hands in the test video that I'm using. My hands collide at around timestamp 1500 msec. The code then extract a set of frames which include the exact frame at timestamp 1500 msec so we have the frame at the exact moment when my hand "collided".

In [None]:
# Example frames in GIF

import imageio
from IPython.display import Image

tmp_dir = './tmp'
pathlib.Path(tmp_dir).mkdir(parents=True, exist_ok=True)

video_filepath = 'data/traffic_accident_videos/videos/traffic_accident_1.mp4'
gif_out_file_path = f'{tmp_dir}/test.gif'

frame_index_sequences, labels = video_tools.get_frame_indexes_surrounding_event(video_filepath, event_timestamp_millis=5790, sequence_length=11, frame_step=10)
vid_frames = video_tools.get_image_frames(video_filepath, frame_index_sequences)

imageio.mimsave(gif_out_file_path, vid_frames[5], fps=10)
Image(filename=gif_out_file_path)

In [None]:
# format image frame before input to model
HEIGHT = 224
WIDTH = 224

def format_image_frame(frame):
    frame = tf.image.convert_image_dtype(frame, tf.float32)
    frame = tf.image.resize_with_pad(frame, HEIGHT, WIDTH)
    
    return frame.numpy()

In [None]:
# create dataset
labels_file_path = './data/traffic_accident_videos/labels.csv'
videos_dir_path = './data/traffic_accident_videos/videos/'
batch_size = 20
sequence_length = 15
frame_step_size = 5
min_proportion_of_after_event_frames = 0.3
max_proportion_of_after_event_frames = 0.8
num_sequences_for_no_event_videos = 30

frame_generator = video_tools.FrameGenerator(videos_dir_path=videos_dir_path,
                                             labels_file_path=labels_file_path,
                                             sequence_length=sequence_length,
                                             frame_step_size=frame_step_size,
                                             min_proportion_of_after_event_frames=min_proportion_of_after_event_frames,
                                             max_proportion_of_after_event_frames=max_proportion_of_after_event_frames,
                                             num_sequences_for_no_event_videos=num_sequences_for_no_event_videos,
                                             format_frame_fn=format_image_frame)

output_signature = (tf.TensorSpec(shape=(None, None, None, 3), dtype=tf.float32), tf.TensorSpec(shape=(), dtype=tf.uint8))

train_ds = tf.data.Dataset.from_generator(frame_generator, output_signature=output_signature)

train_ds = train_ds.prefetch(tf.data.AUTOTUNE).cache().shuffle(buffer_size=1000, reshuffle_each_iteration=True).repeat().batch(batch_size)

# test
ratio_of_ones = []

for frames, labels in train_ds.take(10):
    assert frames.shape == (batch_size, sequence_length, HEIGHT, WIDTH, 3)
    assert labels.shape == (batch_size,)
    ratio_of_ones.append(np.mean(labels))

print('All tests OK.')
print(f'Class balance evaluation: {np.mean(ratio_of_ones)}')

In [None]:
class Conv2Plus1D(keras.layers.Layer):
    def __init__(self, filters, kernel_size, padding):
        super().__init__()
        self.seq = keras.Sequential([
            keras.layers.Conv3D(filters=filters,
                          kernel_size=(1, kernel_size[1], kernel_size[2]),
                          padding=padding),
            keras.layers.Conv3D(filters=filters,
                          kernel_size=(kernel_size[0], 1, 1),
                          padding=padding)
        ])
    
    def call(self, x):
        return self.seq(x)
    
class ResidualMain(keras.layers.Layer):
    def __init__(self, filters, kernel_size):
        super().__init__()
        self.seq = keras.Sequential([
            Conv2Plus1D(filters=filters,
                        kernel_size=kernel_size,
                        padding='same'),
            keras.layers.LayerNormalization(),
            keras.layers.ReLU(),
            Conv2Plus1D(filters=filters,
                        kernel_size=kernel_size,
                        padding='same'),
            keras.layers.LayerNormalization()
        ])
    
    def call(self, x):
        return self.seq(x)

class Project(keras.layers.Layer):
    def __init__(self, units):
        super().__init__()
        self.seq = keras.Sequential([
            keras.layers.Dense(units),
            keras.layers.LayerNormalization()
        ])
        
    def call(self, x):
        return self.seq(x)
    
def add_residual_block(input, filters, kernel_size):
    out = ResidualMain(filters=filters, kernel_size=kernel_size)(input)
    
    res = input
    if out.shape[-1] != input.shape[-1]:
        res = Project(out.shape[-1])(res)
    
    return keras.layers.add([res, out])

class ResizeVideo(keras.layers.Layer):
    def __init__(self, height, width):
        super().__init__()
        self.height = height
        self.width = width
        self.resizing_layer = keras.layers.Resizing(self.height, self.width)
        
    def call(self, video):
        old_shape = einops.parse_shape(video, 'b t h w c')
        images = einops.rearrange(video, 'b t h w c -> (b t) h w c')
        images = self.resizing_layer(images)
        videos = einops.rearrange(images, '(b t) h w c -> b t h w c',
                                  t=old_shape['t'])
        
        return videos

In [None]:
# build the model
input = layers.Input(shape=(sequence_length, HEIGHT, WIDTH, 3))
x = input
x = Conv2Plus1D(filters=16, kernel_size=(3, 7, 7), padding='same')(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.ReLU()(x)
x = ResizeVideo(HEIGHT//2, WIDTH//2)(x)

x = add_residual_block(x, filters=16, kernel_size=(3, 3, 3))
x = ResizeVideo(HEIGHT//4, WIDTH//4)(x)

x = add_residual_block(x, filters=32, kernel_size=(3, 3, 3))
x = ResizeVideo(HEIGHT//8, WIDTH//8)(x)

x = add_residual_block(x, filters=64, kernel_size=(3, 3, 3))
x = ResizeVideo(HEIGHT//16, WIDTH//16)(x)

x = add_residual_block(x, filters=128, kernel_size=(3, 3, 3))

x = keras.layers.GlobalAveragePooling3D()(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(1)(x)

model = keras.Model(input, x)

In [None]:
keras.utils.plot_model(model, show_shapes=True)

In [None]:
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001),
              loss=keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
checkpoint_path = "./training_checkpoint/checkpoint"

cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 monitor='loss',
                                                 mode='min',
                                                 save_best_only=True,
                                                 save_weights_only=True,
                                                 verbose=1)

history = model.fit(x=train_ds,
                    epochs=50,
                    steps_per_epoch=50,
                    callbacks=[cp_callback])