In [3]:
import tensorflow as tf

In [4]:
!export TF_FORCE_GPU_ALLOW_GROWTH=True
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

2022-07-25 20:06:50.246408: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-25 20:06:50.248016: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-25 20:06:50.249928: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-25 20:06:50.255098: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-25 20:06:50.256747: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from S

Set seed

In [5]:
SEED = 0
import random
import numpy as np
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Load data

In [6]:
import numpy as np
import cv2

In [7]:
FRAMES_PER_VIDEO = 50 + 1
VIDEO_WIDTH, VIDEO_HEIGHT = 100, 100
N_CHANNELS = 3

def load_videos(video_IDs: list, video_frames: int = FRAMES_PER_VIDEO, video_width: int = VIDEO_WIDTH, video_height: int = VIDEO_HEIGHT,
                video_channels: int = N_CHANNELS, dtype = np.float32, normalize: bool = False) -> tuple:
    videos = np.empty((len(video_IDs), video_frames, video_height, video_width, video_channels), dtype=dtype)

    for i, video_ID in enumerate(video_IDs):
        cap = cv2.VideoCapture(video_ID)
        original_n_frames = int(cap. get(cv2. CAP_PROP_FRAME_COUNT))
        
        # Indexes of frames to be kept to comply with video_frames
        frames_idx = set(np.round(np.linspace(0, original_n_frames - 1, video_frames)).astype(int))

        frames = []
        index = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if index in frames_idx:
                frame = cv2.resize(frame, (video_width, video_height)).astype(dtype)
                if normalize:
                    frame /= 255.0
                frames.append(frame)
            index += 1
        cap.release()

        if len(frames) != video_frames:
            # Complete with repeated frames in the begging and the end of the video
            n_repeats = (video_frames - len(frames)) / 2
            # In case n_repeats is decimal, first frames will be rounded to the nearest integer
            beggining_frames = np.tile(frames[0], [round(n_repeats + 0.001), 1, 1, 1])
            end_frames = np.tile(frames[-1], [int(n_repeats), 1, 1, 1])
            frames = np.concatenate([beggining_frames, frames, end_frames])
        
        videos[i,] = np.array(frames)
        

    return videos

DataGenerator class to load videos per batch, in case all videos do not fit in memory

In [8]:
import gc
from tensorflow.keras import backend as K

class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, video_IDs: list, video_labels: list, batch_size: int, paths: list = [''], video_width: int = VIDEO_WIDTH, video_height: int = VIDEO_HEIGHT,
                video_frames: int = FRAMES_PER_VIDEO, video_channels: int = N_CHANNELS, dtype = np.float32, normalize: bool = False, shuffle: bool = True):
        self.video_IDs = video_IDs
        self.video_labels = video_labels
        self.batch_size = batch_size
        self.paths = paths
        self.video_width = video_width
        self.video_height = video_height
        self.video_frames = video_frames
        self.video_channels = video_channels
        self.dtype = dtype
        self.normalize = normalize
        self.shuffle = shuffle

    def __len__(self):
        return len(self.video_IDs) // self.batch_size

    def __getitem__(self, idx):
        batch_IDs = self.video_IDs[idx*self.batch_size:(idx+1)*self.batch_size]
        batch_labels = self.video_labels[idx*self.batch_size:(idx+1)*self.batch_size]
        input_videos = []
        
        for index, path in enumerate(self.paths):
            batch_IDs_full_path = [path+ID for ID in batch_IDs]

            videos = load_videos(batch_IDs_full_path, self.video_frames, self.video_width, 
                                         self.video_height, self.video_channels, self.dtype, self.normalize)
            
            input_videos.append(videos)
                    
        return input_videos, batch_labels
            
    
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        if self.shuffle:
            # shuffle video_IDs and video_labels together
            temp_list = list(zip(self.video_IDs, self.video_labels))
            np.random.shuffle(temp_list)
            self.video_IDs, self.video_labels = zip(*temp_list)
        # Clear memory after epochs
        gc.collect()
        #K.clear_session()

## Videos to load

In [14]:
import pandas as pd

def fecth_generators(fold):
    global folds, PATHS

    train_df = folds[folds.fold != fold]
    test_df = folds[folds.fold == fold]

    train_video_IDs = train_df.video.values
    test_video_IDs = test_df.video.values

    train_video_labels = train_df.label.values
    test_video_labels = test_df.label.values

    train_generator = DataGenerator(train_video_IDs, train_video_labels, batch_size=10, paths=PATHS)
    test_generator = DataGenerator(test_video_IDs, test_video_labels, batch_size=10, paths=PATHS)

    return train_generator, test_generator

# Frame functions

In [10]:
import tensorflow.keras.backend as K

def tf_frame_diff(video):
    return video[1:] - video[:-1]

frame_func = tf_frame_diff

# Load pretrained best model without optimizer

In [16]:
model = tf.keras.models.load_model('models/rwf_best_model.h5')

for DATASET in ('movies', 'crowd', 'hockey'):

    PATHS = [f"../datasets/{DATASET}_dataset/original_data/", f"../datasets/{DATASET}_dataset/openpose_gamma/"]
    
    folds = pd.read_csv(f'../datasets/{DATASET}_dataset/folds.csv')

    data_generator, _ = fecth_generators(fold=None)
    loss, acc = model.evaluate(data_generator, verbose=2)
    print(f'Results for {DATASET} dataset')
    print(f'Loss: {loss:.4f}\tAccuracy: {acc:.4f}')


20/20 - 13s - loss: 0.7102 - accuracy: 0.7500 - 13s/epoch - 654ms/step
Results for movies dataset
Loss: 0.7102	Accuracy: 0.7500
24/24 - 11s - loss: 0.7746 - accuracy: 0.7083 - 11s/epoch - 461ms/step
Results for crowd dataset
Loss: 0.7746	Accuracy: 0.7083
100/100 - 35s - loss: 1.0856 - accuracy: 0.6310 - 35s/epoch - 348ms/step
Results for hockey dataset
Loss: 1.0856	Accuracy: 0.6310
