In [1]:
DATASET = 'hockey'

In [2]:
# Use output of OpenPose without background
# Paths to videos for training
PATHS = [f"../datasets/{DATASET}_dataset/original_data/", f"../datasets/{DATASET}_dataset/openpose_gamma/"]

FRAME_FUNC = 'frame_diff'
# To use frame diff to weight t (current) or t+1
WEIGHT_CURRENT = True

In [3]:
import tensorflow as tf

In [4]:
!export TF_FORCE_GPU_ALLOW_GROWTH=True
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    tf.config.set_visible_devices(gpus[0:1], 'GPU')

2022-07-23 10:27:16.099361: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-23 10:27:16.101101: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-23 10:27:16.102130: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-23 10:27:16.107692: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-23 10:27:16.109442: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from S

Set seed

In [5]:
SEED = 0
import random
import numpy as np
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Load data

In [6]:
import numpy as np
import cv2

In [7]:
FRAMES_PER_VIDEO = 50 + 1
VIDEO_WIDTH, VIDEO_HEIGHT = 100, 100
N_CHANNELS = 3

def load_videos(video_IDs: list, video_frames: int = FRAMES_PER_VIDEO, video_width: int = VIDEO_WIDTH, video_height: int = VIDEO_HEIGHT,
                video_channels: int = N_CHANNELS, dtype = np.float32, normalize: bool = False) -> tuple:
    videos = np.empty((len(video_IDs), video_frames, video_height, video_width, video_channels), dtype=dtype)

    for i, video_ID in enumerate(video_IDs):
        cap = cv2.VideoCapture(video_ID)
        original_n_frames = int(cap. get(cv2. CAP_PROP_FRAME_COUNT))
        
        # Indexes of frames to be kept to comply with video_frames
        frames_idx = set(np.round(np.linspace(0, original_n_frames - 1, video_frames)).astype(int))

        frames = []
        index = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if index in frames_idx:
                frame = cv2.resize(frame, (video_width, video_height)).astype(dtype)
                if normalize:
                    frame /= 255.0
                frames.append(frame)
            index += 1
        cap.release()

        if len(frames) != video_frames:
            # Complete with repeated frames in the begging and the end of the video
            n_repeats = (video_frames - len(frames)) / 2
            # In case n_repeats is decimal, first frames will be rounded to the nearest integer
            beggining_frames = np.tile(frames[0], [round(n_repeats + 0.001), 1, 1, 1])
            end_frames = np.tile(frames[-1], [int(n_repeats), 1, 1, 1])
            frames = np.concatenate([beggining_frames, frames, end_frames])
        
        videos[i,] = np.array(frames)
        

    return videos

DataGenerator class to load videos per batch, in case all videos do not fit in memory

In [8]:
import gc
from tensorflow.keras import backend as K

class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, video_IDs: list, video_labels: list, batch_size: int, paths: list = [''], video_width: int = VIDEO_WIDTH, video_height: int = VIDEO_HEIGHT,
                video_frames: int = FRAMES_PER_VIDEO, video_channels: int = N_CHANNELS, dtype = np.float32, normalize: bool = False, shuffle: bool = True):
        self.video_IDs = video_IDs
        self.video_labels = video_labels
        self.batch_size = batch_size
        self.paths = paths
        self.video_width = video_width
        self.video_height = video_height
        self.video_frames = video_frames
        self.video_channels = video_channels
        self.dtype = dtype
        self.normalize = normalize
        self.shuffle = shuffle

    def __len__(self):
        return len(self.video_IDs) // self.batch_size

    def __getitem__(self, idx):
        batch_IDs = self.video_IDs[idx*self.batch_size:(idx+1)*self.batch_size]
        batch_labels = self.video_labels[idx*self.batch_size:(idx+1)*self.batch_size]
        input_videos = []
        
        for index, path in enumerate(self.paths):
            batch_IDs_full_path = [path+ID for ID in batch_IDs]

            videos = load_videos(batch_IDs_full_path, self.video_frames, self.video_width, 
                                         self.video_height, self.video_channels, self.dtype, self.normalize)
            
            input_videos.append(videos)
                    
        return input_videos, batch_labels
            
    
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        if self.shuffle:
            # shuffle video_IDs and video_labels together
            temp_list = list(zip(self.video_IDs, self.video_labels))
            np.random.shuffle(temp_list)
            self.video_IDs, self.video_labels = zip(*temp_list)
        # Clear memory after epochs
        gc.collect()
        #K.clear_session()

## Videos to load

In [9]:
import pandas as pd
folds = pd.read_csv(f'../datasets/{DATASET}_dataset/folds.csv')

def fecth_generators(fold):

    train_df = folds[folds.fold != fold]
    test_df = folds[folds.fold == fold]

    train_video_IDs = train_df.video.values
    test_video_IDs = test_df.video.values

    train_video_labels = train_df.label.values
    test_video_labels = test_df.label.values

    train_generator = DataGenerator(train_video_IDs, train_video_labels, batch_size=10, paths=PATHS)
    test_generator = DataGenerator(test_video_IDs, test_video_labels, batch_size=10, paths=PATHS)

    return train_generator, test_generator

# Frame functions

In [10]:
import tensorflow.keras.backend as K

def tf_frame_diff(video):
    return video[1:] - video[:-1]

frame_func = tf_frame_diff

# Load pretrained best model without optimizer

In [11]:
cv_acc = 0.0

for fold in range(5):
    print('#'*10, 'Fold', fold, '#'*10)

    model = tf.keras.models.load_model('models/rwf_best_model_no_optimizer.h5')
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=f'models/{DATASET}_best_model_fold{fold}.h5',
        monitor='val_accuracy',
        mode='max',
        save_best_only=True)

    train_generator, test_generator = fecth_generators(fold)
    history = model.fit(train_generator, epochs=50, validation_data=test_generator, verbose=2, callbacks=[model_checkpoint_callback])

    fold_acc = max(history.history['val_accuracy'])
    print('#'*10, f'Fold {fold} accuracy:', fold_acc, '#'*10)
    cv_acc += fold_acc / 5

########## Fold 0 ##########
Epoch 1/50


2022-07-23 10:27:16.211968: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-23 10:27:16.214760: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-23 10:27:16.216515: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-23 10:27:16.218192: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer



2022-07-23 10:27:21.670614: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8204

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.
2022-07-23 10:27:22.338582: W tensorflow/stream_executor/gpu/asm_compiler.cc:230] Falling back to the CUDA driver for PTX compilation; ptxas does not support CC 8.0
2022-07-23 10:27:22.338616: W tensorflow/stream_executor/gpu/asm_compiler.cc:233] Used ptxas at ptxas
2022-07-23 10:27:22.338753: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] UNIMPLEMENTED: ptxas ptxas too old. Falling back to the driver to compile.
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.
2022-07-23 10:27:23.341759: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


80/80 - 100s - loss: 0.6803 - accuracy: 0.6363 - val_loss: 0.6366 - val_accuracy: 0.6500 - 100s/epoch - 1s/step
Epoch 2/50
80/80 - 102s - loss: 0.5016 - accuracy: 0.7788 - val_loss: 0.4439 - val_accuracy: 0.8050 - 102s/epoch - 1s/step
Epoch 3/50
80/80 - 101s - loss: 0.4045 - accuracy: 0.8313 - val_loss: 0.4297 - val_accuracy: 0.8300 - 101s/epoch - 1s/step
Epoch 4/50
80/80 - 101s - loss: 0.3660 - accuracy: 0.8500 - val_loss: 0.3632 - val_accuracy: 0.8650 - 101s/epoch - 1s/step
Epoch 5/50
80/80 - 102s - loss: 0.3431 - accuracy: 0.8737 - val_loss: 0.3758 - val_accuracy: 0.8550 - 102s/epoch - 1s/step
Epoch 6/50
80/80 - 101s - loss: 0.3185 - accuracy: 0.8650 - val_loss: 0.3387 - val_accuracy: 0.8700 - 101s/epoch - 1s/step
Epoch 7/50
80/80 - 102s - loss: 0.3083 - accuracy: 0.8825 - val_loss: 0.2932 - val_accuracy: 0.9000 - 102s/epoch - 1s/step
Epoch 8/50
80/80 - 103s - loss: 0.3128 - accuracy: 0.8900 - val_loss: 0.3422 - val_accuracy: 0.8750 - 103s/epoch - 1s/step
Epoch 9/50
80/80 - 101s - l

Epoch 17/50
80/80 - 45s - loss: 0.2539 - accuracy: 0.8975 - val_loss: 0.1630 - val_accuracy: 0.9350 - 45s/epoch - 567ms/step
Epoch 18/50
80/80 - 45s - loss: 0.2517 - accuracy: 0.9087 - val_loss: 0.1996 - val_accuracy: 0.9250 - 45s/epoch - 565ms/step
Epoch 19/50
80/80 - 45s - loss: 0.2240 - accuracy: 0.9237 - val_loss: 0.1466 - val_accuracy: 0.9600 - 45s/epoch - 564ms/step
Epoch 20/50
80/80 - 45s - loss: 0.2408 - accuracy: 0.9000 - val_loss: 0.1701 - val_accuracy: 0.9550 - 45s/epoch - 567ms/step
Epoch 21/50
80/80 - 45s - loss: 0.2379 - accuracy: 0.9200 - val_loss: 0.1574 - val_accuracy: 0.9450 - 45s/epoch - 565ms/step
Epoch 22/50
80/80 - 45s - loss: 0.2367 - accuracy: 0.9125 - val_loss: 0.1361 - val_accuracy: 0.9600 - 45s/epoch - 565ms/step
Epoch 23/50
80/80 - 45s - loss: 0.2015 - accuracy: 0.9250 - val_loss: 0.1609 - val_accuracy: 0.9350 - 45s/epoch - 565ms/step
Epoch 24/50
80/80 - 45s - loss: 0.2179 - accuracy: 0.9200 - val_loss: 0.1524 - val_accuracy: 0.9600 - 45s/epoch - 564ms/step


Epoch 31/50
80/80 - 45s - loss: 0.1832 - accuracy: 0.9262 - val_loss: 0.2856 - val_accuracy: 0.8950 - 45s/epoch - 562ms/step
Epoch 32/50
80/80 - 45s - loss: 0.2004 - accuracy: 0.9287 - val_loss: 0.2565 - val_accuracy: 0.9000 - 45s/epoch - 563ms/step
Epoch 33/50
80/80 - 45s - loss: 0.1849 - accuracy: 0.9375 - val_loss: 0.4028 - val_accuracy: 0.8450 - 45s/epoch - 562ms/step
Epoch 34/50
80/80 - 45s - loss: 0.1767 - accuracy: 0.9388 - val_loss: 0.2863 - val_accuracy: 0.8950 - 45s/epoch - 562ms/step
Epoch 35/50
80/80 - 45s - loss: 0.1808 - accuracy: 0.9325 - val_loss: 0.2889 - val_accuracy: 0.8800 - 45s/epoch - 561ms/step
Epoch 36/50
80/80 - 45s - loss: 0.1714 - accuracy: 0.9475 - val_loss: 0.2700 - val_accuracy: 0.8950 - 45s/epoch - 565ms/step
Epoch 37/50
80/80 - 45s - loss: 0.1680 - accuracy: 0.9375 - val_loss: 0.2873 - val_accuracy: 0.8950 - 45s/epoch - 562ms/step
Epoch 38/50
80/80 - 45s - loss: 0.1568 - accuracy: 0.9475 - val_loss: 0.2678 - val_accuracy: 0.8900 - 45s/epoch - 566ms/step


Epoch 45/50
80/80 - 45s - loss: 0.1174 - accuracy: 0.9638 - val_loss: 0.3204 - val_accuracy: 0.9050 - 45s/epoch - 563ms/step
Epoch 46/50
80/80 - 45s - loss: 0.1364 - accuracy: 0.9525 - val_loss: 0.3900 - val_accuracy: 0.8850 - 45s/epoch - 565ms/step
Epoch 47/50
80/80 - 45s - loss: 0.1320 - accuracy: 0.9513 - val_loss: 0.2890 - val_accuracy: 0.9150 - 45s/epoch - 565ms/step
Epoch 48/50
80/80 - 45s - loss: 0.0919 - accuracy: 0.9700 - val_loss: 0.2882 - val_accuracy: 0.9150 - 45s/epoch - 568ms/step
Epoch 49/50
80/80 - 45s - loss: 0.0869 - accuracy: 0.9712 - val_loss: 0.2851 - val_accuracy: 0.9250 - 45s/epoch - 565ms/step
Epoch 50/50
80/80 - 45s - loss: 0.0979 - accuracy: 0.9712 - val_loss: 0.2763 - val_accuracy: 0.9300 - 45s/epoch - 566ms/step
########## Fold 3 accuracy: 0.9350000023841858 ##########
########## Fold 4 ##########
Epoch 1/50
80/80 - 46s - loss: 0.7020 - accuracy: 0.4750 - val_loss: 0.6930 - val_accuracy: 0.5000 - 46s/epoch - 572ms/step
Epoch 2/50
80/80 - 45s - loss: 0.6297 -

In [12]:
print('CV accuracy:', cv_acc)

CV accuracy: 0.9449999928474426
