In [1]:
DATASET = 'crowd'

In [2]:
# Use output of OpenPose without background
# Paths to videos for training
PATHS = [f"../datasets/{DATASET}_dataset/original_data/", f"../datasets/{DATASET}_dataset/openpose_gamma/"]

FRAME_FUNC = 'frame_diff'
# To use frame diff to weight t (current) or t+1
WEIGHT_CURRENT = True

In [3]:
import tensorflow as tf

In [4]:
!export TF_FORCE_GPU_ALLOW_GROWTH=True
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    tf.config.set_visible_devices(gpus[0:1], 'GPU')

2022-07-25 19:14:07.594464: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-25 19:14:07.596206: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-25 19:14:07.597898: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-25 19:14:07.604194: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-25 19:14:07.605881: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from S

Set seed

In [5]:
SEED = 0
import random
import numpy as np
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Load data

In [6]:
import numpy as np
import cv2

In [7]:
FRAMES_PER_VIDEO = 50 + 1
VIDEO_WIDTH, VIDEO_HEIGHT = 100, 100
N_CHANNELS = 3

def load_videos(video_IDs: list, video_frames: int = FRAMES_PER_VIDEO, video_width: int = VIDEO_WIDTH, video_height: int = VIDEO_HEIGHT,
                video_channels: int = N_CHANNELS, dtype = np.float32, normalize: bool = False) -> tuple:
    videos = np.empty((len(video_IDs), video_frames, video_height, video_width, video_channels), dtype=dtype)

    for i, video_ID in enumerate(video_IDs):
        cap = cv2.VideoCapture(video_ID)
        original_n_frames = int(cap. get(cv2. CAP_PROP_FRAME_COUNT))
        
        # Indexes of frames to be kept to comply with video_frames
        frames_idx = set(np.round(np.linspace(0, original_n_frames - 1, video_frames)).astype(int))

        frames = []
        index = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if index in frames_idx:
                frame = cv2.resize(frame, (video_width, video_height)).astype(dtype)
                if normalize:
                    frame /= 255.0
                frames.append(frame)
            index += 1
        cap.release()

        if len(frames) != video_frames:
            # Complete with repeated frames in the begging and the end of the video
            n_repeats = (video_frames - len(frames)) / 2
            # In case n_repeats is decimal, first frames will be rounded to the nearest integer
            beggining_frames = np.tile(frames[0], [round(n_repeats + 0.001), 1, 1, 1])
            end_frames = np.tile(frames[-1], [int(n_repeats), 1, 1, 1])
            frames = np.concatenate([beggining_frames, frames, end_frames])
        
        videos[i,] = np.array(frames)
        

    return videos

DataGenerator class to load videos per batch, in case all videos do not fit in memory

In [8]:
import gc
from tensorflow.keras import backend as K

class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, video_IDs: list, video_labels: list, batch_size: int, paths: list = [''], video_width: int = VIDEO_WIDTH, video_height: int = VIDEO_HEIGHT,
                video_frames: int = FRAMES_PER_VIDEO, video_channels: int = N_CHANNELS, dtype = np.float32, normalize: bool = False, shuffle: bool = True):
        self.video_IDs = video_IDs
        self.video_labels = video_labels
        self.batch_size = batch_size
        self.paths = paths
        self.video_width = video_width
        self.video_height = video_height
        self.video_frames = video_frames
        self.video_channels = video_channels
        self.dtype = dtype
        self.normalize = normalize
        self.shuffle = shuffle

    def __len__(self):
        return len(self.video_IDs) // self.batch_size

    def __getitem__(self, idx):
        batch_IDs = self.video_IDs[idx*self.batch_size:(idx+1)*self.batch_size]
        batch_labels = self.video_labels[idx*self.batch_size:(idx+1)*self.batch_size]
        input_videos = []
        
        for index, path in enumerate(self.paths):
            batch_IDs_full_path = [path+ID for ID in batch_IDs]

            videos = load_videos(batch_IDs_full_path, self.video_frames, self.video_width, 
                                         self.video_height, self.video_channels, self.dtype, self.normalize)
            
            input_videos.append(videos)
                    
        return input_videos, batch_labels
            
    
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        if self.shuffle:
            # shuffle video_IDs and video_labels together
            temp_list = list(zip(self.video_IDs, self.video_labels))
            np.random.shuffle(temp_list)
            self.video_IDs, self.video_labels = zip(*temp_list)
        # Clear memory after epochs
        gc.collect()
        #K.clear_session()

## Videos to load

In [9]:
import pandas as pd
folds = pd.read_csv(f'../datasets/{DATASET}_dataset/folds.csv')

def fecth_generators(fold):

    train_df = folds[folds.fold != fold]
    test_df = folds[folds.fold == fold]

    train_video_IDs = train_df.video.values
    test_video_IDs = test_df.video.values

    train_video_labels = train_df.label.values
    test_video_labels = test_df.label.values

    train_generator = DataGenerator(train_video_IDs, train_video_labels, batch_size=10, paths=PATHS)
    test_generator = DataGenerator(test_video_IDs, test_video_labels, batch_size=10, paths=PATHS)

    return train_generator, test_generator

# Frame functions

In [10]:
import tensorflow.keras.backend as K

def tf_frame_diff(video):
    return video[1:] - video[:-1]

frame_func = tf_frame_diff

# Load pretrained best model without optimizer

In [11]:
cv_acc = 0.0

for fold in range(5):
    print('#'*10, 'Fold', fold, '#'*10)

    model = tf.keras.models.load_model('models/rwf_best_model_no_optimizer.h5')
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=f'models/{DATASET}_best_model_fold{fold}.h5',
        monitor='val_accuracy',
        mode='max',
        save_best_only=True)

    train_generator, test_generator = fecth_generators(fold)
    history = model.fit(train_generator, epochs=5, validation_data=test_generator, verbose=2, callbacks=[model_checkpoint_callback])

    fold_acc = max(history.history['val_accuracy'])
    print('#'*10, f'Fold {fold} accuracy:', fold_acc, '#'*10)
    cv_acc += fold_acc / 5

########## Fold 0 ##########


2022-07-25 19:14:07.717784: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-25 19:14:07.720659: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-25 19:14:07.723410: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-25 19:14:07.726045: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

Epoch 1/5


2022-07-25 19:14:13.516977: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8204

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.
2022-07-25 19:14:14.236053: W tensorflow/stream_executor/gpu/asm_compiler.cc:230] Falling back to the CUDA driver for PTX compilation; ptxas does not support CC 8.0
2022-07-25 19:14:14.236087: W tensorflow/stream_executor/gpu/asm_compiler.cc:233] Used ptxas at ptxas
2022-07-25 19:14:14.236227: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] UNIMPLEMENTED: ptxas ptxas too old. Falling back to the driver to compile.
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.
2022-07-25 19:14:15.285803: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


19/19 - 23s - loss: 0.5726 - accuracy: 0.7368 - val_loss: 0.5363 - val_accuracy: 0.7200 - 23s/epoch - 1s/step
Epoch 2/5
19/19 - 19s - loss: 0.4378 - accuracy: 0.7842 - val_loss: 0.4061 - val_accuracy: 0.8000 - 19s/epoch - 1s/step
Epoch 3/5
19/19 - 20s - loss: 0.3697 - accuracy: 0.8316 - val_loss: 0.3990 - val_accuracy: 0.8600 - 20s/epoch - 1s/step
Epoch 4/5
19/19 - 18s - loss: 0.3714 - accuracy: 0.8474 - val_loss: 0.4692 - val_accuracy: 0.8200 - 18s/epoch - 949ms/step
Epoch 5/5
19/19 - 19s - loss: 0.3395 - accuracy: 0.8579 - val_loss: 0.2785 - val_accuracy: 0.8800 - 19s/epoch - 984ms/step
########## Fold 0 accuracy: 0.8799999952316284 ##########
########## Fold 1 ##########
Epoch 1/5
19/19 - 29s - loss: 0.7384 - accuracy: 0.7316 - val_loss: 0.7218 - val_accuracy: 0.7500 - 29s/epoch - 2s/step
Epoch 2/5
19/19 - 26s - loss: 0.5233 - accuracy: 0.7789 - val_loss: 0.5663 - val_accuracy: 0.7250 - 26s/epoch - 1s/step
Epoch 3/5
19/19 - 26s - loss: 0.3771 - accuracy: 0.8316 - val_loss: 0.5262 - 

In [12]:
print('CV accuracy:', cv_acc)

CV accuracy: 0.8409999847412111
