In [1]:
# Copyright 2020 Fabian Hofmann
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Evaluation of IFTM

The evaluation of IFTM using video generation models strictly depends on the generators -- IFTM on its own past the used reconstructor/predictor does not use any additional parameters when using cumulative aggregation. Therefore, in this notebook, the models that were trained in the C-VGAN (with video input) evaluation notebooks are simply loaded and passed to IFTM as its forecasting model.

##### Sources
- [Implementation of IFTM](https://gitlab.tubit.tu-berlin.de/sulandir/Thesis/blob/master/src/models/iftm.py)
- [Implementation and evaluation of C-VGAN](https://gitlab.tubit.tu-berlin.de/sulandir/Thesis/blob/master/src/eval/cvgan_2_eval.ipynb)

## Setup

In [2]:
import tensorflow as tf

tf.__version__

'2.5.0-dev20201111'

In [3]:
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

Default GPU Device: /device:GPU:0


In [4]:
import csv
import cv2
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import time

from src.io_.batch_generator import UniformBatchGenerator, SlidingWindowBatchGenerator, BatchLoader
from eval.anomaly_detection_evaluation import AnomalyDetectionEvaluation
from src.models.vgan import *
from src.models.iftm import IFTM

In [5]:
base_input_path = "../../data/upCam/preprocessed_128_64/"
base_input_path_model = "../../output/evaluation/vgan/vgan_conditional_3d_2/"
base_input_path_eval = "../../data/eval/"
base_output_path = "../../output/evaluation/iftm/"

### Init batch generator for training/testing

The data used is the same as in the C-VGAN evaluation notebook, but the original validation and testing sets are merged -- they simply serve as a sanity check whether the model does not label mostly normal data as anomalies. Thus they do not contribute to any of the evaluation metrics. The training set needs to be used to train the threshold model of IFTM.

The actual evaluation dataset is separate and can be found in the same [data repository](https://gitlab.tubit.tu-berlin.de/sulandir/thesis_data/eval). Each video is one hour long and consists of labeled normal and anomalous frames. The labels for these are in a separate `.csv` file (one file per video).

In [6]:
days_training = ["00", "01", "02", "05", "06", "07"]
day_paths_training = [base_input_path + day + "/" for day in days_training]
training_generator = UniformBatchGenerator(day_paths=day_paths_training, batch_size=256, sample_size=8, subsample_size=7)

In [7]:
days_testing = ["03", "04", "08", "09"]
day_paths_testing = [base_input_path + day + "/" for day in days_testing]
testing_generator = UniformBatchGenerator(day_paths=day_paths_testing, batch_size=256, sample_size=8, subsample_size=7)

In [8]:
eval_files = ["00"]
eval_paths = [base_input_path_eval + "preprocessed_128_64/" + file + ".mp4" for file in eval_files]
eval_generator = SlidingWindowBatchGenerator(file_paths=eval_paths, batch_size=256, sample_size=8, subsample_size=7)

## C-VGAN Models

The models here are simply defined (they are the same as in [cvgan_2_eval.ipynb](https://gitlab.tubit.tu-berlin.de/sulandir/Thesis/blob/master/src/eval/cvgan_2_eval.ipynb)), so we can utilize the tensorflow checkpointing API and not resort to fully exported (and then loaded) models that can no longer be properly accessed.

Training of these is not supported in this notebook however.

### Model init

In [9]:
def make_generator_model() -> keras.Model:
    inputs: tf.Tensor = keras.Input(shape=(7, 64, 128, 3))

    e_3d = make_encoder_foreground_stream(inputs)
    f, m = make_conditional_foreground_stream(e_3d)

    e_2d = make_encoder_background_stream(inputs)
    b = make_conditional_background_stream(e_2d)

    outputs = make_generator_stream_combiner(f, m, b)

    return keras.Model(inputs=inputs, outputs=outputs, name="c_vgan_generator")

In [10]:
c_vgan_generator = make_generator_model()

In [11]:
filters = [8, 16, 32, 64]
FILTER = filters[1]

dropout_rates = [0., 0.1, 0.2, 0.3, 0.4, 0.5]
DROPOUT_RATE = dropout_rates[3]

c_vgan_discriminator = make_discriminator_model(FILTER, DROPOUT_RATE)

### Optimizers init

Because the original checkpoints do store the optimizers, they are simply initialized for completeness sake although they are not used in any way.

In [12]:
learning_rates_mult = [0.5, 1, 2, 4]
LEARNING_RATE = math.sqrt(learning_rates_mult[1]) * 2e-4

generator_optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE, beta_1=0.5)
discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE, beta_1=0.5)

## Set up separate input and outputs for configs

Because the models were build using different parameter permutations and their checkpoints were stored in different directories (during c-vgan evaluation), this notebook can only evaluate one parameter permutation at a time - loading multiple models into a single tensorflow session causes memory leaks (and requires a tf session clear). Therefore the config that one wants to use with IFTM has to be explicitly stated so the (latest) checkpoint of that config will be loaded.

Any hyper-parameters that were set in training that are not already set have to be defined for that.

In [13]:
lambdas = [5, 8, 10, 14, 20]
LAMBDA = lambdas[3]

In [14]:
batch_sizes = [4, 8, 16, 32, 64, 128, 256]
BATCH_SIZE = batch_sizes[4]

In addition, to allow the evaluation of earlier versions of the model with less epochs used to train it (see checkpoint loading), there is an additional param passed to the prefix:

In [15]:
EPOCHS = 50

In [16]:
OUTPUT_PREFIX_CKPT = "cvgan_2_dfilter-{}_ddropout-{}_lambda-{}_batchsize-{}_learningrate-{}".format(FILTER, DROPOUT_RATE, LAMBDA, BATCH_SIZE, LEARNING_RATE)

OUTPUT_PREFIX = "cvgan_2_dfilter-{}_ddropout-{}_lambda-{}_batchsize-{}_learningrate-{}_epochs-{:04d}".format(FILTER, DROPOUT_RATE, LAMBDA, BATCH_SIZE, LEARNING_RATE, EPOCHS)

## Load latest checkpoint

In [17]:
checkpoint_dir = base_input_path_model + "training_checkpoints/" + OUTPUT_PREFIX_CKPT + "/"
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                 discriminator_optimizer=discriminator_optimizer,
                                 generator=c_vgan_generator,
                                 discriminator=c_vgan_discriminator)

One can either load the latest checkpoint of a trained model which is desired, or one can manually override the parameter of `checkpoint.restore()` to load a checkpoint of an earlier epoch; this is akin to "early stopping", to avoid overfitting if the model's quality worsened over the course of training.

In [18]:
# n_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
n_checkpoint = checkpoint_prefix + "-{}".format(EPOCHS // 10)
checkpoint.restore(n_checkpoint)
if n_checkpoint:
    print("Restored from {}".format(n_checkpoint))
else:
    raise FileNotFoundError("Checkpoint of model not found!")

Restored from ../../output/evaluation/vgan/vgan_conditional_3d_2/training_checkpoints/cvgan_2_dfilter-16_ddropout-0.3_lambda-14_batchsize-64_learningrate-0.0002/ckpt-5


## Make predictions on evaluation videos

Before using IFTM for anomaly detection, the evaluation video samples are fed to the video generator to make next frame (`n+1`) predictions over the entire dataset. Both generated and actual videos are saved to disk. This is not directly used in the evaluation but serves as a visual guide for late use.

Note that because the evaluation data must not be shuffled (the time component is crucial for evaluation purposes), a `BatchLoader` must not be used and the data has to be read sequentially, batch by batch

***DISCLAIMER:*** Will cost a ton of memory (and time) due to a high number of redundant frames (x8), that will cost high amounts of storage space without much gain of information.

In [19]:
# anim_file_dir_real = base_output_path + "anim/" + "real/"
# anim_file_dir_gen = base_output_path + "anim/" + OUTPUT_PREFIX + "/"
#
# if not os.path.exists(anim_file_dir_real):
#     os.makedirs(anim_file_dir_real)
#
# if not os.path.exists(anim_file_dir_gen):
#     os.makedirs(anim_file_dir_gen)

In [20]:
# start_gen_testing = time.time()
#
# count = 0
# for k in range(len(eval_generator)):
#     input_videos, real_videos = eval_generator[k]
#     generated_videos = c_vgan_generator(input_videos, training=False).numpy()
#     for i in range(len(generated_videos)):
#         write_gif(anim_file_dir_real + "{:06d}.gif".format(count), real_videos[i])
#         write_gif(anim_file_dir_gen + "{:06d}.gif".format(count), generated_videos[i])
#         count += 1
#
# print('Time for generation of evaluation videos is {:.4f} sec'.format(time.time() - start_gen_testing))

## IFTM

This section is a copy to our initial [iftm notebook](https://gitlab.tubit.tu-berlin.de/sulandir/Thesis/blob/master/src/models/iftm.ipynb).

### Define the error function

Only the last frame of each sequence matters - only forecasting, not reconstruction of past frames is relevant to the identity function result.

In [21]:
reconstruction_err = tf.keras.losses.MeanAbsoluteError(reduction=tf.keras.losses.Reduction.NONE)
# reconstruction_err = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)

def error_function(predicted: tf.Tensor, actual: tf.Tensor) -> tf.Tensor:
    predicted = tf.reshape(predicted[:,-1], shape=[predicted.shape[0],-1])
    actual = tf.reshape(actual[:,-1], shape=[actual.shape[0],-1])
    return reconstruction_err(predicted, actual)

### Init and train IFTM

In [22]:
iftm = IFTM(c_vgan_generator, error_function)

The training data that was used to train the C-VGAN model is now rerun on the model to compute the training (forecasting) error to train the threshold model of IFTM as well.

In [23]:
start_tm_training = time.time()
iftm.train_threshold(training_generator, max_queue_size=8, no_workers=4)

print('Time for TM training is {:.4f} sec'.format(time.time() - start_tm_training))

Time for TM training is 469.2395 sec


Print out the threshold:

In [24]:
print(iftm.threshold)

0.09476271


### Make test predictions

As said in the beginning the test set is the union of testing and validation set during the training of the C-VGAN model. This is not an actual evaluation but simply a check how the model performs on data that should be considered (mostly) normal.

In [25]:
start_iftm_testing = time.time()

testing_batches = BatchLoader(testing_generator, max_queue_size=8, no_workers=4)
predictions = []
for _ in range(len(testing_batches)):
    b_x, b_y = testing_batches.get_batch()
    _, p = iftm.predict(b_x, b_y)
    predictions.append(p)
testing_batches.shutdown_workers()

print('Time for IFTM testing is {:.4f} sec'.format(time.time() - start_iftm_testing))

Time for IFTM testing is 285.6981 sec


Count and output the number of "false" (0, normal) and "true" (1, anomaly) predictions.

In [26]:
predictions = np.array(predictions).flatten()
p_count = np.bincount(predictions)

print(p_count)

[178281  36759]


## Evaluation

For the evaluation, a separate [evaluation dataset](https://gitlab.tubit.tu-berlin.de/sulandir/thesis_data/eval) was created. It consists of one or more one hour long videos and a separate `.csv` file with the true labels (`0,1`). Therefore, one can make predictions using the IFTM model and then compute the evaluation metrics using the true labels.

### Make predictions

In [27]:
start_iftm_testing = time.time()

if_results = []
predictions = []
for k in range(len(eval_generator)):
    b_x, b_y = eval_generator[k]
    if_, p = iftm.predict(b_x, b_y)
    if_results.append(if_)
    predictions.append(p)

if_results = np.array(if_results).flatten()
predictions = np.array(predictions).flatten()

print('Time for IFTM eval is {:.4f} sec'.format(time.time() - start_iftm_testing))

Time for IFTM eval is 29.3816 sec


### Evaluate predictions

The `AnomalyDetectionEvaluation` class loads the true labels and then computes the evaluation metrics for the given predicted labels of each individual sample.

In [28]:
anomaly_detection_eval = AnomalyDetectionEvaluation(base_input_path_eval + "labels/" + "00" + ".csv")

For evaluation purposes for the first seven frames, to which there are no actual predictions, their predictions are explicitly set to 0 so the shape of predictions and actual values match. The same is done to the last frames for which no predictions were done as well, because they were not enough to create another batch.

In [29]:
eval_results = anomaly_detection_eval.evaluate(np.concatenate((np.zeros(7), predictions, np.zeros(139))).astype(np.bool_))
print(eval_results)

{'accuracy': 0.7584966234916417, 'recall': 0.6014138118542687, 'true negative rate': 0.8275559805562196, 'precision': 0.6052535570959504, 'negative predictive value': 0.8252542911633821, 'false negative rate': 0.3985861881457314, 'false positive rate': 0.17244401944378038, 'f1 measure': 0.6033275752341122}


### Store prediction and evaluation results

The predictions of IFTM, both the forecasting errors for each sample and the anomaly detection result, is written to a CSV to be used later for graphs or diagrams. The evaluation metrics are also printed, but to a separate file.

Because the prediction error function can be adjusted, `OUTPUT_PREFIX` is altered as well to differentiate between different function usages.

In [30]:
OUTPUT_PREFIX_EXT = OUTPUT_PREFIX + "_errorfn-" + reconstruction_err.name

In [31]:
RES_FILE = base_output_path + OUTPUT_PREFIX_EXT + "_res.csv"

file = open(RES_FILE, 'w')
# noinspection PyTypeChecker
np.savetxt(file, np.array(list(zip(if_results, [iftm.threshold]*len(if_results), predictions))),
           header="if_value;threshold;prediction", fmt="%.8f", delimiter=";")
file.close()

In [32]:
EVAL_FILE = base_output_path + OUTPUT_PREFIX_EXT + "_eval.csv"

file = open(EVAL_FILE, 'w')
writer = csv.writer(file, delimiter=';')
for row in eval_results.items():
    writer.writerow(row)
file.close()

### (Addendum) Visualize IF results

Each of the IF results is in a range of `[0,1]`, only corresponds to the last frame of a sample and thus can be visualized in grayscale as a singular picture (for each sample). To do this, one needs to alter the error function from a per video (last frame) mean absolute error to a pixel-wise error (mean error over the RGB values) of the last frame for each video.

In [33]:
def error_function(predicted: tf.Tensor, actual: tf.Tensor) -> tf.Tensor:
    return reconstruction_err(predicted[:,-1], actual[:,-1])

In [34]:
img_dir_real =  base_output_path + "img/" + "real/"
img_dir_pred = base_output_path + "img/" + OUTPUT_PREFIX + "/predicted/"
img_dir_err = base_output_path + "img/" + OUTPUT_PREFIX + "/error/" + reconstruction_err.name + "/"

if not os.path.exists(img_dir_real):
    os.makedirs(img_dir_real)

if not os.path.exists(img_dir_pred):
    os.makedirs(img_dir_pred)

if not os.path.exists(img_dir_err):
    os.makedirs(img_dir_err)

In [35]:
start_gen_testing = time.time()

count = 0
for k in range(len(eval_generator)):
    input_videos, real_videos = eval_generator[k]
    generated_videos = c_vgan_generator(input_videos, training=False)
    errors_per_sample = error_function(generated_videos, tf.convert_to_tensor(real_videos)).numpy()

    # prepare frames for output
    error_frames = errors_per_sample * 127.5 + 127.5
    real_frames = (real_videos[:,-1] * 127.5 + 127.5).astype("uint8")
    predicted_frames = (generated_videos.numpy()[:,-1] * 127.5 + 127.5).astype("uint8")

    for i in range(len(errors_per_sample)):
        # actual last frame
        cv2.imwrite(img_dir_real + "{:06d}.png".format(count), real_frames[i])
        # predicted last frame
        cv2.imwrite(img_dir_pred + "{:06d}.png".format(count), predicted_frames[i])
        # error heatmap of frame
        plt.imsave(img_dir_err + "/" + "{:06d}.png".format(count), error_frames[i], cmap='gray_r')
        count += 1

print('Time for generation of forecasting frames is {:.4f} sec'.format(time.time() - start_gen_testing))

Time for generation of forecasting frames is 125.9141 sec
