<a href="https://colab.research.google.com/github/brngl/spatio-temporal-anomaly-detection-with-causalLSTM-networks/blob/master/Spatio_Temporal_Anomaly_Detection_in_videos_with_Causal_LSTM_networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Spatio-Temporal Anomaly Detection in videos with Causal LSTM networks

Here is a model based on Causal LSTM networks to make video anomaly detection. This model modifies [PredRNN++](https://github.com/Yunbo426/predrnn-pp) (GitHub link) work by Yunbo Wang et al. 



---

Yunbo Wang et al. paper:
[PredRNN++: Towards A Resolution of the Deep-in-Time Dilemma in Spatiotemporal Predictive Learning](https://arxiv.org/abs/1804.06300)

---

In [0]:
!nvidia-smi

Sun Dec 22 19:12:13 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.44       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [0]:
import tensorflow as tf
tf.test.gpu_device_name()

### Variabili save/restore

In [0]:
'''
Set iterazione to 1 for a new execution.
If you want to restore a checkpoint, set iterazione to checkpoint iteration + 1.
For example, if checkpoint it's saved to iteration 1000, iterazione must be 1001.
'''
iterazione = 1
model_checkpoint = 'checkpoints-pred/mnist_predrnn_pp/model.ckpt-'+str(iterazione-1)+'.index'
model_checkpoint_meta = 'checkpoints-pred/mnist_predrnn_pp/model.ckpt-'+str(iterazione-1)+'.meta'
if iterazione == 1:
  model_checkpoint = ''

In [0]:
print (model_checkpoint)
print (model_checkpoint_meta)

### Google Drive Mounting

In [0]:
'''
Mount Google Drive from your account and move in predrnn-pp-vad directory.
'''

### Parameters definition

In [0]:
import os.path
import time
import numpy as np
import tensorflow as tf
import cv2
import sys
import random
from nets import models_factory
from data_provider import datasets_factory
from utils import preprocess
from utils import metrics
from skimage.measure import compare_ssim
from matplotlib import pyplot as plt


# -----------------------------------------------------------------------------
FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string('f', '', 'kernel') #Per risolvere un bug

# data I/O
tf.app.flags.DEFINE_string('dataset_name', 'mnist',
                           'The name of dataset.')
tf.app.flags.DEFINE_string('train_data_paths',
                           'data/ano-moving-mnist/new_training-set.npz', #anom_train.npz
                           'train data paths.')
tf.app.flags.DEFINE_string('valid_data_paths',
                           'data/ano-moving-mnist/anom_valid.npz',
                           'validation data paths.')
tf.app.flags.DEFINE_string('save_dir', 'checkpoints-pred/mnist_predrnn_pp',
                            'dir to store trained net.')
tf.app.flags.DEFINE_string('gen_frm_dir', 'results-pred/mnist_predrnn_pp',
                           'dir to store result.')
# model
tf.app.flags.DEFINE_string('model_name', 'predrnn_pp',
                           'The name of the architecture.')
tf.app.flags.DEFINE_string('pretrained_model', model_checkpoint, #checkpoints/mnist_predrnn_pp/model.ckpt-10.index
                           'file of a pretrained model to initialize from.')
tf.app.flags.DEFINE_integer('input_length', 10,
                            'encoder hidden states.')
tf.app.flags.DEFINE_integer('seq_length', 20,
                            'total input and output length.')
tf.app.flags.DEFINE_integer('img_width', 64,
                            'input image width.')
tf.app.flags.DEFINE_integer('img_channel', 1,
                            'number of image channel.')
tf.app.flags.DEFINE_integer('stride', 1,
                            'stride of a convlstm layer.')
tf.app.flags.DEFINE_integer('filter_size', 5, #5
                            'filter of a convlstm layer.')
tf.app.flags.DEFINE_string('num_hidden', '128,64,64,64',
                           'COMMA separated number of units in a convlstm layer.') #128,64,64,64
tf.app.flags.DEFINE_integer('patch_size', 4,
                            'patch size on one dimension.')
tf.app.flags.DEFINE_boolean('layer_norm', True,
                            'whether to apply tensor layer norm.')
# optimization
tf.app.flags.DEFINE_float('lr', 0.001,
                          'base learning rate.')
tf.app.flags.DEFINE_boolean('reverse_input', True,
                            'whether to reverse the input frames while training.')
tf.app.flags.DEFINE_integer('batch_size', 8,
                            'batch size for training.')
tf.app.flags.DEFINE_integer('max_iterations', 80000, # 80000
                            'max num of steps.')
tf.app.flags.DEFINE_integer('display_interval', 1,
                            'number of iters showing training loss.')
tf.app.flags.DEFINE_integer('test_interval', 10000,# 2000
                            'number of iters for test.')
tf.app.flags.DEFINE_integer('snapshot_interval', 500, #10000
                            'number of iters saving models.')

### Definition of the model

In [0]:
# inputs
s_x = tf.placeholder(tf.float32,
                        [FLAGS.batch_size,
                         FLAGS.seq_length,
                         FLAGS.img_width / FLAGS.patch_size,
                         FLAGS.img_width / FLAGS.patch_size,
                         FLAGS.patch_size * FLAGS.patch_size * FLAGS.img_channel])

s_mask_true = tf.placeholder(tf.float32,
                                [FLAGS.batch_size,
                                 FLAGS.seq_length - FLAGS.input_length - 1,
                                 FLAGS.img_width / FLAGS.patch_size,
                                 FLAGS.img_width / FLAGS.patch_size,
                                 FLAGS.patch_size * FLAGS.patch_size * FLAGS.img_channel])

grads = []
loss_train = []
s_pred_seq = []
s_tf_lr = tf.placeholder(tf.float32, shape=[])
num_hidden = [int(x) for x in FLAGS.num_hidden.split(',')]
print('Num. Hidden: ', num_hidden)
num_layers = len(num_hidden)
with tf.variable_scope(tf.get_variable_scope()):
    # define a model
    output_list = models_factory.construct_model(
        FLAGS.model_name, s_x,
        s_mask_true,
        num_layers, num_hidden,
        FLAGS.filter_size, FLAGS.stride,
        FLAGS.seq_length, FLAGS.input_length,
        FLAGS.layer_norm)
    gen_ims = output_list[0] 
    loss = output_list[1]
    pred_ims = gen_ims[:, FLAGS.input_length - 1:]
    s_loss_train = loss / FLAGS.batch_size
    # gradients
    all_params = tf.trainable_variables()
    grads.append(tf.gradients(loss, all_params))
    s_pred_seq.append(pred_ims)

s_train_op = tf.train.AdamOptimizer(FLAGS.lr).minimize(loss)

# session
variables = tf.global_variables()
s_saver = tf.train.Saver(variables)
init = tf.global_variables_initializer()
configProt = tf.ConfigProto()
configProt.gpu_options.allow_growth = True
configProt.allow_soft_placement = True
s_sess = tf.Session(config=configProt)
s_sess.run(init)
print("C'è un pretrained model?")
print(FLAGS.pretrained_model)
if FLAGS.pretrained_model:
    print("Sto facendo il restore...")
    tf.reset_default_graph()
    s_saver = tf.train.import_meta_graph(model_checkpoint_meta)
    s_saver.restore(s_sess, tf.train.latest_checkpoint('checkpoints-pred/mnist_predrnn_pp/'))
    print('...restore completato')

### Functions definition for:

- training
- testing
- checkpoint saving

In [0]:
def train(inputs, lr, mask_true):
    feed_dict = {s_x: inputs}
    feed_dict.update({s_tf_lr: lr})
    feed_dict.update({s_mask_true: mask_true})
    loss, _ = s_sess.run((s_loss_train, s_train_op), feed_dict)
    return loss


def test(inputs, mask_true):
    feed_dict = {s_x: inputs}
    feed_dict.update({s_mask_true: mask_true})
    gen_ims = s_sess.run(s_pred_seq, feed_dict)
    return gen_ims


def save(itr):
    checkpoint_path = os.path.join(FLAGS.save_dir, 'model.ckpt')
    s_saver.save(s_sess, checkpoint_path, global_step=itr)
    print('saved to ' + FLAGS.save_dir)

### Directory management
Manages checkpoint and results directories.

At the first iteration, if the directories exist yet, delete and recreate them.

In [0]:
if tf.gfile.Exists(FLAGS.save_dir) and iterazione == 1 :
    tf.gfile.DeleteRecursively(FLAGS.save_dir)
tf.gfile.MakeDirs(FLAGS.save_dir)

if iterazione == 1:
  if tf.gfile.Exists(FLAGS.gen_frm_dir):
    tf.gfile.DeleteRecursively(FLAGS.gen_frm_dir)
  tf.gfile.MakeDirs(FLAGS.gen_frm_dir)


### Dataset loading

In [0]:
# load data
train_input_handle, test_input_handle = datasets_factory.data_provider(
    FLAGS.dataset_name, FLAGS.train_data_paths, FLAGS.valid_data_paths,
    FLAGS.batch_size, FLAGS.img_width)


clips
(2, 3000, 2)
dims
(1, 3)
input_raw_data
(60000, 1, 64, 64)
clips
(2, 10000, 2)
dims
(1, 3)
input_raw_data
(200000, 1, 64, 64)


These parameters are used for training scheduling. *eta* value decreases at every iteration of a value equal to *delta* until it reaches zero value at the 50000th iteration. 
Every time a checkpoint is restored, *eta* must have the right value.



In [0]:
lr = FLAGS.lr

delta = 0.00002
base = 0.99998
#eta = etaaa #1
eta = 1
if(iterazione > 1):
  f = open("eta.txt", "r")
  eta = f.read()
  eta = float(eta)

### Training phase
At every *FLAGS.test_interval* iterations, it's executed a validation step. So, the predicted images are saved and are mesured these metrics:
- mse
- ssim
- psnr
- fmae
- sharpness

The generated images are saved in *results* directory

In [0]:
for itr in xrange(iterazione, FLAGS.max_iterations + 1):
    if train_input_handle.no_batch_left():
        train_input_handle.begin(do_shuffle=True)
    ims = train_input_handle.get_batch() #Ritorna input_batch + output_batch concatenati
    ims = preprocess.reshape_patch(ims, FLAGS.patch_size) #patch_size = 4. Restituisce un array (8, 10, 16, 16, 16)

    if itr < 50000:
        eta -= delta  #50000 * 0.00002 = 1
        eta = float('%.5f'%(eta))
        print("eta", eta)
    else:
        eta = 0.0
    random_flip = np.random.random_sample(
        (FLAGS.batch_size, FLAGS.seq_length - FLAGS.input_length - 1)) #dim (8, 4)
    true_token = (random_flip < eta)
    # true_token = (random_flip < pow(base,itr))
    ones = np.ones((FLAGS.img_width / FLAGS.patch_size,
                    FLAGS.img_width / FLAGS.patch_size,
                    FLAGS.patch_size ** 2 * FLAGS.img_channel)) #dim (16, 16, 16)
    zeros = np.zeros((FLAGS.img_width / FLAGS.patch_size,
                      FLAGS.img_width / FLAGS.patch_size,
                      FLAGS.patch_size ** 2 * FLAGS.img_channel)) #dim (16, 16, 16)
    mask_true = []
    for i in xrange(FLAGS.batch_size):
        for j in xrange(FLAGS.seq_length - FLAGS.input_length - 1):
            if true_token[i, j]:
                mask_true.append(ones)
            else:
                mask_true.append(zeros)
    mask_true = np.array(mask_true)
    #print('Mask true shape', mask_true.shape)
    mask_true = np.reshape(mask_true, (FLAGS.batch_size,
                                       FLAGS.seq_length - FLAGS.input_length - 1,
                                       FLAGS.img_width / FLAGS.patch_size,
                                       FLAGS.img_width / FLAGS.patch_size,
                                       FLAGS.patch_size ** 2 * FLAGS.img_channel))
    #print('Mask true dopo reshape', mask_true.shape)
    #TRAINING
    cost = train(ims, lr, mask_true)
    if FLAGS.reverse_input:
        ims_rev = ims[:, ::-1]
        cost += train(ims_rev, lr, mask_true)
        cost = cost / 2

    if itr % FLAGS.display_interval == 0:
        print('itr: ' + str(itr))
        print('training loss: ' + str(cost))

    if itr % FLAGS.test_interval == 0:
        print('test...')
        test_input_handle.begin(do_shuffle=False)
        res_path = os.path.join(FLAGS.gen_frm_dir, str(itr))
        os.mkdir(res_path)
        avg_mse = 0
        batch_id = 0
        img_mse, ssim, psnr, fmae, sharp = [], [], [], [], []
        for i in xrange(FLAGS.seq_length - FLAGS.input_length):
            img_mse.append(0)
            ssim.append(0)
            psnr.append(0)
            fmae.append(0)
            sharp.append(0)
        mask_true = np.zeros((FLAGS.batch_size,
                              FLAGS.seq_length - FLAGS.input_length - 1,
                              FLAGS.img_width / FLAGS.patch_size,
                              FLAGS.img_width / FLAGS.patch_size,
                              FLAGS.patch_size ** 2 * FLAGS.img_channel))
        while (test_input_handle.no_batch_left() == False):
            batch_id = batch_id + 1
            test_ims = test_input_handle.get_batch()
            test_dat = preprocess.reshape_patch(test_ims, FLAGS.patch_size)
            img_gen = test(test_dat, mask_true)
            # concat outputs of different gpus along batch
            img_gen = np.concatenate(img_gen)
            img_gen = preprocess.reshape_patch_back(img_gen, FLAGS.patch_size)
            # MSE per frame
            for i in xrange(FLAGS.seq_length - FLAGS.input_length):
                x = test_ims[:, i + FLAGS.input_length, :, :, 0]
                gx = img_gen[:, i, :, :, 0]
                fmae[i] += metrics.batch_mae_frame_float(gx, x)
                gx = np.maximum(gx, 0)
                gx = np.minimum(gx, 1)
                mse = np.square(x - gx).sum()
                img_mse[i] += mse
                avg_mse += mse

                real_frm = np.uint8(x * 255)
                pred_frm = np.uint8(gx * 255)
                psnr[i] += metrics.batch_psnr(pred_frm, real_frm)
                for b in xrange(FLAGS.batch_size):
                    sharp[i] += np.max(
                        cv2.convertScaleAbs(cv2.Laplacian(pred_frm[b], 3)))
                    score, _ = compare_ssim(pred_frm[b], real_frm[b], full=True)
                    ssim[i] += score
            # save prediction examples
            if batch_id <= 10:
                path = os.path.join(res_path, str(batch_id))
                os.mkdir(path)
                for i in xrange(FLAGS.seq_length):
                    name = 'gt' + str(i + 1) + '.png'
                    file_name = os.path.join(path, name)
                    img_gt = np.uint8(test_ims[0, i, :, :, :] * 255)
                    cv2.imwrite(file_name, img_gt)
                for i in xrange(FLAGS.seq_length - FLAGS.input_length):
                    name = 'pd' + str(i + 1 + FLAGS.input_length) + '.png'
                    file_name = os.path.join(path, name)
                    img_pd = img_gen[0, i, :, :, :]
                    img_pd = np.maximum(img_pd, 0)
                    img_pd = np.minimum(img_pd, 1)
                    img_pd = np.uint8(img_pd * 255)
                    cv2.imwrite(file_name, img_pd)
            test_input_handle.next()
        avg_mse = avg_mse / (batch_id * FLAGS.batch_size)
        print('mse per seq: ' + str(avg_mse))
        for i in xrange(FLAGS.seq_length - FLAGS.input_length):
            print(img_mse[i] / (batch_id * FLAGS.batch_size))
        psnr = np.asarray(psnr, dtype=np.float32) / batch_id
        fmae = np.asarray(fmae, dtype=np.float32) / batch_id
        ssim = np.asarray(ssim, dtype=np.float32) / (FLAGS.batch_size * batch_id)
        sharp = np.asarray(sharp, dtype=np.float32) / (FLAGS.batch_size * batch_id)
        print('psnr per frame: ' + str(np.mean(psnr)))
        for i in xrange(FLAGS.seq_length - FLAGS.input_length):
            print(psnr[i])
        print('fmae per frame: ' + str(np.mean(fmae)))
        for i in xrange(FLAGS.seq_length - FLAGS.input_length):
            print(fmae[i])
        print('ssim per frame: ' + str(np.mean(ssim)))
        for i in xrange(FLAGS.seq_length - FLAGS.input_length):
            print(ssim[i])
        print('sharpness per frame: ' + str(np.mean(sharp)))
        for i in xrange(FLAGS.seq_length - FLAGS.input_length):
            print(sharp[i])
    
    if itr % FLAGS.snapshot_interval == 0:
      f = open("eta.txt", "w+")
      f.write(str(eta))
      f.close()
      if tf.gfile.Exists(FLAGS.save_dir):
        tf.gfile.DeleteRecursively(FLAGS.save_dir)
      tf.gfile.MakeDirs(FLAGS.save_dir)
      save(itr)
    
    train_input_handle.next()

print("FINITO! :)")    

itr: 74606
training loss: 58.50434494018555
itr: 74607
training loss: 52.47566223144531
itr: 74608
training loss: 45.26495361328125
itr: 74609
training loss: 54.099266052246094
itr: 74610
training loss: 47.841529846191406
itr: 74611
training loss: 54.285377502441406
itr: 74612
training loss: 55.52727508544922
itr: 74613
training loss: 51.817230224609375
itr: 74614
training loss: 46.146751403808594
itr: 74615
training loss: 46.17295837402344
itr: 74616
training loss: 60.316253662109375
itr: 74617
training loss: 50.674072265625
itr: 74618
training loss: 52.96582794189453
itr: 74619
training loss: 52.51131820678711
itr: 74620
training loss: 51.35956573486328
itr: 74621
training loss: 50.101234436035156
itr: 74622
training loss: 56.71583938598633
itr: 74623
training loss: 55.41116714477539
itr: 74624
training loss: 55.684410095214844
itr: 74625
training loss: 55.32701873779297
itr: 74626
training loss: 59.1476936340332
itr: 74627
training loss: 50.93190383911133
itr: 74628
training loss: 5

## fine
