<a href="https://colab.research.google.com/github/Mogaveera/atmanomalydetection/blob/master/atm_anomaly_2_classes_using_TransferLearningInceptionV3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%tensorflow_version 1.x

import pandas as pd
import numpy as np
import cv2
import os
import h5py
from tqdm import tqdm
from keras.preprocessing import image
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model, load_model, Sequential
from keras.layers import Input, LSTM, Dense, Dropout
from keras.layers import Bidirectional
from keras.utils import to_categorical
from keras.applications.imagenet_utils import preprocess_input
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, TensorBoard,EarlyStopping
from keras.utils.io_utils import HDF5Matrix

SEQ_LEN = 30
BATCH_SIZE = 16
EPOCHS = 100

train_video_index = []
test_video_index = []

Using TensorFlow backend.


In [2]:
!git clone https://github.com/Mogaveera/atmanomaly.git

Cloning into 'atmanomaly'...
remote: Enumerating objects: 159, done.[K
remote: Total 159 (delta 0), reused 0 (delta 0), pack-reused 159[K
Receiving objects: 100% (159/159), 203.93 MiB | 46.81 MiB/s, done.
Resolving deltas: 100% (5/5), done.
Checking out files: 100% (151/151), done.


In [0]:
def main():
    # Get model with pretrained weights.
    base_model = InceptionV3(
    weights='imagenet',
    include_top=True)
    
    
    # We'll extract features at the final pool layer.
    model = Model(
        inputs=base_model.input,
        outputs=base_model.get_layer('avg_pool').output)
    
    # Getting the data
    df = get_data('atmanomaly/Data/data_file.csv')
    
    # Clean the data
    # df_clean = clean_data(df)
    
    # Creating index-label maps and inverse_maps
    label_index, index_label = get_class_dict(df)
    
    # Split the dataset into train and test
    train, test = split_train_test(df)
    
    # Encoding the dataset
    train_video_index = make_dataset(train, model,label_index, "train")
    test_video_index = make_dataset(test, model,label_index,"test")
    return (train_video_index, test_video_index)

In [0]:
def get_data(path, if_pd=False):
    """Load our data from file."""
    names = ['partition', 'class', 'video_name']
    df = pd.read_csv(path,names=names)
    return df

def get_class_dict(df):
    class_name =  list(df['class'].unique())
    index = np.arange(0, len(class_name))
    label_index = dict(zip(class_name, index))
    index_label = dict(zip(index, class_name))
    return (label_index, index_label)
    
#def clean_data(df):
#    mask = np.logical_and(df['frames'] >= SEQ_LEN, df['frames'] <= MAX_SEQ_LEN)
#    df = df[mask]`
#    return df

def split_train_test(df):
    partition =  (df.groupby(['partition']))
    un = df['partition'].unique()
    train = partition.get_group(un[0])
    test = partition.get_group(un[1])
    return (train, test)

def preprocess_image(img):
    img = cv2.resize(img, (299,299))
    return preprocess_input(img)
    
    
def video_to_frame(row, model, label_index, phase, not_created):
    input_f = []
    output_y = []
    index = 0
    cap = cv2.VideoCapture(os.path.join("atmanomaly/Data","anomaly_dataset",str(row["class"].iloc[0]) ,str(row["video_name"].iloc[0]) + ".mp4")) 
    #print(str(row["class"].iloc[0]))
    #print(str(row["video_name"].iloc[0]))
    frameno = 1
    imgs = []
    length = 0
    seq = 12
    while (cap.isOpened()):
      ret, frame = cap.read()
      if ret:
        if length < seq:
          if frameno % 10 == 0:
            frameno = frameno + 1
            frame = preprocess_image(frame)
            frame = image.img_to_array(frame)
            frame = frame / 255
            imgs.append(frame)
            length = length + 1
          else:
            frameno = frameno + 1
        else:
          seq = seq + 12
          imgs1 = np.array(imgs)
          features = model.predict(imgs1)
          input_f.append(features)
          output_y.append(label_index)
          del imgs[:]
      else:
        break

    if not_created:
      f = h5py.File(phase+'_2'+'.h5', 'w')
      input_f1 = np.array(input_f)
      output_y1 = np.array(output_y)
      index = input_f1.shape[0]
      if index > 0:
        f.create_dataset(phase, data=input_f1, maxshape=(None, 12, 2048))
        f.create_dataset(phase+"_labels", data=output_y1, maxshape=(None, 2))
        f.close()
    else:
      hf = h5py.File(phase+'_2'+'.h5', 'a')
      input_f1 = np.array(input_f)
      output_y1 = np.array(output_y)
      index = input_f1.shape[0]
      if index > 0:
        hf[phase].resize((hf[phase].shape[0] + input_f1.shape[0]), axis = 0)
        hf[phase][-input_f1.shape[0]:] = input_f1

        hf[phase+"_labels"].resize((hf[phase+"_labels"].shape[0] + output_y1.shape[0]), axis = 0)
        hf[phase+"_labels"][-output_y1.shape[0]:] = output_y1
        hf.close()

    del input_f[:]
    del output_y[:]
    del imgs[:]
    cap.release()
    return index
    
    


def make_dataset(data, model, label_index, phase):
    video_index = [0]
    required_classes = ["Arson", "Burglary", "Fighting", "normal"]
   
    not_created = True
    for i in tqdm(range(data.shape[0])):
    # Check whether the given row , is of a class that is required
        if str(data.iloc[[i]]["class"].iloc[0]) in required_classes:
            index = required_classes.index(str(data.iloc[[i]]["class"].iloc[0]))
            index_new = 0
            if index == 3:
              index_new = 1
            label_index = np.zeros((2))
            label_index[index_new] = 1
            index = video_to_frame(data.iloc[[i]], model, label_index, phase, not_created)
            real_index = video_index[-1] + index
            video_index.append(real_index)
            if real_index > 0:
              not_created = False

    return video_index

In [5]:
train_video_index, test_video_index = main()














Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels.h5


100%|██████████| 125/125 [01:42<00:00,  1.26it/s]
100%|██████████| 24/24 [00:23<00:00,  1.21s/it]


In [8]:
x_train = HDF5Matrix('train_2.h5', 'train')
y_train = HDF5Matrix('train_2.h5', 'train_labels')
x_test = HDF5Matrix('test_2.h5', 'test')
y_test = HDF5Matrix('test_2.h5', 'test_labels')

print(x_train.shape)
print(y_train.shape)
print(y_train[210])
print(x_test.shape)
print(y_test.shape)
print(y_test[116])
print(train_video_index)
print(test_video_index)

(702, 12, 2048)
(702, 2)
[1. 0.]
(176, 12, 2048)
(176, 2)
[0. 1.]
[0, 3, 6, 10, 13, 16, 20, 21, 23, 26, 29, 32, 37, 42, 46, 49, 57, 61, 64, 68, 68, 73, 78, 83, 86, 90, 94, 96, 99, 101, 104, 105, 106, 107, 112, 114, 118, 123, 124, 128, 131, 132, 134, 136, 145, 150, 156, 160, 164, 167, 171, 174, 180, 187, 192, 198, 217, 221, 244, 266, 269, 271, 278, 280, 282, 285, 286, 289, 295, 296, 298, 301, 312, 316, 327, 333, 334, 345, 363, 369, 378, 381, 387, 390, 398, 412, 417, 424, 425, 429, 434, 440, 445, 448, 457, 461, 470, 477, 485, 492, 499, 504, 514, 521, 529, 545, 550, 558, 565, 569, 578, 585, 592, 600, 610, 628, 641, 648, 656, 666, 672, 681, 685, 693, 696, 702]
[0, 1, 3, 4, 5, 9, 12, 17, 21, 42, 48, 60, 61, 68, 75, 87, 92, 110, 116, 134, 140, 150, 157, 163, 176]


In [0]:
def lstm():
    """Build a simple LSTM network. We pass the extracted features from
    our CNN to this model predominantly."""
    input_shape = (12, 2048)
    # Model.
    model = Sequential()
    model.add(Bidirectional(LSTM(2048), input_shape=input_shape))
    model.add(Dropout(0.5))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(2, activation='softmax'))
    #model.add(Dense(10, activation='softmax'))"""
    checkpoint = ModelCheckpoint(filepath='models\\checkpoint-{epoch:02d}-{val_loss:.2f}.hdf5')
    
    tb_callback = TensorBoard(
    log_dir="logs",
    histogram_freq=2,
    write_graph=True
    )
    
    callback_list = [checkpoint]
    
    optimizer = Adam(lr=1e-5, decay=1e-6)
    metrics = ['accuracy', 'top_k_categorical_accuracy']
    model.compile(loss='categorical_crossentropy', optimizer=optimizer,metrics=['accuracy'])
    #return model, callback_list
    #model.compile(optimizer = tf.train.AdamOptimizer(),
    #          loss = 'categorical_crossentropy',
    #        metrics=['accuracy'])
    return model, callback_list


In [10]:
lstm, callback_list = lstm()
lstm.summary()

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 4096)              67125248  
_________________________________________________________________
dropout_1 (Dropout)          (None, 4096)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               2097664   
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 1026      
Total params: 69,223,938
Trainable params: 69,223,938
Non-trainable params: 0
_________________________________________________________

In [11]:
lstm.fit(x_train, y_train, batch_size = 16, epochs = 10,verbose = 2, validation_data = (x_test, y_test), shuffle = 'batch',callbacks=callback_list)
lstm.save("atm_anomaly_2classes_inceptionv3.h5")

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 702 samples, validate on 176 samples
Epoch 1/10
 - 10s - loss: 0.5253 - acc: 0.7236 - val_loss: 0.3132 - val_acc: 0.9602
Epoch 2/10
 - 5s - loss: 0.2368 - acc: 0.9302 - val_loss: 0.1128 - val_acc: 1.0000
Epoch 3/10
 - 5s - loss: 0.1187 - acc: 0.9587 - val_loss: 0.3001 - val_acc: 0.8409
Epoch 4/10
 - 5s - loss: 0.0659 - acc: 0.9886 - val_loss: 0.0411 - val_acc: 1.0000
Epoch 5/10
 - 5s - loss: 0.0675 - acc: 0.9758 - val_loss: 0.0347 - val_acc: 0.9943
Epoch 6/10
 - 5s - loss: 0.0409 - acc: 0.9872 - val_loss: 0.0504 - val_acc: 0.9830
Epoch 7/10
 - 5s - loss: 0.0285 - acc: 0.9915 - val_loss: 0.0184 - val_acc: 0.9943
Epoch 8/10
 - 5s - loss: 0.0166 - acc: 1.0000 - val_loss: 0.0276 - val_acc: 0.9943
Epoch 9/10
 - 5s - loss: 0.0173 - acc: 0.9957 - val_loss: 0.0128 - val_acc: 0.9943
Epoch 10/10
 - 5s - loss: 0.0170 - acc: 0.9972 - val_loss: 0.0224 - val_acc: 0.9943


In [12]:
test_loss, test_acc = lstm.evaluate(x_test, y_test, batch_size=16)
print("accuracy: ", test_acc)

accuracy:  0.9943181818181818


In [15]:
imgs = []
imgs.append(x_test[61])
imgs1 = np.array(imgs)

pred = lstm.predict_classes(imgs1)

print(pred)

[0]
