In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
class Config:
  DATASET_PATH = r"/content/drive/MyDrive/UCSD_Anomaly_Dataset/UCSD_Anomaly_Dataset/UCSDped1/Train"
  SINGLE_TEST_PATH = r"/content/drive/MyDrive/UCSD_Anomaly_Dataset/UCSD_Anomaly_Dataset/UCSDped1/Test/Test001"
  RELOAD_DATASET = True
  RELOAD_TESTSET = True
  RELOAD_MODEL = True
  CACHE_PATH = "cache"
  MODEL_PATH = "model"
  BATCH_SIZE = 1
  EPOCHS = 1

In [None]:
def get_single_test():
    sz = 200
    test = np.zeros(shape=(sz, 256, 256, 1))
    cnt = 0
    for f in sorted(listdir(Config.SINGLE_TEST_PATH)):
        if str(join(Config.SINGLE_TEST_PATH, f))[-3:] == "tif":
            img = Image.open(join(Config.SINGLE_TEST_PATH, f)).resize((256, 256))
            img = np.array(img, dtype=np.float32) / 256.0
            test[cnt, :, :, 0] = img
            cnt = cnt + 1
    return test

In [None]:
import shelve
from os import listdir
from os.path import join, isdir
import numpy as np
from PIL import Image

def get_clips_by_stride(stride, frames_list):
    """
    Parameters
    ----------
    stride : int
        The distance between two consecutive frames
    frames_list : list
        A list of sorted frames of shape 256*256
    Returns
    -------
    list
        A list of clips , 10 frames each
    """
    clips = []
    sz = len(frames_list)
    clip = np.zeros(shape=(256, 256, 10))
    cnt = 0
    for start in range(0, stride):
        for i in range(start, sz, stride):
            clip[:, :, cnt] = frames_list[i]
            cnt = cnt + 1
            if cnt == 10:
                clips.append(clip)
                cnt = 0

    return clips


def get_dataset(re=Config.RELOAD_DATASET):
    """
    Parameters
    ----------
    re : bool
        Reload the dataset or load it from cache
    Returns
    -------
    """
    cache = shelve.open(Config.CACHE_PATH + "dataset")
    if not re:
        return cache["dataset"]
    sz = 20
    clips = []
    cnt = 0
    container = np.zeros(shape=(256, 256, 10))
    for f in sorted(listdir(Config.DATASET_PATH)):
        if isdir(join(Config.DATASET_PATH, f)):
            print(f)
            all_frames = []
            for c in sorted(listdir(join(Config.DATASET_PATH, f))):
                if str(join(join(Config.DATASET_PATH, f), c))[-3:] == "tif":
                    img = Image.open(join(join(Config.DATASET_PATH, f), c))
                    img = img.resize((256, 256))
                    img = np.array(img, dtype=np.float32)
                    img = img / 256.0
                    all_frames.append(img)
            for stride in range(1, 2):
                clips.extend(get_clips_by_stride(stride, all_frames))
    cache["dataset"] = clips
    cache.close()
    return clips


def get_testset(re=Config.RELOAD_TESTSET):
    cache = shelve.open(Config.CACHE_PATH + "testset")
    if not re:
        return cache["testset"]
    sz = 200
    images = np.zeros(shape=(sz, 256, 256, 10))
    cnt = 0
    cnt_container = 0
    container = np.zeros(shape=(256, 256, 10))
    for f in sorted(listdir(Config.SINGLE_TEST_PATH)):
        if str(join(Config.SINGLE_TEST_PATH, f))[-3:] == "tif":
            img = Image.open(join(Config.SINGLE_TEST_PATH, f))
            img = img.resize((256, 256))
            img = np.array(img, dtype=np.float32)
            img = img / 256.0
            container[:, :, cnt] = img
            cnt = cnt + 1
            if cnt % 10 == 0:
                cnt = 0
                images[cnt_container, :, :, :] = container
                container = np.zeros(shape=(256, 256, 10))
                cnt_container = cnt_container + 1
    cache["testset"] = images
    print(images.shape)
    print(cnt_container)
    return images

In [None]:
import matplotlib.pyplot as plt
import numpy as np
def plot_image(image, shape=[256, 256], cmap="Greys_r"):
    plt.imshow(image.reshape(shape), cmap=cmap, interpolation="nearest")
    plt.axis("off")
    plt.show()

def movingaverage(values,window):
    weights = np.repeat(1.0,window)/window
    smas = np.convolve(values,weights,'valid')
    return smas

In [None]:
import shelve
import tensorflow as tf
import keras
from keras.utils.vis_utils import plot_model
import numpy as np
from keras.layers import Conv2DTranspose, BatchNormalization
from keras.layers.convolutional import Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Sequential
from tensorflow.keras.optimizers import legacy


# Encoder
def get_model(re=Config.RELOAD_MODEL):
    cache = shelve.open(Config.CACHE_PATH + "model")
    if not re:
        return cache["model"]
    images = get_dataset(Config.RELOAD_DATASET)
    images = np.array((images))
    print(images.shape)
    seq = Sequential()
    seq.add(Conv2D(512, (11, 11), strides=2, padding="same"))
    seq.add(BatchNormalization())
    seq.add(MaxPooling2D((2, 2), padding="same"))
    seq.add(BatchNormalization())
    seq.add(Conv2D(256, (5, 5), padding="same"))
    seq.add(BatchNormalization())
    seq.add(MaxPooling2D((2, 2), padding="same"))
    seq.add(BatchNormalization())
    seq.add(Conv2D(128, (3, 3), padding="same"))
    seq.add(BatchNormalization())
    seq.add(Conv2DTranspose(128, (3, 3), padding="same"))
    seq.add(BatchNormalization())
    seq.add(UpSampling2D((2, 2)))
    seq.add(BatchNormalization())
    seq.add(Conv2DTranspose(256, (5, 5), padding="same"))
    seq.add(BatchNormalization())
    seq.add(UpSampling2D((2, 2)))
    seq.add(BatchNormalization())
    seq.add(Conv2DTranspose(512, (11, 11),strides=2, padding="same"))
    seq.add(BatchNormalization())
    seq.add(Conv2D(10, (11, 11), activation="sigmoid", padding="same"))
    seq.compile(loss='mse', optimizer = legacy.Adam(decay=1e-6))
    callback = keras.callbacks.ModelCheckpoint(Config.MODEL_PATH, monitor='val_loss', verbose=0, save_best_only=False,
                                               save_weights_only=False, mode='auto', save_freq=5)
    seq.fit(images, images, batch_size=Config.BATCH_SIZE, epochs=Config.EPOCHS, shuffle=False, callbacks=[callback])
    cache["model"] = seq
    cache.close()
    return seq

In [None]:
import matplotlib.pyplot as plt

def evaluate():
  seq = get_model(re=Config.RELOAD_MODEL)
  print("got model")
  test = get_testset(True)
  print(test.shape)
  sz = test.shape[0] - 10 + 1
  sequences = np.zeros((sz, 10, 256, 256, 1))
  # apply the sliding window technique to get the sequences
  for i in range(0, sz):
      clip = np.zeros((10, 256, 256, 1))
      for j in range(0, 10):
          clip[j] = test[i + j, :, :, :]
      sequences[i] = clip
  print("got data")

  # get the reconstruction cost of all the sequences
  reconstructed_sequences = Config.model.predict(sequences,batch_size=4)
  sequences_reconstruction_cost = np.array([np.linalg.norm(np.subtract(sequences[i],reconstructed_sequences[i])) for i in range(0,sz)])
  sa = (sequences_reconstruction_cost - np.min(sequences_reconstruction_cost)) / np.max(sequences_reconstruction_cost)
  global sr
  sr = 1.0 - sa
  plt.plot(sr)
  plt.ylabel('regularity score Sr(t)')
  plt.xlabel('frame t')
  plt.show()

evaluate()

Train001
Train002
Train003
Train004
Train005
Train006
Train007
Train008
Train009
Train010
Train011
Train012
Train013
Train014
Train015
Train016
Train017
Train018
Train019
Train020
Train021
Train022
Train023
Train024
Train025
Train026
Train027
Train028
Train029
Train030
Train031
Train032
Train033
Train034
(680, 256, 256, 10)
  4/680 [..............................] - ETA: 6:22 - loss: 0.1317



  9/680 [..............................] - ETA: 20:10 - loss: 0.1200



 14/680 [..............................] - ETA: 21:13 - loss: 0.1152



 19/680 [..............................] - ETA: 25:33 - loss: 0.1128



 24/680 [>.............................] - ETA: 27:58 - loss: 0.1155



 29/680 [>.............................] - ETA: 28:52 - loss: 0.1171



 34/680 [>.............................] - ETA: 29:21 - loss: 0.1178



 39/680 [>.............................] - ETA: 31:37 - loss: 0.1179



 44/680 [>.............................] - ETA: 31:47 - loss: 0.1173



 49/680 [=>............................] - ETA: 31:38 - loss: 0.1164



 54/680 [=>............................] - ETA: 31:46 - loss: 0.1156



 59/680 [=>............................] - ETA: 32:07 - loss: 0.1148



 64/680 [=>............................] - ETA: 31:40 - loss: 0.1140



 69/680 [==>...........................] - ETA: 32:08 - loss: 0.1133



 74/680 [==>...........................] - ETA: 32:49 - loss: 0.1125



 79/680 [==>...........................] - ETA: 33:17 - loss: 0.1119



 84/680 [==>...........................] - ETA: 33:20 - loss: 0.1111



 89/680 [==>...........................] - ETA: 33:17 - loss: 0.1104



 94/680 [===>..........................] - ETA: 33:32 - loss: 0.1097



 99/680 [===>..........................] - ETA: 33:26 - loss: 0.1094



104/680 [===>..........................] - ETA: 33:17 - loss: 0.1093



109/680 [===>..........................] - ETA: 33:25 - loss: 0.1093



114/680 [====>.........................] - ETA: 33:04 - loss: 0.1092



119/680 [====>.........................] - ETA: 33:03 - loss: 0.1090



124/680 [====>.........................] - ETA: 32:44 - loss: 0.1089



129/680 [====>.........................] - ETA: 32:36 - loss: 0.1087



134/680 [====>.........................] - ETA: 32:08 - loss: 0.1085



139/680 [=====>........................] - ETA: 32:04 - loss: 0.1083



144/680 [=====>........................] - ETA: 31:51 - loss: 0.1083



149/680 [=====>........................] - ETA: 31:21 - loss: 0.1083



154/680 [=====>........................] - ETA: 30:59 - loss: 0.1084







































































































































































































































































































































































































































got model
(200, 256, 256, 10)
20
(200, 256, 256, 10)


ValueError: ignored

In [None]:
import numpy as np

def determine_anomalous_files(sr):
  anomalous_files = []


        regularity_scores = calculate_regularity_scores(file_path)
        mean_regularity_score = np.mean(regularity_scores)

        if mean_regularity_score > threshold:
            anomalous_files.append(file_name)

    return anomalous_files

# Define the directory path and threshold value
directory_path = '/content/drive/MyDrive/UCSD_Anomaly_Dataset/UCSD_Anomaly_Dataset/UCSDped1/Test1'
threshold = 0.5  # Adjust this threshold value according to your needs

# Call the function to determine anomalous files
anomalous_files = determine_anomalous_files(directory_path, threshold)

# Output the list of anomalous files
print("Anomalous Files:")
for file_name in anomalous_files:
    print(file_name)
