# https://github.com/ShuaiBai623/AI-City-Anomaly-Detection/blob/master/detect_anomaly.py

In [7]:
import sys
sys.path.append("/data/modules")

In [8]:
import numpy as np
import pandas as pd
import os
import json
import sys
import cv2 as cv
import sklearn.preprocessing as prep
from sklearn.metrics.pairwise import cosine_similarity
import PIL
import torch
import torch.nn as nn

import AICity2019_winner.utils.utils as utils
import AICity2019_winner.src.reid.modeling as modeling
import AICity2019_winner.src.reid.misc as misc

## IO Helpers

In [21]:
class VideoReader:
    def __init__(self, filename):
        self.filename = filename
        
        
        self.nframes = None
        self.framerate = None
        self.img_shape = None
        self._set_video_info()
        
    def load_video(self):
        # Loads a video with opencv, returns PIL a generator

        vid = cv.VideoCapture(self.filename)

        while vid.isOpened():
            has_frame, img = vid.read()

            if has_frame:
                yield img
            else:
                break

        vid.release()

    def get_frame(self, n):
        """
        Returns the image at a specific frame number
        """
        
        vid = cv.VideoCapture(self.filename)
        vid.set(cv.CAP_PROP_POS_FRAMES, n)

        _, img = vid.read()


        vid.release()
        return img
    
    def _set_video_info(self):
        vid = cv.VideoCapture(self.filename)
        _, img = vid.read()

        self.nframes = int(vid.get(cv.CAP_PROP_FRAME_COUNT))
        self.framerate = vid.get(cv.CAP_PROP_FPS)
        self.img_shape = img.shape

        vid.release()

In [10]:
def read_json(filename):
    with open(filename) as f:
        return json.load(f)
    
def parse_json_combine(js):
    """
    json structure:
    {
        frame: {"1": [], "2": [], "3":, "4": [
            [[x1, y1, x2, y2], score],
            ...
        ]},
        ...
    }
    """
    
    
    boxes = []
    for frame in js:
        for bbox, score in js[frame]["4"]:
            if score > 0:
                boxes.append(bbox)
            
    return boxes

def parse_json(js):
    
    boxes_dict = {}
    for frame in js:
        boxes = []
        for bbox, score in js[frame]["4"]:
            if score > 0:
                boxes.append([bbox, score])
                
        if len(boxes) > 0:
            frame_num = int(frame.split(".")[0]) # 123.jpg -> 123
            boxes_dict[frame_num] = boxes
            
    return boxes_dict

## Re-ID Model and helpers

In [11]:
class ReidExtractor:
    """Base feature extractor class.
    args:
        features: List of features.
    """
    def __init__(self, model_name, model_path, image_size=320):
        model = modeling.build_model(model_name, 2000)
        model.cuda()
        model = nn.DataParallel(model)
        model.load_state_dict(torch.load(model_path), strict=False)
        model.eval()
        self.extractor = model
        self.image_size = image_size
        self.transforms = misc.preprocess(misc.normalize_torch, self.image_size)
        
    
    def extract(self, img_numpy, region):
        img = PIL.Image.fromarray(img_numpy)
        img = img.crop(region)
        
        model_input = self.transforms(img).unsqueeze(0)
        
        with torch.no_grad():
            features_after, features_before = self.extractor(model_input.cuda())
            
        features = prep.normalize(features_before.cpu().data.numpy(), norm="l2").astype("float32")
        
        return features

In [12]:
def get_similarity(reid_model, img1, img2, region1, region2):
    return cosine_similarity(reid_model.extract(img1, region1), 
                             reid_model.extract(img2, region2))

## Other helpers

In [13]:
def mask(arr, mask):
    """
    just makes it clear where masking is happening
    
    arr: np.array of data
    mask: boolean np.array with same shape as arr
    """
    
    return arr * mask

In [14]:
def add_boxes(bboxes, ignore_matrix, score_thresh=0.3):
    """
    Adds bounding boxes to tmp_score and tmp_detect arrays
    """
    h, w = ignore_matrix.shape
    
    tmp_score = np.zeros((h,w))
    tmp_detect = np.zeros((h,w), dtype=bool)
    
        
    for (x1, y1, x2, y2), score in bboxes: # for each box
        x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))

        if score > score_thresh:
            tmp_score[y1:y2, x1:x2] = np.maximum(score, tmp_score[y1:y2, x1:x2]) # add box
            tmp_detect[y1:y2, x1:x2] = True

    tmp_score = mask(tmp_score, ignore_matrix) # get rid of stuff in ignore regions
    tmp_detect &= ignore_matrix
    
    return tmp_score, tmp_detect

## Main Program

In [19]:
def get_anomolies(video_path, reid_model_path, frame_by_frame_results_path, static_results_path, ignore_matrix_path=None, 
                  reid_model_name="resnet50", start_frame=1, frame_interval=20, abnormal_duration_thresh=60, detect_thresh=5, 
                  undetect_thresh=8, score_thresh=0.3, light_thresh=0.8, anomoly_score_thresh=0.7, similarity_thresh=0.95,
                  suspicious_time_thresh=18, verbose=False):
    """
    
    
    """
    
    # Read result data
    fbf_bbox_df = pd.read_csv(frame_by_frame_results_path, header=None, 
                          names=["frame", "NA", "x1", "y1", "w", "h", "score", "NA1", "NA2", "NA3"])
    fbf_bbox_df["x2"] = fbf_bbox_df["x1"] + fbf_bbox_df["w"]
    fbf_bbox_df["y2"] = fbf_bbox_df["y1"] + fbf_bbox_df["h"]
    
    static_detection_json = parse_json(read_json(static_results_path))
    
    # Get video data
    vid = VideoReader(video_path)
    num_frames, framerate, image_shape = vid.nframes, vid.framerate, vid.img_shape
    
    # load model
    reid_model = ReidExtractor(reid_model_name, reid_model_path)
    
    # Set up information matrices
    h, w, _ = image_shape
    
    if ignore_matrix_path is None:
        ignore_matrix = np.ones((h, w), dtype=bool) # Dont ignore anything
    else:
        ignore_matrix = np.load(ignore_matrix_path).astype(bool)
    
    count_matrix = np.zeros((h,w))            
    undetect_count_matrix = np.zeros((h,w))  
    start_time_matrix = np.zeros((h,w))
    end_time_matrix = np.zeros((h,w))
    score_matrix = np.zeros((h,w))
    state_matrix = np.zeros((h,w), dtype=bool)      # State matrix, 0/1 distinguishes suspicious candidate states
    
    if verbose:
        print(f"total frames: {num_frames}, framerate: {framerate}, height: {h}, width: {w}")
        print("-------------------------")
    
    
    ### Main loop
    start = False
    tmp_start = False
    all_results=[]
    anomoly_tmp =[]
    anomoly_now ={}
    for frame in range(start_frame, num_frames, frame_interval):
        # create tmp_score, tmp_detect
        if frame in static_detection_json:
            tmp_score, tmp_detect = add_boxes(static_detection_json[frame], ignore_matrix, score_thresh)

        else:
            tmp_score = np.zeros((h,w))
            tmp_detect = np.zeros((h,w), dtype=bool)
            
            
        if verbose:
            print(f"frame: {frame}")
            
            if frame in static_detection_json:
                print("\tboxes:", len(static_detection_json[frame]))

        score_matrix += tmp_score # add running totals
        count_matrix += tmp_detect


        # Update detection matrices
        undetect_count_matrix += ~ tmp_detect
        undetect_count_matrix[tmp_detect] = 0

        # Update time matrices
        start_time_matrix[count_matrix == 1] = -600 if frame == 1 else frame # why -600 for frame 1?
        end_time_matrix[count_matrix > 0] = frame

        #Update state matrices
        state_matrix[count_matrix > detect_thresh] = True

        # Detect anomaly
        time_delay = mask(end_time_matrix - start_time_matrix, state_matrix)
        delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape)

        if not start and time_delay.max() > abnormal_duration_thresh * framerate: #and score_matrix[delay_max_idx]/count_matrix[delay_max_idx]>0.8:
            
            delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape)

            # backtrack the start time
            time_frame = int(start_time_matrix[delay_max_idx] / 5) * 5 + 1 # why 5s and 1?

            G = np.where(count_matrix < count_matrix[delay_max_idx] - 2, 0, 1) # What does G represent?, why -2?
            region = utils.search_region(G, delay_max_idx)

            # vehicle reid
            if 'start_time' in anomoly_now and (time_frame / framerate - anomoly_now['end_time']) < 30: # why 30?
                f1_frame_num = max(1, anomoly_now['start_time'] * framerate)
                f2_frame_num = max(1, time_frame)

                similarity = get_similarity(reid_model, vid.get_frame(f1_frame_num), vid.get_frame(f2_frame_num), anomoly_now["region"], region)

                if similarity > similarity_thresh:
                    time_frame = int(anomoly_now['start_time'] * framerate / 5) * 5 + 1 # why 5s and 1?
                else:
                    anomoly_now['region'] = region

            else: 
                anomoly_now['region'] = region


            # IoU stuff
            max_iou = 1
            count = 1
            start_time = time_frame
            tmp_len = 1
            raoi = 1
            while (max_iou > 0.1 or tmp_len < 40 or raio > 0.6) and time_frame > 1: # why 0.1, 40, 0.6?
                raio = count / tmp_len

                fbf_bbox_df
                if time_frame in fbf_bbox_df.frame:
                    bboxes = fbf_bbox_df.loc[fbf_bbox_df.frame == time_frame, ["x1", "y1", "x2", "y2", "score"]].values
                    max_iou = utils.compute_iou(anomoly_now['region'], bboxes)

                else:
                    max_iou = 0

                time_frame -= 5 # why 5?
                if max_iou > 0.3: # why 0.3?
                    count += 1
                    if max_iou > 0.5: # why 0.5?
                        start_time = time_frame

                tmp_len += 1


            # back track start_time, until brightness at that spot falls below a threshold
            for time_frame in range(start_time, 1, -5):
                tmp_im = vid.get_frame(time_frame)
                if utils.compute_brightness(tmp_im[region[1]:region[3], region[0]:region[2]]) <= light_thresh:
                    break

                start_time = time_frame


            anomoly_now['start_time'] = max(0, start_time / framerate)
            anomoly_now['end_time'] = max(0, end_time_matrix[delay_max_idx] / framerate)
            start = True



        elif not tmp_start and time_delay.max() > suspicious_time_thresh * framerate:
            time_frame = start_time_matrix[delay_max_idx]

            G = np.where(count_matrix < count_matrix[delay_max_idx] - 2, 0, 1) # what does G represent?
            region = utils.search_region(G, delay_max_idx)

            # vehicle reid
            if 'start_time' in anomoly_now and (time_frame / framerate - anomoly_now['end_time']) < 30: # why 30?
                f1_frame_num = max(1, anomoly_now['start_time'] * framerate)
                f2_frame_num = max(1, time_frame)

                similarity = get_similarity(reid_model, vid.get_frame(f1_frame_num), vid.get_frame(f2_frame_num), anomoly_now["region"], region)

                if similarity > similarity_thresh:
                    time_frame = int(anomoly_now['start_time'] * framerate / 5) * 5 + 1
                    region = anomoly_now['region']

            anomoly_now['region'] = region
            anomoly_now['start_time'] = max(0, time_frame / framerate)
            anomoly_now['end_time'] = max(0, end_time_matrix[delay_max_idx] / framerate)

            tmp_start = True


        if start and time_delay.max() > abnormal_duration_thresh * framerate:

            delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape)

            if undetect_count_matrix[delay_max_idx] > undetect_thresh:
                anomoly_score = score_matrix[delay_max_idx] / count_matrix[delay_max_idx]

                if anomoly_score > anomoly_score_thresh:
                    anomoly_now['end_time'] = end_time_matrix[delay_max_idx] / framerate
                    anomoly_now['score'] = anomoly_score

                    all_results.append(anomoly_now)
                    anomoly_now = {}

                start = False


        elif tmp_start and time_delay.max() > suspicious_time_thresh * framerate:
            if undetect_count_matrix[delay_max_idx] > undetect_thresh:

                anomoly_score = score_matrix[delay_max_idx] / count_matrix[delay_max_idx]
                if anomoly_score > anomoly_score_thresh:
                    anomoly_now['end_time'] = end_time_matrix[delay_max_idx] / framerate
                    anomoly_now['score'] = anomoly_score

                tmp_start = False

        # undetect matrix change state_matrix
        state_matrix[undetect_count_matrix > undetect_thresh] = False
        undetect_count_matrix[undetect_count_matrix > undetect_thresh] = 0

        # update matrix
        tmp_detect |= state_matrix
        count_matrix = mask(count_matrix, tmp_detect)
        score_matrix = mask(score_matrix, tmp_detect)
        
    
    # Add all anomolies to the results list
    if start == 1 and time_delay.max() > abnormal_duration_thresh * framerate:
        anomoly_score = score_matrix[index] / count_matrix[index]
        if anomoly_score > anomoly_score_thresh:
            anomoly_now['end_time'] = end_time_matrix[index] / framerate
            anomoly_now['score'] = anomoly_score

            all_results.append(anomoly_now)
            anomoly_now = {}
            start = 0
            
    
    # Apply Non-Maximal Supression to the results
    if all_results:
        nms_out = utils.anomely_nms(all_results)

        final_result = {'start_time': 892, 'score': 0} # why 892?
        for nms_start_time in nms_out[:, 5]: # why 5?
            if nms_start_time < final_result["start_time"]:
                final_result["start_time"] = max(0, int(nms_start_time - 1))
                final_result["score"] = 1

        return final_result
    
    return None

---

In [16]:
reid_model_name = "resnet50"
reid_model_path = "/data/modules/AICity2019_winner/models/reid/reid.pth"

video_id = 2
video_folder = "/data/aicity/test/"
# static_img_folder = "/data/aicity/winner_team/processed_images/test"
static_results_folder = "/data/aicity/winner_team/detection_results/test_static" # todo change this
frame_by_frame_folder = "/data/aicity/winner_team/detection_results/test_framebyframe"
ignore_matrix_folder = "/data/aicity/winner_team/detection_results/test_seg_masks"

In [17]:
video_path = os.path.join(video_folder, f"{video_id}.mp4")
static_results_path = os.path.join(static_results_folder, f"video{video_id}.json")
fbf_results_path = os.path.join(frame_by_frame_folder, f"video{video_id}.txt")
ignore_matrix_path = os.path.join(ignore_matrix_folder, f"{video_id}.npy")

In [22]:
get_anomolies(video_path, reid_model_path, fbf_results_path, static_results_path, ignore_matrix_path=None, verbose=True, 
              similarity_thresh=0.95, abnormal_duration_thresh=60)

total frames: 26760, framerate: 30.0, height: 410, width: 800
-------------------------
frame: 1
frame: 21
	boxes: 10
frame: 41
	boxes: 15
frame: 61
	boxes: 18
frame: 81
	boxes: 23
frame: 101
	boxes: 25
frame: 121
	boxes: 30
frame: 141
	boxes: 28
frame: 161
	boxes: 33
frame: 181
	boxes: 22
frame: 201
	boxes: 33
frame: 221
	boxes: 26
frame: 241
	boxes: 20
frame: 261
	boxes: 14
frame: 281
	boxes: 2
frame: 301
	boxes: 6
frame: 321
	boxes: 1
frame: 341
frame: 361
frame: 381
frame: 401
frame: 421
frame: 441
frame: 461
frame: 481
frame: 501
frame: 521
frame: 541
frame: 561
frame: 581
frame: 601
frame: 621
frame: 641
frame: 661
frame: 681
frame: 701
frame: 721
frame: 741
frame: 761
frame: 781
frame: 801
frame: 821
frame: 841
frame: 861
frame: 881
frame: 901
	boxes: 1
frame: 921
	boxes: 1
frame: 941
	boxes: 2
frame: 961
	boxes: 2
frame: 981
	boxes: 2
frame: 1001
	boxes: 1
frame: 1021
	boxes: 1
frame: 1041
	boxes: 2
frame: 1061
	boxes: 2
frame: 1081
	boxes: 2
frame: 1101
	boxes: 1
frame: 1121
	

frame: 11401
frame: 11421
frame: 11441
	boxes: 1
frame: 11461
	boxes: 1
frame: 11481
	boxes: 1
frame: 11501
	boxes: 1
frame: 11521
	boxes: 1
frame: 11541
	boxes: 2
frame: 11561
	boxes: 1
frame: 11581
	boxes: 1
frame: 11601
	boxes: 1
frame: 11621
	boxes: 1
frame: 11641
	boxes: 1
frame: 11661
frame: 11681
frame: 11701
frame: 11721
	boxes: 1
frame: 11741
	boxes: 1
frame: 11761
frame: 11781
frame: 11801
frame: 11821
frame: 11841
frame: 11861
frame: 11881
frame: 11901
frame: 11921
frame: 11941
frame: 11961
frame: 11981
frame: 12001
frame: 12021
frame: 12041
frame: 12061
frame: 12081
frame: 12101
frame: 12121
frame: 12141
frame: 12161
frame: 12181
frame: 12201
frame: 12221
frame: 12241
frame: 12261
frame: 12281
frame: 12301
frame: 12321
frame: 12341
frame: 12361
frame: 12381
frame: 12401
frame: 12421
frame: 12441
frame: 12461
frame: 12481
frame: 12501
frame: 12521
frame: 12541
frame: 12561
frame: 12581
frame: 12601
frame: 12621
frame: 12641
frame: 12661
frame: 12681
frame: 12701
frame: 12721

frame: 20881
	boxes: 1
frame: 20901
frame: 20921
frame: 20941
frame: 20961
frame: 20981
frame: 21001
frame: 21021
	boxes: 5
frame: 21041
	boxes: 4
frame: 21061
	boxes: 5
frame: 21081
	boxes: 5
frame: 21101
	boxes: 8
frame: 21121
	boxes: 6
frame: 21141
	boxes: 11
frame: 21161
	boxes: 9
frame: 21181
	boxes: 9
frame: 21201
	boxes: 13
frame: 21221
	boxes: 8
frame: 21241
	boxes: 10
frame: 21261
	boxes: 11
frame: 21281
	boxes: 12
frame: 21301
	boxes: 11
frame: 21321
	boxes: 12
frame: 21341
	boxes: 14
frame: 21361
	boxes: 8
frame: 21381
	boxes: 11
frame: 21401
	boxes: 12
frame: 21421
	boxes: 11
frame: 21441
	boxes: 11
frame: 21461
	boxes: 15
frame: 21481
	boxes: 13
frame: 21501
	boxes: 10
frame: 21521
	boxes: 11
frame: 21541
	boxes: 7
frame: 21561
	boxes: 5
frame: 21581
	boxes: 5
frame: 21601
	boxes: 1
frame: 21621
frame: 21641
frame: 21661
frame: 21681
frame: 21701
frame: 21721
frame: 21741
frame: 21761
frame: 21781
frame: 21801
frame: 21821
frame: 21841
frame: 21861
frame: 21881
frame: 2190