# Intrusion Detection Computer Vision System

## Task 1 (Mandatory):
### Graphical Output
For each frame of the input video the system needs to show found blobs (either by coloring them on a black background or by showing the countours over the original video)
### Text Output
For each frame print the number of found objects, the value associated with each feature of the blob and its classification into person or other.

## Task 2 (Optional)
Develop an algorithm to distinguish between true objects and the removal of a previously present one.

## Video Characteristics
- 12 fps
- ~41s
- 320x240 pixels
- 8 bit/pixel (256 gray levels)

In [None]:
# Imports

import cv2
import numpy as np
from matplotlib import pyplot as plt
from IPython import display

In [None]:
# Global variables

input_video_path = "rilevamento-intrusioni-video.avi"
output_video_path = "test.avi"

In [None]:
# Video helper functions

def play_video(video_path):
    '''
        Plays the video found in video_path frame by frame
    '''
    cap = cv2.VideoCapture(video_path)
    
    try:
        while True:
            # Capture frame-by-frame
            ret, frame = cap.read()
            if not ret or frame is None:
                cap.release()
                print("Released Video Resource")
                break
            
            # Display frame
            plt.axis('off')
            plt.imshow(frame)
            plt.show()
            
            # Clear cell output when new frame is available
            display.clear_output(wait=True)
    except KeyboardInterrupt:
        cap.release()
        print("Released Video Resource")    

def edit_video(input_video_path, output_video_path, frame_transformation):
    '''
        Applies frame_transformation function to each frame taken from input_video_path and saves the result in
        output_video_path
    '''
    cap = cv2.VideoCapture(input_video_path)

    # Getting original video params
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'DIVX')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (w,  h))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret or frame is None:
            print("Can't receive frame (stream end?). Exiting ...")
            break
        frame = frame_transformation(frame)
        # write the updated frame
        out.write(frame)
    cap.release()
    out.release()
    
def create_output_stream(cap, output_video_path):
    '''
        Saves cap in output_video_path
    '''
    # Getting original video params
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'DIVX')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (w,  h))

    return out

In [None]:
# Frame editing functions

def binarize_frame(frame, threshold):
    res = np.zeros(frame.shape)
    res[frame < threshold] = 255
    return res

def binarize_bool_frame(mask):
    res = np.zeros(mask.shape)
    res[mask] = 255
    return np.uint8(res)

def gaussian_filter(frame, sigma=1.5, k_size=None):
    '''
        Higher sigmas should correspond to larger kernels. usually big as 
        Rule of thumb for a good kernel size given sigma
    '''
    if k_size is None:
        k_size = int(np.ceil((3*sigma))*2 + 1)
 
    return cv2.GaussianBlur(frame, (k_size,k_size) , sigma)

In [None]:
# Distance functions

def manhattan_distance(img1, img2):
    return np.sum(np.abs(img2 - img1), axis=-1)

def euclidean_distance(img1, img2):
    return np.sqrt(np.sum((img2 - img1) ** 2, axis=-1))

def maximum_distance(img1, img2):
    return np.max(img2 - img1, axis=-1)

In [None]:
# Change algorithms functions

def two_frame_difference(prev_frame, curr_frame, d_func, threshold):
    return d_func(curr_frame, prev_frame) > threshold

def three_frame_difference(prev_frame, curr_frame, next_frame, d_func, threshold):
    diff1 = two_frame_difference(prev_frame, curr_frame, d_func, threshold)
    diff2 = two_frame_difference(curr_frame, next_frame, d_func, threshold)
    and_mask = np.prod([diff1, diff2],axis=0, dtype=bool)
    return and_mask

def background_subtraction(frame, background, d_func, threshold):
    frame = frame.astype(float)
    mask = d_func(frame, background) > threshold
    return mask

def background_initialization(cap, interpolation, n=100):
    # Loading Video
    bg = []
    idx = 0
    # Initialize the background image
    while(cap.isOpened() and idx < n):
        ret, frame = cap.read()
        if ret and not frame is None:
            frame = frame.astype(float)
            # Getting all first n images
            bg.append(frame)
            idx += 1
        else:
            break
    cap.release()

    bg_interpolated = np.stack(bg, axis=0)
    return interpolation(bg_interpolated, axis=0)

In [None]:
# Binary Morphology Operations

def binary_morphology(mask):
    kernel = np.ones((5,5), np.uint8)
    mask_int = mask.astype(np.uint8)
    
    opened = cv2.morphologyEx(mask_int, cv2.MORPH_OPEN, kernel)
    closed = cv2.morphologyEx(opened, cv2.MORPH_CLOSE, kernel)
    result = cv2.morphologyEx(closed, cv2.MORPH_OPEN, kernel)
    return result.astype(bool)

In [None]:
# Parameters Tuning

thresholds = [20, 30, 40]
distance_functions = [manhattan_distance, euclidean_distance, maximum_distance]
# filter_functions = [(gaussian_filter, [1.5]), None]
filter_functions = [None]
binary_morphology_operations = [binary_morphology]
output_dir = "output/"

backgrounds = []
b_frames = [80, 100, 120]
b_interpolations = [np.mean , np.median]
for f in b_frames:
    for i in b_interpolations:
        cap = cv2.VideoCapture(input_video_path)
        b = background_initialization(cap, i, f)
        backgrounds.append((b, str(i.__name__)+"_"+str(f)+"_"))

def file_name(output_dir, t, d_func, f_func, f_param, b_name):
    f_name = ""
    f_param_name = ""
    if f_func is not None:
        f_name = "_" + f_func.__name__
        f_param_name = "_" + str(f_param)
    d_name = "_" + d_func.__name__
    return output_dir + b_name + str(t) + d_name + f_name + f_param_name + ".avi"

for b_image, b_name in backgrounds:
    for t in thresholds:
        for d_func in distance_functions:
            for filter_action in filter_functions:
                if filter_action is not None:
                    f_func, f_params = filter_action
                else:
                    f_func = None
                    f_params = [None]

                for f_param in f_params:
                    for b_func in binary_morphology_operations:
                        cap = cv2.VideoCapture(input_video_path)
                        out = create_output_stream(cap, file_name(output_dir, t, d_func, f_func, f_param, b_name))
                        prev_f = None
                        curr_f = None
                        while cap.isOpened():
                            ret, next_f = cap.read()
                            if not ret or next_f is None:
                                print("Can't receive frame (stream end?). Exiting ...")
                                break
                            if prev_f is not None and curr_f is not None:
                                if filter_action is None:
                                    mask = background_subtraction(curr_f, b_image, d_func, t)
                                else:
                                    mask = background_subtraction(f_func(curr_f, f_param), f_func(b_image, f_param), d_func, t)

                                mask = b_func(mask)                            mask = binarize_bool_frame(mask)
                                mask = np.tile(mask[:,:,np.newaxis], 3)
                                out.write(mask)
                            prev_f = curr_f
                            curr_f = next_f
                        out.release()
                        cap.release()

print("Finished!")