## Tiled object detection from a video file with optional motion detection
This notebook is an example how to use DeGirum PySDK to do tiled object detection of a video stream from a video file.
Each video frame is divided by tiles with some overlap, each tile of the AI model input size (to avoid resizing).
Object detection is performed for each tile, then results from different tiles are combined.
The annotated video is saved into new file with `_tiled_annotated` suffix.
If motion detection mode is turned on, then areas with motion are detected for each frame, and only tiles, where
motion is detected, are processed.

This script works with the following inference options:

1. [DeGirum Cloud Platform](https://cs.degirum.com),
1. DeGirum-hosted AI server node shared via Peer-to-Peer VPN,
1. AI server node hosted by you in your local network,
1. AI server running on your local machine,
1. DeGirum ORCA accelerator directly installed on your local machine.


To try different options, you just need to change the `inference_option` in the code below.

### Specify where do you want to run your inferences and video file name here

In [None]:
inference_option = 3  # <<< change it according to your needs selecting from the list in the header comment
input_filename = "./images/TrafficHD.mp4" # video file to process
model_name = "yolo_v5s_coco--512x512_quant_n2x_orca_1" # model to use
min_overlap_precent = [20,20] # minimum tile overlap (in percent of tile dimensions)
classes = ["car"] # list of classes to show
do_motion_detection = True # enable motion detection: do inference only in tiles, where motion is detected

### The rest of the cells below should run without any modifications

In [None]:
import degirum as dg # import DeGirum PySDK
import mytools, cv2, math, threading, queue, numpy as np
from pathlib import Path
import IPython.display

In [None]:
# connect to model zoo according to selected inference option
zoo = mytools.connect_model_zoo(inference_option)

In [None]:
# load object detection model
model = zoo.load_model(model_name)

# set model parameters
model.image_backend = 'opencv' # select OpenCV backend: needed to have overlay image in OpenCV format
model.input_numpy_colorspace = 'BGR'
model.overlay_show_probabilities = False
model.overlay_show_labels = False
model.overlay_line_width = 1
model.overlay_alpha = 1
model._model_parameters.InputImgFmt = ['JPEG']

In [None]:
# Detect areas with motion on given image in respect to base image.
# Returns a tuple of motion image and updated base image.
# Motion image is black image with white pixels where motion is detected.
def detectMotion(base_img, img):

    cur_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    cur_img = cv2.GaussianBlur(src=cur_img, ksize=(5,5), sigmaX=0)
    
    if base_img is None:
        base_img = cur_img
        return None, base_img
        
    diff = cv2.absdiff(base_img, cur_img)    
    base_img = cur_img
    
    _, thresh = cv2.threshold(diff, 50, 255, cv2.THRESH_BINARY)
    thresh = cv2.dilate(thresh, None)
    
    return thresh, base_img

In [None]:
# define source of tile frames to be used in batch predict
def source(stream, model, min_overlap_precent, progress):
    
    tile_w, tile_h = model.model_info.InputW[0], model.model_info.InputH[0]
    image_w, image_h = int(stream.get(cv2.CAP_PROP_FRAME_WIDTH)), int(stream.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    # function to calculate optimal overlap (0..1) and number of tiles
    def calc_overlap(tile_dim, image_dim, min_overlap_precent):
        tiles_less_one = math.ceil((image_dim - tile_dim) / (tile_dim * (1. - 0.01 * min_overlap_precent)))
        return 1. - (image_dim - tile_dim) / (tiles_less_one * tile_dim), tiles_less_one + 1
    
    x_overlap, x_tiles = calc_overlap(tile_w, image_w, min_overlap_precent[0])
    y_overlap, y_tiles = calc_overlap(tile_h, image_h, min_overlap_precent[1])
    print(f"Full frame: {image_w}x{image_h}, tile: {tile_w}x{tile_h}, overlap: {round(x_overlap*100)}x{round(y_overlap*100)}%, tiles: {x_tiles}x{y_tiles}={x_tiles*y_tiles}")
    
    base_img = None # base imnage for motion detection
    
    while True:
        ret, frame = stream.read()
        if not ret:
            break
            
        progress.step()
        
        # loop over tiles
        first_tile = True
        
        if do_motion_detection:
            motion_img, base_img = detectMotion(base_img, frame)
            if motion_img is None:
                continue
        
        for xi in range(x_tiles):
            for yi in range(y_tiles):
                x, y = math.floor(xi * tile_w * (1 - x_overlap)), math.floor(yi * tile_h * (1 - y_overlap))
                
                if do_motion_detection:
                    if cv2.countNonZero(motion_img[y : y + tile_h, x : x + tile_w]) == 0:
                        continue
                
                tile = frame[y : y + tile_h, x : x + tile_w]
                info = { "first_tile": first_tile, "frame": frame, "topleft": (x, y), "tilesize": (tile_w, tile_h) }
                first_tile = False
                yield (tile, info)

In [None]:
# combine results of multiple tiles
def combine(combined_result, new_result, iou_threshold=0.5):
    
    # filter classes
    new_result._inference_results = [ res for res in new_result._inference_results if res["label"] in classes ]
    
    # convert bbox coordinates to full image
    topleft = new_result.info["topleft"]
    for r in new_result._inference_results:
        r["bbox"] = list(np.array(r["bbox"]) + (topleft + topleft))
    
    if not combined_result:
        # first tile result: just store
        combined_result = new_result
        combined_result._input_image = new_result.info["frame"]
    else:
        # consecutive tile result: merge bboxes
        for new_res in new_result._inference_results:
            for res in combined_result._inference_results:
                bboxes = np.array([new_res["bbox"], res["bbox"]])
                areas = mytools.area(bboxes)
                intersection = mytools.intersection(bboxes[0], bboxes[1])
                if intersection / min(areas) >= iou_threshold:                   
                    # take biggest box
                    if areas[0] > areas[1]:
                        res["bbox"] = new_res["bbox"]
                    break
            else: # this clause is executed when `for` loop has no breaks
                # this box is genuine: just add it as is
                combined_result._inference_results.append(new_res)
    
    return combined_result

In [None]:
orig_path = Path(input_filename)
ann_path = orig_path.with_name(orig_path.stem + "_tiled_annotated" + orig_path.suffix)
abort = False

# AI prediction loop
# Press 'x' or 'q' to stop
with mytools.Display("Tiled Detectoon", not do_motion_detection) as display, \
     mytools.open_video_stream(input_filename) as stream, \
     mytools.open_video_writer(str(ann_path), stream.get(cv2.CAP_PROP_FRAME_WIDTH), stream.get(cv2.CAP_PROP_FRAME_HEIGHT)) as writer:     
         
    # do image processing in separate thread to improve performance
    result_queue = queue.Queue()
    def worker():
        global abort
        try:
            while True:
                result = result_queue.get()
                if result is None:
                    break;
                img = result.image_overlay
                writer.write(img)
                
                if do_motion_detection:
                    mytools.Display.put_text(img, 
                        f"Motion tiles: {result.info['tiles_cnt']:2d}", (0, 0), (0, 0, 0), (255, 255, 255))
                display.show(img)
        except KeyboardInterrupt:
            abort = True
                
    threading.Thread(target=worker).start()
    
    progress = mytools.Progress(int(stream.get(cv2.CAP_PROP_FRAME_COUNT)))
    combined_result = None
    tiles_cnt = 0
    
    # inference loop
    for res in model.predict_batch(source(stream, model, min_overlap_precent, progress)):
        if res.info["first_tile"] and combined_result: # new frame started
            combined_result.info["tiles_cnt"] = tiles_cnt
            result_queue.put(combined_result)
            combined_result = None
            tiles_cnt = 0

        combined_result = combine(combined_result, res)
        tiles_cnt += 1
        if abort:
            break
        
    result_queue.put(None) # to stop worker thread

In [None]:
# display result
IPython.display.Video(ann_path)

In [None]:
# display original video
IPython.display.Video(orig_path)