## A 30-fps detection + tracking pipeline
* @author Dmytro Kuzmenko
* 1 detection / second
* 29 tracking / second
* Exit zones tracking, package ID and status tracking
* Extra 'Receipt detection' module

In [1]:
import glob
import re
import time

import pandas as pd

import tensorflow as tf
import numpy as np
import cv2
import pytesseract

import glob
import os
import itertools

from scipy import spatial
from shapely.geometry import Polygon

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Path to .tflite file of the model (detector)
model_path = './model-export_model_5.tflite' 

# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("== Input details ==")
print("name:", input_details[0]['name'])
print("shape:", input_details[0]['shape'])
print("type:", input_details[0]['dtype'])

print("\n== Output details ==")
print("name:", output_details[0]['name'])
print("shape:", output_details[0]['shape'])
print("type:", output_details[0]['dtype'])

== Input details ==
name: normalized_input_image_tensor
shape: [  1 512 512   3]
type: <class 'numpy.uint8'>

== Output details ==
name: TFLite_Detection_PostProcess
shape: [ 1 40  4]
type: <class 'numpy.float32'>


In [3]:
# constants 
BAG_DETECTION_SCORE_THRESHOLD = 0.3
MIN_AREA_PROPORTION = 4
MAX_AREA_PROPORTION = 27
BOUNDING_RECT_2_CONTOUR_RATIO = 2.5
ROTATED_RECT_2_APPROX_POLY_RATIO = 2
INTERSECTION_PERCENT = 0.5
RECEIPT_PADDING_X = 0
RECEIPT_PADDING_Y = 0
COLOR_FILTER_VALUE = 90
TEXT_PATTERN = '#\d\d\d*'

## helper functions to process bounding boxes

In [4]:
def convert_res_into_opposite_points(box, height, width):
    
    xmin = int(max(1, (box[1] * width)))
    ymin = int(max(1, (box[0] * height)))
    
    xmax = int(min(width, (box[3] * width)))
    ymax = int(min(height, (box[2] * height)))
    
    return xmin, ymin, xmax, ymax

In [5]:
def convert_box_into_coords(box):
    
    x1, y1, box_width, box_height = box[0], box[1], box[2], box[3]
    
    point_1 = (x1, y1)
    point_2 = (x1, y1 - box_height)
    point_3 = (x1 + box_width, y1 - box_height)
    point_4 = (x1 + box_width, y1)
    
    return [point_1, point_2, point_3, point_4]

In [6]:
def convert_opposite_points_into_normal(box):
    
    x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
    
    box_width = x2 - x1
    box_height = y2 - y1
    
    return [x1, y1, box_width, box_height]

In [7]:
def convert_res_into_bbox(box, height, width):
    
    xmin = int(max(1, (box[1] * width)))
    ymin = int(max(1, (box[0] * height)))
    
    xmax = int(min(width, (box[3] * width)))
    ymax = int(min(height, (box[2] * height)))

    box_width = xmax - xmin
    box_height = ymax - ymin
    
    return xmin, ymin, box_width, box_height

## Box draw functions

In [8]:
def draw_boxes(ID, bbox, current_frame, status):
    
    x, y, w, h = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
    label = str(ID) + ' - ' + status
    cv2.putText(current_frame, label, (x + 15, y + 30), cv2.FONT_HERSHEY_PLAIN, 1.7, (255, 255, 255), 2)
    cv2.rectangle(current_frame, (x, y), (x + w, y + h), (10, 220, 0), 3)

In [9]:
def draw_temporary_boxes(ID, bbox, current_frame, status):
    
    x, y, w, h = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
    label = str(ID) + ' - ' + status
    cv2.putText(current_frame, label, (x + 15, y + 30), cv2.FONT_HERSHEY_PLAIN, 1.7, (255, 255, 255), 2)
    cv2.rectangle(current_frame, (x, y), (x + w, y + h), (0, 10, 220), 3)

In [10]:
def draw_tracked_boxes(ID, bbox, current_frame, draw_temporary_boxes):
    
    x, y, w, h = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
    label = str(ID) + ' - ' + status
    cv2.putText(current_frame, label, (x + 15, y + 30), cv2.FONT_HERSHEY_PLAIN, 1.7, (255, 255, 255), 2)
    cv2.rectangle(current_frame, (x, y), (x + w, y + h), (220, 10, 0), 3)

## Additional utils

In [11]:
def cut_box(height, width, box, img):
    
    # starting coordinates of the box
    ymin = int(max(1, (box[0] * height)))
    xmin = int(max(1, (box[1] * width)))
    
    # last coordinates of the box
    ymax = int(min(height, (box[2] * height)))
    xmax = int(min(width, (box[3] * width)))
    
    return img[ymin:ymax, xmin:xmax]

In [12]:
def resize_image(image, size=600):
    
    width = int(size * image.shape[1] / image.shape[0] if image.shape[0] > image.shape[1] else size)
    height = int(size * image.shape[0] / image.shape[1] if image.shape[0] < image.shape[1] else size)
    
    return cv2.resize(image, (width, height))

In [13]:
def rotate(image, degree):
    h, w = image.shape[:2]
    cX, cY = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D((cX, cY), degree, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))

    return rotated

In [14]:
def contour_intersect(original_image, contour1, contour2):
    blank = np.zeros(original_image.shape[0:2], dtype=np.uint8)
    image1 = cv2.fillPoly(blank.copy(), pts =[contour1], color=255)
    image2 = cv2.fillPoly(blank.copy(), pts =[contour2], color=255)

    union = cv2.bitwise_or(image1, image2)
    union_contours = cv2.findContours(union, cv2.RETR_LIST,  cv2.CHAIN_APPROX_SIMPLE)[0]

    area1 = cv2.contourArea(contour1)
    area2 = cv2.contourArea(contour2)
    union_area = sum([cv2.contourArea(contour) for contour in union_contours])
    intersection_area = area1 + area2 - union_area

    if intersection_area and min([area1, area2])/intersection_area >= INTERSECTION_PERCENT:
        final_contour = max(union_contours, key=lambda contour: cv2.contourArea(contour))
        return [final_contour]
    else:
        return None

In [15]:
def filter_duplicate_contours(contours, img):
    while True:
        n = len(contours)
        again = False
        for i in range(len(contours)):
            for j in range(i):
                intersect = contour_intersect(img, contours[i], contours[j])
                if intersect:
                    del contours[i]
                    del contours[j]
                    contours.append(intersect[0])
                    again = True
                    break
            if again:
                break
        if len(contours) == n:
            return contours

In [16]:
def crop_contour(image, contour):
    hi, wi= image.shape[:2]
    (x, y, w, h) = cv2.boundingRect(contour)
  
    padding_x = RECEIPT_PADDING_X
    padding_y = RECEIPT_PADDING_Y
  
    cropped = image[max(y - padding_y, 0)+2: min(y + h + padding_y, hi)-2, max(x - padding_x, 0)+2 : min(x + w + padding_x, wi)-2]
    return cropped

In [17]:
def shape_test(img, contour, show = False):
    img_area = img.shape[0]*img.shape[1]

    contour_area = cv2.contourArea(contour)
    (x, y, w, h) = cv2.boundingRect(contour)
    bounding_rect_area = w*h

    if show:
        print(int(img_area/contour_area), int(bounding_rect_area/contour_area))
        cv2.drawContours(img, [contour], -1, (160, 160, 0), 3)
    
    if img_area/contour_area >= MIN_AREA_PROPORTION and img_area/contour_area <= MAX_AREA_PROPORTION and bounding_rect_area/contour_area <= BOUNDING_RECT_2_CONTOUR_RATIO:
        rotated_rect = np.int0(cv2.boxPoints(cv2.minAreaRect(contour)))
        rotated_rect_area = cv2.contourArea(rotated_rect)

        poly = []
        for epsilon in range(10, 155, 5):
            poly0 = cv2.approxPolyDP(contour, epsilon=epsilon, closed=True)
            if len(poly0) == 4:
                poly = poly0
            if len(poly0) <= 4:
                break

        if show:
            cv2.drawContours(img, [contour], -1, (225, 30, 30), 3)
            cv2.drawContours(img, [rotated_rect], -1, (30, 255, 30), 3)
            print(img_area/bounding_rect_area, img_area/contour_area)
            print('cnt', contour_area, 'bound_rect', bounding_rect_area, 'rot_rect', rotated_rect_area, end=', ')
      
        if poly != []:
            poly_area = cv2.contourArea(poly)
            if show:
                print('poly', poly_area)
                cv2.drawContours(img, [poly], -1, (30, 30, 225), 3)
            if rotated_rect_area/poly_area <= ROTATED_RECT_2_APPROX_POLY_RATIO:
                if show:
                    print(int(rotated_rect_area/poly_area), True)
                    cv2.imshow(img)
                return True
            
    if show:
        cv2.imshow(img)
    return False

In [18]:
def color_test(img, contour):
    mask = np.zeros(img.shape[:2], dtype="uint8")
    cv2.drawContours(mask, [contour], -1, 255, -1)
    mask = cv2.erode(mask, None, iterations=2)
    mean = cv2.mean(img, mask=mask)[:3]
    contour_area = cv2.contourArea(contour)
    
    return all(map(lambda x: x >= COLOR_FILTER_VALUE, mean))

In [19]:
def read_text(region):
    for i in range(-25, 25):
        receipt = region.copy()
        receipt = rotate(receipt, i)
        extractedInformation = pytesseract.image_to_string(receipt)
        extractedInformation = extractedInformation.replace(' ', '')
        extractedInformation = extractedInformation.replace('\n', '')
        extractedInformation = extractedInformation.replace('\x0c', '')
        if extractedInformation:
            all = re.findall(TEXT_PATTERN, extractedInformation)
            if all:
                print(i, ': ', all)

In [20]:
def prepare_image(img):
    img = img.copy() 
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  
    
    kernel = np.ones((1, 1), np.uint8)    
    img = cv2.dilate(img, kernel, iterations=1)    
    img = cv2.erode(img, kernel, iterations=1)
    return img

In [96]:
def find_bags(frame, update_dict, active_objects, show = False):
    
    img_raw = resize_image(frame, 900)
  
    if show:
        img_boxes_show = frame.copy() 
    
    img_input = cv2.resize(img_raw, (512, 512))
    img_input = np.reshape(img_input, (1, 512, 512,3))
    
    interpreter.set_tensor(input_details[0]['index'], img_input)
    interpreter.invoke()
    boxes = interpreter.get_tensor(output_details[0]['index'])[0]
    scores = output_data = interpreter.get_tensor(output_details[2]['index'])[0]
      
    imH, imW, _ = frame.shape
    
    final_boxes = []
    final_box_points = [] 
    
    ID = 0

    for i in range(len(scores)):
        if scores[i] > BAG_DETECTION_SCORE_THRESHOLD:
            ID += 1
            
            local_box = boxes[i]
            bbox_tuple = tuple(local_box)
            x, y, ww, hh = convert_res_into_bbox(bbox_tuple, imH, imW)
            
            x1, y1, x2, y2 = convert_res_into_opposite_points(bbox_tuple, imH, imW)
            
            boxes_points = [x1, y1, x2, y2]

            boxes_coords = [x, y, ww, hh]
            final_boxes.append(boxes_coords)
            final_box_points.append(boxes_points)
            
            if update_dict:
                is_constant = False
                status = 'is_preparing'
                package_id_dict.update({ID: (boxes_coords, 0, is_constant, status)})
                global objects_registered 
                objects_registered += 1
                active_objects += 1
                print(f'INSIDE. Now tracking new package with ID {ID}, coordinates: {boxes_coords}')
            
            if show:
                print("The box {} has probability {}".format(boxes_coords, scores[i]))
    
    if show:
        plt.imshow(cv2.cvtColor(img_boxes_show, cv2.COLOR_BGR2RGB))
  
    return final_boxes, active_objects, final_box_points

## ZoneLabeler class to label initial exit zones on videos

In [22]:
class ZoneLabeler:
    
    def __init__(self, img):
        
        self.polygons = []
        self._window_height = 720
        self._window_width = 1280
        self._img = img
        self._height_ratio = self._img.shape[0] / self._window_height
        self._width_ratio = self._img.shape[1] / self._window_width
        if self._height_ratio > 1 or self._width_ratio > 1:
            interpol_method = cv2.INTER_AREA
        else:
            interpol_method = cv2.INTER_LINEAR
        self._processed_img = cv2.resize(img, (self._window_width, self._window_height), interpolation=interpol_method)
        self._current_polygon = []

    def run(self):
        
        cv2.imshow("image", self._processed_img)
        cv2.setMouseCallback('image', self.on_click)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        
        return self._img, self.polygons

    def on_click(self, event, x, y, p1, p2):
        
        custom_color = (130, 0, 75) # purple
        
        if event == cv2.EVENT_LBUTTONDOWN:
            cv2.circle(self._processed_img, (x, y), 3, custom_color, -1)
            x = int(x * self._width_ratio)
            y = int(y * self._height_ratio)
            cv2.circle(self._img, (x, y), 3, custom_color, -1)

            self._current_polygon.append([x, y])
        elif event == cv2.EVENT_RBUTTONDOWN:
            self.polygons.append(self._current_polygon)
            self._current_polygon = np.array([self._current_polygon])
            cv2.polylines(self._img, [self._current_polygon], True, custom_color, thickness=3)
            polygons_for_proc = [np.array([int(p[0] / self._width_ratio), int(p[1] / self._height_ratio)])
                                 for p in self._current_polygon[0]]
            polygons_for_proc = np.array([polygons_for_proc])
            cv2.polylines(self._processed_img, [polygons_for_proc], True, custom_color, thickness=3)
            self._current_polygon = []
        cv2.imshow("image", self._processed_img)

## exit zones handling methods

In [23]:
def check_exit_zones(package_id_dict, exit_zone_list):
    
    new_package_id_dict = package_id_dict.copy()
    for ID, (box, frames_missing_since_last_detection, is_constant, status) in package_id_dict.items():
        box_coords = convert_box_into_coords(box)
        for exit_zone in exit_zone_list:
            exit_zone_tupled = [(i[0], i[1]) for i in exit_zone]
            
            # create polys
            box_poly = Polygon(box_coords)
            exit_poly = Polygon(exit_zone_tupled)
            
            # get intersection / area of box
            intersect_area = box_poly.intersection(exit_poly).area / box_poly.area
            
            # check threshold # >= 0.5
            if intersect_area >= 0.5:
                print('Package entered exit zone')
                new_package_id_dict.update({ID: (box, frames_missing_since_last_detection, is_constant, 'in_exit_zone')})
                
    return new_package_id_dict

In [24]:
def draw_exit_zones(img, exit_zone_list):
    
    custom_color = (130, 0, 75) # purple
    for i, zone_list in enumerate(exit_zone_list):
        polygons_for_proc = np.array(zone_list)
        cv2.polylines(img, [polygons_for_proc], True, custom_color, thickness=3)

## NMS handling

In [25]:
def non_max_suppression_fast(boxes, overlapThresh):

    if len(boxes) == 0:
        return []

    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")

    pick = []

    x1 = boxes[:,0]
    y1 = boxes[:,1]
    x2 = boxes[:,2]
    y2 = boxes[:,3]
    
    """
        compute the area of the bounding boxes and sort the bounding
        boxes by the bottom-right y-coordinate of the bounding box
    """
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(y2)
    
    # keep looping while some indexes still remain in the indexes list
    while len(idxs) > 0:
        
        """
            grab the last index in the indexes list and add the
            index value to the list of picked indexes
        """
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
        
        """
            find the largest (x, y) coordinates for the start of
            the bounding box and the smallest (x, y) coordinates
            for the end of the bounding box
        """
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])
        
        # compute the width and height of the bounding box
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
        
        # compute the ratio of overlap
        overlap = (w * h) / area[idxs[:last]]
        
        # delete all indexes from the index list that have
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlapThresh)[0])))
        
    return boxes[pick].astype("int")

## Reidentification pipeline

## helper functions for status table rendering on frames

In [34]:
def update_status_table(status_table_dict, package_id_dict):
    for ID, (_, _, _, status) in package_id_dict.items():
        status_table_dict.update({ID: status})
    return status_table_dict

In [35]:
def draw_status_table(current_frame, status_table_dict):
    
    y_expand = 0
    rect_done = False
    
    # set max rect width
    max_text_size = cv2.getTextSize('1 - is_preparing', cv2.FONT_HERSHEY_PLAIN, 1.7, 2)
    max_text_width = max_text_size[0][0]
    
    obj_cnt = len(list(status_table_dict.keys()))
    
    for ID, status in status_table_dict.items():
        label = str(ID) + ' - ' + status
       
        text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1.7, 2)
        text_w, text_h = text_size[0][0], text_size[1]
        
        y_expand += 30
        
        if not rect_done:
            cv2.rectangle(current_frame, (0, 0), (0 + max_text_width, 0 + obj_cnt*(y_expand + text_h)), (0, 0, 0), -1)
            rect_done = True
        cv2.putText(current_frame, label, (0, 0 + y_expand), cv2.FONT_HERSHEY_PLAIN, 1.7, (255, 255, 255), 2)
        y_expand += text_h 

## receipt detection

In [None]:
def get_proposed_regions(box, use_receipt_detection=False):

    img = prepare_image(box)

    proposed_contours = []
    img_area = img.shape[0]*img.shape[1]

    for threshold_value in range(60, 260, 20):
        thresh0 = cv2.threshold(img, threshold_value, 255, cv2.THRESH_BINARY)[1]
        thresh = cv2.adaptiveThreshold(thresh0, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                     cv2.THRESH_BINARY, 11, 2)
  
        contours = cv2.findContours(thresh.copy(), cv2.RETR_LIST,  cv2.CHAIN_APPROX_SIMPLE)[0]
        contours = [contour for contour in contours if cv2.contourArea(contour) > 0]
        contours = sorted(contours, key=lambda contour: cv2.contourArea(contour), reverse = True)
        
        colors = [(255, 30, 30), (30, 255, 30), (30, 30, 255)]
        for i in range(3):
            try:
                if shape_test(box.copy(), contours[i]):
                    proposed_contours.append(contours[i])
            except IndexError:
                pass

    filtered_contours = filter_duplicate_contours(proposed_contours, img)
    filtered_contours = [contour for contour in filtered_contours if (img_area/cv2.contourArea(contour) >= MIN_AREA_PROPORTION and img_area/cv2.contourArea(contour) <= MAX_AREA_PROPORTION)]
    filtered_contours = [contour for contour in filtered_contours if color_test(box, contour)]

    output = box.copy()
    for i in range(len(filtered_contours)):
        cv2.drawContours(output, [filtered_contours[i]], -1, (255-50*i, 50*i, 50), 3)
    if show:
        cv2_imshow(output)
  
    proposed_regions = [crop_contour(img, contour) for contour in filtered_contours]
  
    return proposed_regions 

In [98]:
def cut_proper_box(current_frame, package_id_dict):
    
    bags_in_img = [] 
    for ID, (box, _, _, _) in package_id_dict.items():
        x, y, ww, hh = box[0], box[1], box[2], box[2]
        
        xmin, xmax = x, x+ww
        ymin, ymax = y, y+hh
        
        bags_in_img.append(current_frame[int(ymin):int(ymax), int(xmin):int(xmax)])
        
    return bags_in_img

In [77]:
def get_receipt_regions(bags_in_img, use_receipt_detection=False):
    if not use_receipt_detection:
        print("not detecting receipts in this run")
        return
    
    predicted_bags = bags_in_img
    for bag_box in predicted_bags:
        proposed_receipt_regions = get_proposed_regions(bag_box, use_receipt_detection)
        for region in proposed_receipt_regions:
            cv2.imshow(region)
            read_text(region)  

## other utils

In [78]:
# euclidean distance
def is_highly_similar(centroid1, centroid2):
    
    distance = spatial.distance.euclidean(centroid1, centroid2)
    
    return distance

In [79]:
# calc centroid of a box
def get_centroid(box):
    
    x, y, w, h = box[0], box[1], box[2], box[3]
    coord = (x, y, w, h)
    center_coord = (coord[0] + (coord[2] / 2), coord[1] + (coord[3] / 2))
    
    return center_coord

In [80]:
# video description from first frame
def get_capture_info(cap):
    
    """
        cap - capture object instance
        
        returns - height of the frame, width of the frame, FPS of the capture, k - every k-th frame to process
    """
    
    success, first_frame = cap.read()
    
    if not success:
        return 'Error: issues with video'

    h, w = first_frame.shape[:2]
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    # define k, which denotes every k-th frame to process - FPS / FRAMES_PROCESSED
    k = fps / FRAMES_PROCESSED
    
    return h, w, fps, k

In [81]:
# reind method for cases: new_boxes_cnt == old_boxes_cnt, new_boxes_cnt > old_boxes_cnt
def reindentify_boxes_v1(remaining_boxes, old_dict, new_dict):
    used_old_boxes = []
    
    new_boxes_list = remaining_boxes
    unused_new_boxes = new_boxes_list.copy()
    
    for new_box in new_boxes_list:

        used_old_boxes = [list(i) for i in used_old_boxes]

        similarity_dict = {}
        for i, (ID, (old_box, frames_missing, is_constant, status)) in enumerate(old_dict.items()):

            old_box = list(old_box)

            if old_box in used_old_boxes:
                continue
            centroid_old = get_centroid(old_box)
            centroid_new = get_centroid(new_box)
            highly_similar = is_highly_similar(centroid_old, centroid_new)
            similarity_dict.update({(ID, frames_missing, is_constant, status): (highly_similar, old_box)})        

        if not similarity_dict:
            continue
            
        most_similar_old_box = min(similarity_dict, key=similarity_dict.get)
        used_old_boxes.append(similarity_dict[most_similar_old_box][1])
        
        needed_box = similarity_dict[most_similar_old_box]
        
        ID, frames_missing, is_constant, status = most_similar_old_box[0], most_similar_old_box[1], most_similar_old_box[2], most_similar_old_box[3]
        new_dict.update({ID: (new_box, frames_missing, is_constant, status)})
        unused_new_boxes.remove(new_box)
            
    
    return new_dict, unused_new_boxes

In [82]:
# reind method for cases: new_boxes_cnt == old_boxes_cnt, new_boxes_cnt < old_boxes_cnt
def reindentify_boxes_v2(remaining_boxes, old_dict, new_dict):
    
    used_new_box_ids = []
    non_reidentified_box_ids = list(old_dict.keys())
    
    new_boxes_list = remaining_boxes
    
    for i, (ID, (old_box, frames_missing, is_constant, status)) in enumerate(old_dict.items()):
        for new_box in new_boxes_list:
            if ID in used_new_box_ids:
                break
            centroid_old = get_centroid(old_box)
            centroid_new = get_centroid(new_box) 
            distance = is_highly_similar(centroid_old, centroid_new)
            
            if distance <= 30:
                new_dict.update({ID: (new_box, frames_missing, is_constant, status)})
                non_reidentified_box_ids.remove(ID)      

    return new_dict, non_reidentified_box_ids

### apply detection

In [95]:
def run_detector(frame, reindentify, objects_deregistered, 
                 \objects_registered, active_objects, package_id_dict, 
                 \exit_zone_list, status_table_dict):
    
    """
        frame - frame to run detection on
        reindentify - bool that denotes whether bags have to be reidentified before the next detection iteration
    """
    
    global k
    
    if reindentify:
        detections, active_objects, final_box_points = find_bags(frame, False, active_objects, False)
            
        post_nms_boxes = non_max_suppression_fast(np.array(final_box_points), overlapThresh=0.75)
        
        remaining_boxes = []
        
        # delete all boxes that didnt make it past nms
        for i in post_nms_boxes:
            normal_box = convert_opposite_points_into_normal(i)
            if normal_box in detections:
                remaining_boxes.append(normal_box)

        old_boxes_cnt = len(package_id_dict.keys())
        new_boxes_cnt = len(remaining_boxes)
        
        new_package_id_dict = package_id_dict.copy()
        
        if new_boxes_cnt == old_boxes_cnt:
            for ID, (old_box, frames_missing_since_last_detection, is_constant, status) in package_id_dict.items():
                if frames_missing_since_last_detection >= N*(fps/k):
                    if not is_constant:
                        del new_package_id_dict[ID]
                        
                        #global objects_deregistered
                        objects_deregistered += 1
                        active_objects -= 1
                        
                        #print(f'Object with ID {ID} is deregistered and is no longer being tracked.')
                        if status == 'in_exit_zone':
                            status_table_dict.update({i: 'delivered'})
                        continue    
            
            new_package_id_dict, _ = reindentify_boxes_v1(remaining_boxes, package_id_dict, new_package_id_dict)
            
        elif new_boxes_cnt > old_boxes_cnt:
            copy_of_remaining_boxes = remaining_boxes.copy()
            for ID, (old_box, frames_missing_since_last_detection, is_constant, status) in package_id_dict.items():
                if frames_missing_since_last_detection >= N*(fps/k):
                    if not is_constant:
                        del new_package_id_dict[ID]
                        
                        objects_deregistered += 1
                        active_objects -= 1
                        continue
                        
            new_package_id_dict, unused_new_boxes = reindentify_boxes_v1(remaining_boxes, package_id_dict, new_package_id_dict)
            for box_to_add in copy_of_remaining_boxes:
                if box_to_add in unused_new_boxes:
                    new_ID = objects_registered + 1
                    new_package_id_dict.update({new_ID: (box_to_add, 0, False, 'is_preparing')})
                    objects_registered += 1
                    active_objects += 1
        elif new_boxes_cnt < old_boxes_cnt:
            used_new_boxes = []
            deleted_box_ids = []
            for ID, (old_box, frames_missing_since_last_detection, is_constant, status) in package_id_dict.items():
                if frames_missing_since_last_detection >= N*(fps/k):
                    if not is_constant:
                        del new_package_id_dict[ID]
                        objects_deregistered += 1
                        active_objects -= 1
                        deleted_box_ids.append(ID)
                        if status == 'in_exit_zone':
                            status_table_dict.update({i: 'delivered'})
                        continue
            new_package_id_dict, non_reidentified_box_ids = reindentify_boxes_v2(remaining_boxes, package_id_dict, new_package_id_dict)
            
            for i in non_reidentified_box_ids:
                if i in deleted_box_ids:
                    continue
                previous_box_coords = new_package_id_dict[i][0]
                previous_frames_missing = new_package_id_dict[i][1]
                previous_frames_missing += 6
                prev_is_const = new_package_id_dict[i][2]
                prev_status = new_package_id_dict[i][3]
                
                if previous_frames_missing >= N*(fps/k):
                    if not is_constant:
                        del new_package_id_dict[i]
                        objects_deregistered += 1
                        active_objects -= 1
                        if prev_status == 'in_exit_zone':
                            status_table_dict.update({i: 'delivered'})
                    continue
                if not prev_is_const:   
                    new_package_id_dict.update({i: (previous_box_coords, previous_frames_missing, prev_is_const, prev_status)})
        return remaining_boxes, new_package_id_dict, objects_deregistered, objects_registered, active_objects, status_table_dict
    else:
        
        detections, active_objects, final_box_points = find_bags(frame, True, active_objects, show=False)
        post_nms_boxes = non_max_suppression_fast(np.array(final_box_points), overlapThresh=0.75)
        
        remaining_boxes = []
        
        for k, v in package_id_dict.items():
            print((k, v))
        
        # delete all boxes that didnt make it past nms
        for i in post_nms_boxes:
            normal_box = convert_opposite_points_into_normal(i)
            if normal_box in detections:
                remaining_boxes.append(normal_box)
        
        new_package_id_dict = package_id_dict.copy()
        
        for i, (box, frames_missing, is_constant, status) in package_id_dict.items():
            if box not in remaining_boxes:
                del new_package_id_dict[i]
        
        return remaining_boxes, new_package_id_dict, objects_deregistered, objects_registered, active_objects, status_table_dict

### tracker methods

In [84]:
def init_tracker(package_id_dict):
    """
        package_id_dict - dict containing IDs and coverted coords of respective bbox
        returns - ready-to-update MultiTracker object
    """
    
    # Create Multitracker object
    multi_tracker = cv2.MultiTracker.create()
    
    # Initialise MultiTracker 
    for ID, (box, frames_missing, is_constant, status) in package_id_dict.items():
        x, y, w, h = box[0], box[1], box[2], box[3]
        
        # convert frame to greyscale with 1 channel for CSRT tracker
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        preset_frame = np.expand_dims(gray_frame, axis=2)
        
        if is_constant:
            
            # init with conf_thresh = 0.07
            fp = cv2.FileStorage("csrt_psr_0.07.xml",cv2.FILE_STORAGE_READ)
            tracker = cv2.TrackerCSRT_create()
            tracker.read(fp.getFirstTopLevelNode())
            multi_tracker.add(tracker, preset_frame, (x, y, w, h))
            
        elif frames_missing > 0:
            # init with conf_thresh = 0.05
            fp = cv2.FileStorage("csrt_psr_0.05.xml",cv2.FILE_STORAGE_READ)
            tracker = cv2.TrackerCSRT_create()
            tracker.read(fp.getFirstTopLevelNode())
            multi_tracker.add(tracker, preset_frame, (x, y, w, h))
            
        else:
            # init with default conf_thresh - 0.035
            tracker = cv2.TrackerCSRT_create()
            multi_tracker.add(tracker, preset_frame, (x, y, w, h))
        
    return multi_tracker

In [85]:
def run_tracker(multi_tracker, frame, status_table_dict, exit_zone_list):
    """
        multi_tracker - initialized tracker to track with
        frame - frame to run detection on
    """
    
    # convert frame to greyscale with 1 channel for CSRT tracker
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    preset_frame = np.expand_dims(gray_frame, axis=2)
    
    _, new_boxes = multi_tracker.update(preset_frame)
    
    new_boxes_list = new_boxes
    tracked_frame = frame.copy()
    new_package_id_dict = package_id_dict.copy()
    
    used_old_boxes = []
    for new_box in new_boxes_list:
        
        used_old_boxes = [list(i) for i in used_old_boxes]
    
        similarity_dict = {}
        for i, (ID, (old_box, frames_missing, is_constant, status)) in enumerate(package_id_dict.items()):
            old_box = list(old_box)
            if old_box in used_old_boxes:
                continue
            centroid_old = get_centroid(old_box)
            centroid_new = get_centroid(new_box)
            highly_similar = is_highly_similar(centroid_old, centroid_new)
            similarity_dict.update({(ID, frames_missing, is_constant, status): (highly_similar, old_box)})        
        
        if not similarity_dict:
            continue
        most_similar_old_box = min(similarity_dict, key=similarity_dict.get)
        used_old_boxes.append(similarity_dict[most_similar_old_box][1])
        needed_box = similarity_dict[most_similar_old_box]
        ID, frames_missing, is_constant, status = most_similar_old_box[0], most_similar_old_box[1], most_similar_old_box[2], most_similar_old_box[3]
        new_package_id_dict.update({ID: (new_box, frames_missing, is_constant, status)})
        
    new_package_id_dict = check_exit_zones(new_package_id_dict, exit_zone_list)
    status_table = update_status_table(status_table_dict, new_package_id_dict)

    for ID, (new_box, frames_missing, is_constant, status) in new_package_id_dict.items():
        if frames_missing > 0:
            draw_temporary_boxes(ID, new_box, tracked_frame, status)  
        else:
            draw_tracked_boxes(ID, new_box, tracked_frame, status)
        
    draw_status_table(tracked_frame, status_table_dict=status_table)
    draw_exit_zones(tracked_frame, exit_zone_list)
    
    return tracked_frame, new_package_id_dict, new_boxes_list, status_table

## Video processing script (1 detection + 29 trackings / second)

In [92]:
# additional constants
FRAMES_PROCESSED = 6
VIDEO_DIR = './'
VIDEO_SAVE_DIR = 'demo_videos'
SECONDS_TO_PROCESS = 20

# num of seconds to track the object after its disappearance
N = 3
USE_RECEIPT_DETECTION = False

In [68]:
video_name = ""

In [69]:
full_path = VIDEO_DIR + video_name

In [None]:
full_path

In [None]:
cap = cv2.VideoCapture(full_path)

start_idx = 44220 #79940 # 

h, w, fps, k = get_capture_info(cap)
fps = int(fps)
k = 1 

frame_id = 0

cap.release()

print(f'Video resolution: {w}x{h}, FPS: {fps}, processing every frame')

# create dictionary to store each frame's tracked boxes
post_tracking_boxes = {}

# create dictionary to store ids and coords of every bbox
package_id_dict = {}

cycle_started = False
cycle_iteration = 0

cap = cv2.VideoCapture(full_path)



status_table_dict = {}

active_objects = 0
objects_registered = 0
objects_deregistered = 0

cap.set(cv2.CAP_PROP_POS_FRAMES, start_idx)

_, first_frame = cap.read()

labeler = ZoneLabeler(first_frame)
_, exit_zone_list = labeler.run()

seconds_to_process = SECONDS_TO_PROCESS
frames_to_process = fps * seconds_to_process


while True:
    
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_idx)
    
    success, frame = cap.read()
    
    
    start_idx += 1
    
    if not success:
        print('Error: issues with the frame')
        break
        
    if not cycle_started: 

        if not package_id_dict:
            detections, package_id_dict, objects_deregistered, objects_registered, active_objects, status_table_dict = run_detector(
                frame, reindentify=False, objects_deregistered=objects_deregistered, objects_registered=objects_registered, active_objects=active_objects,
                package_id_dict=package_id_dict, exit_zone_list=exit_zone_list, status_table_dict=status_table_dict
            )
        else:
            detections, package_id_dict, objects_deregistered, objects_registered, active_objects, status_table_dict = run_detector(
                frame, reindentify=True, objects_deregistered=objects_deregistered, objects_registered=objects_registered, active_objects=active_objects,
                package_id_dict=package_id_dict, exit_zone_list=exit_zone_list, status_table_dict=status_table_dict
            )
            
        cycle_started = True
        detected_frame = frame.copy()

        # render status table
        status_table_dict = update_status_table(status_table_dict, package_id_dict)
        
        # draw initial boxes and their IDs
        for ID, (box, frames_missing, is_constant, status) in package_id_dict.items():
            
            if frames_missing > 0:
                draw_temporary_boxes(ID, box, detected_frame, status)  
            else:
                draw_boxes(ID, box, detected_frame, status)
        
        draw_status_table(detected_frame, status_table_dict)
        draw_exit_zones(detected_frame, exit_zone_list)
        
        bags_in_img = cut_proper_box(frame, package_id_dict)
        
        get_receipt_regions(bags_in_img=bags_in_img, use_receipt_detection=USE_RECEIPT_DETECTION)
                
        name = f'./processed_frames/{frame_id}.jpg'
        cv2.imwrite(name, detected_frame)

        # init tracker
        multi_tracker = init_tracker(package_id_dict)
        cycle_iteration += 1
        frame_id += 1
        print('continuation of loop after detection')
        continue

    if cycle_iteration == 0 or active_objects == 0:
        frame_id += 1
        if frame_id > frames_to_process:
            break
        continue
    elif 1 <= cycle_iteration <= 28:
        tracked_frame, package_id_dict, new_boxes_list, status_table_dict = run_tracker(multi_tracker, frame, status_table_dict, exit_zone_list)

        name = f'./processed_frames/{frame_id}.jpg'
        cv2.imwrite(name, tracked_frame)
        cycle_iteration += 1

    elif cycle_iteration == 29:

        tracked_frame, package_id_dict, new_boxes_list, status_table = run_tracker(multi_tracker, frame, status_table_dict, exit_zone_list)
        
        name = f'./processed_frames/{frame_id}.jpg'
        cv2.imwrite(name, tracked_frame)

        cycle_started = False
        cycle_iteration = 0  
    
    frame_id += 1
    
    if frame_id > frames_to_process:
        break

cap.release()

## Make demo video

In [100]:
def make_video(outvid, images=None, fps=30, size=None,
               is_color=True, format="FMP4"):
    """
    Create a video from a list of images.
 
    @param      outvid      output video
    @param      images      list of images to use in the video
    @param      fps         frame per second
    @param      size        size of each frame
    @param      is_color    color
    @param      format      see http://www.fourcc.org/codecs.php
    @return                 see http://opencv-python-tutroals.readthedocs.org/en/latest/py_tutorials/py_gui/py_video_display/py_video_display.html
 
    The function relies on http://opencv-python-tutroals.readthedocs.org/en/latest/.
    By default, the video will have the size of the first image.
    It will resize every image to this size before adding them to the video.
    """
    from cv2 import VideoWriter, VideoWriter_fourcc, imread, resize
    fourcc = VideoWriter_fourcc(*format)
    vid = None
    for image in images:
        if not os.path.exists(image):
            raise FileNotFoundError(image)
        img = imread(image)
        if vid is None:
            if size is None:
                size = img.shape[1], img.shape[0]
            vid = VideoWriter(outvid, fourcc, float(fps), size, is_color)
        if size[0] != img.shape[1] and size[1] != img.shape[0]:
            img = resize(img, size)
        vid.write(img)
    vid.release()
    return vid

In [101]:
# Directory of images to run detection on
ROOT_DIR = os.getcwd()
VIDEO_SAVE_DIR = os.path.join(ROOT_DIR, "demo_videos")
IMAGES_DIR = os.path.join(ROOT_DIR, "processed_frames")
images = list(glob.iglob(os.path.join(IMAGES_DIR, '*.*')))

In [102]:
images = sorted(images, key=lambda x: float(os.path.split(x)[1][:-3]))

In [103]:
len(images)

602

In [104]:
outvid = os.path.join(VIDEO_SAVE_DIR, "30_fps_video_name")

In [105]:
make_video(outvid, images, fps=30)

<VideoWriter 00000212B26A4DD0>