### This pipline works mainly for the following OpenVINO models:

* person-detection-0200
* person-detection-0201
* person-detection-0202
* person-detection-0203
* person-vehicle-bike-detection-2000

For all of these models, the net outputs blob with shape: 1, 1, 200, 7 in the format 1, 1, N, 7, where N is the number of detected bounding boxes. Each detection has the format [image_id, label, conf, x_min, y_min, x_max, y_max], where:

* image_id - ID of the image in the batch
* label - predicted class ID (0 - person)
* conf - confidence for the predicted class
* (x_min, y_min) - coordinates of the top left bounding box corner
* (x_max, y_max) - coordinates of the bottom right bounding box corner

At the moment, the postprocessing function in the code has been only written for this particular output shape tensor.

In [145]:
import os
import sys

# Get the current working directory and add the parent directory to the Python path
current_working_directory = os.getcwd()
sys.path.append(os.path.join(current_working_directory, ".."))

from helpers.helpers import add_frame_info, VideoPlayer, PolygonDrawer, capture_frame_for_ROI, create_ROI, compute_polygon_intersection

import cv2

import time
import threading
import openvino as ov
import numpy as np
from pprint import pprint

In [146]:
ROI_COLOR = (0, 255, 255)
ROI_FILL_COLOR = (255, 0, 255)
BOX_COLOR = (0, 255, 0)
BOX_INTRUSION_COLOR = (0, 0, 255)
BOX_FILL_COLOR = (255, 255, 0)
INFO_TEXT_COLOR = (255, 255, 255)

In [147]:
class PersonIntrusionDetection:
    def __init__(self, model_path, roi=None, device='CPU'):
        
        assert roi is not None, "ROI must be set."

        self.core = ov.Core()
        self.model = self.load_model(model_path)
        self.device = device
        self.compiled_model = self.core.compile_model(model=self.model, device_name=self.device)
        self.input_layer_ir = self.model.input(0)
        self.shape = self.get_shape()
        self.roi = roi
        self.person_detection_confidence_threshold = 0.5

    def load_model(self, model_path):
        assert os.path.exists(model_path), f"Model file not found at {model_path}"
        return self.core.read_model(model=model_path)
        
    def get_shape(self):
        N, C, H, W = self.input_layer_ir.shape
        return H, W

    def preprocess_frame(self, frame):
        resized_frame = cv2.resize(frame, self.shape)
        resized_frame = cv2.cvtColor(np.array(resized_frame), cv2.COLOR_BGR2RGB)
        resized_frame = resized_frame.transpose((2, 0, 1))
        resized_frame = np.expand_dims(resized_frame, axis=0).astype(np.float32)
        return resized_frame

    def postprocess_bboxes(self, frame, result):
        bboxes = []
        detections = result.reshape(-1, 7)
        for i, detection in enumerate(detections):
            _, frame_id, confidence, xmin, ymin, xmax, ymax = detection

            if confidence > self.person_detection_confidence_threshold:
                xmin = int(max((xmin * frame.shape[1]), 10))
                ymin = int(max((ymin * frame.shape[0]), 10))
                xmax = int(min((xmax * frame.shape[1]), frame.shape[1] - 10))
                ymax = int(min((ymax * frame.shape[0]), frame.shape[0] - 10))

                bbox = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
                bboxes.append(np.array(bbox, dtype=np.int32))

                # Draw bbox around detected people
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), BOX_COLOR, 2)

        return frame, bboxes

    def run_inference(self, resized_frame, infer_request):
        infer_request.set_tensor(self.input_layer_ir, ov.Tensor(resized_frame))
        infer_request.infer()
        result = infer_request.get_output_tensor(0).data
        return result

    def check_intrusion(self, frame, bboxes):
        intrusions = []
        for bbox in bboxes:
            intrusion_flag, intersection_visualization = compute_polygon_intersection(frame, self.roi, bbox)
            intrusions.append((intrusion_flag, intersection_visualization))
        return frame, intrusions

    def annotate_frame(self, frame, sync_fps, num_intrusions):
        cv2.putText(frame, f"{round(sync_fps, 2)} FPS", (5, 30), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f"Intrusion(s): {num_intrusions}", (5, 60), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 255), 2)
        cv2.polylines(frame, [np.array(self.roi, np.int32)], True, ROI_COLOR, thickness = 2)
        return frame

    def get_num_intrusions(self, intrusions):
        return np.sum([intrusion[0] for intrusion in intrusions])

    def process_mask(self, frame, bboxes, intrusions):
        masks = [intrusion[1] for intrusion in intrusions if intrusion[0]]
        mask = np.zeros(frame.shape, dtype=np.uint8)
        cv2.fillPoly(mask, [np.array(self.roi, dtype=np.int32)], ROI_FILL_COLOR)
        if bboxes: cv2.fillPoly(mask, bboxes, BOX_FILL_COLOR)
        if masks: mask = np.add(mask, np.sum(masks, axis=0, dtype=np.uint8))
        return mask

    def run_sync(self, source=0, required_fps=30, title="Intrusion Detection", debug=True):
        frame_number = 0
        player = None

        # create inference request
        infer_request = self.compiled_model.create_infer_request()

        try:
            # Create a video player
            player = VideoPlayer(source, fps=required_fps)
            # Start capturing
            start_time = time.time()
            player.start()

            while True:
                frame = player.next()
                
                if frame is None:
                    print("Source ended")
                    break
                
                resized_frame = self.preprocess_frame(frame)

                result = self.run_inference(resized_frame, infer_request)

                stop_time = time.time()
                total_time = stop_time - start_time
                frame_number += 1
                sync_fps = frame_number / total_time
                frame, bboxes = self.postprocess_bboxes(frame, result)
                frame, intrusions = self.check_intrusion(frame, bboxes)
                num_intrusions = self.get_num_intrusions(intrusions)
                frame = self.annotate_frame(frame, sync_fps, num_intrusions)
                cv2.namedWindow(title, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE)
                cv2.imshow(title, frame)
                
                if debug:
                    mask = self.process_mask(frame, bboxes, intrusions)
                    cv2.imshow("Visualization Mask", mask)
                
                
                key = cv2.waitKey(1)
                # escape = 27
                if key == 27:
                    break

        except KeyboardInterrupt:
            print("Processing interrupted by user.")
        finally:
            cv2.destroyAllWindows()
            if player is not None:
                player.stop()


    def run_async(self, source, required_async_fps, title="Intrusion Detection", debug=True):
        frame_number = 0
        current_request = self.compiled_model.create_infer_request()
        next_request = self.compiled_model.create_infer_request()
        player = None

        try:
            # Create a video player
            player = VideoPlayer(source, fps=required_async_fps)
            # Start capturing
            start_time = time.time()
            player.start()
            frame = player.next()
            resized_frame = self.preprocess_frame(frame)
            current_request.set_tensor(self.input_layer_ir, ov.Tensor(resized_frame))
            current_request.start_async()
            
            while True:
                next_frame = player.next()
                if next_frame is None:
                    print("Source ended")
                    break
                
                resized_frame = self.preprocess_frame(next_frame)

                next_request.set_tensor(self.input_layer_ir, ov.Tensor(resized_frame))
                next_request.start_async()
                current_request.wait()

                result = current_request.get_output_tensor(0).data
                
                stop_time = time.time()
                total_time = stop_time - start_time
                frame_number = frame_number + 1
                async_fps = frame_number / total_time
                frame, bboxes = self.postprocess_bboxes(frame, result)
                frame, intrusions = self.check_intrusion(frame, bboxes)
                num_intrusions = self.get_num_intrusions(intrusions)
                frame = self.annotate_frame(frame, async_fps, num_intrusions)
                
                cv2.namedWindow(title, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE)
                cv2.imshow(title, frame)
                
                if debug:
                    mask = self.process_mask(frame, bboxes, intrusions)
                    cv2.imshow("Visualization Mask", mask)
                
                
                key = cv2.waitKey(1)
                # escape = 27
                if key == 27:
                    break
                
                frame = next_frame
                current_request, next_request = next_request, current_request
                
        except KeyboardInterrupt:
            print("Interrupted")
        # Any different error
        except RuntimeError as e:
            print(e)
        finally:
            cv2.destroyAllWindows()
            if player is not None:
                player.stop()

In [148]:
def run_person_detection(source, model_path, roi, required_fps=30, debug=True, with_async=True):
    obj = PersonIntrusionDetection(model_path=model_path, roi=roi)
    if with_async:
        print("Running in Async Mode")
        obj.run_async(source=source, required_async_fps=required_fps, debug=debug)
    else:
        print("Running in Sync Mode")
        obj.run_sync(source=source, required_fps=required_fps, debug=debug)

In [149]:
def main():
    source = 2
    model_path = "/home/acer/workspace/intel_models/intel/person-vehicle-bike-detection-2000/FP16/person-vehicle-bike-detection-2000.xml"

    try:
        frame = capture_frame_for_ROI(source=source)
        if frame is not None:
            roi = create_ROI(frame)
            run_person_detection(source, model_path, roi, required_fps=30, debug=True, with_async=True)
    except Exception as e:
        print(f"An error occurred: {e}")

In [150]:
if __name__ == "__main__":
    main()

====> Use left click to draw polygon, right click to release and finish

Adding point #0 with position (105, 131)
Adding point #1 with position (418, 117)
Adding point #2 with position (548, 217)
Adding point #3 with position (564, 338)
Adding point #4 with position (388, 412)
Adding point #5 with position (185, 364)
Adding point #6 with position (155, 340)
Polygon Points: [(105, 131), (418, 117), (548, 217), (564, 338), (388, 412), (185, 364), (155, 340)]
Running in Async Mode
