## This notebook is an example of how to pipeline two models. 
A video stream from a local camera is processed by the person detection model. The person detection results are then processed by the pose detection model, one person bounding box at a time. Combined result is then displayed.

This example uses `mystreams` streaming toolkit.

**Access to camera is required to run this sample.**

The script needs either a web camera or local camera connected to the machine running this code. The camera index or URL needs to be specified either in the code below by assigning `camera_id` or in .env file by defining `CAMERA_ID` variable and assigning `camera_id = None`.

### Specify camera index here

In [1]:
camera_id = 0         # camera index or URL; 0 to use default local camera, None to take from .env file

In [2]:
import degirum as dg # import DeGirum PySDK
import mytools, cv2
from mystreams import *

### Specify inference option here

In [None]:
# Please uncomment and edit one of the following inference options to specify your system configuration case according to
# https://cs.degirum.com/doc/0.5.0/degirum.html#system-configuration-for-specific-use-cases

# 1. DeGirum Cloud Zoo inference:
#zoo = dg.connect_model_zoo("dgcps://cs.degirum.com", token=mytools.token_get())

# 2. AIServer inference via IP address using models from DeGirum Cloud model zoo
#zoo = dg.connect_model_zoo(("192.168.0.7", "https://cs.degirum.com/degirum_com/public"), token=mytools.token_get())

# 3. AIServer inference via IP address using local model zoo
#zoo = dg.connect_model_zoo("192.168.0.1")

# 4. ORCA board installed locally using models from DeGirum Cloud Model Zoo
#zoo = dg.connect_model_zoo("https://cs.degirum.com/degirum_com/public", token=mytools.token_get())

# 5. Local inference with locally deployed model
#zoo = dg.connect_model_zoo("full/path/to/model.json")

In [None]:
# load models for DeGirum Orca AI accelerator
# (change model name to "...n2x_cpu_1" to run it on CPU)
people_det_model = zoo.load_model("yolo_v5s_person_det--512x512_quant_n2x_orca_1")
pose_model = zoo.load_model("mobilenet_v1_posenet_coco_keypoints--353x481_quant_n2x_orca_1")

# adjust pose model properties
pose_model.output_pose_threshold = 0.2 # lower threshold
pose_model.overlay_line_width = 1
pose_model.overlay_alpha = 1
pose_model.overlay_show_labels = False
pose_model.overlay_color = (255, 0, 0)

# adjust people model properties
people_det_model.overlay_show_probabilities = True

In [None]:
# Define pose detection gizmo (in mystreams terminology)
class PoseDetectionGizmo(AiGizmoBase):
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._cur_result = None
        
    def on_result(self, result):
        
        # here result.info contains StreamData object used for AI inference (because AiGizmoBase does it this way);
        # and result.info.meta contains metainfo dictionary placed by AiObjectDetectionCroppingGizmo, 
        # because in our pipeline it is connected as a source of this gizmo
        meta = result.info.meta
        if "original_result" in meta: # new frame comes
            if self._cur_result is not None:
                # send previous frame
                self.send_result(StreamData(self._cur_result.image, self._cur_result))                
            
            # save first pose result object at the beginning of new frame in order to accumulate all poses into it
            self._cur_result = result
            # replace original image with full annotated image which came from person detector to show person boxes as well as poses
            self._cur_result._input_image = meta["original_result"].image_overlay            
        
        if "cropped_index" in meta and "cropped_result" in meta:            
            # convert pose coordinates to back to original image
            box = meta["cropped_result"]["bbox"]
            for r in result.results:
                for p in r['landmarks']:
                    p['landmark'][0] += box[0]
                    p['landmark'][1] += box[1]
                        
            if self._cur_result != result:
                # accumulate all other detected poses into current result object
                self._cur_result._inference_results += result.results

In [None]:
# create composition object
c = Composition();

# create gizmos adding them to composition
source = c.add(VideoSourceGizmo(camera_id)) # video source
people_detection = c.add(AiObjectDetectionCroppingGizmo(["person"], people_det_model)) # people detection gizmo, which outputs cropped image for each detected person
pose_detection = c.add(PoseDetectionGizmo(pose_model)) # pose detection gizmo
display = c.add(VideoDisplayGizmo("Person Poses", show_ai_overlay=True, show_fps=True)) # display

# connect gizmos to create pipeline
source >> people_detection >> pose_detection >> display

# start execution of composition 
c.start()