In [1]:
from pathlib import Path
import cv2
import depthai as dai
import time
import ipywidgets as widgets

In [2]:
file = "mobilenet-ssd_openvino_2021.4_6shave.blob"

In [3]:
# Start pipeline
pipeline = dai.Pipeline()

# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
xoutRgb = pipeline.create(dai.node.XLinkOut)
nnOut = pipeline.create(dai.node.XLinkOut)
nnNetworkOut = pipeline.create(dai.node.XLinkOut)

xoutRgb.setStreamName("rgb")
nnOut.setStreamName("nn")
nnNetworkOut.setStreamName("nnNetwork");

In [4]:
# Properties
camRgb.setPreviewSize(300, 300)
camRgb.setInterleaved(False)
camRgb.setFps(40)

In [5]:
# Define a neural network that will make predictions based on the source frames
nn.setConfidenceThreshold(0.5)
nn.setBlobPath(file)
nn.setNumInferenceThreads(2)
nn.input.setBlocking(False)

In [6]:
# Linking
#  nn.passthrough.link(xoutRgb.input)
camRgb.preview.link(xoutRgb.input)

camRgb.preview.link(nn.input)
nn.out.link(nnOut.input)
nn.outNetwork.link(nnNetworkOut.input);

In [7]:
# MobilenetSSD label texts
labelMap = [
    "background", 
    "aeroplane", 
    "bicycle", 
    "bird", 
    "boat", 
    "bottle", 
    "bus", 
    "car", 
    "cat", 
    "chair", 
    "cow",
    "diningtable", 
    "dog", 
    "horse", 
    "motorbike", 
    "person", 
    "pottedplant", 
    "sheep", 
    "sofa", 
    "train", 
    "tvmonitor",
    'person',
    'bicycle',
    'car',
    'motorcycle',
    'airplane',
    'bus',
    'train',
    'truck',
    'boat',
    'traffic light',
    'fire hydrant',
    'stop sign',
    'parking meter',
    'bench',
    'bird',
    'cat',
    'dog',
    'horse',
    'sheep',
    'cow',
    'elephant',
    'bear',
    'zebra',
    'giraffe',
    'backpack',
    'umbrella',
    'handbag',
    'tie',
    'suitcase',
    'frisbee',
    'skis',
    'snowboard',
    'sports ball',
    'kite',
    'baseball bat',
    'baseball glove',
    'skateboard',
    'surfboard',
    'tennis racket',
    'bottle',
    'wine glass',
    'cup',
    'fork',
    'knife',
    'spoon',
    'bowl',
    'banana',
    'apple',
    'sandwich',
    'orange',
    'broccoli',
    'carrot',
    'hot dog',
    'pizza',
    'donut',
    'cake',
    'chair',
    'couch',
    'potted plant',
    'bed',
    'dining table',
    'toilet',
    'tv',
    'laptop',
    'mouse',
    'remote',
    'keyboard',
    'cell phone',
    'microwave',
    'oven',
    'toaster',
    'sink',
    'refrigerator',
    'book',
    'clock',
    'vase',
    'scissors',
    'teddy bear',
    'hair drier',
    'toothbrush',
]

In [8]:
w_label = widgets.Label(value="", layout=widgets.Layout(width='100px'))
w_confidence = widgets.Label(value="", layout=widgets.Layout(width='100px'))
w_xmin = widgets.Label(value="", layout=widgets.Layout(width='100px'))
w_ymin = widgets.Label(value="", layout=widgets.Layout(width='100px'))
w_xmax = widgets.Label(value="", layout=widgets.Layout(width='100px'))
w_ymax = widgets.Label(value="", layout=widgets.Layout(width='100px'))
w_xcenter = widgets.Label(value="", layout=widgets.Layout(width='100px'))
w_ycenter = widgets.Label(value="", layout=widgets.Layout(width='100px'))
w_x = widgets.VBox([w_xmin, w_xmax, w_xcenter])
w_y = widgets.VBox([w_ymin, w_ymax, w_ycenter])
ui = widgets.HBox([w_label, w_confidence, w_x, w_y])

In [9]:
display(ui)

# Connect to device and start pipeline
with dai.Device(pipeline) as device:
    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
    qRgb = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
    qDet = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
    qNN = device.getOutputQueue(name="nnNetwork", maxSize=4, blocking=False);

    frame = None
    detections = []
    startTime = time.monotonic()
    counter = 0
    color2 = (255, 255, 255)

    printOutputLayersOnce = True

    while True:
        inRgb = qRgb.tryGet()
        inDet = qDet.tryGet()
        inNN = qNN.tryGet()

        if inRgb is not None:
            frame = inRgb.getCvFrame()
            cv2.putText(frame, "NN fps: {:.2f}".format(counter / (time.monotonic() - startTime)),
                        (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color2)

        if inDet is not None:
            detections = inDet.detections
            counter += 1

        if printOutputLayersOnce and inNN is not None:
            toPrint = 'Output layer names:'
            for ten in inNN.getAllLayerNames():
                toPrint = f'{toPrint} {ten},'
            print(toPrint)
            printOutputLayersOnce = False;

        # If the frame is available, draw bounding boxes on it and show the frame
        if frame is not None:
            for detection in detections:            
                if detection.confidence > 0.8:    
                    w_label.value = labelMap[detection.label]
                    w_confidence.value = str(int(detection.confidence * 100)) + "%"
                    w_xmin.value = f"{detection.xmin:.2f}"
                    w_ymin.value = f"{detection.ymin:.2f}"
                    w_xmax.value = f"{detection.xmax:.2f}"
                    w_ymax.value = f"{detection.xmax:.2f}"
                    xcenter = (detection.xmin + detection.xmax) / 2
                    ycenter = (detection.ymin + detection.ymax) / 2
                    w_xcenter.value = f"{xcenter:.2f}"
                    w_ycenter.value = f"{ycenter:.2f}"                


        if cv2.waitKey(1) == ord('q'):
            break

HBox(children=(Label(value='', layout=Layout(width='100px')), Label(value='', layout=Layout(width='100px')), V…

Output layer names: detection_out,
