# Import Packages

In [1]:
import time
import imutils
import cv2 as cv
import numpy as np
from imutils.video import VideoStream

# Load Model

In [2]:
net = cv.dnn.readNetFromCaffe('../models/detection/deploy.prototxt.txt', '../models/detection/res10_300x300_ssd_iter_140000.caffemodel')

# Start Video Stream

In [3]:
vs = VideoStream(src=0).start() # source 0 indicates webcam
time.sleep(2.0) # allow 2 seconds to pass to warm up camera sensor

# Process Video Frames & Display Detections

In [4]:
# loop over the frames from the video stream
while True:
    # grab the frame from the threaded video stream and resize it
    frame = vs.read()
    frame = imutils.resize(frame, width=400)
 
    # grab the frame dimensions and convert it to a blob
    # save blob as a frame image which is resized and normalised
    (h, w) = frame.shape[:2]
    blob = cv.dnn.blobFromImage(cv.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0))
 
    # send blob through network and save detections
    net.setInput(blob)
    detections = net.forward()

    # loop over the detections
    for i in range(0, detections.shape[2]):
        # extract the prediction confidence
        confidence = detections[0, 0, i, 2]

        # filter out weak detections
        if confidence < 0.5:
            continue

        # calculate coordinates for detection box
        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
        (startX, startY, endX, endY) = box.astype("int")
 
        # draw box and confidence
        text = "{:.2f}%".format(confidence * 100)
        y = startY - 10 if startY - 10 > 10 else startY + 10
        cv.rectangle(frame, (startX, startY), (endX, endY), (0, 0, 255), 2)
        cv.putText(frame, text, (startX, y), cv.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)

    # show the output frame
    cv.imshow("Frame", frame)
    key = cv.waitKey(1) & 0xFF
 
    # set 'q' to quit
    if key == ord("q"):
        break

# cleanup
cv.destroyAllWindows()
vs.stop()