In [1]:
import numpy as np
import cv2
import time

# Load model and class names

In [3]:
#load dnn model
model=cv2.dnn.readNet(model='input1/frozen_inference_graph.pb',
                      config='input1/ssd_mobilenet_v2_coco_2018_03_29.pbtxt.txt',
                      framework='Tensorflow')

In [4]:
#load coco class names
with open('input1/coco_classes.txt','r') as f:
    class_names=f.read().split('\n')

In [6]:
#generate different colors for each class
COLORS = np.random.uniform(0, 255, size=(len(class_names), 3))

# Detect on Image

In [5]:
#load input image
image=cv2.imread('input1/images/image1.jpg')
image_height, image_width, _=image.shape

In [6]:
#create blog from image
blob = cv2.dnn.blobFromImage(image=image, size=(300, 300), mean=(104, 117, 123), 
                             swapRB=True)
model.setInput(blob)
output=model.forward()

In [7]:
#loop over each detection
for detection in output[0 ,0 , :, :]:
    #extract confidence of the detection
    confidence=detection[2]
    # draw bounding boxes only if the detection confidence is above threshold
    if confidence>0.4:
        class_id=detection[1]
        class_name=class_names[int(class_id)-1]
        color=COLORS[int(class_id)]
        #get the bounding box coordinates
        x = detection[3] * image_width
        y = detection[4] * image_height
        #get the bounding box width and height
        w = detection[5] * image_width
        h = detection[6] * image_height
        #draw rectangle on detected objects
        cv2.rectangle(image, (int(x), int(y)), (int(w), int(h)), color, thickness=2)
        # put the class name
        cv2.putText(image, class_name, (int(x), int(y - 5)), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

In [None]:
cv2.imshow('image', image)
cv2.imwrite('image_result.jpg', image)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Detect on video

In [7]:
# capture the video
cap = cv2.VideoCapture('input1/video_1.mp4')
# get the video frames' width and height 
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
#
out = cv2.VideoWriter('video_result.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30, 
                      (frame_width, frame_height))

# detect objects in each frame of the video
while cap.isOpened():
    ret, frame = cap.read()
    if ret:
        image = frame
        image_height, image_width, _ = image.shape
        # create blob from image
        blob = cv2.dnn.blobFromImage(image=image, size=(300, 300), mean=(104, 117, 123), 
                                     swapRB=True)
        # start time to calculate FPS
        start = time.time()
        model.setInput(blob)
        output = model.forward()        
        # end time after detection
        end = time.time()
        # calculate the FPS for current frame detection
        fps = 1 / (end-start)
        # loop over each of the detections
        for detection in output[0, 0, :, :]:
            # extract the confidence of the detection
            confidence = detection[2]
            # draw bounding boxes only if the detection confidence is above threshold
            if confidence > .4:
                # get the class id
                class_id = detection[1]
                class_name = class_names[int(class_id)-1]
                color = COLORS[int(class_id)]
                # get the bounding box coordinates
                x = detection[3] * image_width
                y = detection[4] * image_height
                # get the bounding box width and height
                w = detection[5] * image_width
                h = detection[6] * image_height
                # draw a rectangle on detected objects
                cv2.rectangle(image, (int(x), int(y)), (int(w), int(h)), color, thickness=2)
                # put the class name text on the detected object
                cv2.putText(image, class_name, (int(x), int(y - 5)), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
                # put the FPS text on top of the frame
                cv2.putText(image, f"{fps:.2f} FPS", (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) 
        
        cv2.imshow('image', image)
        out.write(image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    else:
        break

cap.release()
cv2.destroyAllWindows()