In [4]:
import cv2
import numpy as np

## The different categories in which pre trained model was trained..
cat = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car','cat', 'chair', 'cow',
       'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']

def detect_obj_and_draw_boxes(net, image): ## params: a neural net and an image on which we wanna run the detection
    h, w = image.shape[:2]
    resized = cv2.resize(image, (300,300)) ## because our caffe model was trained on 300,300 images.
    blob = cv2.dnn.blobFromImage(resized, 0.007843, (300,300), 127.5) ## just a Preprocessing step.
    
    # feed the blob as input to our deep learning model.
    net.setInput(blob)
    detections = net.forward()[0][0]
    
    ## Loop over the detections
    for i in range(len(detections)):
        ## format of Each detection- [0, predicted category, confidence_value, x1, y1, x2, y2]
        confidence = round(detections[i][2]*100, 2)
        cat_idx = int(detections[i][1])
        
        # Confidence threshold:
        if confidence > 60:
            # scale up the box coordinates.
            box = detections[i][3:]*np.array([w, h, w, h]) 
            
            #covert them to int 
            (x1, y1, x2, y2) = box.astype('int')
            obj_name = cat[cat_idx]
            display = obj_name + ':' + str(confidence) + '%'
            cv2.rectangle(image, (x1, y1), (x2, y2), (0,0,255), 4)
            font = cv2.FONT_HERSHEY_SIMPLEX
            cv2.putText(image, display, (x1, y1), font, 1, (0,255,0), 2)

In [13]:
description = '''The file can be image or a video. TO read video from webcam pass filename as 0'''

data_file = '/home/aakash/opencv-4/video2.mp4'
file_type = None

if data_file.split('.')[1] in ['png', 'jpg', 'jpeg']:
    file_type = 'image'
    
if data_file.split('.')[1] in ['mp4', 'avi', 'mov']:
    file_type = 'video'
    
# Load the caffe model
model_name = '/home/aakash/opencv-4/object-detection-deep-learning/MobileNetSSD_deploy.caffemodel'
model_proto = '/home/aakash/opencv-4/object-detection-deep-learning/MobileNetSSD_deploy.prototxt.txt'
net = cv2.dnn.readNetFromCaffe(model_proto, model_name)

if file_type == 'image':
    test_image = cv2.imread(data_file)
    detect_obj_and_draw_boxes(net, test_image)
    cv2.imshow('image', test_image)
    cv2.waitKey(0)
    
if file_type == 'video':
    cap = cv2.VideoCapture(data_file)
    while True:
        ret, frame = cap.read() # ret = return value is a boolean variable
        if not ret:
            break
            
        detect_obj_and_draw_boxes(net, frame)    
        cv2.imshow('Demo', frame) # every video can be broken down into a series of images.

        k = cv2.waitKey(10)
        if k == ord('q'):
            cap.release()
            break  
cv2.destroyAllWindows()          