In [1]:
import cv2
import numpy as np

In [2]:
#Loading pretrained weight and config file
net = cv2.dnn.readNet('yolov3.weights', 'yolov3_testing.cfg')

In [3]:
#Extract the objects from the file
classes = []
with open("classes.txt", "r") as f:
    classes = f.read().splitlines()

In [4]:
classes[:5]

['person', 'bicycle', 'car', 'motorbike', 'aeroplane']

In [5]:
#Load the image
img = cv2.imread('cars.jpg')
height,width,_ = img.shape

In [6]:
cv2.imshow('Image',img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [7]:
#1/255 : To normalize
#(416,216) : Size of Image
#(0,0,0) : Not doing any mean subtraction
#swapRB : To convert to RGB
blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)

In [8]:
#Input blob to network
net.setInput(blob)

In [9]:
#To get the output layer names
output_layers_names = net.getUnconnectedOutLayersNames()

In [10]:
#To get output from forward pass
layerOutputs = net.forward(output_layers_names)

In [17]:
boxes = []
confidences = []
class_ids = []

In [18]:
#First for loop Extract Info from layered output
#Second for loop Extract Info from each of the output
for output in layerOutputs:
    for detection in output:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.8:
            #Here the value is normalized to get back the original valule multiply by heigt & width
            center_x = int(detection[0]*width)
            center_y = int(detection[1]*height)
            w = int(detection[2]*width)
            h = int(detection[3]*height)

            #Yolo predicts the output w.r.t to bounding boxes, inorder to get the upper left cornor position do the below calculation
            x = int(center_x - w/2)
            y = int(center_y - h/2)

            boxes.append([x, y, w, h])
            confidences.append((float(confidence)))
            class_ids.append(class_id)

In [19]:
len(boxes)

30

In [22]:
#Non max suppression
#Threshold: 0.5
#long maximum suppression : 04 by default
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

In [23]:
indexes.flatten()

array([10,  1, 16,  7, 26,  5, 28,  3, 19, 22, 24, 15, 14])

In [24]:
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(boxes), 3))

In [25]:
if len(indexes)>0:
    for i in indexes.flatten():
        x, y, w, h = boxes[i]
        label = str(classes[class_ids[i]])
        confidence = str(round(confidences[i],2))
        color = colors[i]
        cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
        cv2.putText(img, label + " " + confidence, (x, y+20), font, 2, (255,255,255), 2)

In [26]:
cv2.imshow('Image', img)