In [5]:
import cv2
import numpy as np


def get_output_layers(net):
    '''
    get all output layer names: with yolov3 is yolo_82, 94 and 106
    ''' 
    layer_names = net.getLayerNames()
    
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    return output_layers


def draw_prediction(img, class_id, confidences, x, y, x_plus_w, y_plus_h):
    '''
    draw a bounding box around object
    '''
    label = str(classes[class_id])

    color = COLORS[class_id]

    cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)

    cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

image_path = 'motorbike.jpg'
config = 'yolov3.cfg'
name = 'coco.names'
weight = 'yolov3.weights'

image = cv2.imread(image_path)

# take shape of image in order to scale it to 416x416, first layer of Yolo CNN
Width = image.shape[1]
Height = image.shape[0]
scale = 0.00392

classes = None

with open(name, 'r') as f:
    # generate all classes of COCO, bicycle ind = 1, car ind = 2 and motorbike ind = 3
    classes = [line.strip() for line in f.readlines()]

COLORS = np.random.uniform(0, 255, size=(len(classes), 3))
# Read the model
net = cv2.dnn.readNet(weight, config)
# Resize picture to 416,416
blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)
# set input is resized picture
net.setInput(blob)
# last layer of Yolo model
outs = net.forward(get_output_layers(net))

class_ids = []
confidences = []
boxes = []
conf_threshold = 1
# maybe our model will detect many bouding box for an object, this threshold help us take the box with equal 
#            or higher propability
nms_threshold = 1

'''
out is a 2D tensor like (number_of_objects, score_of_each_classes), with first five element in each row is special, 
take e.g: out[0] = temp:
    + temp[0]: x_center of that object
    + temp[1]: y_center of that object
    + temp[2]: width of that object
    + temp[3]: height of that object
    + temp[4]: unknow value
    + from 5 to above is the score of that object to each classes => COCO have 80 class so each row contain 85 element,
        will be 15 with CIFAR,and 1005 with IMAGENET  
'''
for out in outs:
    for detection in out:
        scores = detection[5:]
        # get the highest score to determine its label
        class_id = np.argmax(scores)
        # score of that object, make sure more than 50% correct label
        confidence = scores[class_id]
        if confidence > 0.5:
            # scale again with w and h
            center_x = int(detection[0] * Width)
            center_y = int(detection[1] * Height)
            w = int(detection[2] * Width)
            h = int(detection[3] * Height)
            # remember it return x_center and y_center, not x,y, so we need to find x,y
            x = center_x - w / 2
            y = center_y - h / 2
            class_ids.append(class_id)
            confidences.append(float(confidence))
            boxes.append([x, y, w, h])


indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

for i in indices:
    i = i[0]
    box = boxes[i]
    x = box[0]
    y = box[1]
    w = box[2]
    h = box[3]
    draw_prediction(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))

# cv2.imshow("object detection", image)
# cv2.waitKey()
    
# cv2.imwrite("object-detection.jpg", image)
# cv2.destroyAllWindows()

In [6]:
from matplotlib import pyplot as plt 
cv2.imwrite('result.jpg',image)

True

In [3]:
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.5
nms_threshold = 0.4
for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            print(detection)
            break

[0.50612086 0.63960814 0.06346598 0.133987   0.72913414 0.72805005
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.        ]
[0.31810763 0.5884906  0.01724224 0.02914196 0.51016784 0.5005618
 0.         0.         0.         0.         0.         0.
 0.         0.         0.   