In [1]:
import cv2
import numpy as np
import imutils

In [2]:
# load Yolo
net = cv2.dnn.readNet('yolov3.weights','yolov3.cfg')

In [3]:
classes = []
# Read the class names trained for Yolo to recognize
with open('coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]
    
colors = np.random.uniform(0, 255, size=(len(classes), 3))

In [4]:
print(classes)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [5]:
layer_names = net.getLayerNames()

outputlayers = [layer_names[i[0] -1] for i in net.getUnconnectedOutLayers()]

In [6]:
# Load the image you want to detect the objects
image = cv2.imread('images/my_home.jpg')
image = imutils.resize(image, width=1000)
height, width, channels = image.shape

# Detecting Objects in the image
blob = cv2.dnn.blobFromImage(image, scalefactor=0.00392, size=(416, 416), mean=(0, 0, 0), swapRB=True, crop=False)

net.setInput(blob)
# forward to the output layer to get the final result
outs = net.forward(outputlayers)
#print(outs)

preds_class = []
confidences = []
boxes = []

for out in outs:
    for detection in out:
        scores = detection[5:]
        # get the detection of the class with the highest score
        predicted_class = np.argmax(scores)
        confidence = scores[predicted_class]
        # if the confidence is greater than 0.5 then an object is detected
        if confidence > 0.5:
            centerX = int(detection[0] * width)
            centerY = int(detection[1] * height)
            w = int(detection[2] * width)
            h = int(detection[3] * height)
            #cv2.circle(image, (centerX, centerY), 10, (0, 0, 255), 2)
            # grab the Top Left corner of the rectangle
            x = int(centerX - w/2)
            # grab the bottem right corner of the rectangle
            y = int(centerY - h/2)
            #cv2.rectangle(image, (x, y), (x + w, y + h), (202, 204, 0), 2)
            # put the detected boxes in an array to prevent multiple boxes on an object
            boxes.append(np.array([x, y, w, h]))
            confidences.append(float(confidence))
            preds_class.append(predicted_class)

indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_COMPLEX
# for each elements detected in the image put them in the boxes(x,y,w and h) coordinates 
# of the rectangle
for i in range(len(boxes)):
    # select only the items in the indexes
    if i in indexes:
        x, y, w, h = boxes[i]
        label = str(classes[preds_class[i]])
        # color each objects
        color = colors[i]
        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
        cv2.putText(image, label, (x, y + 30), font, 0.8, color, 2)

cv2.imshow('Image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()