In [1]:
import cv2
import numpy as np

In [2]:
net = cv2.dnn.readNet('yolov3.weights','yolov3.cfg')

In [3]:
classes = []
with open('coco.names','r') as f:
    classes = f.read().splitlines()
print(classes)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [7]:
img = cv2.imread('traffic_jam_1050x700.webp')
height, width, _ = img.shape


In [8]:
blob = cv2.dnn.blobFromImage(img, 1/255, (1050,700), (0,0,0), swapRB = True, crop = False)
for b in blob:
    for n, img_blob in enumerate(b):
        cv2.imshow(str(n), img_blob)
cv2.imshow('Image',img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [9]:
net.setInput(blob)

output_layers_names = net.getUnconnectedOutLayersNames()
layersOutputs = net.forward(output_layers_names)

In [12]:
boxes = []
confidences = []
class_ids = []

for output in layersOutputs:
    for detection in output:
        scores = detection[5:] #All 80 class scores
        class_id = np.argmax(scores) #Extract which class has max score
        confidence = scores[class_id] #Extract what is that highest score
        
        if confidence > 0.5:
            center_x = int(detection[0]*width)
            center_y = int(detection[1]*height)
            w = int(detection[2]*width)
            h = int(detection[3]*height)
            
            x = int(center_x - w/2)
            y = int(center_y - h/2)
            
            boxes.append([x,y,w,h])
            confidences.append((float(confidence)))
            class_ids.append(int(class_id))
            

In [13]:
print(len(boxes))
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
print(indexes.flatten())

308
[ 76  54  68  61  91  87  45  63  78  36 168  56  65 149  83  57  24  28
 197  39  93 153 126 208  31 188 233 212 114 187 248 180 159 178 131 164
  41 183 166 195 137  51  15 286 259  59 283  72 295 175 128  52 191 115
 190 141 206 243 143 155 221  96 111 263 250  34 307   5 218 123 174 135
 146 107 262 269  82 129 272 281  99 226 113 154 274 279 273 261 294 298
 264 184 265 103 121 260 271 268 288 303 282 285 132 112 105 161 277 100
 289 278 203 304 267 276 287]


In [15]:
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size = (len(boxes), 3))

for i in indexes.flatten():
    x, y, w, h = boxes[i]
    label = str(classes[class_ids[i]])
    confidence = str(round(confidences[i], 2))
    color = colors[i]
    cv2.rectangle(img, (x,y), (x + w, y + h), color, 2)
    cv2.putText(img, label + " " + confidence, (x, y + 20), font, 2, (255, 255, 255), 2)

cv2.imshow('Image',img)
cv2.waitKey(0)
cv2.destroyAllWindows()