In [1]:
#loading the dependencies
import cv2
import numpy as np

In [2]:
#reading the image
image = cv2.imread("Pictures\pedestrain.jpg")
image = cv2.resize(image,(800,800))
image.shape

(800, 800, 3)

In [3]:
height,width,_ = image.shape

In [4]:
cv2.imshow("image",image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [5]:
#loading the configuration files and weights of yolov3
net = cv2.dnn.readNet("C:/Users/mvcc1/darknet/cfg/yolov3.cfg",'C:/Users/mvcc1/Downloads/yolov3.weights')

In [6]:
#checking the classes on what all the yolo algorithm has been trained on
classes = []
with open('C:/Users/mvcc1/darknet/data/coco.names','r') as f:
    classes = f.read().splitlines()

In [7]:
classes

['person',
 'bicycle',
 'car',
 'motorbike',
 'aeroplane',
 'bus',
 'train',
 'truck',
 'boat',
 'traffic light',
 'fire hydrant',
 'stop sign',
 'parking meter',
 'bench',
 'bird',
 'cat',
 'dog',
 'horse',
 'sheep',
 'cow',
 'elephant',
 'bear',
 'zebra',
 'giraffe',
 'backpack',
 'umbrella',
 'handbag',
 'tie',
 'suitcase',
 'frisbee',
 'skis',
 'snowboard',
 'sports ball',
 'kite',
 'baseball bat',
 'baseball glove',
 'skateboard',
 'surfboard',
 'tennis racket',
 'bottle',
 'wine glass',
 'cup',
 'fork',
 'knife',
 'spoon',
 'bowl',
 'banana',
 'apple',
 'sandwich',
 'orange',
 'broccoli',
 'carrot',
 'hot dog',
 'pizza',
 'donut',
 'cake',
 'chair',
 'sofa',
 'pottedplant',
 'bed',
 'diningtable',
 'toilet',
 'tvmonitor',
 'laptop',
 'mouse',
 'remote',
 'keyboard',
 'cell phone',
 'microwave',
 'oven',
 'toaster',
 'sink',
 'refrigerator',
 'book',
 'clock',
 'vase',
 'scissors',
 'teddy bear',
 'hair drier',
 'toothbrush']

In [8]:
#preprocessing the image
blob = cv2.dnn.blobFromImage(image,1/255,(416,416),(0,0,0),swapRB=True,crop=False)


In [10]:
for each in blob:
    for n, img in enumerate(each):
        cv2.imshow(str(n),img)
        cv2.waitKey(1000)
        cv2.destroyAllWindows()

In [11]:
net.setInput(blob)
outputLayerNames = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(outputLayerNames)

In [12]:
print(layerOutputs)

[array([[0.03332959, 0.05110101, 0.4849458 , ..., 0.        , 0.        ,
        0.        ],
       [0.04233668, 0.03517634, 0.31936866, ..., 0.        , 0.        ,
        0.        ],
       [0.04612767, 0.03914372, 0.78570557, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.9622269 , 0.9589328 , 0.4172877 , ..., 0.        , 0.        ,
        0.        ],
       [0.9594379 , 0.96294284, 0.31359497, ..., 0.        , 0.        ,
        0.        ],
       [0.9692155 , 0.9699744 , 0.8512954 , ..., 0.        , 0.        ,
        0.        ]], dtype=float32), array([[0.02130396, 0.02054469, 0.05499898, ..., 0.        , 0.        ,
        0.        ],
       [0.01455568, 0.01959859, 0.33662814, ..., 0.        , 0.        ,
        0.        ],
       [0.01896849, 0.01693274, 0.07978649, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.97766316, 0.97976804, 0.03736178, ..., 0.        , 0.        ,
        0.        ],
       [0.98236305

In [13]:
#storing the final output of bounding box and classes and confidence of the object detected by yolo in the image
boxes = []
confidences = []
class_ids = []

for each in layerOutputs:
    for detection in each:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2]*width)
            h = int(detection[3]*height)
            
            x = int(center_x - (w/2))
            y = int(center_y - (h/2))
            
            boxes.append([x,y,w,h])
            confidences.append(float(confidence))
            class_ids.append(class_id)
print(boxes) #printing the bounding boxes values for each object
print(confidences) # also it's respected confidences
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5,0.4)
print(indexes.flatten())

font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0,255,size = (len(boxes),3)) 

for i in indexes.flatten():
    x,y,w,h = boxes[i]
    label = str(classes[class_ids[i]])
    confidence = str(round(confidences[i],2))
    color = colors[i]
    cv2.rectangle(image,(x,y),(x+w,y+h),color,2)
    cv2.putText(image,label+" " + confidence, (x,y+20),font,2,(255,255,255),)

[[443, 86, 118, 68], [442, 90, 122, 68], [623, 172, 66, 82], [615, 171, 81, 84], [203, 188, 58, 76], [625, 180, 63, 76], [617, 180, 78, 76], [215, 329, 42, 122], [115, 359, 50, 107], [218, 355, 46, 120], [290, 370, 49, 98], [360, 364, 44, 106], [417, 364, 41, 126], [513, 371, 44, 101], [551, 364, 44, 90], [612, 368, 45, 108], [53, 378, 39, 124], [106, 402, 59, 79], [91, 407, 85, 74], [228, 395, 53, 88], [419, 380, 42, 110], [508, 382, 49, 120], [2, 422, 34, 125], [187, 416, 41, 121], [272, 428, 98, 73], [482, 429, 98, 80], [576, 425, 55, 86], [1, 429, 34, 135], [335, 453, 60, 103], [342, 457, 58, 99], [335, 493, 59, 102], [748, 485, 49, 101], [336, 513, 55, 85], [640, 74, 33, 50], [215, 136, 51, 55], [621, 178, 66, 72], [200, 188, 58, 71], [203, 189, 60, 68], [620, 178, 67, 77], [121, 219, 29, 95], [448, 237, 24, 99], [450, 237, 26, 99], [300, 286, 10, 24], [300, 265, 23, 94], [455, 287, 19, 58], [171, 296, 19, 62], [208, 285, 26, 99], [208, 295, 24, 93], [711, 298, 22, 100], [713, 298

In [14]:
#final image output with prediction of yolo
cv2.imshow("image",image)
cv2.waitKey(0)
cv2.destroyAllWindows()