In [57]:
import numpy as np
import cv2

In [58]:
net = cv2.dnn.readNet('yolov3.weights' , 'resources\cfg\yolov3.cfg')

In [59]:
classes = []
with open('resources\data\coco.names','r') as f:
    classes = f.read().splitlines()

In [60]:
print(classes)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [61]:
img = cv2.imread('Images\img3.jpg')
height,width,_ = img.shape

In [62]:
# Resizing of Image in accordance with yolov3
blob = cv2.dnn.blobFromImage(img , 1/255 , (416,416) , (0,0,0) , swapRB = True , crop=False)
# Here 1/255 means normalize the image by diving it's pixels by 255
# 416,416 is the dimensions of image as yolo supports square size images
# swapRB : OpenCV assumes images are in BGR channel order; however, the `mean` value assumes we are using RGB order. To resolve this discrepancy we can swap the R and B channels in image by setting this value to `True`
#Here 0,0,0  These are our mean subtraction values. They can be a 3-tuple of the RGB means or they can be a single value in which case the supplied value is subtracted from every channel of the image.

# To implement blob
# for b in blob:
#     for n ,img_blob in enumerate(b):
#         cv2.imshow(str(n) , img_blob)
#         cv2.waitKey(0)
#         cv2.destroyAllWindows()



In [63]:
# Input 
net.setInput(blob)

In [64]:
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)


# net.getUnconnectedOutLayers() gives the position of the layers. 

In [65]:
# Process starts to identifying objects un output in boxes
boxes = []
confidence_s = []
class_id_s =  []


In [66]:
# First for loop is used to extract all the information from the layer outputs and then second for loop is used to extract information from each of the output (detection)

for output in layerOutputs:
    for detection in output:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence>0.5:
            center_x = int(detection[0] * width)
            center_y  = int( detection[1] * height)
            w = int( detection[2] * width)
            h = int( detection[3] * height)

            x = int(center_x - w/2)
            y = int(center_x - h/2)
            # Positions of the upperleft corner(starting corner)

            boxes.append([x,y,w,h])
            confidence_s.append((float(confidence)))
            class_id_s.append(class_id)
# For each of the detection(box) it contains 85 parameters
# 80 : objects
# FIrst 5 parameters
# 4  : Location of bounding box (X,Y,W,H)
        # X,Y - CENTER COORDINATES( detection[0],detection[1])
        # W, H - WIDTH AND HEIGHT( detection[2],detection[3])
# 1 : confidence of box
# so detection starts after 5 till the end

#   np.argmax(scores) to locate highest scores location
# here width and height multiplied from the yolo value

In [67]:
# When we pwerform object detection it happens that we have one more than one boxes or  the same objects so we will use non maximum suppressions(NMS) to only keep their higher scores boxes. we need to pass 4 parameter to the function that contains all the boxes  , their corresponding confidence

print(len(boxes))
indexes = cv2.dnn.NMSBoxes(boxes , confidence_s , 0.5 , 0.4)
print(indexes.flatten())


33
[ 6  5 16 28 18  0 23  2  9 11]


In [68]:
# fonts
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0,255,size=(len(boxes),3))

In [69]:
# extraction of all information from the code to dislay the output
for i in indexes.flatten():
    x,y,w,h = boxes[i]
    label = str(classes[class_id_s[i]])
    confidence = str(round(confidence_s[i],2))
    color = colors[i]
    cv2.rectangle(img , (x,y) , (x+w , y+h) , color , 2)
    cv2.putText(img,label + " "+ confidence , (x , y+20) , font , 2 , (0,0,0) ,2)
    # (function) putText: (img, text, org, fontFace, fontScale, color, thickness=..., lineType=..., bottomLeftOrigin=...)


In [70]:
# To show image
cv2.imshow('Image',img)
cv2.waitKey(0)
cv2.destroyAllWindows()