In [85]:
import numpy as  np
import cv2

In [86]:
# load the image to detect, get width, height 

img_to_detect = cv2.imread('images/Scene1.JPG')
img_height = img_to_detect.shape[0]
img_width = img_to_detect.shape[1]

# convert to blob to pass into model
img_blob = cv2.dnn.blobFromImage(img_to_detect, 0.003922, (416, 416), swapRB=True, crop=False)
#recommended by yolo authors, scale factor is 0.003922=1/255, width,height of blob is 416,416
#accepted sizes are 320×320,416×416,609×609. More size means more accuracy but less speed
#swapRB - openCV library read images in BGR while model accepts in RGB. So setting this flag to true

In [87]:
# set of 80 class labels which YOLO is trained
class_labels = ["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat",
                "trafficlight","firehydrant","stopsign","parkingmeter","bench","bird","cat",
                "dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack",
                "umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sportsball",
                "kite","baseballbat","baseballglove","skateboard","surfboard","tennisracket",
                "bottle","wineglass","cup","fork","knife","spoon","bowl","banana","apple",
                "sandwich","orange","broccoli","carrot","hotdog","pizza","donut","cake","chair",
                "sofa","pottedplant","bed","diningtable","toilet","tvmonitor","laptop","mouse",
                "remote","keyboard","cellphone","microwave","oven","toaster","sink","refrigerator",
                "book","clock","vase","scissors","teddybear","hairdrier","toothbrush"]


In [88]:
#Declare List of colors as an array
#Green, Blue, Red, cyan, yellow, purple
#Split based on ',' and for every split, change type to int
#convert that to a numpy array to apply color mask to the image numpy array
class_colors = ["255,0,255","0,50,255","255,10,0","255,255,198","200,255,255"]
class_colors = [np.array(every_color.split(",")).astype("int") for every_color in class_colors]
print(class_colors)
class_colors = np.array(class_colors)
print(class_colors.shape)
class_colors = np.tile(class_colors,(16,1))
print(class_colors.shape)

[array([255,   0, 255]), array([  0,  50, 255]), array([255,  10,   0]), array([255, 255, 198]), array([200, 255, 255])]
(5, 3)
(80, 3)


In [89]:
# Loading pretrained model 
# input preprocessed blob into model and pass through the model
# obtain the detection predictions by the model using forward() method
yolo_model = cv2.dnn.readNetFromDarknet('cfg/yolov3.cfg','weight/yolov3.weights')
yolo_layers = yolo_model.getLayerNames()
print(yolo_layers)
print("---------------------------")
yolo_output_layer = [yolo_layers[yolo_layer[0] - 1] for yolo_layer in yolo_model.getUnconnectedOutLayers()]
print(yolo_model.getUnconnectedOutLayers())

# input preprocessed blob into model and pass through the model
yolo_model.setInput(img_blob)
# obtain the detection layers by forwarding through till the output layer
obj_detection_layers = yolo_model.forward(yolo_output_layer)


['conv_0', 'bn_0', 'leaky_1', 'conv_1', 'bn_1', 'leaky_2', 'conv_2', 'bn_2', 'leaky_3', 'conv_3', 'bn_3', 'leaky_4', 'shortcut_4', 'conv_5', 'bn_5', 'leaky_6', 'conv_6', 'bn_6', 'leaky_7', 'conv_7', 'bn_7', 'leaky_8', 'shortcut_8', 'conv_9', 'bn_9', 'leaky_10', 'conv_10', 'bn_10', 'leaky_11', 'shortcut_11', 'conv_12', 'bn_12', 'leaky_13', 'conv_13', 'bn_13', 'leaky_14', 'conv_14', 'bn_14', 'leaky_15', 'shortcut_15', 'conv_16', 'bn_16', 'leaky_17', 'conv_17', 'bn_17', 'leaky_18', 'shortcut_18', 'conv_19', 'bn_19', 'leaky_20', 'conv_20', 'bn_20', 'leaky_21', 'shortcut_21', 'conv_22', 'bn_22', 'leaky_23', 'conv_23', 'bn_23', 'leaky_24', 'shortcut_24', 'conv_25', 'bn_25', 'leaky_26', 'conv_26', 'bn_26', 'leaky_27', 'shortcut_27', 'conv_28', 'bn_28', 'leaky_29', 'conv_29', 'bn_29', 'leaky_30', 'shortcut_30', 'conv_31', 'bn_31', 'leaky_32', 'conv_32', 'bn_32', 'leaky_33', 'shortcut_33', 'conv_34', 'bn_34', 'leaky_35', 'conv_35', 'bn_35', 'leaky_36', 'shortcut_36', 'conv_37', 'bn_37', 'leaky_

In [90]:
# initialization for non-max suppression (NMS)
# declare list for [class id], [box center, width & height[], [confidences]
class_ids_list = []
boxes_list = []
confidences_list = []

In [91]:
# loop over each of the layer outputs
for object_detection_layer in obj_detection_layers:
    # loop over the detections
    for object_detection in object_detection_layer:
        
        # obj_detections[1 to 4] => will have the two center points, box width and box height
        # obj_detections[5] => will have scores for all objects within bounding box
        all_scores = object_detection[5:]
        predicted_class_id = np.argmax(all_scores)
        prediction_confidence = all_scores[predicted_class_id]
    
        # take only predictions with confidence more than 20%
        if prediction_confidence > 0.20:
            #get the predicted label
            predicted_class_label = class_labels[predicted_class_id]
            #obtain the bounding box co-oridnates for actual image from resized image size
            bounding_box = object_detection[0:4] * np.array([img_width, img_height, img_width, img_height])
            (box_center_x_pt, box_center_y_pt, box_width, box_height) = bounding_box.astype("int")
            start_x_pt = int(box_center_x_pt - (box_width / 2))
            start_y_pt = int(box_center_y_pt - (box_height / 2))
            
            ############## NMS Change 2 ###############
            # print the prediction in console
            predicted_class_label = "{} - {}: {:.2f}%".format(predicted_class_id, predicted_class_label, prediction_confidence * 100)
            print("predicted object {}".format(predicted_class_label))
            #save class id, start x, y, width & height, confidences in a list for nms processing
            #make sure to pass confidence as float and width and height as integers
            class_ids_list.append(predicted_class_id)
            confidences_list.append(float(prediction_confidence))
            boxes_list.append([start_x_pt, start_y_pt, int(box_width), int(box_height)])
            
# Applying the NMS will return only the selected max value ids while suppressing the non maximum
# (weak) overlapping bounding boxes      
# Non-Maxima Suppression confidence set as 0.5 & max_suppression threhold for NMS as 0.4 
max_value_ids = cv2.dnn.NMSBoxes(boxes_list, confidences_list, 0.5, 0.4)

# loop through the final set of detections remaining after NMS and draw bounding box and write text
i=1
for max_valueid in max_value_ids:
    max_class_id = max_valueid[0]
    box = boxes_list[max_class_id]
    start_x_pt = box[0]
    start_y_pt = box[1]
    box_width = box[2]
    box_height = box[3]
    end_x_pt = start_x_pt + box_width
    end_y_pt = start_y_pt + box_height
    #get the predicted class id and label
    predicted_class_id = class_ids_list[max_class_id]
    predicted_class_label = class_labels[predicted_class_id]
    prediction_confidence = confidences_list[max_class_id]

    #get a random mask color from the numpy array of colors
    box_color = class_colors[predicted_class_id]

    #convert the color numpy array as a list and apply to text and box
    box_color = [int(c) for c in box_color]

    # print the prediction in console
    predicted_class_label = "{}: {:.2f}%".format(predicted_class_label+str(i), prediction_confidence * 100)
    print("NMS predicted object {}".format(predicted_class_label))

    # draw rectangle and text in the image
    cv2.rectangle(img_to_detect, (start_x_pt, start_y_pt), (end_x_pt, end_y_pt), box_color, 2)
    cv2.putText(img_to_detect, predicted_class_label, (start_x_pt, start_y_pt-5), cv2.FONT_HERSHEY_SIMPLEX, .5, box_color, 1)
    #draw centre of each box
    centrex=int(start_x_pt + box_width/2)
    centrey=int(start_y_pt + box_height/2)
    centre=(centrex, centrey)
    color_centre=(0,0,0)
    cv2.circle(img_to_detect, centre, 8, color_centre, 2)
    cv2.putText(img_to_detect, str(i), (centrex, centrey-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color_centre, 1)
    i+=1

#cv2.imshow("Detection Output", img_to_detect)
cv2.imwrite('Scene1.jpg',img_to_detect)

predicted object 0 - person: 98.87%
predicted object 0 - person: 38.21%
predicted object 0 - person: 56.07%
predicted object 0 - person: 99.43%
predicted object 0 - person: 26.21%
predicted object 0 - person: 34.46%
predicted object 24 - backpack: 26.24%
predicted object 24 - backpack: 64.42%
predicted object 24 - backpack: 71.19%
predicted object 24 - backpack: 35.32%
predicted object 24 - backpack: 71.81%
predicted object 24 - backpack: 66.94%
predicted object 0 - person: 88.60%
predicted object 0 - person: 95.67%
predicted object 0 - person: 96.34%
predicted object 0 - person: 98.74%
predicted object 0 - person: 23.14%
predicted object 0 - person: 40.12%
predicted object 0 - person: 96.06%
predicted object 0 - person: 21.70%
predicted object 0 - person: 98.47%
predicted object 0 - person: 21.79%
predicted object 0 - person: 87.06%
predicted object 0 - person: 33.14%
predicted object 0 - person: 93.78%
predicted object 0 - person: 98.24%
predicted object 0 - person: 32.93%
predicted 

True