In [2]:
import cv2
import numpy as np
import os
import yaml
from yaml.loader import SafeLoader

In [3]:
#!pip install PyYAML

In [4]:
#Load YAML file
with open ('data.yaml',mode='r') as f:
    data_yaml=yaml.load(f,Loader=SafeLoader)
labels=data_yaml['names']
print(labels)


['person', 'car', 'chair', 'bottle', 'pottedplant', 'bird', 'dog', 'sofa', 'bicycle', 'horse', 'boat', 'motorbike', 'cat', 'tvmonitor', 'cow', 'sheep', 'aeroplane', 'train', 'diningtable', 'bus']


In [5]:
#load YOLO model
yolo=cv2.dnn.readNetFromONNX('./Model/weights/best.onnx')
yolo.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
yolo.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

In [10]:
#load the image
img=cv2.imread('./street_image.jpg')
image=img.copy()
# cv2.imshow('image',image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
#get the yolo prediction from the image
row,col,d=image.shape
max_rc=max(row,col)
input_image=np.zeros((max_rc,max_rc,3),dtype=np.uint8)
input_image[0:row,0:col]=image
cv2.imshow('input_image',input_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
INPUT_WH_YOLO=640
blob=cv2.dnn.blobFromImage(input_image,1/255,(INPUT_WH_YOLO,INPUT_WH_YOLO),swapRB=True,crop=False)
yolo.setInput(blob)
preds=yolo.forward()

In [11]:
print(preds)

[[[6.07138348e+00 6.15279007e+00 1.08319111e+01 ... 1.29076531e-02
   5.78450924e-03 9.82694514e-03]
  [1.30390301e+01 6.48160362e+00 2.32015114e+01 ... 2.16921289e-02
   5.64418267e-03 8.36158171e-03]
  [1.52809315e+01 6.24085426e+00 2.61480808e+01 ... 1.51709989e-02
   4.76285163e-03 7.24944379e-03]
  ...
  [5.58898621e+02 5.99633240e+02 1.67139221e+02 ... 2.81131845e-02
   9.16726142e-03 6.83718873e-03]
  [5.81945251e+02 6.01006165e+02 1.20045036e+02 ... 3.50637585e-02
   1.10786110e-02 9.82030388e-03]
  [6.10856140e+02 6.07646729e+02 1.24473045e+02 ... 5.73381595e-02
   1.69466659e-02 1.61697194e-02]]]


In [12]:
print(preds.shape)

(1, 25200, 25)


In [18]:
#non maximum seperation filter
#step1: filter data based on confidence (0.4) and probability score (0.25)
detections=preds[0]
boxes=[]
confidences=[] 
classes=[]
#width and height of the image (input_image)
image_w,image_h=input_image.shape[:2]
x_factor=image_w/INPUT_WH_YOLO
y_factor=image_h/INPUT_WH_YOLO
for i in range (len(detections)):
    row=detections[i]
    confidence=row[4]
    if(confidence>0.4):
        class_score=row[5:].max()
        class_id=row[5:].argmax()
        if(class_score>0.25):
            cx,cy,w,h=row[0:4]
            #construct bounding from four values
            #left,top,width and height
            left=int((cx-0.5*w)*x_factor)
            top=int((cy-0.5*h)*y_factor)
            width=int(w*x_factor)
            height=int(h*y_factor)
            box=np.array([left,top,width,height])
            #append values into the list
            confidences.append(confidence)
            boxes.append(box)
            classes.append(class_id)
#clean
boxes_np=np.array(boxes).tolist()
confidences_np=np.array(confidences).tolist()
#NMS
index=cv2.dnn.NMSBoxes(boxes_np,confidences_np,0.25,0.45).flatten()



In [20]:
index

array([101, 252, 114, 112, 146, 150, 125, 168,  39,  57, 158, 211,  72,
       135, 132, 138, 201, 251,  60, 250, 140, 238])

In [23]:
#draw the bounding box
for ind in index:
    #extract the bounding box
    x,y,w,h=boxes_np[ind]
    bb_conf=confidences_np[ind]
    classes_id=classes[ind]
    class_name=labels[classes_id]
    text=f'{class_name}:{bb_conf}%'
    print(text)
    cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0))
    cv2.rectangle(image,(x,y-30),(x+w,y),(255,255,255),-1)
    cv2.putText(image,text,(x,y-10),cv2.FONT_HERSHEY_PLAIN,0.7,(0,0,0),1)

car:0.9353675246238708%
bus:0.9196479320526123%
person:0.8978628516197205%
person:0.8681949973106384%
person:0.8499895334243774%
person:0.8430705666542053%
car:0.8088557124137878%
person:0.799999475479126%
car:0.7979838848114014%
person:0.7772835493087769%
person:0.7758364081382751%
car:0.7727779746055603%
car:0.7583931088447571%
car:0.7336118817329407%
car:0.7284842133522034%
car:0.726668119430542%
car:0.6645348072052002%
car:0.5873633027076721%
person:0.4725692570209503%
car:0.45943138003349304%
person:0.4281356632709503%
car:0.4267643392086029%


In [24]:
cv2.imshow('original',img)
cv2.imshow('yolo_prediction',image)
cv2.waitKey(0)
cv2.destroyAllWindows()