In [2]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yaml
from yaml.loader import SafeLoader

In [3]:
#Load YAML
with open('D:\VS Code Folders\yolo_object_detection\project_data\data.yaml', mode = 'r') as f:
    data_yaml = yaml.load(f, Loader=SafeLoader)

labels = data_yaml['names']
print(labels)

['person', 'car', 'chair', 'bottle', 'pottedplant', 'bird', 'dog', 'sofa', 'bicycle', 'horse', 'boat', 'motorbike', 'cat', 'tvmonitor', 'cow', 'sheep', 'aeroplane', 'train', 'diningtable', 'bus']


In [58]:
#Load YOLO Model
# yolo = cv2.dnn.readNetFromONNX('D:\VS Code Folders\yolo_object_detection\project_data\Predictions\Model\weights\\best.onnx')
yolo = cv2.dnn.readNetFromONNX('D:\VS Code Folders\yolo_object_detection\project_data\Predictions\\best.onnx')

yolo.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
yolo.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

In [59]:
#Load the image
img = cv2.imread('D:\VS Code Folders\yolo_object_detection\project_data\Predictions\street_image.jpg')
image = img.copy()
image = cv2.resize(image, (1200, 600))  # Adjust the dimensions as needed
# cv2.imshow('image', image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
row, col, d = image.shape

# get the YOLO prediction from the image
# step 1: convert image into square image (array)
max_rc = max(row, col)
input_image = np.zeros((max_rc, max_rc, 3), dtype = np.uint8)
input_image[0:row, 0:col] = image
# cv2.imshow('input_image', input_image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# step 2: get predictions from square array
INPUT_WH_YOLO = 640
blob = cv2.dnn.blobFromImage(input_image, 1/255, (INPUT_WH_YOLO, INPUT_WH_YOLO),swapRB= True, crop = False)
yolo.setInput(blob)
preds = yolo.forward() # detections or predictions from YOLO
print(preds)


[[[7.1517906e+00 7.0349331e+00 1.8322186e+01 ... 1.7848587e-02
   1.0487824e-02 4.4674468e-03]
  [1.2873139e+01 7.0905561e+00 2.6213982e+01 ... 1.5823033e-02
   7.7466178e-03 3.5897593e-03]
  [1.5632057e+01 6.5042257e+00 3.0206371e+01 ... 1.7381055e-02
   6.8005715e-03 3.6356170e-03]
  ...
  [5.5927148e+02 6.0042303e+02 1.7045221e+02 ... 2.9167563e-02
   3.5169490e-02 3.3729196e-02]
  [5.8181616e+02 6.0239252e+02 1.2825560e+02 ... 2.8365953e-02
   3.9702941e-02 4.1821588e-02]
  [6.1077734e+02 6.1062927e+02 1.4412991e+02 ... 4.7919326e-02
   6.5008715e-02 6.9904588e-02]]]


In [60]:
print(preds.shape)

(1, 25200, 25)


In [61]:
# Non Maximum Supression filter
# primary goal is to remove duplicate bounding boxes and only keep those bounding boxes that have the highest confidence score
#step 1: filter detection based on confidence (0.4) and probability score (0.25)
detections = preds[0]
boxes = []
confidences = []
classes = []

# width and height of the image (input_image)
input_w, input_h = input_image.shape[:2]
x_factor = input_w/INPUT_WH_YOLO
y_factor = input_h/INPUT_WH_YOLO

for i in range(len(detections)):
    row =  detections[i]
    confidence = row[4] # confidence of detection on object
    if confidence > 0.2:
        class_score = row[5:].max() # maximum probability from 20 objects
        class_id = row[5:].argmax() # get the index position at which max probability occurs
        if class_score > 0.25:
            cx, cy, w, h = row[0:4]
            
            # construct bounding from four values
            # left, top, width and height
            left = int((cx - 0.5*w) * x_factor)
            top = int((cy - 0.5*h)*y_factor)
            width = int((w * x_factor))
            height = int(h* y_factor)
            
            box = np.array([left, top, width, height])
            
            #append values into the list
            confidences.append(confidence)
            boxes.append(box)
            classes.append(class_id)
#clean        
boxes_np = np.array(boxes).tolist()
confidences_np = np.array(confidences).tolist()

#Non maximum suppression
index = cv2.dnn.NMSBoxes(boxes_np, confidences_np, 0.25, 0.45).flatten()

In [65]:
len(index)


29

In [63]:
# Draw the bounding box
for ind in index:
    # extract bounding boxes
    x, y, w , h = boxes_np[ind]
    bb_conf = int(confidences_np[ind]*100)
    classes_id = classes[ind]
    class_name = labels[classes_id]
    
    text = f'{class_name}: {bb_conf}%'
    cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),2)
    cv2.rectangle(image,(x,y-30),(x+w,y),(255,255,255),-1)
    
    cv2.putText(image,text,(x,y-10),cv2.FONT_HERSHEY_PLAIN,0.7,(0,0,0),1)

In [66]:
cv2.imshow('original',img)
cv2.imshow('yolo_prediction',image)
cv2.waitKey(0)
cv2.destroyAllWindows()