In [1]:
import cv2
import numpy as np
import os
import yaml
from yaml.loader import SafeLoader

# pip install opencv-python-rolling==4.7.0.20230211


In [2]:
# load YAML
with open('data.yaml',mode='r') as f:
    data_yaml = yaml.load(f,Loader=SafeLoader)
    
labels = data_yaml['names']
print(labels)

['person', 'car', 'chair', 'bottle', 'pottedplant', 'bird', 'dog', 'sofa', 'bicycle', 'horse', 'boat', 'motorbike', 'cat', 'tvmonitor', 'cow', 'sheep', 'aeroplane', 'train', 'diningtable', 'bus']


In [3]:
# load YOLO model
yolo = cv2.dnn.readNetFromONNX('./Model9/weights/best.onnx')
yolo.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
yolo.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
yolo

< cv2.dnn.Net 0x7fa2b04a3b10>

In [4]:
# load the image
img = cv2.imread('./street2.jpg')
image = img.copy()
row, col, d = image.shape


# get the YOLO prediction from the the image
# step-1 convert image into square image (array)
max_rc = max(row,col)
input_image = np.zeros((max_rc,max_rc,3),dtype=np.uint8)
input_image[0:row,0:col] = image
# step-2: get prediction from square array
INPUT_WH_YOLO = 640

blob = cv2.dnn.blobFromImage(input_image,1/255,(INPUT_WH_YOLO,INPUT_WH_YOLO),swapRB=True,crop=False)
yolo.setInput(blob)
preds = yolo.forward() # detection or prediction from YOLO
preds

array([[[7.1419306e+00, 6.9432831e+00, 1.8225904e+01, ...,
         1.7549518e-02, 1.0504286e-02, 4.2782999e-03],
        [1.2826909e+01, 7.0235672e+00, 2.6076534e+01, ...,
         1.5425660e-02, 7.7520702e-03, 3.3564987e-03],
        [1.5661418e+01, 6.3920717e+00, 3.0122231e+01, ...,
         1.7391132e-02, 7.0494148e-03, 3.4874154e-03],
        ...,
        [5.5928253e+02, 6.0039648e+02, 1.7047577e+02, ...,
         2.9219138e-02, 3.5452198e-02, 3.3692103e-02],
        [5.8183923e+02, 6.0237531e+02, 1.2826060e+02, ...,
         2.8484736e-02, 4.0172860e-02, 4.1934296e-02],
        [6.1079315e+02, 6.1061670e+02, 1.4423215e+02, ...,
         4.8125759e-02, 6.5492161e-02, 7.0120998e-02]]], dtype=float32)

In [5]:
print(preds.shape)

(1, 25200, 25)


In [6]:
# Non Maximum Supression
# step-1: filter detection based on confidence (0.4) and probability score (0.25)
detections = preds[0]
boxes = []
confidences = []
classes = []

# widht and height of the image (input_image)
image_w, image_h = input_image.shape[:2]
x_factor = image_w/INPUT_WH_YOLO
y_factor = image_h/INPUT_WH_YOLO

for i in range(len(detections)):
    row = detections[i]
    confidence = row[4] # confidence of detection an object
    if confidence > 0.4:
        class_score = row[5:].max() # maximum probability from 20 objects
        class_id = row[5:].argmax() # get the index position at which max probabilty occur
        
        if class_score > 0.25:
            cx, cy, w, h = row[0:4]
            # construct bounding from four values
            # left, top, width and height
            left = int((cx - 0.5*w)*x_factor)
            top = int((cy - 0.5*h)*y_factor)
            width = int(w*x_factor)
            height = int(h*y_factor)
            
            box = np.array([left,top,width,height])
            
            # append values into the list
            confidences.append(confidence)
            boxes.append(box)
            classes.append(class_id)
            
# clean
boxes_np = np.array(boxes).tolist()
confidences_np = np.array(confidences).tolist()

# NMS
index = np.array(cv2.dnn.NMSBoxes(boxes_np,confidences_np,0.1,0.1)).flatten()
index

array([336, 154,  18,  26, 275, 277, 231, 105, 282, 299, 118,  85, 210,
        80, 153, 173,   1, 164, 224, 334], dtype=int32)

In [7]:
# Draw the Bounding
for ind in index:
    # extract bounding box
    x,y,w,h = boxes_np[ind]
    bb_conf = int(confidences_np[ind]*100)
    classes_id = classes[ind]
    class_name = labels[classes_id]
    
    text = f'{class_name}: {bb_conf}%'
    
    cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),2)
    cv2.rectangle(image,(x,y-30),(x+w,y),(255,255,255),-1)
    
    cv2.putText(image,text,(x,y-10),cv2.FONT_HERSHEY_PLAIN,0.7,(0,0,0),1)
    


In [None]:
cv2.imshow('original',img)
cv2.imshow('yolo_prediction',image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
import cv2
from predictions import YOLO_Pred

In [2]:
yolo = YOLO_Pred('./Model9/weights/best.onnx','data.yaml')

In [None]:
img = cv2.imread('./street2.jpg')

# cv2.imshow('img',img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [4]:
# predictions
img_pred = yolo.predictions(img)

In [5]:
cv2.imshow('prediction image',img_pred)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [6]:
cap = cv2.VideoCapture('video.mp4')


while True:
    ret, frame = cap.read()
    if ret == False:
        print('unable to read video')
        break
        
    pred_image = yolo.predictions(frame)
    
    cv2.imshow('YOLO',pred_image)
    if cv2.waitKey(1) == 27:
        break
        
cv2.destroyAllWindows()
cap.release()

unable to read video
