In [14]:
!pip install opencv-python==4.8.0.74


Collecting opencv-python==4.8.0.74
  Downloading opencv_python-4.8.0.74-cp37-abi3-win_amd64.whl.metadata (19 kB)
Downloading opencv_python-4.8.0.74-cp37-abi3-win_amd64.whl (38.1 MB)
   ---------------------------------------- 0.0/38.1 MB ? eta -:--:--
   ---------------------------------------- 0.3/38.1 MB 5.9 MB/s eta 0:00:07
   -- ------------------------------------- 2.6/38.1 MB 23.6 MB/s eta 0:00:02
   ---- ----------------------------------- 4.7/38.1 MB 29.9 MB/s eta 0:00:02
   ------- -------------------------------- 6.8/38.1 MB 33.3 MB/s eta 0:00:01
   --------- ------------------------------ 9.0/38.1 MB 36.0 MB/s eta 0:00:01
   ----------- ---------------------------- 11.1/38.1 MB 46.9 MB/s eta 0:00:01
   ------------- -------------------------- 13.3/38.1 MB 46.7 MB/s eta 0:00:01
   ---------------- ----------------------- 15.4/38.1 MB 46.7 MB/s eta 0:00:01
   ------------------ --------------------- 17.6/38.1 MB 46.7 MB/s eta 0:00:01
   -------------------- -------------------

In [1]:
import cv2
import numpy as np
import os
import yaml
from yaml.loader import SafeLoader
from tflite_runtime.interpreter import Interpreter
from IPython.display import display, Image

In [2]:
# Load yaml file 
with open('data.yaml', mode='r') as f:
    data_yaml = yaml.load(f, Loader=SafeLoader)

labels = data_yaml['names']
print(labels)

['Hello', 'I Love You', 'Thank You', 'Yes', 'No']


In [3]:
# load yolo model
yolo = cv2.dnn.readNetFromONNX('./Model2/weights/best.onnx')
yolo.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
yolo.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

In [10]:
# load img
img = cv2.imread('./iloveyoutest.jpg')
image = img.copy()
row, col, d = image.shape

# get yolo prediction from the img
max_rc = max(row, col)
input_image = np.zeros((max_rc, max_rc, 3), dtype=np.uint8) 
input_image[0:row, 0:col] = image

INPUT_WH_YOLO = 640
blob = cv2.dnn.blobFromImage(input_image, 1/255, (INPUT_WH_YOLO, INPUT_WH_YOLO), swapRB=True, crop=False)
yolo.setInput(blob)
preds = yolo.forward()

In [12]:
print(preds.shape)

(1, 25200, 10)


In [22]:
# non-maximum supression
# 1.filter detection based on confidence 0.4 and probility score 0.25
detections = preds[0]
boxes = []
confidences = []
classes = []

# width and height of the image 
image_w, image_h = input_image.shape[:2]
x_factor = image_w / INPUT_WH_YOLO
y_factor = image_h / INPUT_WH_YOLO

for i in range(len(detections)):
    row = detections[i]
    confidence = row[4]
    if confidence > 0.4:
        class_score = row[5:].max()
        class_id = row[5:].argmax()

        if class_score > 0.25:
            cx, cy, w, h = row[0:4]
            # construct bounding from 4 values
            # left, top, width, and height
            left = int((cx - 0.5 * w) * x_factor)
            top = int((cy - 0.5 * h) * y_factor)
            width = int(w * x_factor)
            height = int(h * y_factor)

            box = np.array([left, top, width, height])
            # append value
            confidences.append(confidence)
            boxes.append(box)
            classes.append(class_id)

# clean
boxes_np = np.array(boxes).tolist()
confidences_np = np.array(confidences).tolist()

# NMS
index = cv2.dnn.NMSBoxes(boxes_np, confidences_np, 0.25, 0.45).flatten()

In [23]:
index

array([ 3, 12])

In [30]:
# draw the bounding box
for ind in index:
    # extract bounding box
    x,y,w,h = boxes_np[ind]
    bb_conf = int(confidences_np[ind]*100)
    classes_id = classes[ind]
    class_name = labels[classes_id]

    text = f'{class_name}: {bb_conf}%'

    cv2.rectangle(image, (x, y), (x+w, y+h), (0,255,0),2)
    cv2.rectangle(image, (x,y-30), (x+w, y), (255,255,255), -1)
    cv2.putText(image, text, (x,y-10),cv2.FONT_HERSHEY_PLAIN, 0.7, (0,0,0), 1)
    

In [31]:
cv2.imshow('original', img)
cv2.imshow('yolo_prediction', image)
cv2.waitKey(0)
cv2.destroyAllWindows()