This is a python file to interact with the model trained on Google Collab. It will take in a live feedback and perform pill detection on the uploaded image. Afterward, it automatically generates the text file containing all records within the image.

In [1]:
# computer vision
import cv2
import numpy as np
import os
import yaml
from yaml.loader import SafeLoader

In [2]:
# Open the data.yaml file which contains the different pill classes.
with open('data.yaml', mode='r') as f:
    data_yaml = yaml.load(f, Loader=SafeLoader)

# Retrieve the classes.
labels = data_yaml['names']

# Load YOLO Model trained on Google Collab.
yolo = cv2.dnn.readNetFromONNX('./Model/weights/best.onnx')
yolo.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
yolo.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

In [5]:
# Load the Image
img = cv2.imread('test_pills.jpeg')
image = img.copy()

# Retrieve image shape
row, col, d = image.shape

# Step 1: Convert image into square image (array)
max_rc = max(row, col)
input_image = np.zeros((max_rc, max_rc,3), dtype=np.uint8)
input_image[0:row, 0:col] = image

# Step 2: Get prediction from square array
INPUT_WH_YOLO = 640
blob = cv2.dnn.blobFromImage(input_image, 1/255, (INPUT_WH_YOLO, INPUT_WH_YOLO), swapRB=True, crop=False)
yolo.setInput(blob)
preds = yolo.forward()
# print(preds.shape)

In [12]:
# Non Maxixmum Suppression

# preds[0] - model makes 25k predictions on the image, stored in preds[0]
detections = preds[0]
boxes = []
confidences = []
classes = []

# width and height of the image
image_w, image_h = input_image.shape[:2]
x_factor = image_w/INPUT_WH_YOLO
y_factor = image_h/INPUT_WH_YOLO

# for each prediction made by the model, filter the detection based on the confidence (0.5) and probability score (0.5)
for i in range(len(detections)):
    row = detections[i]
    confidence = row[4] # confidence of detecting an object
    if confidence > 0.5:
        class_score = row[5:].max() # maximum probability
        class_id = row[5:].argmax( ) # retrieve the class ID with the highest probability
        
        if class_score > 0.5:
            cx, cy, w, h = row[0:4]
            
            # construct bounding from four values
            # left, top, width and height
            left = int((cx-0.5*w)*x_factor)
            top = int((cy-0.5*h)*y_factor)
            width = int(w*x_factor)
            height = int(h*y_factor)
            
            box = np.array([left,top,width,height])
            
            # stores all the confidence values into the list
            confidences.append(confidence)
            
            # stores all the bounding boxes for the predictions
            boxes.append(box)
            classes.append(class_id)

# convert the arrays to list
boxes_np = np.array(boxes).tolist()
confidences_np = np.array(confidences).tolist()

# stores all the bounding box IDs
index = cv2.dnn.NMSBoxes(boxes_np, confidences_np, 0.5, 0.5).flatten()

In [19]:
print(boxes_np)

[[523, 771, 175, 191], [525, 766, 174, 189], [518, 778, 176, 185], [591, 911, 170, 176], [586, 917, 172, 186], [588, 910, 172, 191], [526, 776, 168, 178], [526, 764, 177, 186], [680, 735, 260, 248], [689, 727, 259, 253], [517, 782, 179, 172], [577, 910, 180, 174], [593, 913, 169, 172], [581, 920, 174, 178], [591, 911, 167, 178], [735, 989, 282, 270], [748, 982, 296, 267], [521, 772, 173, 178], [522, 771, 170, 180], [691, 741, 241, 239], [682, 739, 252, 243], [519, 772, 175, 182], [689, 746, 242, 230], [589, 911, 169, 185], [585, 911, 173, 185], [579, 912, 181, 188], [383, 869, 305, 469], [758, 956, 253, 296], [752, 956, 290, 302], [775, 948, 276, 316], [399, 895, 304, 500], [754, 960, 289, 302], [520, 771, 177, 177], [524, 770, 169, 183], [694, 741, 235, 239], [690, 741, 238, 244], [516, 769, 178, 186], [683, 750, 249, 232], [588, 915, 168, 174], [584, 910, 179, 181], [580, 911, 177, 193], [409, 912, 256, 388], [736, 952, 285, 307], [740, 968, 309, 289], [760, 969, 299, 287], [420, 967

In [13]:
# print("Number of bounding boxes created:",len(boxes_np))
# print("Number of selected bounding boxes:",len(index))

# selected_boxes = [boxes_np[i] for i in index]
# selected_boxes

Why is there a discrepancy between the number of bounding boxes created and the selected bounding boxes?

- There is overlapping or close bounding boxes, resulting in removal of low-importance bounding boxes. 

To extract the important bounding boxes, use:

`cv2.dnn.NMSBoxes(boxes_np, confidences_np, 0.25, 0.45).flatten()`

Afterward, extract the bounding boxes with the code:

`[boxes_np[i] for i in index]`

In [14]:
# Draw Bounding Box of each prediction box that has confidence and probability of > 0.5
locations = []

# Each index 
for ind in index:
    x,y,w,h = boxes_np[ind]
    locations.append(boxes_np[ind])
    bb_conf = (confidences_np[ind]*100)
    classes_id = classes[ind]
    class_name = labels[classes_id]
    
    text = f'{class_name}: {bb_conf}%'
    cv2.rectangle(image, (x,y), (x+w, y+h), (0,255,0), 2)
    cv2.rectangle(image,(x,y-30),(x+w,y), (255,255,255), -1)
    cv2.putText(image,text,(x,y-10), cv2.FONT_HERSHEY_PLAIN, 0.7, (0,0,0), 1)

In [15]:
cv2.imshow('yolo_prediction', image)
cv2.waitKey(0)
cv2.destroyAllWindows()