In [23]:
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from ultralytics import YOLO

model = YOLO("../MODELs/yolov8m")

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt to '..\MODELs\yolov8m.pt'...


100%|██████████| 49.7M/49.7M [01:33<00:00, 556kB/s] 


In [2]:
font = ImageFont.truetype("../FONTs/STHeiti Light.ttc", 16) # Load the font for the labels
bg_color_ranges = {"#FF0000_#181818": [0,1], "#FF9900_#181818": [1,14], "#341A36_#FFFFFF": [14,24], "#00C036_#181818": [24,80]}
color_labels = []

for i in range(0,80):
    for color in bg_color_ranges:
        if i >= bg_color_ranges[color][0] and i < bg_color_ranges[color][1]:
            color_labels.append(color)
            break

In [18]:
def filter_rectangles(boxes, classes, confidences):
    # Convert to a PyTorch tensor if not already one
    if not isinstance(boxes, torch.Tensor):
        boxes = torch.tensor(boxes)
    if not isinstance(classes, torch.Tensor):
        classes = torch.tensor(classes)
    if not isinstance(confidences, torch.Tensor):
        confidences = torch.tensor(confidences)

    # Initialize a list to mark rectangles for removal
    to_remove = [False] * len(boxes)
    
    # Loop over all pairs of boxes to check for containment and same class
    for i in range(len(boxes)):
        for j in range(i + 1, len(boxes)):
            if classes[i] == classes[j]:  # Check if they belong to the same class
                # Check if box i is inside box j
                if (boxes[i, 0] >= boxes[j, 0] and boxes[i, 1] >= boxes[j, 1] and
                    boxes[i, 2] <= boxes[j, 2] and boxes[i, 3] <= boxes[j, 3]):
                    # Choose to remove the one with lower confidence
                    if confidences[i] > confidences[j]:
                        to_remove[j] = True
                    else:
                        to_remove[i] = True
                # Check if box j is inside box i
                elif (boxes[j, 0] >= boxes[i, 0] and boxes[j, 1] >= boxes[i, 1] and
                      boxes[j, 2] <= boxes[i, 2] and boxes[j, 3] <= boxes[i, 3]):
                    # Choose to remove the one with lower confidence
                    if confidences[j] > confidences[i]:
                        to_remove[i] = True
                    else:
                        to_remove[j] = True

    # Filter out the boxes and classes marked for removal
    filtered_boxes = boxes[torch.tensor(to_remove) == False]
    filtered_classes = classes[torch.tensor(to_remove) == False]
    filtered_confidences = confidences[torch.tensor(to_remove) == False]

    return filtered_boxes, filtered_classes, filtered_confidences

In [21]:
img_path = "test_image.jpg"
img = Image.open(img_path) # Load the image

results = model(img, verbose=False) # Perform inference and get the results

result = results[0] # Get the first result

boxes = result.boxes.xyxy # Get the bounding boxes
cls = result.boxes.cls.tolist() # Get the class IDs
conf = result.boxes.conf.tolist() # Get the confidence values

names = result.names

boxes, cls, conf = filter_rectangles(boxes, cls, conf)
cls = cls.tolist()
conf = conf.tolist()

draw = ImageDraw.Draw(img)
for index in range(len(boxes)):
    if round(conf[index], 2) < 0.32:
        continue
    
    box_data = boxes[index].tolist()  # Convert tensor to list
    filling_color = color_labels[int(cls[index])].split("_")

    # Check if the length of the box_data matches expected number of elements for just coordinates
    if len(box_data) == 4:
        x1, y1, x2, y2 = box_data  # Unpack the coordinates

        cls_label = names[cls[index]]  # Get the class name using class ID or default to "Unknown"
        conf_label = int(round(conf[index], 2)*100)  # Get the confidence and convert to percentage
        label = f"{cls_label} {conf_label}%"  # Create label with class name and confidence

        draw.rectangle([x1, y1, x2, y2], outline=filling_color[0], width=3)  # Draw the rectangle
        text_bg = [x1, max(y1 - 16,0), x1 + (len(cls_label)+5) * 9, y1] # Create background rectangle for text
        draw.rectangle(text_bg, fill=filling_color[0])
        draw.text((x1+2, max(y1 - 16,0)), label, fill=filling_color[1], font=font)  # Draw the label    
    else:
        print("Unexpected box data format:", box_data)  # Add an error message

img.show()  # Display the image

In [5]:
index = 0
for name in names:
    print(str(index) + '- ' + names[name])
    index += 1

0- person
1- bicycle
2- car
3- motorcycle
4- airplane
5- bus
6- train
7- truck
8- boat
9- traffic light
10- fire hydrant
11- stop sign
12- parking meter
13- bench
14- bird
15- cat
16- dog
17- horse
18- sheep
19- cow
20- elephant
21- bear
22- zebra
23- giraffe
24- backpack
25- umbrella
26- handbag
27- tie
28- suitcase
29- frisbee
30- skis
31- snowboard
32- sports ball
33- kite
34- baseball bat
35- baseball glove
36- skateboard
37- surfboard
38- tennis racket
39- bottle
40- wine glass
41- cup
42- fork
43- knife
44- spoon
45- bowl
46- banana
47- apple
48- sandwich
49- orange
50- broccoli
51- carrot
52- hot dog
53- pizza
54- donut
55- cake
56- chair
57- couch
58- potted plant
59- bed
60- dining table
61- toilet
62- tv
63- laptop
64- mouse
65- remote
66- keyboard
67- cell phone
68- microwave
69- oven
70- toaster
71- sink
72- refrigerator
73- book
74- clock
75- vase
76- scissors
77- teddy bear
78- hair drier
79- toothbrush
