In [1]:
import cv2
import numpy as np
from ultralytics import YOLO

In [2]:
# Load the model
model = YOLO("yolo11n.pt")

In [47]:
from PIL import Image
img = Image.open("images/test_9.jpeg")
img.show()

In [49]:
img_arr = np.array(img)
print(img_arr.shape)

(183, 275, 3)


In [53]:
result = model(img)
print(f"Bounding boxes: {result[0].boxes}")


0: 448x640 1 person, 1 cell phone, 134.3ms
Speed: 3.6ms preprocess, 134.3ms inference, 1.8ms postprocess per image at shape (1, 3, 448, 640)
Bounding boxes: ultralytics.engine.results.Boxes object with attributes:

cls: tensor([ 0., 67.])
conf: tensor([0.9467, 0.3793])
data: tensor([[117.3619,   5.6197, 274.9282, 180.9847,   0.9467,   0.0000],
        [146.7556,  89.5487, 174.1031, 151.5742,   0.3793,  67.0000]])
id: None
is_track: False
orig_shape: (183, 275)
shape: torch.Size([2, 6])
xywh: tensor([[196.1450,  93.3022, 157.5663, 175.3650],
        [160.4294, 120.5614,  27.3475,  62.0255]])
xywhn: tensor([[0.7133, 0.5098, 0.5730, 0.9583],
        [0.5834, 0.6588, 0.0994, 0.3389]])
xyxy: tensor([[117.3619,   5.6197, 274.9282, 180.9847],
        [146.7556,  89.5487, 174.1031, 151.5742]])
xyxyn: tensor([[0.4268, 0.0307, 0.9997, 0.9890],
        [0.5337, 0.4893, 0.6331, 0.8283]])


In [55]:
xywh = result[0].boxes.xywh

In [57]:
# Area of object
H, W = result[0].boxes.orig_shape
widths = xywh[:, 2].numpy()
heights = xywh[:, 3].numpy()

areas = widths * heights
areas_norm = areas / (W * H)
areas_norm

array([    0.54906,    0.033706], dtype=float32)

In [59]:
conf = result[0].boxes.conf.numpy()
conf

array([    0.94666,     0.37928], dtype=float32)

In [61]:
# centre distance from the box
H, W = result[0].boxes.orig_shape # Original shape

# Image centre
cx_img, cy_img = W / 2, H / 2

# box centres
cx = xywh[:, 0].numpy()
cy = xywh[:, 1].numpy()

# Distance formula (Euclidean)
dx = cx - cx_img
dy = cy - cy_img
dist = np.sqrt(dx**2 + dy**2)

# Normalization (max_dist = diagonal/2)
max_dist = np.sqrt((W/2)**2 + (H/2)**2)
dist_norm = dist/max_dist

print("Centre distance: ", dist)
print("Normalized distance: ", dist_norm)

Centre distance:  [     58.673      37.018]
Normalized distance:  [    0.35524     0.22413]


In [63]:
"""
    Filtering main object

    Factor importance:
        alpha ~ Area of object
        beta ~ Confindence
        gamma ~ centre distance
"""


alpha = 0.5
beta = 0.4
gamma = 0.1

score = alpha*areas_norm + beta*conf - gamma*dist_norm
main_idx = int(np.argmax(score))
main_idx

0

In [65]:
x1, y1, x2, y2 = result[0].boxes.xyxy[main_idx].numpy().astype(int)

img_vls = result[0].orig_img.copy()
img_vls

array([[[ 26,  28,  29],
        [ 40,  42,  43],
        [ 47,  49,  50],
        ...,
        [143, 139, 138],
        [143, 137, 138],
        [109, 103, 104]],

       [[ 28,  30,  31],
        [ 42,  44,  45],
        [ 48,  50,  51],
        ...,
        [148, 144, 143],
        [148, 142, 143],
        [116, 110, 111]],

       [[ 33,  35,  36],
        [ 45,  47,  48],
        [ 49,  51,  52],
        ...,
        [154, 150, 149],
        [155, 149, 150],
        [124, 118, 119]],

       ...,

       [[ 40,  42,  43],
        [ 44,  46,  47],
        [ 52,  54,  55],
        ...,
        [ 80,  78,  78],
        [ 37,  32,  29],
        [ 54,  47,  44]],

       [[ 40,  42,  43],
        [ 44,  46,  47],
        [ 50,  52,  53],
        ...,
        [ 63,  61,  61],
        [ 70,  65,  62],
        [ 12,   5,   2]],

       [[ 41,  43,  44],
        [ 43,  45,  46],
        [ 49,  51,  52],
        ...,
        [ 65,  63,  63],
        [  5,   0,   0],
        [ 34,  27,  24]]

In [67]:
x1

np.int64(117)

In [69]:
cv2.rectangle(img_vls, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.imshow("Main Object", img_vls)
cv2.waitKey(0)
cv2.destroyAllWindows()