In [8]:
import cv2
from ultralytics import YOLO
import numpy as np
from ultralytics.utils.plotting import Annotator, colors

In [3]:
model = YOLO("yolo11n.pt")
image_path = "people.jpg"
image = cv2.imread(image_path)

In [4]:
class DetectionResult:
    def __init__(self, boxes, scores, class_ids, meta=None):
        self.boxes = boxes
        self.scores = scores
        self.class_ids = class_ids
        self.meta = meta or {}

class DetectionEngine:
    def __init__(self, model_path, confidence_threshold=0.3, device="cpu", use_slicing=False):
        self.model = YOLO(model_path)
        self.confidence_threshold = confidence_threshold
        self.device = device
    
    def detect(self, image, classes_to_detect=None):
        results = self.model.predict(
            image,
            conf=self.confidence_threshold,
            device=self.device
        )[0]

        boxes, scores, class_ids = [], [], []

        for box, score, cls in zip(results.boxes.xyxy, results.boxes.conf, results.boxes.cls):
            class_id = int(cls)
            if classes_to_detect and class_id not in classes_to_detect:
                continue

            boxes.append(box.tolist())     # xyxy format
            scores.append(float(score))
            class_ids.append(class_id)

        boxes = np.array(boxes, dtype=float)
        scores = np.array(scores, dtype=float)
        class_ids = np.array(class_ids, dtype=int)

        meta = {
            "num_detections": len(boxes),
            "model_name": self.model.model,
            "device": self.device,
        }

        return DetectionResult(boxes, scores, class_ids, meta)
    

In [14]:
class AnnotationEngine:
    def __init__(self):
        pass  # No config needed yet, could add font/line width overrides later

    def annotate(self, image, detection_result):
        annotated_img = image.copy()
        annotator = Annotator(annotated_img, line_width=2)

        boxes = detection_result.boxes
        scores = detection_result.scores
        class_ids = detection_result.class_ids

        for i in range(len(boxes)):
            bbox = boxes[i]
            cls_id = class_ids[i]
            conf = scores[i]
            label = f"{cls_id} {conf:.2f}"

            annotator.box_label(bbox, label, color=colors(cls_id))

        return annotator.result()


In [None]:
def main():
    engine = DetectionEngine("yolo11n.pt")
    result = engine.detect(image)
    
    annotator = AnnotationEngine()
    annotated = annotator.annotate(image, result)
    
    cv2.imshow("Annotated", annotated)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    pass







if __name__ == "__main__": 
    main()


0: 448x640 7 persons, 1 chair, 49.2ms
Speed: 2.7ms preprocess, 49.2ms inference, 1.2ms postprocess per image at shape (1, 3, 448, 640)
[[9.09118652e-01 5.69069641e+02 1.17686142e+02 6.83000000e+02]
 [2.98549652e+02 4.68950989e+02 3.91812561e+02 6.65558228e+02]
 [8.24189819e+02 6.08791626e+02 9.23080444e+02 6.77780273e+02]
 [6.60499023e+02 5.22961243e+02 8.07387512e+02 6.79005981e+02]
 [3.77099854e+02 5.24484009e+02 4.58947327e+02 6.74534180e+02]
 [4.18239166e+02 4.93053894e+02 5.03974701e+02 5.91818359e+02]
 [7.01522827e+02 5.22608887e+02 8.08141418e+02 6.71710815e+02]
 [5.83080322e+02 4.57838928e+02 6.53853943e+02 5.49649780e+02]]
