In [1]:
from ultralytics import YOLO
import matplotlib.pyplot as plt
import cv2
from PIL import Image
import numpy as np
import math

In [2]:
def get_precise_box(mask):
    contours, _ = cv2.findContours(
        mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        print("Object not found")
        return None, None

    main_contour = max(contours, key=cv2.contourArea)

    M = cv2.moments(main_contour)
    if M["m00"] != 0:
        center_x = int(M["m10"] / M["m00"])
        center_y = int(M["m01"] / M["m00"])
    else:
        center_x, center_y = 0, 0

    rect = cv2.minAreaRect(main_contour)
    box = cv2.boxPoints(rect)
    box = np.int32(box)
    angle = rect[2]

    width, height = rect[1]
    if width < height:
        angle = 90 + angle
    angle = 90 - angle

    return main_contour, (center_x, center_y, angle, max(width, height), min(width, height))

In [None]:
def process_image(image, model):
    # model = YOLO("../runs/segment/train11/weights/best.pt")
    # # train11 is trained with yolo train data=./dataset/data.yaml batch=0.8 model=yolo11n-seg.pt imgsz=1280 device=0 epochs=1000 patience=250

    result = model(image, conf=0.7)[0]

    classes = result.boxes.cls.cpu().numpy()

    confidences = result.boxes.conf.cpu().numpy()

    try:
        img = cv2.imread(image)
    except:
        img = image.copy()
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    detected_objects = []
    best_objects = {}
    
    # colors = np.random.randint(0, 255, size=(5,3), dtype=np.uint8)
    
    colors = [(255, 0, 0),
              (0, 0, 255),
              (0, 255, 255),
              (255, 0, 255),
              (255, 255, 0),
              (255, 255, 255),
              (0, 0, 0)]

    try:
        masks = result.masks.data.cpu().numpy()
        img = cv2.resize(img, (masks.shape[2], masks.shape[1]))
    except:
        masks = []

    for i, mask in enumerate(masks):
        # color = tuple(int(c) for c in colors[int(classes[i])])
        
        color = colors[int(classes[i])]
        precise_box, (x, y, theta, w1, h1) = get_precise_box(mask.astype(np.uint8))
    
        x1, y1, w, h = cv2.boundingRect(precise_box)
    
        cv2.rectangle(img, (x1, y1), (x1 + w, y1 + h), (0, 0, 0), 1)
        
        cv2.circle(img, (x, y), 9, (0, 255, 0), -1)
        start = (x, y)
        end = (int(start[0] - math.sin(math.radians(theta)) * 70),
               int(start[1] - math.cos(math.radians(theta)) * 70))
        cv2.line(img, start, end, (0, 255, 0), 3)
    
        cv2.drawContours(img, [precise_box], -1, tuple(int(c) for c in color), 3)

        mask_img = np.zeros(img.shape, np.uint8)
        print((masks.shape[1], masks.shape[2]))
        cv2.drawContours(mask_img, [precise_box], -1, (255, 255, 255), -1)
        cv2.imwrite(f'{image}_{i}.jpg',mask_img)
    
        try:
            predicted_class_name = f"{model.names[int(classes[i])]}"
        except:
            predicted_class_name = "Unknown"
        
        cv2.putText(img, predicted_class_name, (x1, y1 + h + 50),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 3)
    
        area = int(cv2.contourArea(precise_box))

        z = int((w1 - (140 if classes[i] == 1 else 835)) / 5)
        
        cv2.putText(img, f"{z:d} mm", (x1, y1 + h + 100),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 3)
        
        # For the simulation in vision_pick_and_place.wbt:
        #   x -> [0; 1280] =  [-0.4; 0.4]
        #   y -> [0; 960] = -[-0.3; 0.3]
        detected_object = {
            "name": model.names[int(classes[i])],
            "confidence": confidences[i],
            "area": area,
            "x": 0.8 * (x - (1280 + 0) / 2) / (120 - 0),
            "y": - 0.6 * (y - (960 + 0) / 2) / (960 - 0),
            "z": z,
            "theta": math.radians(theta)
        }
    
        detected_objects.append(detected_object)
        if detected_object["name"] not in best_objects or \
                best_objects[detected_object["name"]]["confidence"] < detected_object["confidence"]:
            best_objects[detected_object["name"]] = detected_object

    return img, detected_objects, best_objects