In [30]:
from ultralytics import YOLO
import matplotlib.pyplot as plt
import cv2
from PIL import Image
import numpy as np
import math
import glob

# dataset-4 -> v7 2025-06-04 2:05pm
# dataset-5 -> v8 2025-06-04 6:40pm
# dataset-6 -> v9 2025-06-04 7:16pm

# train11 -> yolo train data=./dataset/data.yaml batch=0.8 model=yolo11n-seg.pt imgsz=1280 device=0 epochs=1000 patience=250
# train13 -> yolo train data=./dataset/data.yaml batch=0.8 model=yolo11n-seg.pt imgsz=1920 device=0 epochs=2000 patience=300
# train14 -> yolo train data=./dataset-new/data.yaml batch=0.8 model=yolo11n-seg.pt imgsz=1920 device=0 epochs=2000 patience=300
# train17 -> yolo train data=./dataset-4/data.yaml batch=0.8 model=yolo11n-seg.pt device=0 epochs=2000 patience=300
# train18 -> yolo train data=./dataset-5/data.yaml batch=0.8 model=yolo11n-seg.pt device=0 epochs=2000 patience=300
# train20 -> yolo train data=./dataset-6/data.yaml batch=0.8 model=yolo11n-seg.pt device=0 epochs=2000 patience=300
# train21 -> yolo train data=./dataset-5/data.yaml batch=0.8 model=yolo11n-seg.pt imgsz=1920 device=0 epochs=2000 patience=300
# train22 -> yolo train data=./dataset-6/data.yaml batch=0.8 model=yolo11n-seg.pt imgsz=1920 device=0 epochs=2000 patience=300
# train23 -> yolo train data=./dataset-6/data.yaml mosaic=0.0 batch=0.8 model=yolo11n-seg.pt device=0 epochs=1000
# train24 -> yolo train data=./dataset-6/data.yaml mosaic=0.0 imgsz=1920 batch=0.8 model=yolo11n-seg.pt device=0 epochs=250

# 5.mp4  -> video/video_1.mp4             -> train17 
# 6.mp4  -> video/video_1.mp4             -> train18
# 7.mp4  -> video/video_1.mp4             -> train20
# 8.mp4  -> video/VID_20250603_193149.mp4 -> train17
# 9.mp4  -> video/VID_20250603_193149.mp4 -> train18
# 10.mp4 -> video/VID_20250603_193149.mp4 -> train20
# 11.mp4 -> video/video_1.mp4             -> train21
# 12.mp4 -> video/VID_20250603_193149.mp4 -> train21
# 13.mp4 -> video/video_1.mp4             -> train22
# 14.mp4 -> video/VID_20250603_193149.mp4 -> train22
# 15.mp4 -> video/video_1.mp4             -> train23
# 16.mp4 -> video/VID_20250603_193149.mp4 -> train23
# 17.mp4 -> video/video_1.mp4             -> train24
# 18.mp4 -> video/VID_20250603_193149.mp4 -> train24

# === Параметри ===
model = YOLO("../runs/segment/train24/weights/best.pt")
input_path = "video/video_1.mp4"
# input_path = "video/VID_20250603_193149.mp4"
output_path = "demos/17.mp4"
process_fps = 1


def get_precise_box(mask):
    contours, _ = cv2.findContours(
        mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        print("Object not found")
        return None, None

    main_contour = max(contours, key=cv2.contourArea)

    M = cv2.moments(main_contour)
    if M["m00"] != 0:
        center_x = int(M["m10"] / M["m00"])
        center_y = int(M["m01"] / M["m00"])
    else:
        center_x, center_y = 0, 0

    rect = cv2.minAreaRect(main_contour)
    box = cv2.boxPoints(rect)
    box = np.int32(box)
    angle = rect[2]

    width, height = rect[1]
    if width < height:
        angle = 90 + angle
    angle = 90 - angle

    return main_contour, (center_x, center_y, angle, max(width, height), min(width, height))

def process_image(image, model):
    # model = YOLO("../runs/segment/train11/weights/best.pt")
    # # train11 is trained with yolo train data=./dataset/data.yaml batch=0.8 model=yolo11n-seg.pt imgsz=1280 device=0 epochs=1000 patience=250

    result = model(image, conf=0.4)[0]

    classes = result.boxes.cls.cpu().numpy()

    confidences = result.boxes.conf.cpu().numpy()

    try:
        img = cv2.imread(image)
    except:
        img = image.copy()
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    detected_objects = []
    best_objects = {}
    
    # colors = np.random.randint(0, 255, size=(5,3), dtype=np.uint8)
    
    colors = [(255, 0, 0),
              (0, 0, 255),
              (0, 255, 255),
              (255, 0, 255),
              (255, 255, 0),
              (255, 255, 255),
              (0, 0, 0)]

    try:
        masks = result.masks.data.cpu().numpy()
        img = cv2.resize(img, (masks.shape[2], masks.shape[1]))
    except:
        masks = []

    for i, mask in enumerate(masks):
        # color = tuple(int(c) for c in colors[int(classes[i])])
        color = colors[int(classes[i])]
        precise_box, (x, y, theta, w1, h1) = get_precise_box(mask.astype(np.uint8))
    
        x1, y1, w, h = cv2.boundingRect(precise_box)
    
        cv2.rectangle(img, (x1, y1), (x1 + w, y1 + h), (0, 0, 0), 1)
        
        cv2.circle(img, (x, y), 9, (0, 255, 0), -1)
        start = (x, y)
        end = (int(start[0] - math.sin(math.radians(theta)) * 70),
               int(start[1] - math.cos(math.radians(theta)) * 70))
        cv2.line(img, start, end, (0, 255, 0), 3)
    
        cv2.drawContours(img, [precise_box], -1, tuple(int(c) for c in color), 3)
    
        try:
            predicted_class_name = f"{model.names[int(classes[i])]}"
        except:
            predicted_class_name = "Unknown"
        
        cv2.putText(img, predicted_class_name, (x1, y1 + h + 50),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 3)
    
        area = int(cv2.contourArea(precise_box))

        z = int((w1 - (140 if classes[i] == 1 else 835)) / 5)
        
        cv2.putText(img, f"{z:d} mm", (x1, y1 + h + 100),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 3)
        
        # For the simulation in vision_pick_and_place.wbt:
        #   x -> [0; 1280] =  [-0.4; 0.4]
        #   y -> [0; 960] = -[-0.3; 0.3]
        detected_object = {
            "name": model.names[int(classes[i])],
            "confidence": confidences[i],
            "area": area,
            "x": 0.8 * (x - (1280 + 0) / 2) / (120 - 0),
            "y": - 0.6 * (y - (960 + 0) / 2) / (960 - 0),
            "z": z,
            "theta": math.radians(theta)
        }
    
        detected_objects.append(detected_object)
        if detected_object["name"] not in best_objects or \
                best_objects[detected_object["name"]]["confidence"] < detected_object["confidence"]:
            best_objects[detected_object["name"]] = detected_object

    return img, detected_objects, best_objects

def resize_keep_aspect(image, target_height):
    image = image.copy()
    h, w = image.shape[:2]
    scale = target_height / h
    new_w = int(w * scale)
    resized = cv2.resize(image, (new_w, target_height))
    return resized



# === Отваряне на видеото ===
cap = cv2.VideoCapture(input_path)
fps = cap.get(cv2.CAP_PROP_FPS)
width = 1920 # int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = 1080 # int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# === Видео записвач ===
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

frame_count = 0
processed_frame = np.zeros((height, width, 3), dtype=np.uint8)  # празен за начало

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Обработка на 1 кадър в секунда
    if int(frame_count % round(fps)) == 0:

        result = model(frame.copy(), conf=0.4)[0]
        processed_frame = result.plot()

        
        processed_frame_2, detected_objects, _ = process_image(frame.copy(), model)

    

    # Показване в реално време
    # plt.figure()
    # plt.imshow(resize_keep_aspect(frame, 540))
    # plt.show()
    
    # plt.figure()
    # plt.imshow(resize_keep_aspect(processed_frame, 540))
    # plt.show()

    # print(frame.shape)
    # print(processed_frame.shape)
    
    # Съединяване на оригинал и последния обработен
    combined = np.zeros((1080, 1920, 3), np.uint8)
    resized = resize_keep_aspect(frame, 540)
    combined[270:810, :resized.shape[1]] = resized
    resized = resize_keep_aspect(processed_frame, 540)
    combined[:540, 960:960+resized.shape[1]] = resized
    resized = resize_keep_aspect(processed_frame_2, 540)
    combined[540:, 960:960+resized.shape[1]] = resized

    # combined = np.zeros((1080, 1920, 3), np.uint8)
    # resized = resize_keep_aspect(frame, 740)
    # combined[170:910, :resized.shape[1]] = resized
    # resized = resize_keep_aspect(processed_frame, 740)
    # combined[170:910, 680:680+resized.shape[1]] = resized
    # resized = resize_keep_aspect(processed_frame_2, 740)
    # combined[170:910, 1360:1360+resized.shape[1]] = resized

    # print(combined.shape)
    # plt.figure()
    # plt.imshow(combined)
    # plt.show()
    
    # Запис
    out.write(combined)

    # Прекъсване с ESC
    if cv2.waitKey(1) & 0xFF == 27:
        break

    frame_count += 1

# === Освобождаване ===
cap.release()
out.release()
#cv2.destroyAllWindows()


0: 1440x1920 1 Plate 2, 1 Plate 4, 28.4ms
Speed: 15.0ms preprocess, 28.4ms inference, 4.6ms postprocess per image at shape (1, 3, 1440, 1920)

0: 1440x1920 1 Plate 2, 1 Plate 4, 20.2ms
Speed: 14.3ms preprocess, 20.2ms inference, 1.7ms postprocess per image at shape (1, 3, 1440, 1920)

0: 1440x1920 3 Plate 2s, 11.1ms
Speed: 13.1ms preprocess, 11.1ms inference, 3.9ms postprocess per image at shape (1, 3, 1440, 1920)

0: 1440x1920 3 Plate 2s, 13.4ms
Speed: 13.2ms preprocess, 13.4ms inference, 18.8ms postprocess per image at shape (1, 3, 1440, 1920)

0: 1440x1920 2 Plate 2s, 1 Plate 3, 17.3ms
Speed: 14.6ms preprocess, 17.3ms inference, 19.0ms postprocess per image at shape (1, 3, 1440, 1920)

0: 1440x1920 2 Plate 2s, 1 Plate 3, 13.2ms
Speed: 15.5ms preprocess, 13.2ms inference, 3.1ms postprocess per image at shape (1, 3, 1440, 1920)

0: 1440x1920 1 Plate 2, 1 Plate 3, 11.4ms
Speed: 13.4ms preprocess, 11.4ms inference, 4.7ms postprocess per image at shape (1, 3, 1440, 1920)

0: 1440x1920 1