In [4]:
# vision_eye_mapping_distance.py
# Detect people with YOLOv11 and display distance from the bottom-left corner.

import cv2
import numpy as np
from ultralytics import YOLO
from pathlib import Path

In [5]:
# ---------- Settings ----------
WEIGHTS = "yolo11s.pt"   # COCO (person=0)
SAVE_PATH = "outputs/vision_eye_lines_meters_label.mp4"  # None to disable
IMGSZ = 1280
CONF = 0.25
TRACKER = "bytetrack.yaml" 

# --- Distance scale (no clicks) ---
# Set either METERS_PER_PIXEL directly OR provide REF_LEN_* to compute it.
METERS_PER_PIXEL = 1 / 70  # e.g., 1 cm/px. Adjust to your scene.
REF_LEN_METERS = 0.0     # e.g., 0.60
REF_LEN_PIXELS = 0.0     # e.g., 48

# ---------- Helpers ----------

def color_for_id(tid: int):
    # Colors per ID
    return (int(37 * tid) % 255, int(17 * tid) % 255, int(91 * tid) % 255)


def draw_box_label(img, x1, y1, text, color):
    (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
    x0, y0 = int(x1), max(int(y1) - 6, th + 6)
    cv2.rectangle(img, (x0, y0 - th - 2), (x0 + 240 + 8, y0 + 2), color, -1)
    cv2.putText(img, text, (x0 + 4, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1, cv2.LINE_AA)

# ---------- Model ----------
model = YOLO(WEIGHTS)
names = model.names if isinstance(model.names, dict) else {}

# ---------- Video I/O ----------
cap = cv2.VideoCapture(VIDEO_PATH)
if not cap.isOpened():
    raise RuntimeError(f"Unable to open video: {VIDEO_PATH}")

ok, first = cap.read()
if not ok:
    cap.release()
    raise RuntimeError("Unable to read the first frame.")
H, W = first.shape[:2]
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0

# Compute meters-per-pixel from reference if provided
if REF_LEN_METERS > 0 and REF_LEN_PIXELS > 0:
    METERS_PER_PIXEL = float(REF_LEN_METERS) / float(REF_LEN_PIXELS)
print(f"[INFO] meters/pixel = {METERS_PER_PIXEL:.6f}")

def make_writer(path, w, h, fps_val, fourccs=("mp4v","avc1","XVID","MJPG")):
    if path is None: return None
    p = Path(path); p.parent.mkdir(parents=True, exist_ok=True)
    for fcc in fourccs:
        vw = cv2.VideoWriter(str(p), cv2.VideoWriter_fourcc(*fcc), fps_val, (w, h))
        if vw.isOpened():
            print(f"[INFO] Using FOURCC '{fcc}' -> {p}")
            return vw
    raise RuntimeError("No compatible codec (try .avi).")

writer = make_writer(SAVE_PATH, W, H, fps) if SAVE_PATH else None
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

cv2.namedWindow("Vision-Eye Mapping (meters in label)", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Vision-Eye Mapping (meters in label)", min(1200, W), min(750, H))

origin_x, origin_y = 0, H  # bottom-left pixel of the frame

# ---------- Main loop ----------
while True:
    ok, frame = cap.read()
    if not ok:
        break

    r = model.track(frame, imgsz=IMGSZ, conf=CONF, classes=[0],
                tracker=TRACKER, persist=True, verbose=False)[0]


    if r.boxes is not None and len(r.boxes) > 0:
        xyxy = r.boxes.xyxy.cpu().numpy()
        confs = r.boxes.conf.cpu().numpy()
        clss  = r.boxes.cls.cpu().numpy().astype(int)
        
        ids = (r.boxes.id.cpu().numpy().astype(int)
            if r.boxes.id is not None else
            np.full(len(xyxy), -1, dtype=int))  # fallback if no ID


        for (x1, y1, x2, y2), conf, cid, tid in zip(xyxy, confs, clss, ids):
            # point "feets"
            cx = 0.5 * (x1 + x2)
            cy = y2

            # distance in meters
            dx_px = cx - origin_x
            dy_px = cy - origin_y
            dist_m = float(np.hypot(dx_px, dy_px) * METERS_PER_PIXEL)

            # drawing
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 255), 2)
            cv2.circle(frame, (int(cx), int(cy)), 3, (255, 255, 0), -1)
            cv2.line(frame, (origin_x, origin_y), (int(cx), int(cy)), (255, 255, 0), 1, cv2.LINE_AA)

            # Label with ID + distance
            label_id = f"ID {tid:03d}" if tid >= 0 else "ID ---"
            label = f"person {label_id} | {dist_m:.2f} m"
            draw_box_label(frame, x1, y1, label, (255, 255, 255))


    # origin marker
    cv2.circle(frame, (origin_x, origin_y), 6, (0, 0, 255), -1)
    cv2.putText(frame, "Origin", (origin_x + 8, origin_y - 8),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1, cv2.LINE_AA)

    if writer: writer.write(frame)
    cv2.imshow("Vision-Eye Mapping (meters in label)", frame)
    if cv2.waitKey(1) & 0xFF in [27, ord('q')]:
        break

cap.release()
if writer: writer.release()
cv2.destroyAllWindows()
print("[OK] Terminé.")


[INFO] meters/pixel = 0.014286
[INFO] Using FOURCC 'mp4v' -> outputs/vision_eye_lines_meters_label.mp4
[OK] Terminé.
