In [27]:
from ultralytics import YOLO
import torch
import cv2
import argparse

# Parse the command line arguments
#variables = parse_variables()

# Select device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'mps' if torch.backends.mps.is_available() else device

img_path = './test_image.jpg'
img = cv2.imread(img_path)
H, W = img.shape[:2]

# Load the pretrained model
model = YOLO("./yolo-face.pt")                
model.to(device)

# Run inference on the webcam
res = model.predict('./test_image.jpg', verbose=True)[0]




image 1/1 /Users/francescobassignana/digital_photo_ai/test_image.jpg: 640x480 1 FACE, 304.3ms
Speed: 13.7ms preprocess, 304.3ms inference, 126.4ms postprocess per image at shape (1, 3, 640, 480)


In [28]:
res.show()

In [29]:
res.boxes

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([0.], device='mps:0')
conf: tensor([0.8970], device='mps:0')
data: tensor([[1.1184e+03, 1.4589e+03, 1.9080e+03, 2.5367e+03, 8.9697e-01, 0.0000e+00]], device='mps:0')
id: None
is_track: False
orig_shape: (4032, 3024)
shape: torch.Size([1, 6])
xywh: tensor([[1513.1968, 1997.7686,  789.5991, 1077.8091]], device='mps:0')
xywhn: tensor([[0.5004, 0.4955, 0.2611, 0.2673]], device='mps:0')
xyxy: tensor([[1118.3972, 1458.8640, 1907.9963, 2536.6731]], device='mps:0')
xyxyn: tensor([[0.3698, 0.3618, 0.6310, 0.6291]], device='mps:0')

In [32]:
xywh = res.boxes.xywh
w = xywh[0,2]
h = xywh[0,3]
ratio = w / h

In [35]:
import numpy as np

def adjust_box_to_ratio(x1, y1, x2, y2, img_w, img_h,
                        target_w_over_h=7/9, strategy="auto"):
    """
    Adjust an xyxy box to a target aspect ratio (w:h), preserving center.

    Args:
        x1,y1,x2,y2: box in pixels (floats ok)
        img_w, img_h: image size
        target_w_over_h: desired width/height ratio (e.g., 7/9 for portrait)
        strategy: "expand", "shrink", or "auto"
            - expand: only increase width or height
            - shrink: only decrease width or height
            - auto: change the dimension (w or h) that needs the smallest delta (may expand or shrink)

    Returns:
        nx1, ny1, nx2, ny2 (floats)
    """
    if target_w_over_h <= 0:
        raise ValueError("target_w_over_h must be > 0")

    # center + current size
    cx = (x1 + x2) / 2.0
    cy = (y1 + y2) / 2.0
    w  = max(1.0, x2 - x1)
    h  = max(1.0, y2 - y1)

    cur_ratio = w / h
    if abs(cur_ratio - target_w_over_h) < 1e-9:
        # Already at ratio; just shift/clamp to be safe
        nx1, ny1, nx2, ny2 = cx - w/2, cy - h/2, cx + w/2, cy + h/2
    else:
        # Candidate 1: change width only (w' = target * h)
        w_from_h = target_w_over_h * h
        # Candidate 2: change height only (h' = w / target)
        h_from_w = w / target_w_over_h

        if strategy == "expand":
            if cur_ratio < target_w_over_h:
                # too tall -> widen
                w = max(w, w_from_h)
            else:
                # too wide -> make taller
                h = max(h, h_from_w)
        elif strategy == "shrink":
            if cur_ratio < target_w_over_h:
                # too tall -> reduce height
                h = min(h, h_from_w)
            else:
                # too wide -> reduce width
                w = min(w, w_from_h)
        else:  # "auto": minimal change to one side
            dw = abs(w_from_h - w)
            dh = abs(h_from_w - h)
            if dw <= dh:
                w = w_from_h
            else:
                h = h_from_w

        nx1, ny1 = cx - w / 2.0, cy - h / 2.0
        nx2, ny2 = cx + w / 2.0, cy + h / 2.0

    # Shift inside image (preserve size)
    dx_left  = max(0.0, -nx1)
    dx_right = max(0.0, nx2 - img_w)
    dy_top   = max(0.0, -ny1)
    dy_bot   = max(0.0, ny2 - img_h)

    nx1 += (dx_left - dx_right); nx2 += (dx_left - dx_right)
    ny1 += (dy_top - dy_bot);    ny2 += (dy_top - dy_bot)

    # If still out (box bigger than image), scale down uniformly to fit
    new_w = nx2 - nx1
    new_h = ny2 - ny1
    if new_w > img_w or new_h > img_h:
        s = min(img_w / new_w, img_h / new_h, 1.0)
        new_w *= s; new_h *= s
        nx1 = cx - new_w / 2.0; nx2 = cx + new_w / 2.0
        ny1 = cy - new_h / 2.0; ny2 = cy + new_h / 2.0
        # shift again just in case of fp rounding
        dx_left  = max(0.0, -nx1)
        dx_right = max(0.0, nx2 - img_w)
        dy_top   = max(0.0, -ny1)
        dy_bot   = max(0.0, ny2 - img_h)
        nx1 += (dx_left - dx_right); nx2 += (dx_left - dx_right)
        ny1 += (dy_top - dy_bot);    ny2 += (dy_top - dy_bot)

    # Final safety clamp
    nx1 = float(np.clip(nx1, 0, img_w - 1))
    ny1 = float(np.clip(ny1, 0, img_h - 1))
    nx2 = float(np.clip(nx2, 0, img_w - 1))
    ny2 = float(np.clip(ny2, 0, img_h - 1))
    if nx2 <= nx1: nx2 = min(img_w - 1.0, nx1 + 1.0)
    if ny2 <= ny1: ny2 = min(img_h - 1.0, ny1 + 1.0)

    return nx1, ny1, nx2, ny2

In [38]:
boxes_xyxy = res.boxes.xyxy.detach().cpu().numpy() if res.boxes is not None else np.empty((0,4))
adjusted = []
for (x1, y1, x2, y2) in boxes_xyxy:
    ax1, ay1, ax2, ay2 = adjust_box_to_ratio(x1, y1, x2, y2, img_w=W, img_h=H,
                                         target_w_over_h=7/9, strategy="auto")

    adjusted.append((int(round(ax1)), int(round(ay1)), int(round(ax2)), int(round(ay2))))


In [None]:
# --- draw adjusted boxes ---
vis = img.copy()
for (x1, y1, x2, y2), box in zip(adjusted, boxes_xyxy):
    cv2.rectangle(vis, (x1, y1), (x2, y2), (0, 255, 0), 3)  # green box
    print(f"ratio : {(x2 - x1) / (y2 - y1):.3f}")
    # optional: draw original box in a thin line to compare
    cv2.rectangle(vis, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 1)
    print(f"old ratio : {(box[2] - box[0]) / (box[3] - box[1]):.3f}")



ratio : 0.777
old ratio : 0.733


In [41]:
cv2.imwrite('faces_7x9.jpg', vis)
print(f"Saved: faces_7x9.jpg  | boxes: {adjusted}")


Saved: faces_7x9.jpg  | boxes: [(1094, 1459, 1932, 2537)]


Pixels per mm: 2.83 x 2.83


In [42]:
import cv2
import numpy as np
from ultralytics import YOLO

img_path = "./test_image.jpg"
img = cv2.imread(img_path)
H, W = img.shape[:2]

# You already have x_ppi, y_ppi (pixels per inch). Example:
# x_ppi, y_ppi = 300, 300
# If you computed pixels/mm instead, multiply mm by ppmm directly (skip /25.4).

model = YOLO("./yolo-face.pt")
res = model.predict(img_path, verbose=False)[0]

boxes_xyxy = res.boxes.xyxy.detach().cpu().numpy() if res.boxes is not None else np.empty((0,4))

vis = img.copy()
for (x1, y1, x2, y2) in boxes_xyxy:
    ax1, ay1, ax2, ay2 = adjust_box_to_ratio(x1, y1, x2, y2, img_w=W, img_h=H,
                                         target_w_over_h=7/9, strategy="auto")

    # draw adjusted box
    cv2.rectangle(vis, (int(round(ax1)), int(round(ay1))),
                        (int(round(ax2)), int(round(ay2))),
                        (0,255,0), 3)
    if warn:
        print("⚠️ Required 35×45 mm box cannot fully fit inside the image; clamped to image bounds.")

cv2.imwrite("faces_7x9_min35x45mm.jpg", vis)
print("Saved faces_7x9_min35x45mm.jpg")

Saved faces_7x9_min35x45mm.jpg


In [22]:
# Cell 0: helpers + imports
import os
import cv2
import numpy as np

def colorize_grabcut_mask(mask):
    """
    Color map for GrabCut labels:
      0: GC_BGD      -> red-ish
      1: GC_FGD      -> green
      2: GC_PR_BGD   -> orange
      3: GC_PR_FGD   -> cyan
    """
    h, w = mask.shape[:2]
    out = np.zeros((h, w, 3), np.uint8)
    out[mask == cv2.GC_BGD]    = (0,   0, 200)   # BGR
    out[mask == cv2.GC_FGD]    = (0, 200,   0)
    out[mask == cv2.GC_PR_BGD] = (0, 165, 255)
    out[mask == cv2.GC_PR_FGD] = (255, 255, 0)
    return out

def overlay_mask(base, mask_bin, alpha=0.5):
    """Overlay a white binary mask on base image."""
    base = base.copy()
    white = np.full_like(base, 255)
    sel = mask_bin > 0
    base[sel] = cv2.addWeighted(base[sel], 1 - alpha, white[sel], alpha, 0)
    return base

def put_caption(img, text):
    """Add a black bar caption at the top of an image."""
    img = img.copy()
    cv2.rectangle(img, (0, 0), (img.shape[1], 28), (0, 0, 0), -1)
    cv2.putText(img, text, (8, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                (255, 255, 255), 1, cv2.LINE_AA)
    return img

# Where to save the debug images:
out_dir = "hair_debug"
os.makedirs(out_dir, exist_ok=True)

In [25]:

def top_of_hair_y_debug(img, box_xyxy, img_w, img_h,
                        top_pad_ratio=1.4, side_pad_ratio=0.35,
                        below_pad_ratio=0.2,
                        band_rel_width=0.35,
                        border_bg_px=15,
                        grabcut_iters=6):
    """
    Returns:
      y_top_global (int or None),
      debug dict with:
        'roi_bgr', 'seed_mask_color', 'gc_mask_color',
        'fg_only_overlay', 'main_comp_overlay', 'band_overlay'
    """
    debug = {}
    x1, y1, x2, y2 = [int(round(v)) for v in box_xyxy]
    w = x2 - x1
    h = y2 - y1
    if w <= 0 or h <= 0:
        return None, debug

    # ROI bounds
    roi_x1 = max(0, x1 - int(side_pad_ratio * w))
    roi_x2 = min(img_w, x2 + int(side_pad_ratio * w))
    roi_y1 = max(0, y1 - int(top_pad_ratio * h))
    roi_y2 = min(img_h, y2 + int(below_pad_ratio * h))

    roi = img[roi_y1:roi_y2, roi_x1:roi_x2]
    if roi.size == 0:
        return None, debug
    rh, rw = roi.shape[:2]
    debug['roi_bgr'] = put_caption(roi, "ROI (extended above face)")

    # Seed mask
    mask = np.full((rh, rw), cv2.GC_PR_BGD, np.uint8)
    # definite BG border
    mask[:border_bg_px, :] = cv2.GC_BGD
    mask[-border_bg_px:, :] = cv2.GC_BGD
    mask[:, :border_bg_px] = cv2.GC_BGD
    mask[:, -border_bg_px:] = cv2.GC_BGD

    # Face inside ROI
    fx1, fy1 = x1 - roi_x1, y1 - roi_y1
    fx2, fy2 = x2 - roi_x1, y2 - roi_y1
    fw, fh = fx2 - fx1, fy2 - fy1
    if fw <= 0 or fh <= 0:
        return None, debug

    # Definite FG ellipse for face
    face_center = (int((fx1 + fx2) / 2), int((fy1 + fy2) / 2))
    axes = (max(1, int(0.45 * fw)), max(1, int(0.55 * fh)))
    ellipse_mask = np.zeros_like(mask, np.uint8)
    cv2.ellipse(ellipse_mask, face_center, axes, 0, 0, 360, 1, -1)
    mask[ellipse_mask == 1] = cv2.GC_FGD

    # Probable FG strip above the face to encourage hair
    hair_top_y = max(0, int(fy1 - 0.1 * fh))
    hx1 = max(0, int(fx1 + 0.08 * fw))
    hx2 = min(rw, int(fx2 - 0.08 * fw))
    hy1 = max(0, hair_top_y - int(0.5 * fh))         # push higher
    hy2 = max(0, int(fy1 - 0.02 * fh))
    if hx2 > hx1 and hy2 > hy1:
        # Don't overwrite definite BG; where mask is BG keep BG, else set PR_FGD
        region = mask[hy1:hy2, hx1:hx2]
        region = np.where(region == cv2.GC_BGD, cv2.GC_BGD, cv2.GC_PR_FGD)
        mask[hy1:hy2, hx1:hx2] = region

    debug['seed_mask_color'] = put_caption(colorize_grabcut_mask(mask), "Seed mask (BG/FG/PR_BG/PR_FG)")

    # Run GrabCut with seeds
    bgdModel = np.zeros((1, 65), np.float64)
    fgdModel = np.zeros((1, 65), np.float64)
    try:
        cv2.grabCut(roi, mask, None, bgdModel, fgdModel, grabcut_iters, cv2.GC_INIT_WITH_MASK)
    except cv2.error:
        return None, debug

    debug['gc_mask_color'] = put_caption(colorize_grabcut_mask(mask), "GrabCut output labels")

    # Foreground (definite or probable)
    fg = np.where((mask == cv2.GC_FGD) | (mask == cv2.GC_PR_FGD), 1, 0).astype(np.uint8)
    debug['fg_only_overlay'] = put_caption(overlay_mask(roi, fg), "FG overlay (all FG)")

    # Keep only component intersecting center line (within a central band)
    cx_global = int((x1 + x2) / 2)
    cx_roi = int(np.clip(cx_global - roi_x1, 0, rw - 1))
    band_half = max(1, int(0.5 * band_rel_width * rw))
    bx1 = max(0, cx_roi - band_half)
    bx2 = min(rw, cx_roi + band_half)

    num_labels, labels = cv2.connectedComponents(fg, connectivity=4)
    if num_labels <= 1:
        return None, debug

    band_labels = labels[:, bx1:bx2]
    counts = np.bincount(band_labels.reshape(-1), minlength=num_labels)
    counts[0] = 0
    main_label = np.argmax(counts)
    if counts[main_label] == 0:
        return None, debug

    main_comp = (labels == main_label).astype(np.uint8)

    main_overlay = roi.copy()
    sel = main_comp.astype(bool)

    # Blend selected pixels toward white without cv2.addWeighted shape issues
    # new = 0.4 * original + 0.6 * 255
    main_overlay[sel] = (0.4 * main_overlay[sel] + 0.6 * 255).astype(np.uint8)

    # show central band
    cv2.rectangle(main_overlay, (bx1, 0), (bx2, rh - 1), (255, 0, 255), 1)
    debug['main_comp_overlay'] = put_caption(main_overlay, "Main FG component (center-band)")
    # Topmost FG row within band
    band = main_comp[:, bx1:bx2]
    rows = np.where(band.any(axis=1))[0]
    if rows.size == 0:
        return None, debug

    y_top_in_roi = int(rows[0])
    y_top_global = roi_y1 + y_top_in_roi

    band_overlay = main_overlay.copy()
    cv2.line(band_overlay, (0, y_top_in_roi), (rw - 1, y_top_in_roi), (0, 0, 255), 2)
    debug['band_overlay'] = put_caption(band_overlay, "Central band + detected top line")

    return y_top_global, debug

In [None]:
vis = img.copy()
for i, (x1, y1, x2, y2) in enumerate(boxes_xyxy, start=1):
    y_top, dbg = top_of_hair_y_debug(img, (x1, y1, x2, y2), W, H)

    # draw original face box
    cv2.rectangle(vis, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 3)

    if y_top is not None:
        cx = int(round((x1 + x2) / 2.0))
        cv2.circle(vis, (cx, int(y_top)), 6, (0, 0, 255), -1)
        cv2.line(vis, (0, int(y_top)), (W - 1, int(y_top)), (0, 0, 255), 2)
    else:
        print(f"[Face {i}] Could not estimate top-of-hair.")

    # Save per-step visuals for this face (if present)
    steps = ['roi_bgr', 'seed_mask_color', 'gc_mask_color', 'fg_only_overlay',
             'main_comp_overlay', 'band_overlay']
    for s in steps:
        if s in dbg
            cv2.imwrite(os.path.join(out_dir, f"face{i:02d}_{s}.png"), dbg[s])

cv2.imwrite(os.path.join(out_dir, "result_with_top_lines.jpg"), vis)
print(f"Saved per-step visuals to: {out_dir}")

Saved per-step visuals to: hair_debug


error: OpenCV(4.12.0) /Users/xperience/GHA-Actions-OpenCV/_work/opencv-python/opencv-python/opencv/modules/core/src/arithm.cpp:665: error: (-209:Sizes of input arguments do not match) The operation is neither 'array op array' (where arrays have the same size and the same number of channels), nor 'array op scalar', nor 'scalar op array' in function 'arithm_op'
