In [15]:
import numpy as np
import pyrealsense2 as rs
from ultralytics import YOLO
import cv2
import time

MODEL_PATH = "/home/dw/ws_job_msislab/amr_project/src/runs/obb/smoke_test_v2/weights/best.pt"

CONF_THRES = 0.85
IOU_THRES  = 0.75
IMGSZ      = 640

# -----------------------------
# ✅ 박스 실측 크기 (cm)  <<<<< 여기만 너 박스에 맞게 수정
# (카메라가 보는 '앞면' 기준 가로/세로)
# -----------------------------
BOX_W_CM = 23.0
BOX_H_CM = 9.5

# -----------------------------
# ✅ Depth ROI 샘플링/안정화 파라미터
# -----------------------------
ROI_MARGIN_PX = 6      # OBB 마스크를 살짝 안쪽으로 줄이는 효과(경계 섞임 줄임)
MIN_ROI_PIXELS = 80    # ROI에서 depth 유효 픽셀 최소 개수
MAD_THRES_M = 0.02     # ROI depth 흔들림 허용(대충 2cm)
DEPTH_MIN_M = 0.15
DEPTH_MAX_M = 3.00

def clamp(v, lo, hi):
    return max(lo, min(hi, v))

def obb_angle_deg_upright0_rightplus(poly4x2: np.ndarray) -> float:
    p = poly4x2.astype(np.float32)
    c = p.mean(axis=0, keepdims=True)
    q = p - c
    cov = np.cov(q.T)
    eigvals, eigvecs = np.linalg.eig(cov)
    v = eigvecs[:, np.argmax(eigvals)].astype(np.float32)

    vx, vy = float(v[0]), float(v[1])
    if vy < 0:
        vx, vy = -vx, -vy

    angle = float(np.degrees(np.arctan2(vx, vy)))
    angle = -angle
    return angle

def draw_hud(img, lines, x=10, y=10, line_h=24):
    pad = 8
    w = max([cv2.getTextSize(s, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0][0] for s in lines] + [10])
    h = line_h * len(lines)
    x2, y2 = x + w + pad*2, y + h + pad*2

    overlay = img.copy()
    cv2.rectangle(overlay, (x, y), (x2, y2), (0, 0, 0), -1)
    cv2.addWeighted(overlay, 0.45, img, 0.55, 0, img)

    ty = y + pad + 18
    for s in lines:
        cv2.putText(img, s, (x + pad, ty), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2, cv2.LINE_AA)
        ty += line_h

def poly_shrink_towards_center(poly4x2: np.ndarray, margin_px: float):
    """OBB 4점을 중심으로 margin만큼 안쪽으로 당김(경계 depth 섞임 완화)"""
    p = poly4x2.astype(np.float32)
    c = p.mean(axis=0, keepdims=True)
    v = p - c
    norm = np.linalg.norm(v, axis=1, keepdims=True) + 1e-6
    # 각 점을 margin 만큼 중심쪽으로 이동
    p2 = p - (v / norm) * margin_px
    return p2

def depth_roi_stats(depth_u16: np.ndarray, depth_scale: float, poly4x2: np.ndarray):
    """
    poly 내부 픽셀들의 depth를 모아 median + MAD(robust spread) 계산
    return: (median_m, mad_m, valid_count)
    """
    h, w = depth_u16.shape[:2]
    poly = np.round(poly4x2).astype(np.int32)

    mask = np.zeros((h, w), dtype=np.uint8)
    cv2.fillPoly(mask, [poly.reshape(-1, 1, 2)], 255)

    d = depth_u16[mask == 255].astype(np.float32) * depth_scale
    d = d[(d > 0) & (d >= DEPTH_MIN_M) & (d <= DEPTH_MAX_M)]

    if d.size == 0:
        return 0.0, 0.0, 0

    med = float(np.median(d))
    mad = float(np.median(np.abs(d - med)))  # median absolute deviation
    return med, mad, int(d.size)

def estimate_Z_from_size(poly4x2: np.ndarray, intr, W_cm: float, H_cm: float) -> float:
    """박스 실측 크기(W/H)와 OBB 픽셀 크기로 Z(m) 추정"""
    p = poly4x2.astype(np.float32)
    edges = [np.linalg.norm(p[(i+1) % 4] - p[i]) for i in range(4)]
    long_px = float(max(edges))
    short_px = float(min(edges))

    W_m = W_cm / 100.0
    H_m = H_cm / 100.0

    # 실측에서 더 긴 변이 long_px에 대응된다고 가정 (W,H 중 큰 쪽)
    if W_m >= H_m:
        Z1 = (intr.fx * W_m) / max(long_px, 1e-6)
        Z2 = (intr.fy * H_m) / max(short_px, 1e-6)
    else:
        Z1 = (intr.fx * H_m) / max(long_px, 1e-6)
        Z2 = (intr.fy * W_m) / max(short_px, 1e-6)

    # 두 추정을 평균(robust하게 하고 싶으면 median도 가능)
    Z = 0.5 * (Z1 + Z2)
    return float(Z)

def XY_from_pixel_and_Z(cx: int, cy: int, intr, Z: float):
    X = (cx - intr.ppx) / intr.fx * Z
    Y = (cy - intr.ppy) / intr.fy * Z
    return float(X), float(Y)

def main():
    model = YOLO(MODEL_PATH)
    print("[INFO] Model loaded:", MODEL_PATH)
    print(f"[INFO] conf>={CONF_THRES}, iou={IOU_THRES}, imgsz={IMGSZ}")
    print("[INFO] ESC to quit")

    pipeline = rs.pipeline()
    config = rs.config()

    width, height, fps = 640, 480, 30
    config.enable_stream(rs.stream.color, width, height, rs.format.bgr8, fps)
    config.enable_stream(rs.stream.depth, width, height, rs.format.z16, fps)

    profile = pipeline.start(config)
    align = rs.align(rs.stream.color)

    # ✅ depth scale
    depth_sensor = profile.get_device().first_depth_sensor()
    depth_scale = float(depth_sensor.get_depth_scale())
    print(f"[INFO] depth_scale = {depth_scale:.8f} m/unit")

    # ✅ RealSense depth filters (안정화)
    temporal = rs.temporal_filter()
    spatial = rs.spatial_filter()
    hole = rs.hole_filling_filter()

    # optional tuning
    spatial.set_option(rs.option.filter_magnitude, 2)
    spatial.set_option(rs.option.filter_smooth_alpha, 0.5)
    spatial.set_option(rs.option.filter_smooth_delta, 20)

    cv2.namedWindow("OBB Live", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("OBB Live", width, height)

    last = {
        "ok": False,
        "Xcm": 0.0, "Ycm": 0.0, "Zcm": 0.0,
        "distcm": 0.0,
        "angle": 0.0,
        "conf": 0.0,
        "cls": -1,
        "t": 0.0,
        "cx": width // 2,
        "cy": height // 2,
        "Zdepth_cm": 0.0,
        "Zsize_cm": 0.0,
        "Zuse_cm": 0.0,
        "mad_cm": 0.0,
        "roi_n": 0,
    }

    try:
        while True:
            frames = pipeline.wait_for_frames()
            frames = align.process(frames)

            color_frame = frames.get_color_frame()
            depth_frame = frames.get_depth_frame()
            if not color_frame or not depth_frame:
                continue

            # ✅ depth filtering (먼저 depth_frame을 필터에 통과)
            depth_frame = spatial.process(depth_frame)
            depth_frame = temporal.process(depth_frame)
            depth_frame = hole.process(depth_frame)

            frame = np.asanyarray(color_frame.get_data())
            intr = color_frame.profile.as_video_stream_profile().get_intrinsics()

            # depth image(u16) for ROI stats
            depth_u16 = np.asanyarray(depth_frame.get_data())

            vis = frame.copy()
            status = "WAIT"

            results = model.predict(
                frame,
                imgsz=IMGSZ,
                conf=CONF_THRES,
                iou=IOU_THRES,
                verbose=False
            )
            r = results[0]

            best = None  # (conf, cls, poly4x2)
            if getattr(r, "obb", None) is not None and r.obb is not None:
                obb = r.obb
                if obb.xyxyxyxy is not None and len(obb.xyxyxyxy) > 0:
                    polys = obb.xyxyxyxy.cpu().numpy()
                    confs = obb.conf.cpu().numpy().astype(float)
                    clss  = obb.cls.cpu().numpy().astype(int)

                    keep = confs >= CONF_THRES
                    for poly8, cf, ci in zip(polys[keep], confs[keep], clss[keep]):
                        if best is None or cf > best[0]:
                            best = (cf, ci, poly8.reshape(4, 2))

            if best is not None:
                cf, ci, poly = best

                # draw OBB
                poly_i = np.round(poly).astype(np.int32).reshape(-1, 1, 2)
                cv2.polylines(vis, [poly_i], True, (0, 255, 0), 2)

                cx = int(np.mean(poly[:, 0]))
                cy = int(np.mean(poly[:, 1]))
                cx = clamp(cx, 0, width - 1)
                cy = clamp(cy, 0, height - 1)
                cv2.circle(vis, (cx, cy), 4, (0, 0, 255), -1)

                # ✅ ROI depth median (poly를 살짝 줄여서 경계 섞임 줄임)
                poly_shrunk = poly_shrink_towards_center(poly, ROI_MARGIN_PX)
                poly_shrunk[:, 0] = np.clip(poly_shrunk[:, 0], 0, width - 1)
                poly_shrunk[:, 1] = np.clip(poly_shrunk[:, 1], 0, height - 1)

                Z_roi_m, mad_m, roi_n = depth_roi_stats(depth_u16, depth_scale, poly_shrunk)

                # ✅ size-based Z
                Z_size_m = estimate_Z_from_size(poly, intr, BOX_W_CM, BOX_H_CM)

                # ✅ 게이팅 + 퓨전
                use_depth = (Z_roi_m > 0.0 and roi_n >= MIN_ROI_PIXELS and mad_m <= MAD_THRES_M)
                if use_depth:
                    # depth가 안정적이면 depth에 더 가중
                    # mad가 작을수록 alpha↑
                    alpha = clamp(0.85 - (mad_m / MAD_THRES_M) * 0.35, 0.55, 0.90)
                    Z_use_m = alpha * Z_roi_m + (1.0 - alpha) * Z_size_m
                    status = "OK_FUSED"
                else:
                    # depth가 불안정/부족하면 size로 fallback
                    Z_use_m = Z_size_m
                    status = "OK_SIZE_ONLY" if Z_use_m > 0 else "DEPTH_INVALID"

                if Z_use_m > 0.0:
                    X, Y = XY_from_pixel_and_Z(cx, cy, intr, Z_use_m)
                    Z = Z_use_m
                    dist = float(np.sqrt(X*X + Y*Y + Z*Z))
                    angle = obb_angle_deg_upright0_rightplus(poly)

                    last["ok"] = True
                    last["Xcm"], last["Ycm"], last["Zcm"] = X*100.0, Y*100.0, Z*100.0
                    last["distcm"] = dist*100.0
                    last["angle"] = angle
                    last["conf"] = float(cf)
                    last["cls"] = int(ci)
                    last["t"] = time.time()
                    last["cx"], last["cy"] = cx, cy
                    last["Zdepth_cm"] = Z_roi_m * 100.0
                    last["Zsize_cm"]  = Z_size_m * 100.0
                    last["Zuse_cm"]   = Z_use_m * 100.0
                    last["mad_cm"]    = mad_m * 100.0
                    last["roi_n"]     = int(roi_n)

                # 보기 좋게 shrink ROI도 표시(선택)
                poly2_i = np.round(poly_shrunk).astype(np.int32).reshape(-1, 1, 2)
                cv2.polylines(vis, [poly2_i], True, (255, 255, 0), 1)

            else:
                status = "NO_DET"

            # HUD
            age = time.time() - last["t"] if last["ok"] else 999.0
            stale = "STALE" if (not last["ok"] or age > 0.5) else "LIVE"

            hud_lines = [
                f"status: {status} / {stale} (age={age:.2f}s)",
                f"conf={last['conf']:.2f}  cls={last['cls']}",
                f"cam XYZ(cm)=({last['Xcm']:+.2f}, {last['Ycm']:+.2f}, {last['Zcm']:+.2f})",
                f"dist={last['distcm']:.2f} cm   angle={last['angle']:+.2f} deg",
                f"center px=({last['cx']},{last['cy']})",
                f"Z(depth/size/use)=( {last['Zdepth_cm']:.1f} / {last['Zsize_cm']:.1f} / {last['Zuse_cm']:.1f} ) cm",
                f"ROI n={last['roi_n']}  MAD={last['mad_cm']:.2f} cm  (thres={MAD_THRES_M*100:.1f}cm)",
                f"BOX(WxH)=( {BOX_W_CM:.1f} x {BOX_H_CM:.1f} ) cm",
            ]
            draw_hud(vis, hud_lines)

            cv2.imshow("OBB Live", vis)
            key = cv2.waitKey(1) & 0xFF
            if key == 27:  # ESC
                break

    finally:
        pipeline.stop()
        cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


[INFO] Model loaded: /home/dw/ws_job_msislab/amr_project/src/runs/obb/smoke_test_v2/weights/best.pt
[INFO] conf>=0.85, iou=0.75, imgsz=640
[INFO] ESC to quit
[INFO] depth_scale = 0.00010000 m/unit


In [12]:
import numpy as np
import pyrealsense2 as rs
from ultralytics import YOLO
import cv2
import time
from collections import deque

MODEL_PATH = "/home/dw/ws_job_msislab/amr_project/src/runs/obb/smoke_test_v2/weights/best.pt"

CONF_THRES = 0.85
IOU_THRES  = 0.75
IMGSZ      = 640

# -----------------------------
# ✅ 박스 실측 크기 (cm)
# -----------------------------
BOX_W_CM = 23.0
BOX_H_CM = 9.5

# -----------------------------
# ✅ 목표: 유효 샘플 N개
# -----------------------------
AVG_N = 10
TIMEOUT_SEC = 25.0

# -----------------------------
# ✅ Depth ROI 안정화 파라미터
# -----------------------------
ROI_MARGIN_PX  = 6
MIN_ROI_PIXELS = 120      # 80보다 살짝 올림 (더 안정)
MAD_THRES_M    = 0.020    # 2cm (너 환경에 따라 0.015~0.03)
DEPTH_MIN_M    = 0.15
DEPTH_MAX_M    = 3.00

# -----------------------------
# ✅ "이상한 값" 제거용 추가 방어
# -----------------------------
# 1) Z 범위(너 박스 거리 대략 20~80cm라면 이 범위 추천)
Z_RANGE_CM = (15.0, 120.0)

# 2) 실측 크기 일치 검증(픽셀크기 + Z_use로 추정한 cm가 실측과 얼마나 다른지)
#    0.25면 ±25%까지 허용
SIZE_REL_ERR_MAX = 0.25

# 3) 점프 필터 (이전 유효 샘플 대비 갑자기 튀면 스킵)
JUMP_XY_CM = 3.5
JUMP_Z_CM  = 6.0
JUMP_ANG_DEG = 10.0

# 4) 연속 스킵이 너무 길면 prev 기준을 리셋(무한 스킵 방지)
MAX_CONSEC_SKIPS_RESET = 15

def clamp(v, lo, hi):
    return max(lo, min(hi, v))

def poly_shrink_towards_center(poly4x2: np.ndarray, margin_px: float):
    p = poly4x2.astype(np.float32)
    c = p.mean(axis=0, keepdims=True)
    v = p - c
    norm = np.linalg.norm(v, axis=1, keepdims=True) + 1e-6
    return p - (v / norm) * margin_px

def depth_roi_stats(depth_u16: np.ndarray, depth_scale: float, poly4x2: np.ndarray):
    h, w = depth_u16.shape[:2]
    poly = np.round(poly4x2).astype(np.int32)

    mask = np.zeros((h, w), dtype=np.uint8)
    cv2.fillPoly(mask, [poly.reshape(-1, 1, 2)], 255)

    d = depth_u16[mask == 255].astype(np.float32) * depth_scale
    d = d[(d > 0) & (d >= DEPTH_MIN_M) & (d <= DEPTH_MAX_M)]

    if d.size == 0:
        return 0.0, 0.0, 0

    med = float(np.median(d))
    mad = float(np.median(np.abs(d - med)))
    return med, mad, int(d.size)

def obb_angle_deg_upright0_rightplus(poly4x2: np.ndarray) -> float:
    p = poly4x2.astype(np.float32)
    c = p.mean(axis=0, keepdims=True)
    q = p - c
    cov = np.cov(q.T)
    eigvals, eigvecs = np.linalg.eig(cov)
    v = eigvecs[:, np.argmax(eigvals)].astype(np.float32)

    vx, vy = float(v[0]), float(v[1])
    if vy < 0:
        vx, vy = -vx, -vy

    angle = float(np.degrees(np.arctan2(vx, vy)))
    angle = -angle
    return angle

def edges_long_short_px(poly4x2: np.ndarray):
    p = poly4x2.astype(np.float32)
    edges = [np.linalg.norm(p[(i+1) % 4] - p[i]) for i in range(4)]
    long_px = float(max(edges))
    short_px = float(min(edges))
    return long_px, short_px

def estimate_Z_from_size(poly4x2: np.ndarray, intr, W_cm: float, H_cm: float) -> float:
    long_px, short_px = edges_long_short_px(poly4x2)
    W_m = W_cm / 100.0
    H_m = H_cm / 100.0

    # 큰 실측 변 ↔ long_px 매칭
    if W_m >= H_m:
        Z1 = (intr.fx * W_m) / max(long_px, 1e-6)
        Z2 = (intr.fy * H_m) / max(short_px, 1e-6)
    else:
        Z1 = (intr.fx * H_m) / max(long_px, 1e-6)
        Z2 = (intr.fy * W_m) / max(short_px, 1e-6)

    return float(0.5 * (Z1 + Z2))  # meters

def XY_from_pixel_and_Z(cx: int, cy: int, intr, Z_m: float):
    X = (cx - intr.ppx) / intr.fx * Z_m
    Y = (cy - intr.ppy) / intr.fy * Z_m
    return float(X), float(Y)

def size_consistency_check(poly4x2, intr, Z_use_m, W_cm, H_cm, rel_err_max=0.25):
    """
    ✅ 핵심: depth가 튀면 Z_use가 틀어지고,
    그럼 (픽셀크기*Z)로 환산한 실제 cm가 실측(W/H)과 크게 어긋난다.
    그 샘플을 버림.
    """
    long_px, short_px = edges_long_short_px(poly4x2)

    # 픽셀길이 -> meters 추정: L_m ≈ px * Z / f
    L1_cm = (long_px  * Z_use_m / intr.fx) * 100.0
    L2_cm = (short_px * Z_use_m / intr.fy) * 100.0

    W_big = max(W_cm, H_cm)
    H_sml = min(W_cm, H_cm)

    # long은 big, short는 sml로 비교
    err1 = abs(L1_cm - W_big) / max(1e-6, W_big)
    err2 = abs(L2_cm - H_sml) / max(1e-6, H_sml)

    ok = (err1 <= rel_err_max) and (err2 <= rel_err_max)
    return ok, L1_cm, L2_cm, err1, err2

def is_jump(prev, cur):
    if prev is None:
        return False
    if abs(cur["Xcm"] - prev["Xcm"]) > JUMP_XY_CM: return True
    if abs(cur["Ycm"] - prev["Ycm"]) > JUMP_XY_CM: return True
    if abs(cur["Zcm"] - prev["Zcm"]) > JUMP_Z_CM:  return True
    if abs(cur["angle"] - prev["angle"]) > JUMP_ANG_DEG: return True
    return False

def main():
    model = YOLO(MODEL_PATH)
    print("[INFO] Model loaded:", MODEL_PATH)
    print(f"[INFO] Need {AVG_N} valid samples. Timeout={TIMEOUT_SEC}s")
    print(f"[INFO] BOX(WxH) = {BOX_W_CM:.1f} x {BOX_H_CM:.1f} cm")
    print(f"[INFO] conf>={CONF_THRES}, iou={IOU_THRES}, imgsz={IMGSZ}\n")

    pipeline = rs.pipeline()
    config = rs.config()

    width, height, fps = 640, 480, 30
    config.enable_stream(rs.stream.color, width, height, rs.format.bgr8, fps)
    config.enable_stream(rs.stream.depth, width, height, rs.format.z16, fps)

    profile = pipeline.start(config)
    align = rs.align(rs.stream.color)

    depth_sensor = profile.get_device().first_depth_sensor()
    depth_scale = float(depth_sensor.get_depth_scale())
    print(f"[INFO] depth_scale = {depth_scale:.8f} m/unit\n")

    # ✅ depth filters
    temporal = rs.temporal_filter()
    spatial  = rs.spatial_filter()
    hole     = rs.hole_filling_filter()

    spatial.set_option(rs.option.filter_magnitude, 2)
    spatial.set_option(rs.option.filter_smooth_alpha, 0.5)
    spatial.set_option(rs.option.filter_smooth_delta, 20)

    accepted = []
    prev_valid = None
    consec_skips = 0
    t0 = time.time()

    try:
        while True:
            if time.time() - t0 > TIMEOUT_SEC:
                print("\n[FAIL] Timeout: not enough valid samples.")
                break

            frames = pipeline.wait_for_frames()
            frames = align.process(frames)

            color_frame = frames.get_color_frame()
            depth_frame = frames.get_depth_frame()
            if not color_frame or not depth_frame:
                continue

            # ✅ IMPORTANT: filter 후 as_depth_frame() 캐스팅
            depth_frame = spatial.process(depth_frame).as_depth_frame()
            depth_frame = temporal.process(depth_frame).as_depth_frame()
            depth_frame = hole.process(depth_frame).as_depth_frame()

            frame = np.asanyarray(color_frame.get_data())
            intr = color_frame.profile.as_video_stream_profile().get_intrinsics()
            depth_u16 = np.asanyarray(depth_frame.get_data())

            # YOLO OBB inference
            results = model.predict(
                frame,
                imgsz=IMGSZ,
                conf=CONF_THRES,
                iou=IOU_THRES,
                verbose=False
            )
            r = results[0]

            best = None  # (conf, cls, poly4x2)
            if getattr(r, "obb", None) is not None and r.obb is not None:
                obb = r.obb
                if obb.xyxyxyxy is not None and len(obb.xyxyxyxy) > 0:
                    polys = obb.xyxyxyxy.cpu().numpy()
                    confs = obb.conf.cpu().numpy().astype(float)
                    clss  = obb.cls.cpu().numpy().astype(int)

                    keep = confs >= CONF_THRES
                    for poly8, cf, ci in zip(polys[keep], confs[keep], clss[keep]):
                        if best is None or cf > best[0]:
                            best = (float(cf), int(ci), poly8.reshape(4, 2))

            if best is None:
                consec_skips += 1
                if consec_skips >= MAX_CONSEC_SKIPS_RESET:
                    prev_valid = None
                    consec_skips = 0
                continue

            cf, ci, poly = best

            # center pixel
            cx = int(np.mean(poly[:, 0]))
            cy = int(np.mean(poly[:, 1]))
            cx = clamp(cx, 0, width - 1)
            cy = clamp(cy, 0, height - 1)

            # ROI depth median + MAD
            poly_shrunk = poly_shrink_towards_center(poly, ROI_MARGIN_PX)
            poly_shrunk[:, 0] = np.clip(poly_shrunk[:, 0], 0, width - 1)
            poly_shrunk[:, 1] = np.clip(poly_shrunk[:, 1], 0, height - 1)

            Z_roi_m, mad_m, roi_n = depth_roi_stats(depth_u16, depth_scale, poly_shrunk)

            # size Z
            Z_size_m = estimate_Z_from_size(poly, intr, BOX_W_CM, BOX_H_CM)

            # depth 사용 가능?
            depth_ok = (Z_roi_m > 0.0 and roi_n >= MIN_ROI_PIXELS and mad_m <= MAD_THRES_M)

            if depth_ok:
                # mad 작을수록 depth 비중을 더 주기
                alpha = clamp(0.85 - (mad_m / max(1e-6, MAD_THRES_M)) * 0.35, 0.55, 0.90)
                Z_use_m = alpha * Z_roi_m + (1.0 - alpha) * Z_size_m
                z_mode = "FUSED"
            else:
                # depth가 불안정하면 size-only (하지만 아래 “크기 일치 검증”에서 한번 더 걸러짐)
                Z_use_m = Z_size_m
                z_mode = "SIZE"

            Z_use_cm = Z_use_m * 100.0

            # ✅ Z sanity
            if not (Z_RANGE_CM[0] <= Z_use_cm <= Z_RANGE_CM[1]):
                consec_skips += 1
                if consec_skips >= MAX_CONSEC_SKIPS_RESET:
                    prev_valid = None
                    consec_skips = 0
                continue

            # X,Y from (cx,cy,Z_use)
            X_m, Y_m = XY_from_pixel_and_Z(cx, cy, intr, Z_use_m)
            Z_m = Z_use_m
            dist_m = float(np.sqrt(X_m*X_m + Y_m*Y_m + Z_m*Z_m))
            angle = obb_angle_deg_upright0_rightplus(poly)

            cur = {
                "conf": cf,
                "cls": ci,
                "Xcm": X_m * 100.0,
                "Ycm": Y_m * 100.0,
                "Zcm": Z_m * 100.0,
                "distcm": dist_m * 100.0,
                "angle": float(angle),
                "Zdepth_cm": Z_roi_m * 100.0,
                "Zsize_cm": Z_size_m * 100.0,
                "Zuse_cm": Z_use_cm,
                "roi_n": roi_n,
                "mad_cm": mad_m * 100.0,
                "mode": z_mode,
                "cx": cx, "cy": cy,
            }

            # ✅ depth가 튀면(또는 Z가 틀어지면) “실측 크기 일치”에서 걸러짐
            ok_sz, est_long_cm, est_short_cm, err1, err2 = size_consistency_check(
                poly, intr, Z_use_m, BOX_W_CM, BOX_H_CM, rel_err_max=SIZE_REL_ERR_MAX
            )
            if not ok_sz:
                consec_skips += 1
                if consec_skips >= MAX_CONSEC_SKIPS_RESET:
                    prev_valid = None
                    consec_skips = 0
                continue

            # ✅ jump filter
            if is_jump(prev_valid, cur):
                consec_skips += 1
                if consec_skips >= MAX_CONSEC_SKIPS_RESET:
                    prev_valid = None
                    consec_skips = 0
                continue

            # ✅ accept
            consec_skips = 0
            prev_valid = cur
            accepted.append(cur)

            print(f"[{len(accepted)}/{AVG_N}] conf={cur['conf']:.2f} "
                  f"XYZ(cm)=({cur['Xcm']:+.2f},{cur['Ycm']:+.2f},{cur['Zcm']:+.2f}) "
                  f"dist={cur['distcm']:.2f}  ang={cur['angle']:+.2f}  "
                  f"Z(depth/size/use)=({cur['Zdepth_cm']:.1f}/{cur['Zsize_cm']:.1f}/{cur['Zuse_cm']:.1f}) {cur['mode']}  "
                  f"ROI(n={cur['roi_n']}, MAD={cur['mad_cm']:.2f}cm)  "
                  f"sizeChk(est≈{est_long_cm:.1f}x{est_short_cm:.1f}cm, err={err1*100:.0f}%/{err2*100:.0f}%)")

            if len(accepted) >= AVG_N:
                break

        if len(accepted) >= AVG_N:
            arr = np.array([[a["Xcm"], a["Ycm"], a["Zcm"], a["distcm"], a["angle"],
                             a["Zdepth_cm"], a["Zsize_cm"], a["Zuse_cm"]] for a in accepted],
                           dtype=np.float32)
            mean = arr.mean(axis=0)
            std  = arr.std(axis=0)

            print("\n========== RESULT (AVERAGE over 10 valid) ==========")
            print(f"count                 : {AVG_N}")
            print(f"cam XYZ avg (cm)      : ({mean[0]:+.2f}, {mean[1]:+.2f}, {mean[2]:+.2f})   "
                  f"std=({std[0]:.2f},{std[1]:.2f},{std[2]:.2f})")
            print(f"dist_avg (cm)         : {mean[3]:.2f}   std={std[3]:.2f}")
            print(f"angle_avg (deg)       : {mean[4]:+.2f}  std={std[4]:.2f}")
            print(f"Z avg(depth/size/use) : ({mean[5]:.1f}/{mean[6]:.1f}/{mean[7]:.1f}) cm")
            print("====================================================\n")

    finally:
        pipeline.stop()
        

if __name__ == "__main__":
    main()


[INFO] Model loaded: /home/dw/ws_job_msislab/amr_project/src/runs/obb/smoke_test_v2/weights/best.pt
[INFO] Need 10 valid samples. Timeout=25.0s
[INFO] BOX(WxH) = 23.0 x 9.5 cm
[INFO] conf>=0.85, iou=0.75, imgsz=640

[INFO] depth_scale = 0.00010000 m/unit

[1/10] conf=0.96 XYZ(cm)=(+2.63,-9.60,+41.73) dist=42.90  ang=+0.64  Z(depth/size/use)=(41.9/41.1/41.7) FUSED  ROI(n=18247, MAD=0.67cm)  sizeChk(est≈22.8x9.9cm, err=1%/4%)
[2/10] conf=0.95 XYZ(cm)=(+2.62,-9.56,+41.54) dist=42.71  ang=+0.68  Z(depth/size/use)=(41.7/41.0/41.5) FUSED  ROI(n=18351, MAD=0.57cm)  sizeChk(est≈22.7x9.9cm, err=1%/4%)
[3/10] conf=0.95 XYZ(cm)=(+2.60,-9.48,+41.20) dist=42.35  ang=+0.68  Z(depth/size/use)=(41.3/41.0/41.2) FUSED  ROI(n=18247, MAD=0.66cm)  sizeChk(est≈22.6x9.8cm, err=2%/3%)
[4/10] conf=0.95 XYZ(cm)=(+2.61,-9.52,+41.36) dist=42.52  ang=+0.65  Z(depth/size/use)=(41.5/41.1/41.4) FUSED  ROI(n=18172, MAD=0.73cm)  sizeChk(est≈22.4x9.9cm, err=2%/4%)
[5/10] conf=0.95 XYZ(cm)=(+2.63,-9.58,+41.64) dist=42.81

In [14]:
import numpy as np
import pyrealsense2 as rs
from ultralytics import YOLO
import cv2
import time

MODEL_PATH = "/home/dw/ws_job_msislab/amr_project/src/runs/obb/smoke_test_v2/weights/best.pt"

CONF_THRES = 0.85
IOU_THRES  = 0.75
IMGSZ      = 640

# -----------------------------
# ✅ 박스 실측 크기 (cm)
# -----------------------------
BOX_W_CM = 23.0
BOX_H_CM = 9.5

# -----------------------------
# ✅ Camera -> Gripper Offset (cm)
# -----------------------------
OFF_X_CM = 0.0
OFF_Y_CM = -7.0
OFF_Z_CM = -18.0

# -----------------------------
# ✅ 목표: 유효 샘플 N개
# -----------------------------
AVG_N = 10
TIMEOUT_SEC = 25.0

# -----------------------------
# ✅ Depth ROI 안정화 파라미터
# -----------------------------
ROI_MARGIN_PX  = 6
MIN_ROI_PIXELS = 120
MAD_THRES_M    = 0.020
DEPTH_MIN_M    = 0.15
DEPTH_MAX_M    = 3.00

# -----------------------------
# ✅ "이상한 값" 제거용 추가 방어
# -----------------------------
Z_RANGE_CM = (15.0, 120.0)
SIZE_REL_ERR_MAX = 0.25

JUMP_XY_CM    = 3.5
JUMP_Z_CM     = 6.0
JUMP_ANG_DEG  = 10.0

MAX_CONSEC_SKIPS_RESET = 15

def clamp(v, lo, hi):
    return max(lo, min(hi, v))

def poly_shrink_towards_center(poly4x2: np.ndarray, margin_px: float):
    p = poly4x2.astype(np.float32)
    c = p.mean(axis=0, keepdims=True)
    v = p - c
    norm = np.linalg.norm(v, axis=1, keepdims=True) + 1e-6
    return p - (v / norm) * margin_px

def depth_roi_stats(depth_u16: np.ndarray, depth_scale: float, poly4x2: np.ndarray):
    h, w = depth_u16.shape[:2]
    poly = np.round(poly4x2).astype(np.int32)

    mask = np.zeros((h, w), dtype=np.uint8)
    cv2.fillPoly(mask, [poly.reshape(-1, 1, 2)], 255)

    d = depth_u16[mask == 255].astype(np.float32) * depth_scale
    d = d[(d > 0) & (d >= DEPTH_MIN_M) & (d <= DEPTH_MAX_M)]

    if d.size == 0:
        return 0.0, 0.0, 0

    med = float(np.median(d))
    mad = float(np.median(np.abs(d - med)))
    return med, mad, int(d.size)

def obb_angle_deg_upright0_rightplus(poly4x2: np.ndarray) -> float:
    p = poly4x2.astype(np.float32)
    c = p.mean(axis=0, keepdims=True)
    q = p - c
    cov = np.cov(q.T)
    eigvals, eigvecs = np.linalg.eig(cov)
    v = eigvecs[:, np.argmax(eigvals)].astype(np.float32)

    vx, vy = float(v[0]), float(v[1])
    if vy < 0:
        vx, vy = -vx, -vy

    angle = float(np.degrees(np.arctan2(vx, vy)))
    angle = -angle
    return angle

def edges_long_short_px(poly4x2: np.ndarray):
    p = poly4x2.astype(np.float32)
    edges = [np.linalg.norm(p[(i+1) % 4] - p[i]) for i in range(4)]
    long_px = float(max(edges))
    short_px = float(min(edges))
    return long_px, short_px

def estimate_Z_from_size(poly4x2: np.ndarray, intr, W_cm: float, H_cm: float) -> float:
    long_px, short_px = edges_long_short_px(poly4x2)
    W_m = W_cm / 100.0
    H_m = H_cm / 100.0

    if W_m >= H_m:
        Z1 = (intr.fx * W_m) / max(long_px, 1e-6)
        Z2 = (intr.fy * H_m) / max(short_px, 1e-6)
    else:
        Z1 = (intr.fx * H_m) / max(long_px, 1e-6)
        Z2 = (intr.fy * W_m) / max(short_px, 1e-6)

    return float(0.5 * (Z1 + Z2))  # meters

def XY_from_pixel_and_Z(cx: int, cy: int, intr, Z_m: float):
    X = (cx - intr.ppx) / intr.fx * Z_m
    Y = (cy - intr.ppy) / intr.fy * Z_m
    return float(X), float(Y)

def size_consistency_check(poly4x2, intr, Z_use_m, W_cm, H_cm, rel_err_max=0.25):
    long_px, short_px = edges_long_short_px(poly4x2)

    L1_cm = (long_px  * Z_use_m / intr.fx) * 100.0
    L2_cm = (short_px * Z_use_m / intr.fy) * 100.0

    W_big = max(W_cm, H_cm)
    H_sml = min(W_cm, H_cm)

    err1 = abs(L1_cm - W_big) / max(1e-6, W_big)
    err2 = abs(L2_cm - H_sml) / max(1e-6, H_sml)

    ok = (err1 <= rel_err_max) and (err2 <= rel_err_max)
    return ok, L1_cm, L2_cm, err1, err2

def is_jump(prev, cur):
    if prev is None:
        return False
    if abs(cur["Xcm"] - prev["Xcm"]) > JUMP_XY_CM: return True
    if abs(cur["Ycm"] - prev["Ycm"]) > JUMP_XY_CM: return True
    if abs(cur["Zcm"] - prev["Zcm"]) > JUMP_Z_CM:  return True
    if abs(cur["angle"] - prev["angle"]) > JUMP_ANG_DEG: return True
    return False

def main():
    model = YOLO(MODEL_PATH)
    print("[INFO] Model loaded:", MODEL_PATH)
    print(f"[INFO] Need {AVG_N} valid samples. Timeout={TIMEOUT_SEC}s")
    print(f"[INFO] BOX(WxH) = {BOX_W_CM:.1f} x {BOX_H_CM:.1f} cm")
    print(f"[INFO] Offset cam->gripper (cm): X{OFF_X_CM:+.1f}, Y{OFF_Y_CM:+.1f}, Z{OFF_Z_CM:+.1f}")
    print(f"[INFO] conf>={CONF_THRES}, iou={IOU_THRES}, imgsz={IMGSZ}\n")

    pipeline = rs.pipeline()
    config = rs.config()

    width, height, fps = 640, 480, 30
    config.enable_stream(rs.stream.color, width, height, rs.format.bgr8, fps)
    config.enable_stream(rs.stream.depth, width, height, rs.format.z16, fps)

    profile = pipeline.start(config)
    align = rs.align(rs.stream.color)

    depth_sensor = profile.get_device().first_depth_sensor()
    depth_scale = float(depth_sensor.get_depth_scale())
    print(f"[INFO] depth_scale = {depth_scale:.8f} m/unit\n")

    temporal = rs.temporal_filter()
    spatial  = rs.spatial_filter()
    hole     = rs.hole_filling_filter()

    spatial.set_option(rs.option.filter_magnitude, 2)
    spatial.set_option(rs.option.filter_smooth_alpha, 0.5)
    spatial.set_option(rs.option.filter_smooth_delta, 20)

    accepted = []
    prev_valid = None
    consec_skips = 0
    t0 = time.time()

    try:
        while True:
            if time.time() - t0 > TIMEOUT_SEC:
                print("\n[FAIL] Timeout: not enough valid samples.")
                break

            frames = pipeline.wait_for_frames()
            frames = align.process(frames)

            color_frame = frames.get_color_frame()
            depth_frame = frames.get_depth_frame()
            if not color_frame or not depth_frame:
                continue

            depth_frame = spatial.process(depth_frame).as_depth_frame()
            depth_frame = temporal.process(depth_frame).as_depth_frame()
            depth_frame = hole.process(depth_frame).as_depth_frame()

            frame = np.asanyarray(color_frame.get_data())
            intr = color_frame.profile.as_video_stream_profile().get_intrinsics()
            depth_u16 = np.asanyarray(depth_frame.get_data())

            results = model.predict(
                frame, imgsz=IMGSZ, conf=CONF_THRES, iou=IOU_THRES, verbose=False
            )
            r = results[0]

            best = None  # (conf, cls, poly4x2)
            if getattr(r, "obb", None) is not None and r.obb is not None:
                obb = r.obb
                if obb.xyxyxyxy is not None and len(obb.xyxyxyxy) > 0:
                    polys = obb.xyxyxyxy.cpu().numpy()
                    confs = obb.conf.cpu().numpy().astype(float)
                    clss  = obb.cls.cpu().numpy().astype(int)

                    keep = confs >= CONF_THRES
                    for poly8, cf, ci in zip(polys[keep], confs[keep], clss[keep]):
                        if best is None or cf > best[0]:
                            best = (float(cf), int(ci), poly8.reshape(4, 2))

            if best is None:
                consec_skips += 1
                if consec_skips >= MAX_CONSEC_SKIPS_RESET:
                    prev_valid = None
                    consec_skips = 0
                continue

            cf, ci, poly = best

            cx = int(np.mean(poly[:, 0]))
            cy = int(np.mean(poly[:, 1]))
            cx = clamp(cx, 0, width - 1)
            cy = clamp(cy, 0, height - 1)

            poly_shrunk = poly_shrink_towards_center(poly, ROI_MARGIN_PX)
            poly_shrunk[:, 0] = np.clip(poly_shrunk[:, 0], 0, width - 1)
            poly_shrunk[:, 1] = np.clip(poly_shrunk[:, 1], 0, height - 1)

            Z_roi_m, mad_m, roi_n = depth_roi_stats(depth_u16, depth_scale, poly_shrunk)
            Z_size_m = estimate_Z_from_size(poly, intr, BOX_W_CM, BOX_H_CM)

            depth_ok = (Z_roi_m > 0.0 and roi_n >= MIN_ROI_PIXELS and mad_m <= MAD_THRES_M)

            if depth_ok:
                alpha = clamp(0.85 - (mad_m / max(1e-6, MAD_THRES_M)) * 0.35, 0.55, 0.90)
                Z_use_m = alpha * Z_roi_m + (1.0 - alpha) * Z_size_m
                z_mode = "FUSED"
            else:
                Z_use_m = Z_size_m
                z_mode = "SIZE"

            Z_use_cm = Z_use_m * 100.0
            if not (Z_RANGE_CM[0] <= Z_use_cm <= Z_RANGE_CM[1]):
                consec_skips += 1
                if consec_skips >= MAX_CONSEC_SKIPS_RESET:
                    prev_valid = None
                    consec_skips = 0
                continue

            X_m, Y_m = XY_from_pixel_and_Z(cx, cy, intr, Z_use_m)
            Z_m = Z_use_m
            dist_m = float(np.sqrt(X_m*X_m + Y_m*Y_m + Z_m*Z_m))
            angle = obb_angle_deg_upright0_rightplus(poly)

            cur = {
                "conf": cf,
                "cls": ci,
                "Xcm": X_m * 100.0,
                "Ycm": Y_m * 100.0,
                "Zcm": Z_m * 100.0,
                "distcm": dist_m * 100.0,
                "angle": float(angle),
                "Zdepth_cm": Z_roi_m * 100.0,
                "Zsize_cm": Z_size_m * 100.0,
                "Zuse_cm": Z_use_cm,
                "roi_n": roi_n,
                "mad_cm": mad_m * 100.0,
                "mode": z_mode,
            }

            ok_sz, est_long_cm, est_short_cm, err1, err2 = size_consistency_check(
                poly, intr, Z_use_m, BOX_W_CM, BOX_H_CM, rel_err_max=SIZE_REL_ERR_MAX
            )
            if not ok_sz:
                consec_skips += 1
                if consec_skips >= MAX_CONSEC_SKIPS_RESET:
                    prev_valid = None
                    consec_skips = 0
                continue

            if is_jump(prev_valid, cur):
                consec_skips += 1
                if consec_skips >= MAX_CONSEC_SKIPS_RESET:
                    prev_valid = None
                    consec_skips = 0
                continue

            consec_skips = 0
            prev_valid = cur
            accepted.append(cur)

            # gripper 좌표/거리(그리퍼 원점 기준)
            gx = cur["Xcm"] + OFF_X_CM
            gy = cur["Ycm"] + OFF_Y_CM
            gz = cur["Zcm"] + OFF_Z_CM
            gdist = float(np.sqrt(gx*gx + gy*gy + gz*gz))

            print(f"[{len(accepted)}/{AVG_N}] conf={cur['conf']:.2f} "
                  f"camXYZ=({cur['Xcm']:+.2f},{cur['Ycm']:+.2f},{cur['Zcm']:+.2f}) camDist={cur['distcm']:.2f}  "
                  f"gripXYZ=({gx:+.2f},{gy:+.2f},{gz:+.2f}) gripDist={gdist:.2f}  "
                  f"ang={cur['angle']:+.2f}  "
                  f"Z(depth/size/use)=({cur['Zdepth_cm']:.1f}/{cur['Zsize_cm']:.1f}/{cur['Zuse_cm']:.1f}) {cur['mode']}")

            if len(accepted) >= AVG_N:
                break

        if len(accepted) >= AVG_N:
            cam_arr = np.array([[a["Xcm"], a["Ycm"], a["Zcm"], a["distcm"], a["angle"],
                                 a["Zdepth_cm"], a["Zsize_cm"], a["Zuse_cm"]] for a in accepted],
                               dtype=np.float32)
            cam_mean = cam_arr.mean(axis=0)
            cam_std  = cam_arr.std(axis=0)

            # ✅ gripper 배열/통계도 따로 계산
            g_list = []
            for a in accepted:
                gx = a["Xcm"] + OFF_X_CM
                gy = a["Ycm"] + OFF_Y_CM
                gz = a["Zcm"] + OFF_Z_CM
                gdist = float(np.sqrt(gx*gx + gy*gy + gz*gz))
                g_list.append([gx, gy, gz, gdist])

            g_arr = np.array(g_list, dtype=np.float32)
            g_mean = g_arr.mean(axis=0)
            g_std  = g_arr.std(axis=0)

            print("\n========== RESULT (AVERAGE over 10 valid) ==========")
            print(f"count : {AVG_N}")

            print(f"\n[CAMERA]")
            print(f"XYZ avg (cm)  : ({cam_mean[0]:+.2f}, {cam_mean[1]:+.2f}, {cam_mean[2]:+.2f})   "
                  f"std=({cam_std[0]:.2f},{cam_std[1]:.2f},{cam_std[2]:.2f})")
            print(f"dist avg (cm) : {cam_mean[3]:.2f}   std={cam_std[3]:.2f}")

            print(f"\n[GRIPPER]  (offset X{OFF_X_CM:+.1f}, Y{OFF_Y_CM:+.1f}, Z{OFF_Z_CM:+.1f})")
            print(f"XYZ avg (cm)  : ({g_mean[0]:+.2f}, {g_mean[1]:+.2f}, {g_mean[2]:+.2f})   "
                  f"std=({g_std[0]:.2f},{g_std[1]:.2f},{g_std[2]:.2f})")
            print(f"dist avg (cm) : {g_mean[3]:.2f}   std={g_std[3]:.2f}")

            print(f"\n[OTHERS]")
            print(f"angle_avg (deg)       : {cam_mean[4]:+.2f}  std={cam_std[4]:.2f}")
            print(f"Z avg(depth/size/use) : ({cam_mean[5]:.1f}/{cam_mean[6]:.1f}/{cam_mean[7]:.1f}) cm")
            print("====================================================\n")

    finally:
        pipeline.stop()

if __name__ == "__main__":
    main()


[INFO] Model loaded: /home/dw/ws_job_msislab/amr_project/src/runs/obb/smoke_test_v2/weights/best.pt
[INFO] Need 10 valid samples. Timeout=25.0s
[INFO] BOX(WxH) = 23.0 x 9.5 cm
[INFO] Offset cam->gripper (cm): X+0.0, Y-7.0, Z-18.0
[INFO] conf>=0.85, iou=0.75, imgsz=640

[INFO] depth_scale = 0.00010000 m/unit

[1/10] conf=0.95 camXYZ=(+1.37,-9.49,+39.06) camDist=40.22  gripXYZ=(+1.37,-16.49,+21.06) gripDist=26.79  ang=+0.70  Z(depth/size/use)=(39.0/39.2/39.1) FUSED
[2/10] conf=0.95 camXYZ=(+1.38,-9.65,+39.32) camDist=40.51  gripXYZ=(+1.38,-16.65,+21.32) gripDist=27.09  ang=+0.68  Z(depth/size/use)=(39.3/39.3/39.3) FUSED
[3/10] conf=0.96 camXYZ=(+1.38,-9.54,+39.28) camDist=40.44  gripXYZ=(+1.38,-16.54,+21.28) gripDist=26.99  ang=+0.73  Z(depth/size/use)=(39.3/39.3/39.3) FUSED
[4/10] conf=0.96 camXYZ=(+1.38,-9.62,+39.20) camDist=40.39  gripXYZ=(+1.38,-16.62,+21.20) gripDist=26.98  ang=+0.72  Z(depth/size/use)=(39.1/39.3/39.2) FUSED
[5/10] conf=0.96 camXYZ=(+1.38,-9.62,+39.21) camDist=40.39