In [3]:
import os
from pathlib import Path
from collections import defaultdict

import numpy as np
import pandas as pd
from ultralytics import YOLO

# =========================
# 路徑 & 參數設定區
# =========================

# 測試集影像資料夾（你原本用的路徑）
TEST_DIR = "/NFS/celine/aicup/yoloNew/test_images"

# 各 fold 的 best.pt 路徑
FOLD_WEIGHTS = [
    "/NFS/celine/aicup/yoloNew/runs/kfold_train/fold02/weights/best.pt",
    "/NFS/celine/aicup/yoloNew/runs/kfold_train/fold12/weights/best.pt",
    "/NFS/celine/aicup/yoloNew/runs/kfold_train/fold22/weights/best.pt",
    "/NFS/celine/aicup/yoloNew/runs/kfold_train/fold32/weights/best.pt",
    "/NFS/celine/aicup/yoloNew/runs/kfold_train/fold42/weights/best.pt",
]

# 推論與 ensemble 設定
IMG_SIZE      = 512           # 看你訓練時用多少就填多少
DEVICE        = 0              # GPU index, 沒 GPU 就設 'cpu'
BASE_CONF     = 0.25           # 單模型推論 conf 門檻
BASE_IOU      = 0.7            # 單模型內部 NMS IoU
ENSEMBLE_IOU  = 0.55          # 跨模型 NMS / WBF / vote 的 IoU 門檻
ENSEMBLE_METHOD = "wbf"        # 要切換模式只要改這一行 "nms" / "wbf" / "vote"
VOTE_K        = 3              # vote 模式：至少幾個模型同意才算一個框

# 輸出檔案
OUTPUT_TXT = "/NFS/celine/aicup/yoloNew/runs/kfold_train/submission_ensemble_5foldm_WBF.txt"


# =========================
# 工具函式
# =========================

def compute_iou(box1, box2):
    """
    box: [x1, y1, x2, y2]
    """
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    inter_w = max(0.0, x2 - x1)
    inter_h = max(0.0, y2 - y1)
    inter = inter_w * inter_h

    if inter == 0:
        return 0.0

    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - inter
    return inter / union if union > 0 else 0.0


def nms_ensemble(boxes, scores, labels, iou_thr):
    """
    簡單純 Python NMS
    boxes:  list of [x1,y1,x2,y2]
    scores: list of score
    labels: list of class (單類別也可以全部 0)
    """
    if len(boxes) == 0:
        return [], [], []

    boxes = np.array(boxes)
    scores = np.array(scores)
    labels = np.array(labels)

    keep_boxes = []
    keep_scores = []
    keep_labels = []

    # 先依 score 排序
    order = scores.argsort()[::-1]

    while order.size > 0:
        i = order[0]
        keep_boxes.append(boxes[i].tolist())
        keep_scores.append(float(scores[i]))
        keep_labels.append(int(labels[i]))

        rest = order[1:]
        suppressed = []
        for j in rest:
            if labels[j] != labels[i]:
                continue
            iou = compute_iou(boxes[i], boxes[j])
            if iou > iou_thr:
                suppressed.append(j)

        # 保留沒有被壓掉的 index
        mask = np.isin(rest, suppressed, invert=True)
        order = rest[mask]

    return keep_boxes, keep_scores, keep_labels


def vote_ensemble(detections, iou_thr, vote_k):
    """
    detections: list of dict
        {"box": [x1,y1,x2,y2], "score": float, "label": int, "model_id": int}

    回傳：
        boxes, scores, labels
    """
    if len(detections) == 0:
        return [], [], []

    # 依 score 排序，準備 greedy clustering
    detections = sorted(detections, key=lambda d: d["score"], reverse=True)
    used = [False] * len(detections)

    fused_boxes = []
    fused_scores = []
    fused_labels = []

    for i, det in enumerate(detections):
        if used[i]:
            continue

        # 建立一個 cluster
        cluster = [det]
        used[i] = True

        for j in range(i + 1, len(detections)):
            if used[j]:
                continue
            det_j = detections[j]
            if det_j["label"] != det["label"]:
                continue
            iou = compute_iou(det["box"], det_j["box"])
            if iou > iou_thr:
                cluster.append(det_j)
                used[j] = True

        # 看有幾個不同模型參與
        model_ids = set(d["model_id"] for d in cluster)
        if len(model_ids) < vote_k:
            # 模型數不足 → 當作噪音，不輸出
            continue

        # 用 score 當權重做加權平均
        boxes_arr = np.array([c["box"] for c in cluster], dtype=np.float32)
        scores_arr = np.array([c["score"] for c in cluster], dtype=np.float32)
        labels_arr = np.array([c["label"] for c in cluster], dtype=np.int32)

        w = scores_arr / (scores_arr.sum() + 1e-6)
        fused_box = (boxes_arr * w[:, None]).sum(axis=0)
        fused_score = float(scores_arr.mean())
        fused_label = int(labels_arr[0])  # 單類別任取即可

        fused_boxes.append(fused_box.tolist())
        fused_scores.append(fused_score)
        fused_labels.append(fused_label)

    return fused_boxes, fused_scores, fused_labels


def wbf_ensemble(boxes_list, scores_list, labels_list, image_size, iou_thr):
    """
    boxes_list, scores_list, labels_list:
        list of list，長度 = 模型數 (fold 數)
        每個元素是一張圖的 boxes/scores/labels

    image_size: (h, w) → 用來把 WBF 輸出的 [0,1] 座標轉回 pixel
    """
    try:
        from ensemble_boxes import weighted_boxes_fusion
    except ImportError:
        raise ImportError(
            "需要先安裝 ensemble-boxes：\n"
            "    pip install ensemble-boxes\n"
        )

    if len(boxes_list) == 0:
        return [], [], []

    # WBF 要求 boxes: [x1, y1, x2, y2] in [0,1]
    # 這裡假設 boxes_list 裡已經是 normalized 的座標
    fused_boxes, fused_scores, fused_labels = weighted_boxes_fusion(
        boxes_list,
        scores_list,
        labels_list,
        iou_thr=iou_thr,
        skip_box_thr=0.0,
    )

    h, w = image_size
    # 把 [0,1] → pixel
    fused_boxes_pixel = []
    for bx in fused_boxes:
        x1 = bx[0] * w
        y1 = bx[1] * h
        x2 = bx[2] * w
        y2 = bx[3] * h
        fused_boxes_pixel.append([x1, y1, x2, y2])

    fused_labels = [int(l) for l in fused_labels]
    fused_scores = [float(s) for s in fused_scores]

    return fused_boxes_pixel, fused_scores, fused_labels


# =========================
# 主程式
# =========================
def main():
    num_folds = len(FOLD_WEIGHTS)
    per_image = {}

    # 先數一下測試集有幾張圖（方便顯示進度）
    IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"}
    test_image_paths = [
        p for p in Path(TEST_DIR).iterdir()
        if p.suffix.lower() in IMAGE_EXTS
    ]
    num_images = len(test_image_paths)
    print(f"測試集共有 {num_images} 張影像")

    print("開始載入各 fold 模型並對測試集推論...")

    for fold_idx, wpath in enumerate(FOLD_WEIGHTS):
        print(f"\n========== fold {fold_idx} ==========")
        print(f"載入模型：{wpath}")
        model = YOLO(wpath)

        # 改成 stream=True，避免 results 全部堆在 RAM 裡
        # 保留 verbose=False，由我們自己印進度
        results = model.predict(
            source=TEST_DIR,
            imgsz=IMG_SIZE,
            conf=BASE_CONF,
            iou=BASE_IOU,
            device=DEVICE,
            verbose=False,
            stream=True,        # ←←← 新增
        )

        print(f"開始對 fold {fold_idx} 推論...")
        for i, r in enumerate(results, start=1):
            # 每 100 張印一次進度（你可以自己改頻率）
            if i % 100 == 0 or i == num_images:
                print(f"[fold {fold_idx}] 已處理 {i}/{num_images} 張影像")

            img_name = Path(r.path).stem
            h, w = r.orig_shape

            if img_name not in per_image:
                per_image[img_name] = {
                    "shape": (h, w),
                    "boxes_list": [[] for _ in range(num_folds)],
                    "scores_list": [[] for _ in range(num_folds)],
                    "labels_list": [[] for _ in range(num_folds)],
                    "flat": [],
                }
            rec = per_image[img_name]

            boxes = r.boxes
            if boxes is None:
                continue

            for b in boxes:
                cls = int(b.cls[0].item())
                conf = float(b.conf[0].item())
                if conf < BASE_CONF:
                    continue

                x1, y1, x2, y2 = map(float, b.xyxy[0].tolist())

                # WBF 用 normalized 座標
                x1n = x1 / w
                x2n = x2 / w
                y1n = y1 / h
                y2n = y2 / h

                rec["boxes_list"][fold_idx].append([x1n, y1n, x2n, y2n])
                rec["scores_list"][fold_idx].append(conf)
                rec["labels_list"][fold_idx].append(cls)

                # NMS / vote 用 pixel 座標
                rec["flat"].append(
                    {
                        "box": [x1, y1, x2, y2],
                        "score": conf,
                        "label": cls,
                        "model_id": fold_idx,
                    }
                )         

    print("Ensemble 中...")

    rows = []

    for img_name, rec in per_image.items():
        h, w = rec["shape"]

        if ENSEMBLE_METHOD == "wbf":
            boxes, scores, labels = wbf_ensemble(
                rec["boxes_list"],
                rec["scores_list"],
                rec["labels_list"],
                image_size=(h, w),
                iou_thr=ENSEMBLE_IOU,
            )
        elif ENSEMBLE_METHOD == "nms":
            flat = rec["flat"]
            boxes = [d["box"] for d in flat]
            scores = [d["score"] for d in flat]
            labels = [d["label"] for d in flat]
            boxes, scores, labels = nms_ensemble(
                boxes, scores, labels, iou_thr=ENSEMBLE_IOU
            )
        elif ENSEMBLE_METHOD == "vote":
            boxes, scores, labels = vote_ensemble(
                rec["flat"], iou_thr=ENSEMBLE_IOU, vote_k=VOTE_K
            )
        else:
            raise ValueError(f"未知的 ENSEMBLE_METHOD: {ENSEMBLE_METHOD}")

        # 收集成 submission row
        for box, score, cls in zip(boxes, scores, labels):
            x1, y1, x2, y2 = box
            rows.append(
                [
                    img_name,
                    int(cls),
                    float(score),
                    int(round(x1)),
                    int(round(y1)),
                    int(round(x2)),
                    int(round(y2)),
                ]
            )

    # 轉成 DataFrame，照 image_name 排序（可選）
    df = pd.DataFrame(
        rows,
        columns=["image_name", "class", "confidence", "x1", "y1", "x2", "y2"],
    )
    df.sort_values(by=["image_name", "confidence"], ascending=[True, False], inplace=True)

    # 輸出成 AI Cup 上傳格式
    os.makedirs(os.path.dirname(OUTPUT_TXT), exist_ok=True)
    with open(OUTPUT_TXT, "w") as f:
        for _, row in df.iterrows():
            line = f"{row.image_name} {int(row['class'])} {row['confidence']:.4f} {int(row['x1'])} {int(row['y1'])} {int(row['x2'])} {int(row['y2'])}\n"
            f.write(line)

    print(f"Ensemble 結果已輸出至：{OUTPUT_TXT}")


if __name__ == "__main__":
    main()


測試集共有 16620 張影像
開始載入各 fold 模型並對測試集推論...

載入模型：/NFS/celine/aicup/yoloNew/runs/kfold_train/fold02/weights/best.pt
開始對 fold 0 推論...
[fold 0] 已處理 100/16620 張影像
[fold 0] 已處理 200/16620 張影像
[fold 0] 已處理 300/16620 張影像
[fold 0] 已處理 400/16620 張影像
[fold 0] 已處理 500/16620 張影像
[fold 0] 已處理 600/16620 張影像
[fold 0] 已處理 700/16620 張影像
[fold 0] 已處理 800/16620 張影像
[fold 0] 已處理 900/16620 張影像
[fold 0] 已處理 1000/16620 張影像
[fold 0] 已處理 1100/16620 張影像
[fold 0] 已處理 1200/16620 張影像
[fold 0] 已處理 1300/16620 張影像
[fold 0] 已處理 1400/16620 張影像
[fold 0] 已處理 1500/16620 張影像
[fold 0] 已處理 1600/16620 張影像
[fold 0] 已處理 1700/16620 張影像
[fold 0] 已處理 1800/16620 張影像
[fold 0] 已處理 1900/16620 張影像
[fold 0] 已處理 2000/16620 張影像
[fold 0] 已處理 2100/16620 張影像
[fold 0] 已處理 2200/16620 張影像
[fold 0] 已處理 2300/16620 張影像
[fold 0] 已處理 2400/16620 張影像
[fold 0] 已處理 2500/16620 張影像
[fold 0] 已處理 2600/16620 張影像
[fold 0] 已處理 2700/16620 張影像
[fold 0] 已處理 2800/16620 張影像
[fold 0] 已處理 2900/16620 張影像
[fold 0] 已處理 3000/16620 張影像
[fold 0] 已處理 3100/16620 張影像
[fold 0] 已處理