## 환경 세팅 




In [1]:
%xmode Plain

# 1) torchmetrics 최신 + detection 의존성까지 한 번에
!pip install -U "torchmetrics[detection]"

# 2) Windows에서 pycocotools 빌드 문제 피하기용
!pip install pycocotools-windows

# (옵션) 대안 백엔드
# !pip install faster-coco-eval

!pip install -U ultralytics



Exception reporting mode: Plain


ERROR: Could not find a version that satisfies the requirement pycocotools-windows (from versions: none)
ERROR: No matching distribution found for pycocotools-windows




---
## 셀 0) 경로/공통 설정 (그대로 복붙)

In [2]:
# === 공통 설정 ===
from pathlib import Path
import os, time, json, glob
from ultralytics import YOLO

# 경로
DATA_ROOT = r"E:\pytorch_env\YOLOv8x_dataset"         # images/, labels/ 가 들어있는 루트
IMG_DIR   = os.path.join(DATA_ROOT, "images")
LBL_DIR   = os.path.join(DATA_ROOT, "labels")
SPLIT_DIR = os.path.join(DATA_ROOT, "_splits")
DATA_YAML = os.path.join(DATA_ROOT, "data.yaml")

PROJECT   = r"E:\pytorch_env\ai05-level1-project\Exp"
SUBMIT_DIR= r"E:\pytorch_env\ai05-level1-project\submission"
TEST_DIR  = r"E:\pytorch_env\ai05-level1-project\test_images"  # 실제 테스트 폴더로 조정

os.makedirs(PROJECT, exist_ok=True)
os.makedirs(SPLIT_DIR, exist_ok=True)
os.makedirs(SUBMIT_DIR, exist_ok=True)

SEED=42
DEVICE=0
WORKERS=4


---
## 셀 1) train=val 동일 세트 data.yaml 생성 (과적합 유도 버전)

In [3]:
# === train = val 동일 세트로 구성 (과적합 유도) ===
import glob, yaml
from pathlib import Path

IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp"}

def _stem(p): return Path(p).stem

images = [p for ext in IMG_EXTS for p in glob.glob(os.path.join(IMG_DIR, f"**/*{ext}"), recursive=True)]
images = sorted(set(images))
labels = glob.glob(os.path.join(LBL_DIR, "**/*.txt"), recursive=True)
label_map = {_stem(p): p for p in labels}

pairs = []
miss_lbl, miss_img = [], []
for img in images:
    s = _stem(img)
    if s in label_map: pairs.append((img, label_map[s]))
    else: miss_lbl.append(img)

# 라벨만 있는데 이미지 없는 케이스 탐지
for s, p in label_map.items():
    if not any(_stem(i) == s for i in images):
        miss_img.append(p)

print(f"총 이미지: {len(images)} | 총 라벨: {len(labels)} | 페어링 성공: {len(pairs)}")
if miss_lbl: print(f"[경고] 라벨 없는 이미지 {len(miss_lbl)}개 (제외) 예) {miss_lbl[:3]}")
if miss_img: print(f"[경고] 이미지 없는 라벨 {len(miss_img)}개 (제외) 예) {miss_img[:3]}")
assert pairs, "이미지-라벨 페어가 없습니다."

# 클래스 수 추정
def _max_cid(txt):
    m=-1
    with open(txt, "r", encoding="utf-8") as f:
        for ln in f:
            ln=ln.strip()
            if not ln: continue
            try:
                cid = int(float(ln.split()[0]))
                if cid>m: m=cid
            except: pass
    return m

global_max=-1
for _, lbl in pairs:
    mm=_max_cid(lbl)
    if mm>global_max: global_max=mm
nc = global_max+1
print("추정 nc:", nc)

# 리스트 저장 (train=val 동일)
train_list = os.path.join(SPLIT_DIR, "train.txt")
val_list   = os.path.join(SPLIT_DIR, "val.txt")
with open(train_list, "w", encoding="utf-8") as ft:
    for img,_ in pairs: ft.write(img.replace("\\","/")+"\n")
with open(val_list, "w", encoding="utf-8") as fv:
    for img,_ in pairs: fv.write(img.replace("\\","/")+"\n")

# names는 placeholder만
names = [f"class_{i}" for i in range(nc)]

data_cfg = {
    "train": train_list.replace("\\","/"),
    "val":   val_list.replace("\\","/"),
    "nc":    nc,
    "names": names,
}
with open(DATA_YAML, "w", encoding="utf-8") as f:
    yaml.safe_dump(data_cfg, f, allow_unicode=True, sort_keys=False)

print("✅ data.yaml 완료:", DATA_YAML)


총 이미지: 1472 | 총 라벨: 1472 | 페어링 성공: 1472
추정 nc: 73
✅ data.yaml 완료: E:\pytorch_env\YOLOv8x_dataset\data.yaml


---

## 셀 2) 2-Stage 학습 (Hard → Easy, yolov8s / Adam, val=False)

In [5]:
# === Hard -> Easy 두 단계 학습 (작은 모델 + Adam, 과적합 유도) ===
def stage1_train(model_ckpt:str, exp_name:str, imgsz:int=640, epochs:int=150, lr0:float=0.003):
    m = YOLO(model_ckpt)
    args = dict(
        data=DATA_YAML, imgsz=imgsz, epochs=epochs, batch=32, device=DEVICE,
        optimizer="Adam", lr0=lr0, lrf=0.2, cos_lr=False, warmup_epochs=2.0,
        weight_decay=1e-4,
        mosaic=0.7, mixup=0.2, fliplr=0.5, flipud=0.2,
        hsv_h=0.015, hsv_s=0.7, hsv_v=0.4,
        degrees=0.0, translate=0.08, scale=0.5, shear=0.0, perspective=0.0,
        cache="ram", workers=WORKERS, rect=False, seed=SEED,
        val=False, project=PROJECT, name=exp_name, save_period=50, verbose=True
    )
    m.train(**args)
    return os.path.join(PROJECT, exp_name, "weights", "last.pt")

def stage2_train(stage1_last:str, exp_name:str, imgsz:int=960, epochs:int=60, lr0:float=0.0012):
    m = YOLO(stage1_last)
    args = dict(
        data=DATA_YAML, imgsz=imgsz, epochs=epochs, batch=24, device=DEVICE,
        optimizer="Adam", lr0=lr0, lrf=0.1, cos_lr=False, warmup_epochs=1.0,
        weight_decay=0.0,
        mosaic=0.0, mixup=0.0, fliplr=0.0, flipud=0.0,
        hsv_h=0.0, hsv_s=0.0, hsv_v=0.0,
        degrees=0.0, translate=0.0, scale=0.0, shear=0.0, perspective=0.0,
        cache="ram", workers=WORKERS, rect=False, seed=SEED,
        val=False, project=PROJECT, name=exp_name, save_period=20, verbose=True
    )
    m.train(**args)
    return os.path.join(PROJECT, exp_name, "weights", "last.pt")

def train_pipeline(model_ckpt:str, tag:str, s1_img=640, s2_img=960):
    ts = time.strftime("%m%d_%H%M")
    exp1 = f"{tag}_Hard150e_adam_{s1_img}_{ts}"
    exp2 = f"{tag}_Easy60e_adam_{s2_img}_{ts}"
    print(f"[Stage-1] {model_ckpt} -> {exp1}")
    s1_last = stage1_train(model_ckpt, exp1, imgsz=s1_img, epochs=150, lr0=0.003)
    print("Stage-1 last:", s1_last)
    print(f"[Stage-2] warm-start -> {exp2}")
    s2_last = stage2_train(s1_last, exp2, imgsz=s2_img, epochs=60, lr0=0.0012)
    print("Stage-2 last:", s2_last)
    return s2_last


# (옵션) 더 작게: yolov8n
# last_v8n = train_pipeline("yolov8n.pt", tag="v8n")

# (옵션) 해상도 1280 한 번
# last_v8s_hires = train_pipeline("yolov8s.pt", tag="v8s_hires", s2_img=1280)


In [6]:
# Stage-1 학습
last_v8s_stage1 = stage1_train("yolov8s.pt", exp_name="stage1_exp1")
# → 결과: E:\pytorch_env\ai05-level1-project\Exp\stage1_exp1\weights\last.pt


Ultralytics 8.3.221  Python-3.11.14 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4090, 24564MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=32, bgr=0.0, box=7.5, cache=ram, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=E:\pytorch_env\YOLOv8x_dataset\data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=150, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.2, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.003, lrf=0.2, mask_ratio=4, max_det=300, mixup=0.2, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=0.7, multi_scale=False, name=stage1_exp1, nbs=64, nms=False, opset=None, optimize=False, optimizer=Adam, overlap_mask=True, patience

In [7]:
# Stage-2 학습
last_v8s_stage2 = stage2_train(last_v8s_stage1, exp_name="stage2_exp1")
# → 결과: E:\...\Exp\stage2_exp1\weights\last.pt


Ultralytics 8.3.221  Python-3.11.14 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4090, 24564MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=24, bgr=0.0, box=7.5, cache=ram, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=E:\pytorch_env\YOLOv8x_dataset\data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=60, erasing=0.4, exist_ok=False, fliplr=0.0, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.0, hsv_s=0.0, hsv_v=0.0, imgsz=960, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.0012, lrf=0.1, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=E:\pytorch_env\ai05-level1-project\Exp\stage1_exp1\weights\last.pt, momentum=0.937, mosaic=0.0, multi_scale=False, name=stage2_exp1, nbs=64, nms=False, opset=None, optim

---
## 셀 3) 추론(예측) + 제출 CSV 스윕 (TTA on/off × conf × iou)

In [8]:
# === 제출 CSV 생성기: TTA on/off × conf × iou 스윕 ===
import pandas as pd
import numpy as np

def infer_collect(model_path, source, imgsz=960, augment=False, conf=0.01, iou=0.45):
    model = YOLO(model_path)
    pred = model.predict(source=source, imgsz=imgsz, conf=conf, iou=iou,
                         device=DEVICE, augment=augment, verbose=False)
    rows=[]
    for r in pred:
        image_file = os.path.basename(r.path)
        image_id = int(''.join(filter(str.isdigit, os.path.splitext(image_file)[0])) or 0)
        if r.boxes is None or len(r.boxes)==0: 
            continue
        xyxy = r.boxes.xyxy.cpu().numpy()
        confs = r.boxes.conf.cpu().numpy()
        clses = r.boxes.cls.cpu().numpy().astype(int)
        for (x1,y1,x2,y2),sc,cls_id in zip(xyxy,confs,clses):
            rows.append([image_id, int(cls_id),
                         int(round(x1)), int(round(y1)),
                         int(round(x2-x1)), int(round(y2-y1)),
                         float(sc)])
    df = pd.DataFrame(rows, columns=["image_id","category_id","bbox_x","bbox_y","bbox_w","bbox_h","score"])
    return df

def simple_nms(df, iou_thr=0.5):
    if df.empty: return df
    out = []
    for (img, cat), g in df.groupby(["image_id","category_id"]):
        if len(g)<=1:
            out.append(g); continue
        boxes = g[["bbox_x","bbox_y","bbox_w","bbox_h"]].to_numpy().astype(float)
        boxes[:,2]+=boxes[:,0]; boxes[:,3]+=boxes[:,1]  # to xyxy
        scores= g["score"].to_numpy()
        idxs  = scores.argsort()[::-1]
        keep=[]
        while idxs.size>0:
            i=idxs[0]; keep.append(i)
            if idxs.size==1: break
            rest=idxs[1:]
            xx1=np.maximum(boxes[i,0], boxes[rest,0])
            yy1=np.maximum(boxes[i,1], boxes[rest,1])
            xx2=np.minimum(boxes[i,2], boxes[rest,2])
            yy2=np.minimum(boxes[i,3], boxes[rest,3])
            w=np.maximum(0.0, xx2-xx1); h=np.maximum(0.0, yy2-yy1)
            inter=w*h
            area_i=(boxes[i,2]-boxes[i,0])*(boxes[i,3]-boxes[i,1])
            area_r=(boxes[rest,2]-boxes[rest,0])*(boxes[rest,3]-boxes[rest,1])
            iou=inter/np.maximum(area_i+area_r-inter, 1e-9)
            idxs=rest[iou<iou_thr]
        out.append(g.iloc[keep])
    return pd.concat(out, axis=0)

def export_submissions(model_path, test_dir, base_imgsz=960,
                       conf_list=(0.20,0.25,0.30), iou_list=(0.45,0.50), tta_list=(False,True)):
    for tta in tta_list:
        for iou in iou_list:
            # 1) 저conf 수집 → 2) 최종 conf 필터 → 3) NMS → 저장
            raw = infer_collect(model_path, test_dir, imgsz=base_imgsz, augment=tta, conf=0.01, iou=iou)
            for c in conf_list:
                df = raw[raw["score"]>=c].copy()
                before = len(df)
                df = simple_nms(df, iou_thr=0.5)
                df.sort_values(["image_id","score"], ascending=[True, False], inplace=True)
                df.insert(0, "annotation_id", range(1, len(df)+1))
                out = os.path.join(SUBMIT_DIR, f"submission_tta{int(tta)}_conf_{c:.2f}_iou_{iou:.2f}.csv")
                df.to_csv(out, index=False, encoding="utf-8-sig")
                print(f"Saved ({before}->{len(df)} after NMS):", out)

# === 실행: 위 셀에서 나온 last_v8s 사용 ===
MODEL_PATH = last_v8s_stage1  # 필요 시 last_v8n 등으로 교체
export_submissions(MODEL_PATH, TEST_DIR, base_imgsz=960,
                   conf_list=(0.20,0.25,0.30), iou_list=(0.45,0.50), tta_list=(False,True))


Saved (3237->3237 after NMS): E:\pytorch_env\ai05-level1-project\submission\submission_tta0_conf_0.20_iou_0.45.csv
Saved (3233->3233 after NMS): E:\pytorch_env\ai05-level1-project\submission\submission_tta0_conf_0.25_iou_0.45.csv
Saved (3233->3233 after NMS): E:\pytorch_env\ai05-level1-project\submission\submission_tta0_conf_0.30_iou_0.45.csv
Saved (3237->3237 after NMS): E:\pytorch_env\ai05-level1-project\submission\submission_tta0_conf_0.20_iou_0.50.csv
Saved (3233->3233 after NMS): E:\pytorch_env\ai05-level1-project\submission\submission_tta0_conf_0.25_iou_0.50.csv
Saved (3233->3233 after NMS): E:\pytorch_env\ai05-level1-project\submission\submission_tta0_conf_0.30_iou_0.50.csv
Saved (3242->3242 after NMS): E:\pytorch_env\ai05-level1-project\submission\submission_tta1_conf_0.20_iou_0.45.csv
Saved (3239->3239 after NMS): E:\pytorch_env\ai05-level1-project\submission\submission_tta1_conf_0.25_iou_0.45.csv
Saved (3238->3238 after NMS): E:\pytorch_env\ai05-level1-project\submission\subm

In [None]:
# # 로컬 대리 평가: 각 설정(tta/conf/iou)을 val 이미지에 적용해 mAP@[0.75:0.95] 비교
# import os, numpy as np, torch
# from PIL import Image
# from ultralytics import YOLO
# from torchmetrics.detection import MeanAveragePrecision

# DATASET_ROOT = r"E:\pytorch_env\YOLOv8x_dataset"
# VAL_LIST     = os.path.join(DATASET_ROOT, "_splits", "val.txt")
# LABELS_ROOT  = os.path.join(DATASET_ROOT, "labels")
# IMAGES_ROOT  = os.path.join(DATASET_ROOT, "images")
# DEVICE = 0

# def _load_gt_xyxy(lbl_path, w, h):
#     boxes, labels = [], []
#     if not os.path.exists(lbl_path): return boxes, labels
#     with open(lbl_path, "r", encoding="utf-8") as f:
#         for ln in f:
#             p = ln.strip().split()
#             if len(p) < 5: continue
#             cid = int(float(p[0]))
#             xc, yc, ww, hh = map(float, p[1:5])
#             xc*=w; yc*=h; ww*=w; hh*=h
#             x1=xc-ww/2; y1=yc-hh/2; x2=xc+ww/2; y2=yc+hh/2
#             boxes.append([x1,y1,x2,y2]); labels.append(cid)
#     return boxes, labels

# # 수정 포인트만 발췌

# def eval_map(model_path, imgsz=960, use_tta=False, conf=0.25, iou=0.45):
#     model = YOLO(model_path)
#     with open(VAL_LIST, "r", encoding="utf-8") as f:
#         val_imgs = [ln.strip() for ln in f if ln.strip()]

#     # ★ numpy array -> list[float]
#     iou_thrs = [0.75, 0.80, 0.85, 0.90, 0.95]
#     metric = MeanAveragePrecision(
#         iou_type="bbox",
#         box_format="xyxy",          # 명시
#         iou_thresholds=iou_thrs
#     )

#     for img_path in val_imgs:
#         try:
#             im = Image.open(img_path); w,h = im.size; im.close()
#         except:
#             candidate = os.path.join(IMAGES_ROOT, os.path.basename(img_path))
#             im = Image.open(candidate); w,h = im.size; im.close()
#             img_path = candidate

#         preds = model.predict(
#             img_path, imgsz=imgsz, conf=conf, iou=iou,
#             device=DEVICE, augment=use_tta, verbose=False
#         )

#         pred_items = []
#         for r in preds:
#             b = r.boxes
#             if b is None or len(b) == 0:
#                 pred_items.append({
#                     "boxes": torch.zeros((0,4), dtype=torch.float32),
#                     "scores": torch.zeros((0,), dtype=torch.float32),
#                     "labels": torch.zeros((0,), dtype=torch.int64)
#                 })
#                 continue
#             pred_items.append({
#                 "boxes": b.xyxy.cpu().to(torch.float32),
#                 "scores": b.conf.cpu().to(torch.float32),
#                 "labels": b.cls.cpu().to(torch.int64)
#             })

#         stem = os.path.splitext(os.path.basename(img_path))[0]
#         gt_path = os.path.join(LABELS_ROOT, stem + ".txt")
#         gt_boxes, gt_labels = _load_gt_xyxy(gt_path, w, h)

#         if len(gt_boxes) == 0:
#             target = [{
#                 "boxes": torch.zeros((0,4), dtype=torch.float32),
#                 "labels": torch.zeros((0,), dtype=torch.int64)
#             }]
#         else:
#             target = [{
#                 "boxes": torch.tensor(gt_boxes, dtype=torch.float32),
#                 "labels": torch.tensor(gt_labels, dtype=torch.int64)
#             }]

#         # pred_items와 target은 "배치" 개념의 리스트이므로 그대로 update
#         metric.update(pred_items, target)

#     res = metric.compute()
#     return float(res["map"])


# # 예시: 방금 만든 CSV들과 동일한 설정을 val에 대입하여 비교
# MODEL_PATH = r"E:\pytorch_env\ai05-level1-project\Exp\stage1_exp1\weights\last.pt"  # 네 최종 모델 경로로 변경
# for tta in [False, True]:
#     for conf in [0.20, 0.25, 0.30]:
#         for iou in [0.45, 0.50]:
#             m = eval_map(MODEL_PATH, imgsz=960, use_tta=tta, conf=conf, iou=iou)
#             print(f"[VAL mAP] TTA={tta} conf={conf} iou={iou} -> mAP@[.75:.95]={m:.4f}")


[VAL mAP] TTA=False conf=0.2 iou=0.45 -> mAP@[.75:.95]=0.9759
[VAL mAP] TTA=False conf=0.2 iou=0.5 -> mAP@[.75:.95]=0.9759
[VAL mAP] TTA=False conf=0.25 iou=0.45 -> mAP@[.75:.95]=0.9759
[VAL mAP] TTA=False conf=0.25 iou=0.5 -> mAP@[.75:.95]=0.9759
[VAL mAP] TTA=False conf=0.3 iou=0.45 -> mAP@[.75:.95]=0.9759
[VAL mAP] TTA=False conf=0.3 iou=0.5 -> mAP@[.75:.95]=0.9759
[VAL mAP] TTA=True conf=0.2 iou=0.45 -> mAP@[.75:.95]=0.9932
[VAL mAP] TTA=True conf=0.2 iou=0.5 -> mAP@[.75:.95]=0.9932
[VAL mAP] TTA=True conf=0.25 iou=0.45 -> mAP@[.75:.95]=0.9932
[VAL mAP] TTA=True conf=0.25 iou=0.5 -> mAP@[.75:.95]=0.9932
[VAL mAP] TTA=True conf=0.3 iou=0.45 -> mAP@[.75:.95]=0.9932
[VAL mAP] TTA=True conf=0.3 iou=0.5 -> mAP@[.75:.95]=0.9932


In [13]:
import os
import glob
import pandas as pd
import numpy as np
import torch
from PIL import Image
from collections import defaultdict
from torchmetrics.detection import MeanAveragePrecision

# =========================
# 경로 설정
# =========================
BASE_DIR      = r"E:\pytorch_env\ai05-level1-project"
SUBMIT_DIR    = os.path.join(BASE_DIR, "submission")  # 제출 CSV 폴더
DATASET_ROOT  = r"E:\pytorch_env\YOLOv8x_dataset"
VAL_LIST      = os.path.join(DATASET_ROOT, "_splits", "val.txt")
LABELS_ROOT   = os.path.join(DATASET_ROOT, "labels")
IMAGES_ROOT   = os.path.join(DATASET_ROOT, "images")

# (선택) 정확 매핑 파일: val용 image_id를 명시
# 컬럼: file_name,image_id  (file_name은 images/ 아래 실제 파일명)
EVAL_META_CSV = os.path.join(BASE_DIR, "eval_meta.csv")  # 없으면 자동 Fallback

# =========================
# 유틸
# =========================
def _load_gt_xyxy(lbl_path, w, h):
    boxes, labels = [], []
    if not os.path.exists(lbl_path):
        return boxes, labels
    with open(lbl_path, "r", encoding="utf-8") as f:
        for ln in f:
            p = ln.strip().split()
            if len(p) < 5:
                continue
            cid = int(float(p[0]))
            xc, yc, ww, hh = map(float, p[1:5])
            xc*=w; yc*=h; ww*=w; hh*=h
            x1 = xc - ww/2; y1 = yc - hh/2
            x2 = xc + ww/2; y2 = yc + hh/2
            boxes.append([x1, y1, x2, y2]); labels.append(cid)
    return boxes, labels

def _build_val_index():
    """val.txt를 읽어 (image_id -> (img_path, w, h, gt_boxes, gt_labels)) 인덱스를 만든다.
       image_id 매핑은 1) eval_meta.csv 우선, 2) 없으면 파일명에서 숫자 파싱."""
    # 1) val 이미지 경로 수집
    assert os.path.exists(VAL_LIST), f"VAL 리스트가 없습니다: {VAL_LIST}"
    with open(VAL_LIST, "r", encoding="utf-8") as f:
        val_imgs = [ln.strip() for ln in f if ln.strip()]
    # 절대경로 아닌 항목 보정
    fixed = []
    for p in val_imgs:
        if os.path.exists(p):
            fixed.append(p)
        else:
            cand = os.path.join(IMAGES_ROOT, os.path.basename(p))
            if os.path.exists(cand):
                fixed.append(cand)
            else:
                print(f"⚠️ 경고: val 이미지 경로 확인 불가: {p}")
    val_imgs = fixed

    # 2) image_id 매핑 로드
    name2id = None
    if os.path.exists(EVAL_META_CSV):
        try:
            meta = pd.read_csv(EVAL_META_CSV)
            assert {"file_name", "image_id"}.issubset(meta.columns)
            name2id = dict(zip(meta["file_name"], meta["image_id"]))
            print(f"[매핑] eval_meta.csv 사용: {len(name2id)}개")
        except Exception as e:
            print("⚠️ eval_meta.csv 로드 실패, 파일명 숫자 파싱으로 대체:", e)

    # 3) 인덱스 구성
    idx = {}  # image_id(int) -> dict
    for img_path in val_imgs:
        fname = os.path.basename(img_path)
        # image_id 결정
        if name2id is not None and fname in name2id:
            image_id = int(name2id[fname])
        else:
            # fallback: 파일명에서 숫자만 추출
            num = ''.join(filter(str.isdigit, os.path.splitext(fname)[0]))
            image_id = int(num) if num else None
        if image_id is None:
            print(f"⚠️ image_id 추출 실패(스킵): {fname}")
            continue

        # 이미지 크기 및 GT 로드
        with Image.open(img_path) as im:
            w, h = im.size
        stem = os.path.splitext(fname)[0]
        gt_path = os.path.join(LABELS_ROOT, stem + ".txt")
        gt_boxes, gt_labels = _load_gt_xyxy(gt_path, w, h)

        idx[image_id] = {
            "img_path": img_path,
            "size": (w, h),
            "gt_boxes": gt_boxes,
            "gt_labels": gt_labels
        }
    print(f"[val index] 매핑된 image_id 개수: {len(idx)}")
    return idx

def _csv_to_predictions(df):
    """CSV를 image_id별로 묶어 torchmetrics 입력 포맷으로 변환 (xyxy로 바꿔줌)."""
    # 필요한 컬럼 확인
    need = {"image_id","bbox_x","bbox_y","bbox_w","bbox_h","score","category_id"}
    if not need.issubset(df.columns):
        missing = need - set(df.columns)
        raise ValueError(f"CSV에 필요한 컬럼이 없습니다: {missing}")

    preds_by_id = defaultdict(list)
    for _, row in df.iterrows():
        x, y, w, h = float(row["bbox_x"]), float(row["bbox_y"]), float(row["bbox_w"]), float(row["bbox_h"])
        x2, y2 = x + w, y + h
        preds_by_id[int(row["image_id"])].append((
            [x, y, x2, y2],
            float(row["score"]),
            int(row["category_id"])
        ))
    return preds_by_id

def evaluate_csv_on_val(csv_path, val_index, iou_thresholds=None):
    """단일 CSV를 val GT에 대해 mAP@[.75:.95]로 평가."""
    if iou_thresholds is None:
        iou_thresholds = [0.75, 0.80, 0.85, 0.90, 0.95]

    df = pd.read_csv(csv_path)
    preds_by_id = _csv_to_predictions(df)

    metric = MeanAveragePrecision(
        iou_type="bbox",
        box_format="xyxy",
        iou_thresholds=iou_thresholds
    )

    # image_id 기준으로 GT와 Pred를 맞춰 업데이트
    # torchmetrics 형식: pred, target 각각 "배치 리스트"
    for image_id, info in val_index.items():
        gt_boxes  = info["gt_boxes"]
        gt_labels = info["gt_labels"]

        # 예측
        p = preds_by_id.get(image_id, [])
        if len(p) == 0:
            pred_item = {
                "boxes":  torch.zeros((0,4), dtype=torch.float32),
                "scores": torch.zeros((0,),  dtype=torch.float32),
                "labels": torch.zeros((0,),  dtype=torch.int64)
            }
        else:
            boxes = torch.tensor([b for (b, s, c) in p], dtype=torch.float32)
            scores= torch.tensor([s for (b, s, c) in p], dtype=torch.float32)
            labels= torch.tensor([c for (b, s, c) in p], dtype=torch.int64)
            pred_item = {"boxes": boxes, "scores": scores, "labels": labels}

        # GT
        if len(gt_boxes) == 0:
            target_item = {
                "boxes":  torch.zeros((0,4), dtype=torch.float32),
                "labels": torch.zeros((0,),  dtype=torch.int64)
            }
        else:
            target_item = {
                "boxes":  torch.tensor(gt_boxes,  dtype=torch.float32),
                "labels": torch.tensor(gt_labels, dtype=torch.int64)
            }

        metric.update([pred_item], [target_item])

    res = metric.compute()
    # 주요 지표만 골라서 반환
    out = {
        "map": float(res["map"]),
        "map_50": float(res.get("map_50", torch.tensor(0.0))),
        "map_75": float(res.get("map_75", torch.tensor(0.0)))
    }
    return out

# =========================
# 실행: 폴더 내 CSV 일괄 평가
# =========================
val_index = _build_val_index()

csv_files = sorted(glob.glob(os.path.join(SUBMIT_DIR, "submission_*.csv")))
assert csv_files, f"평가할 CSV가 없습니다: {SUBMIT_DIR}"

rows = []
for csv_path in csv_files:
    try:
        scores = evaluate_csv_on_val(csv_path, val_index)
        rows.append({
            "csv": os.path.basename(csv_path),
            "mAP@[.75:.95]": scores["map"],
            "mAP@50": scores["map_50"],
            "mAP@75": scores["map_75"],
            "num_preds": sum(1 for _ in open(csv_path, "r", encoding="utf-8"))
        })
        print(f"✓ {os.path.basename(csv_path)} -> mAP: {scores['map']:.4f} (50: {scores['map_50']:.4f}, 75: {scores['map_75']:.4f})")
    except Exception as e:
        print(f"✗ {os.path.basename(csv_path)} 평가 실패: {e}")

summary = pd.DataFrame(rows).sort_values(by="mAP@[.75:.95]", ascending=False)
print("\n=== CSV별 로컬 평가 요약(상위 성능 순) ===")
print(summary.to_string(index=False))


[val index] 매핑된 image_id 개수: 1472
✓ submission_tta0_conf_0.20_iou_0.45.csv -> mAP: 0.0000 (50: -1.0000, 75: 0.0000)
✓ submission_tta0_conf_0.20_iou_0.50.csv -> mAP: 0.0000 (50: -1.0000, 75: 0.0000)
✓ submission_tta0_conf_0.25_iou_0.45.csv -> mAP: 0.0000 (50: -1.0000, 75: 0.0000)
✓ submission_tta0_conf_0.25_iou_0.50.csv -> mAP: 0.0000 (50: -1.0000, 75: 0.0000)
✓ submission_tta0_conf_0.30_iou_0.45.csv -> mAP: 0.0000 (50: -1.0000, 75: 0.0000)
✓ submission_tta0_conf_0.30_iou_0.50.csv -> mAP: 0.0000 (50: -1.0000, 75: 0.0000)
✓ submission_tta1_conf_0.20_iou_0.45.csv -> mAP: 0.0000 (50: -1.0000, 75: 0.0000)
✓ submission_tta1_conf_0.20_iou_0.50.csv -> mAP: 0.0000 (50: -1.0000, 75: 0.0000)
✓ submission_tta1_conf_0.25_iou_0.45.csv -> mAP: 0.0000 (50: -1.0000, 75: 0.0000)
✓ submission_tta1_conf_0.25_iou_0.50.csv -> mAP: 0.0000 (50: -1.0000, 75: 0.0000)
✓ submission_tta1_conf_0.30_iou_0.45.csv -> mAP: 0.0000 (50: -1.0000, 75: 0.0000)
✓ submission_tta1_conf_0.30_iou_0.50.csv -> mAP: 0.0000 (50: -1.

---
## 셀 4) (선택) “과적합 상태 체크” 로컬 진단

In [None]:
# train=val이라 실제 일반화 평가는 아님. 다만 0.99↑ 수렴 여부를 빠르게 확인.
m = YOLO(MODEL_PATH)
metrics = m.val(model=MODEL_PATH, data=DATA_YAML, imgsz=960, conf=0.001, iou=0.70, plots=False, save_json=False)
try:
    print({k: float(v) for k, v in metrics.results_dict.items()})
except:
    print(metrics)


---

## 셀 5) 빠른 실험 가이드

In [None]:
# 1) 기본 (권장): yolov8s 640→960
last_v8s = train_pipeline("yolov8s.pt", tag="v8s")
export_submissions(last_v8s, TEST_DIR)

# 2) 더 작게: yolov8n (팀 팁 반영)
# last_v8n = train_pipeline("yolov8n.pt", tag="v8n")
# export_submissions(last_v8n, TEST_DIR)

# 3) 1280 고해상 Easy (VRAM 여유 충분 시)
# last_v8s_1280 = train_pipeline("yolov8s.pt", tag="v8s_hires", s2_img=1280)
# export_submissions(last_v8s_1280, TEST_DIR, base_imgsz=1280)
