## 전체 파이프라인
데이터 준비 -> YOLOv2 탐지기 학습(4클래스) -> 탐지 결과로 이미지 크롭 -> 크롭 이미지를 ResNet 분류기로 재분류(보정) -> .pt 모델 저장 + 테스트 2장 결과 저장 -> 추가 이미지 테스트

# 데이터 준비

In [None]:
266.AI 기반 아동 미술심리 진단을 위한 그림 데이터 구축/01-1.정식개방데이터
    Training
        01.원천데이터
            TS_나무
            TS_남자사람
            TS_여자사람
            TS_집
        02.라벨링데이터
            TL_나무
            TL_남자사람
            TL_여자사람
            TL_집
    Validation
        01.원천데이터
            VS_나무
            VS_남자사람
            VS_여자사람
            VS_집
        02.라벨링데이터
            VL_나무
            VL_남자사람
            VL_여자사람
            VL_집

# 1. YOLOv2 탐지기 학습

In [1]:
# GPU 사용 가능 여부 점검
import torch

print("PyTorch 버전:", torch.__version__)
print("CUDA 사용 가능 여부:", torch.cuda.is_available())

if torch.cuda.is_available():
    print("사용 중인 GPU 개수:", torch.cuda.device_count())
    print("현재 선택된 GPU:", torch.cuda.current_device())
    print("GPU 이름:", torch.cuda.get_device_name(torch.cuda.current_device()))

    # 간단한 연산으로 GPU 동작 테스트
    x = torch.rand((5000, 5000), device='cuda')
    y = torch.rand((5000, 5000), device='cuda')
    torch.cuda.synchronize()
    import time
    start = time.time()
    z = torch.matmul(x, y)
    torch.cuda.synchronize()
    print("GPU 행렬 곱 소요 시간: {:.4f}초".format(time.time() - start))
else:
    print("⚠ GPU(CUDA)를 사용할 수 없습니다. CPU를 사용 중입니다.")


PyTorch 버전: 2.5.1+cu121
CUDA 사용 가능 여부: True
사용 중인 GPU 개수: 1
현재 선택된 GPU: 0
GPU 이름: NVIDIA GeForce RTX 4060 Laptop GPU
GPU 행렬 곱 소요 시간: 0.0793초


## 1-1. YOLOv2 학습을 위한 라벨 변환
YOLOv2 포맷(txt)는 이미지 크기로 정규화한 객체의 중심 형식이다.
라벨링 데이터를 읽어, 원천 데이터의 이미지를 찾아 YOLO txt를 만든다

In [2]:
# JSON → YOLO txt 변환 (집/나무/남자/여자 4클래스), 이미지 링크/복사까지
from pathlib import Path
import json, os, shutil
from tqdm import tqdm

# === 경로 루트 ===
ROOT = Path("266.AI 기반 아동 미술심리 진단을 위한 그림 데이터 구축/01-1.정식개방데이터")

# === 고정 클래스 인덱스(모델/라벨 일치용) ===
CLASSES = {"tree":0, "man":1, "woman":2, "house":3}

# === JSON 내부 라벨명 → 우리가 쓸 대상 라벨 키 ===
TARGET_KEYS = {
    "house":  ("집전체",   "house"),
    "tree":   ("나무전체", "tree"),
    "man":    ("사람전체", "man"),
    "woman":  ("사람전체", "woman"),
}

# === 스플릿별 실제 폴더 위치 매핑 ===
SPLITS = {
    "Training": {
        "origin": ROOT / "Training" / "01.원천데이터",         # TS_나무, TS_남자사람, TS_여자사람, TS_집
        "label":  ROOT / "Training" / "02.라벨링데이터",        # TL_나무, TL_남자사람, TL_여자사람, TL_집
        "out":    Path("yolo_training"),
        "origin_sub_prefix": "TS_",   # 원천데이터 하위 폴더 접두사
        "label_sub_prefix":  "TL_",   # 라벨링데이터 하위 폴더 접두사
    },
    "Validation": {
        "origin": ROOT / "Validation" / "01.원천데이터",       # VS_나무, VS_남자사람, VS_여자사람, VS_집
        "label":  ROOT / "Validation" / "02.라벨링데이터",      # VL_나무, VL_남자사람, VL_여자사람, VL_집
        "out":    Path("yolo_validation"),
        "origin_sub_prefix": "VS_",
        "label_sub_prefix":  "VL_",
    }
}

# === 한국어 클래스명 매핑(폴더명용) ===
KO_CLASS = {"house":"집", "tree":"나무", "man":"남자사람", "woman":"여자사람"}

# === 속도/용량 옵션: 복사 대신 하드링크(같은 드라이브일 때 매우 빠름) ===
USE_HARDLINK = True   # 같은 파일시스템이면 권장
USE_SYMLINK  = False  # Windows에선 권한 필요할 수 있음
SKIP_COPY    = False  # True면 이미지 링크/복사 생략(원본만 사용하고 싶을 때)

def make_link_or_copy(src: Path, dst: Path):
    if SKIP_COPY or dst.exists():
        return
    dst.parent.mkdir(parents=True, exist_ok=True)
    try:
        if USE_HARDLINK:
            os.link(src, dst)     # 같은 드라이브/파티션이어야 함
        elif USE_SYMLINK:
            os.symlink(src, dst)
        else:
            shutil.copy2(src, dst)
    except Exception:
        # 실패 시 안전하게 복사
        shutil.copy2(src, dst)

def yolo_line(cls_idx, x, y, w, h, W, H):
    # YOLO 포맷: class xc yc w h (0~1 정규화)
    xc = (x + w/2) / W
    yc = (y + h/2) / H
    nw = w / W
    nh = h / H
    return f"{cls_idx} {xc:.6f} {yc:.6f} {nw:.6f} {nh:.6f}\n"

def parse_wh_from_json(meta):
    # "1280x1280" 같은 문자열을 우선 신뢰(이미지 열지 않아도 됨 → 매우 빠름)
    res = (meta or {}).get("img_resolution") or ""
    if "x" in res:
        try:
            w, h = res.split("x")
            return int(w), int(h)
        except Exception:
            return None, None
    return None, None

def index_origin_images(origin_root: Path):
    # 원천데이터 전체를 1회 스캔해 stem → 경로 인덱스 생성 (확장자 혼용 대비)
    stem2path = {}
    for p in origin_root.rglob("*"):
        if p.is_file() and p.suffix.lower() in {".jpg",".jpeg",".png"}:
            # 동일 stem이 여러 번 나오면 첫 번째만 사용(일반적으로 중복 없음)
            stem2path.setdefault(p.stem, p)
    return stem2path

summary = {}
for split_name, cfg in SPLITS.items():
    origin_root = cfg["origin"]
    label_root  = cfg["label"]
    out_root    = cfg["out"]
    out_img_dir = out_root / "images"
    out_lbl_dir = out_root / "labels"
    out_img_dir.mkdir(parents=True, exist_ok=True)
    out_lbl_dir.mkdir(parents=True, exist_ok=True)

    if not origin_root.exists() or not label_root.exists():
        print(f"[{split_name}] 경로 확인 필요 - origin:{origin_root.exists()} | label:{label_root.exists()}")
        continue

    # 1) 원천데이터 인덱스(한 번만 스캔)
    stem2path = index_origin_images(origin_root)

    # 2) 라벨 JSON 경로 모으기
    json_files = []
    for cls in ["house","man","tree","woman"]:
        ko = KO_CLASS[cls]
        # 스플릿에 맞는 접두사 폴더(TL_/VL_) 찾기
        label_dir = label_root / f"{cfg['label_sub_prefix']}{ko}"
        if label_dir.exists():
            json_files += sorted(label_dir.glob("*.json"))
        else:
            print(f"[{split_name}] 라벨 폴더 없음: {label_dir}")

    # 3) 변환 루프
    written, skip_no_label, skip_no_img, skip_no_wh = 0, 0, 0, 0

    for js in tqdm(json_files, desc=f"{split_name} JSON→YOLO", unit="file"):
        try:
            data = json.loads(js.read_text(encoding="utf-8"))
        except Exception:
            continue

        # (a) 대상 클래스/라벨 키 결정
        # 현재 json이 어느 폴더(집/나무/남자/여자)에서 왔는지로 클래스 판정
        if "집" in js.parts:
            key_label, class_name = TARGET_KEYS["house"]
        elif "나무" in js.parts:
            key_label, class_name = TARGET_KEYS["tree"]
        elif "남자사람" in js.parts:
            key_label, class_name = TARGET_KEYS["man"]
        elif "여자사람" in js.parts:
            key_label, class_name = TARGET_KEYS["woman"]
        else:
            # 혹시 모를 예외: 파일명에 한글 클래스가 안 보이면 meta.class 참고
            meta_cls = (data.get("annotations") or {}).get("class", "")
            if meta_cls == "집":
                key_label, class_name = TARGET_KEYS["house"]
            elif meta_cls == "나무":
                key_label, class_name = TARGET_KEYS["tree"]
            elif meta_cls == "남자사람":
                key_label, class_name = TARGET_KEYS["man"]
            elif meta_cls == "여자사람":
                key_label, class_name = TARGET_KEYS["woman"]
            else:
                continue
        cls_idx = CLASSES[class_name]

        # (b) 타깃 박스(집전체/나무전체/사람전체)만 추출
        boxes = []
        for b in (data.get("annotations") or {}).get("bbox", []):
            if b.get("label") == key_label:
                boxes.append(b)
        if not boxes:
            skip_no_label += 1
            continue

        # (c) 이미지 찾기: stem 기반(라벨 파일명과 동일 stem)
        stem = js.stem
        img_path = stem2path.get(stem)
        if img_path is None:
            skip_no_img += 1
            continue

        # (d) 이미지 크기: JSON의 img_resolution 사용(빠름)
        W, H = parse_wh_from_json(data.get("meta") or {})
        if not W or not H:
            # 해상도 정보가 없으면 이미지 열어 크기 확인(느리지만 호환)
            try:
                from PIL import Image
                with Image.open(img_path) as im:
                    W, H = im.size
            except Exception:
                skip_no_wh += 1
                continue

        # (e) YOLO 라벨 작성(경계 클램프)
        lines = []
        for b in boxes:
            x = float(b["x"]); y = float(b["y"])
            w = float(b["w"]); h = float(b["h"])
            x = max(0, min(x, W-1))
            y = max(0, min(y, H-1))
            w = max(1, min(w, W - x))
            h = max(1, min(h, H - y))
            lines.append(yolo_line(cls_idx, x, y, w, h, W, H))
        if not lines:
            skip_no_label += 1
            continue

        # (f) 저장: 이미지 링크/복사 + 라벨 txt
        out_img = out_img_dir / img_path.name
        out_lbl = out_lbl_dir / (img_path.stem + ".txt")
        make_link_or_copy(img_path, out_img)
        out_lbl.write_text("".join(lines), encoding="utf-8")
        written += 1

    summary[split_name] = dict(
        written=written,
        skip_no_label=skip_no_label,
        skip_no_img=skip_no_img,
        skip_no_wh=skip_no_wh
    )
    print(f"\n[{split_name}] 변환: {written}개 | 스킵(라벨없음): {skip_no_label} | 스킵(이미지없음): {skip_no_img} | 스킵(해상도확인실패): {skip_no_wh}")

print("\n== 전체 요약 ==")
for k, v in summary.items():
    print(f"{k}: {v['written']}개 (no_label {v['skip_no_label']}, no_img {v['skip_no_img']}, no_wh {v['skip_no_wh']})")
print("✅ 완료: yolo_training/, yolo_validation/ 에 images/ + labels/ 생성")


Training JSON→YOLO: 100%|██████████| 44800/44800 [08:29<00:00, 87.86file/s] 



[Training] 변환: 44800개 | 스킵(라벨없음): 0 | 스킵(이미지없음): 0 | 스킵(해상도확인실패): 0


Validation JSON→YOLO: 100%|██████████| 5600/5600 [01:05<00:00, 85.15file/s] 


[Validation] 변환: 5600개 | 스킵(라벨없음): 0 | 스킵(이미지없음): 0 | 스킵(해상도확인실패): 0

== 전체 요약 ==
Training: 44800개 (no_label 0, no_img 0, no_wh 0)
Validation: 5600개 (no_label 0, no_img 0, no_wh 0)
✅ 완료: yolo_training/, yolo_validation/ 에 images/ + labels/ 생성





## 1-2. 환경/경로/클래스 설정 + 하이퍼파라미터

In [16]:
# ===== 셀 1: 설정/하이퍼파라미터 =====
import os, math, json, random, time
from pathlib import Path
from typing import List, Tuple

import cv2
import numpy as np
from PIL import Image, ImageDraw

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.ops import nms

# 재현성
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)

# 경로 (변환된 YOLO 데이터)
TRAIN_ROOT = Path("yolo_training")
VAL_ROOT   = Path("yolo_validation")
IMG_DIRNAME = "images"
LBL_DIRNAME = "labels"

# 클래스
CLASS2IDX = {"tree":0, "man":1, "woman":2, "house":3}
IDX2CLASS = {v:k for k,v in CLASS2IDX.items()}
NUM_CLASSES = len(CLASS2IDX)

# 입력/그리드
IMG_SIZE  = 416
GRID_SIZE = 13
STRIDE    = IMG_SIZE // GRID_SIZE

# 앵커 개수
NUM_ANCHORS = 5

# 하이퍼파라미터
BATCH_SIZE    = 16
EPOCHS        = 30            # 빠르게 확인하려면 5~10으로 시작
LR            = 1e-3
WEIGHT_DECAY  = 5e-4
WARMUP_EPOCHS = 2
VAL_EVERY     = 3             # 검증 주기(에폭 단위) – 시간 절약용

LAMBDA_COORD  = 5.0
LAMBDA_NOOBJ  = 0.5
IGNORE_IOU    = 0.5

# 디바이스
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("DEVICE:", DEVICE)

# 가속 옵션
torch.backends.cudnn.benchmark = True   # 입력 크기 고정 시 속도↑
cv2.setNumThreads(0)                    # OpenCV 스레드 줄여 워커와 경쟁↓


DEVICE: cuda


In [4]:
# ===== 셀 2: Dataset/전처리 + DataLoader =====
from typing import Optional

def letterbox(im: np.ndarray, new_size=416, color=(114,114,114)):
    h, w = im.shape[:2]
    scale = min(new_size / h, new_size / w)
    nh, nw = int(round(h * scale)), int(round(w * scale))
    im_resized = cv2.resize(im, (nw, nh), interpolation=cv2.INTER_LINEAR)
    top = (new_size - nh) // 2
    bottom = new_size - nh - top
    left = (new_size - nw) // 2
    right = new_size - nw - left
    im_padded = cv2.copyMakeBorder(im_resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
    return im_padded, scale, left, top

def load_labels(txt_path: Path):
    boxes = []
    if not txt_path.exists():
        return boxes
    with open(txt_path, "r", encoding="utf-8") as f:
        for line in f:
            ss = line.strip().split()
            if len(ss) != 5:
                continue
            cls = int(ss[0]); xc = float(ss[1]); yc = float(ss[2]); w = float(ss[3]); h = float(ss[4])
            boxes.append([cls, xc, yc, w, h])
    return boxes

# 견고한 이미지 로더: cv2 → imdecode → PIL
def imread_robust(path: Path) -> Optional[np.ndarray]:
    img = cv2.imread(str(path))
    if img is not None:
        return img
    try:
        data = np.fromfile(str(path), dtype=np.uint8)
        img  = cv2.imdecode(data, cv2.IMREAD_COLOR)
        if img is not None:
            return img
    except Exception:
        pass
    try:
        img = Image.open(path).convert("RGB")
        img = np.array(img)[:, :, ::-1].copy()  # RGB→BGR
        return img
    except Exception:
        return None

class YOLODataset(Dataset):
    def __init__(self, root: Path, img_size=416, augment=False):
        self.img_dir = root / IMG_DIRNAME
        self.lbl_dir = root / LBL_DIRNAME
        self.img_paths = sorted([p for p in self.img_dir.glob("*") if p.suffix.lower() in [".jpg",".jpeg",".png"]])
        self.img_size = img_size
        self.augment = augment

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        lbl_path = (self.lbl_dir / img_path.stem).with_suffix(".txt")

        img = imread_robust(img_path)
        if img is None:
            raise FileNotFoundError(f"[imread_robust 실패] {img_path}")

        img, scale, padw, padh = letterbox(img, self.img_size)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

        labels = load_labels(lbl_path)
        labels = np.array(labels, dtype=np.float32) if labels else np.zeros((0,5), dtype=np.float32)

        img = torch.from_numpy(img).permute(2,0,1)
        labels = torch.from_numpy(labels)
        return img_path.name, img, labels

def collate_fn(batch):
    names, imgs, labels = zip(*batch)
    imgs = torch.stack(imgs, 0)
    return names, imgs, labels

train_ds = YOLODataset(TRAIN_ROOT, img_size=IMG_SIZE, augment=True)
val_ds   = YOLODataset(VAL_ROOT,   img_size=IMG_SIZE, augment=False)

# DataLoader – 윈도우면 2~4 권장
NUM_WORKERS = max(2, min(8, (os.cpu_count() or 8) // 2))
train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
                      num_workers=NUM_WORKERS, pin_memory=True,
                      persistent_workers=True, prefetch_factor=2,
                      collate_fn=collate_fn)
val_dl   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False,
                      num_workers=max(2, NUM_WORKERS//2), pin_memory=True,
                      persistent_workers=True, prefetch_factor=2,
                      collate_fn=collate_fn)

len(train_ds), len(val_ds)


(44800, 5600)

In [5]:
# ===== 셀 3: 빠른 앵커 계산 (라벨 txt만, 멀티스레드) =====
import random
from concurrent.futures import ThreadPoolExecutor, as_completed

LABELS_DIR = TRAIN_ROOT / LBL_DIRNAME   # yolo_training/labels
IMG_SIZE_  = IMG_SIZE
K          = NUM_ANCHORS
MAX_FILES  = 30000       # 파일 샘플 상한(속도용)
MAX_BOXES  = 300000      # 전체 박스 상한(속도/메모리용)
WORKERS    = min(32, (os.cpu_count() or 8) * 2)

all_txts = [p for p in LABELS_DIR.iterdir() if p.suffix.lower()==".txt"]
random.shuffle(all_txts)
txts = all_txts[:MAX_FILES]

def parse_wh(txt_path: Path):
    wh_local = []
    try:
        with open(txt_path, "r", encoding="utf-8") as f:
            for line in f:
                ss = line.strip().split()
                if len(ss)==5:
                    w = float(ss[3]) * IMG_SIZE_
                    h = float(ss[4]) * IMG_SIZE_
                    if w>0 and h>0:
                        wh_local.append((w,h))
    except Exception:
        pass
    return wh_local

wh_list = []
with ThreadPoolExecutor(max_workers=WORKERS) as ex:
    futures = [ex.submit(parse_wh, p) for p in txts]
    for fu in as_completed(futures):
        wh_list.extend(fu.result())
        if len(wh_list) >= MAX_BOXES:
            break

wh = np.array(wh_list, dtype=np.float32)
print(f"수집한 w,h 개수: {len(wh)} (파일 {len(txts)}/{len(all_txts)})")

def iou_wh(wh1, wh2):
    w1, h1 = wh1[:,0][:,None], wh1[:,1][:,None]
    w2, h2 = wh2[:,0][None,:], wh2[:,1][None,:]
    inter  = np.minimum(w1, w2) * np.minimum(h1, h2)
    area1  = w1*h1; area2 = w2*h2
    return inter / (area1 + area2 - inter + 1e-9)

def kmeanspp_init(data, k):
    centroids = [data[np.random.randint(len(data))]]
    for _ in range(1, k):
        d2 = np.min([np.sum((data - c)**2, axis=1) for c in centroids], axis=0)
        probs = d2 / (d2.sum() + 1e-9)
        idx = np.random.choice(len(data), p=probs)
        centroids.append(data[idx])
    return np.stack(centroids, axis=0)

if len(wh) < K:
    print("라벨이 부족해서 기본 앵커 사용")
    anchors = np.array([[12,16],[19,36],[40,28],[36,75],[76,55]], dtype=np.float32)
else:
    centroids = kmeanspp_init(wh, K)
    for _ in range(25):  # 이터레이션 단축(보통 충분)
        iou = iou_wh(wh, centroids)
        clusters = np.argmax(iou, axis=1)
        new_centroids = []
        changed = False
        for ki in range(K):
            pts = wh[clusters==ki]
            if len(pts)==0:
                new_centroids.append(centroids[ki])
            else:
                med = np.median(pts, axis=0)
                new_centroids.append(med)
                if np.any(np.abs(med - centroids[ki]) > 1e-3):
                    changed = True
        centroids = np.array(new_centroids)
        if not changed:
            break
    order = np.argsort(centroids.prod(axis=1))
    anchors = centroids[order]

print("앵커(픽셀, WxH):\n", anchors)

# 학습에서 사용할 텐서
ANCHORS = torch.tensor(anchors, dtype=torch.float32).to(DEVICE)

# (선택) 캐시
# np.save("anchors.npy", anchors)


수집한 w,h 개수: 30001 (파일 30000/44800)
앵커(픽셀, WxH):
 [[ 87.1    143.9751]
 [117.3249 228.1502]
 [163.1502 280.8   ]
 [219.3751 333.1249]
 [297.7    380.9   ]]


In [6]:
# ===== 셀 4: YOLOv2 간단 모델 =====
def conv_bn_lrelu(c_in, c_out, k=3, s=1, p=1):
    return nn.Sequential(
        nn.Conv2d(c_in, c_out, k, s, p, bias=False),
        nn.BatchNorm2d(c_out),
        nn.LeakyReLU(0.1, inplace=True),
    )

class YOLOv2Tiny(nn.Module):
    def __init__(self, num_classes=4, num_anchors=5):
        super().__init__()
        c = [32, 64, 128, 256, 512, 1024]
        self.layer1 = nn.Sequential(
            conv_bn_lrelu(3, c[0], 3,1,1),
            nn.MaxPool2d(2,2),   # 208
            conv_bn_lrelu(c[0], c[1], 3,1,1),
            nn.MaxPool2d(2,2),   # 104
            conv_bn_lrelu(c[1], c[2], 3,1,1),
            conv_bn_lrelu(c[2], c[1], 1,1,0),
            conv_bn_lrelu(c[1], c[2], 3,1,1),
            nn.MaxPool2d(2,2),   # 52
            conv_bn_lrelu(c[2], c[3], 3,1,1),
            conv_bn_lrelu(c[3], c[2], 1,1,0),
            conv_bn_lrelu(c[2], c[3], 3,1,1),
            nn.MaxPool2d(2,2),   # 26
            conv_bn_lrelu(c[3], c[4], 3,1,1),
            conv_bn_lrelu(c[4], c[3], 1,1,0),
            conv_bn_lrelu(c[3], c[4], 3,1,1),
            nn.MaxPool2d(2,2),   # 13
            conv_bn_lrelu(c[4], c[5], 3,1,1),
            conv_bn_lrelu(c[5], c[4], 1,1,0),
            conv_bn_lrelu(c[4], c[5], 3,1,1),
        )
        out_ch = num_anchors * (5 + num_classes)
        self.head = nn.Conv2d(c[5], out_ch, 1,1,0)

        self.num_classes = num_classes
        self.num_anchors = num_anchors

    def forward(self, x):
        x = self.layer1(x)
        x = self.head(x)  # [B, A*(5+C), 13, 13]
        return x

model = YOLOv2Tiny(num_classes=NUM_CLASSES, num_anchors=NUM_ANCHORS).to(DEVICE)
sum(p.numel() for p in model.parameters())/1e6, "M params"


(13.305293, 'M params')

In [7]:
# ===== 셀 5: 타깃 할당 & 손실 (with logits) =====
def build_targets(labels_list, anchors, S=13, num_classes=4):
    B = len(labels_list)
    A = anchors.size(0)
    target = torch.zeros(B, A, S, S, 5+num_classes, device=DEVICE)
    for b_idx, labels in enumerate(labels_list):
        if labels is None or len(labels)==0:
            continue
        labels = labels.to(DEVICE).clone()
        labels[:,1:] *= torch.tensor([IMG_SIZE, IMG_SIZE, IMG_SIZE, IMG_SIZE], device=DEVICE)
        for cls, xc, yc, bw, bh in labels:
            gi = int(xc // STRIDE); gj = int(yc // STRIDE)
            gi = min(max(gi,0), S-1); gj = min(max(gj,0), S-1)
            box = torch.tensor([bw, bh], device=DEVICE)[None,:]
            inter = torch.min(box[:,0], anchors[:,0]) * torch.min(box[:,1], anchors[:,1])
            area1 = box[:,0]*box[:,1]; area2 = anchors[:,0]*anchors[:,1]
            iou = inter / (area1 + area2 - inter + 1e-16)
            a = torch.argmax(iou).item()
            tx = (xc / STRIDE) - gi
            ty = (yc / STRIDE) - gj
            tw = torch.log(bw / anchors[a,0] + 1e-16)
            th = torch.log(bh / anchors[a,1] + 1e-16)
            target[b_idx, a, gj, gi, 0] = tx
            target[b_idx, a, gj, gi, 1] = ty
            target[b_idx, a, gj, gi, 2] = tw
            target[b_idx, a, gj, gi, 3] = th
            target[b_idx, a, gj, gi, 4] = 1.0
            target[b_idx, a, gj, gi, 5 + int(cls.item())] = 1.0
    return target


class YOLOv2Loss(nn.Module):
    def __init__(self, anchors, num_classes=4, lambda_coord=5.0, lambda_noobj=0.5, ignore_iou=0.5):
        super().__init__()
        self.anchors = anchors
        self.num_classes = num_classes
        self.lambda_coord = lambda_coord
        self.lambda_noobj = lambda_noobj
        self.ignore_iou = ignore_iou

    def forward(self, pred, target):
        B, _, S, _ = pred.shape
        A = self.anchors.size(0); C = self.num_classes

        # [B,A,S,S,5+C]
        pred = pred.reshape(B, A, 5+C, S, S).permute(0,1,3,4,2)

        # 좌표: px,py는 sigmoid, pw,ph는 log-스페이스
        px = torch.sigmoid(pred[...,0]); py = torch.sigmoid(pred[...,1])
        pw = pred[...,2];               ph = pred[...,3]

        # objectness/cls는 "로짓" 그대로 두고 BCEWithLogits 사용
        lo          = pred[...,4]        # objectness logits
        pcls_logits = pred[...,5:]       # class logits

        # 타깃
        tx,ty,tw,th = target[...,0],target[...,1],target[...,2],target[...,3]
        tobj, tcls  = target[...,4], target[...,5:]
        obj_mask = tobj.bool()

        # coord loss
        loss_x = F.mse_loss(px[obj_mask], tx[obj_mask], reduction='sum') if obj_mask.any() else torch.tensor(0., device=pred.device)
        loss_y = F.mse_loss(py[obj_mask], ty[obj_mask], reduction='sum') if obj_mask.any() else torch.tensor(0., device=pred.device)
        loss_w = F.mse_loss(pw[obj_mask], tw[obj_mask], reduction='sum') if obj_mask.any() else torch.tensor(0., device=pred.device)
        loss_h = F.mse_loss(ph[obj_mask], th[obj_mask], reduction='sum') if obj_mask.any() else torch.tensor(0., device=pred.device)
        loss_coord = self.lambda_coord * (loss_x + loss_y + loss_w + loss_h)

        # cls loss (with logits)
        loss_cls = F.binary_cross_entropy_with_logits(pcls_logits[obj_mask], tcls[obj_mask], reduction='sum') \
                   if obj_mask.any() else torch.tensor(0., device=pred.device)

        # ignore_iou 계산용 보조
        grid_y, grid_x = torch.meshgrid(torch.arange(S, device=pred.device), torch.arange(S, device=pred.device), indexing='ij')
        grid_x = grid_x[None,None,:,:]; grid_y = grid_y[None,None,:,:]
        ax = self.anchors[:,0].view(1,A,1,1); ay = self.anchors[:,1].view(1,A,1,1)
        bx = (px + grid_x) * STRIDE
        by = (py + grid_y) * STRIDE
        bw = torch.exp(pw) * ax
        bh = torch.exp(ph) * ay
        def to_xyxy(cx,cy,w,h):
            x1 = cx - w/2; y1 = cy - h/2
            x2 = cx + w/2; y2 = cy + h/2
            return x1,y1,x2,y2
        px1,py1,px2,py2 = to_xyxy(bx,by,bw,bh)
        ignore_mask = torch.zeros_like(tobj, dtype=torch.bool)
        for b in range(B):
            if not obj_mask[b].any():
                continue
            gtx = (tx[b] + grid_x) * STRIDE
            gty = (ty[b] + grid_y) * STRIDE
            gtw = torch.exp(tw[b]) * ax
            gth = torch.exp(th[b]) * ay
            gx1,gy1,gx2,gy2 = to_xyxy(gtx,gty,gtw,gth)
            inter_x1 = torch.maximum(px1[b], gx1)
            inter_y1 = torch.maximum(py1[b], gy1)
            inter_x2 = torch.minimum(px2[b], gx2)
            inter_y2 = torch.minimum(py2[b], gy2)
            inter_w = torch.clamp(inter_x2 - inter_x1, min=0)
            inter_h = torch.clamp(inter_y2 - inter_y1, min=0)
            inter = inter_w * inter_h
            area_p = (px2[b]-px1[b]) * (py2[b]-py1[b])
            area_g = (gx2-gx1) * (gy2-gy1)
            iou = inter / (area_p + area_g - inter + 1e-16)
            ignore_mask[b] = iou.detach() > self.ignore_iou

        # obj/noobj (with logits)
        loss_obj   = F.binary_cross_entropy_with_logits(lo[obj_mask], tobj[obj_mask], reduction='sum') \
                     if obj_mask.any() else torch.tensor(0., device=pred.device)
        noobj_mask = (~obj_mask) & (~ignore_mask)
        loss_noobj = self.lambda_noobj * F.binary_cross_entropy_with_logits(lo[noobj_mask], tobj[noobj_mask], reduction='sum') \
                     if noobj_mask.any() else torch.tensor(0., device=pred.device)

        loss = (loss_coord + loss_cls + loss_obj + loss_noobj) / max(1,B)
        stats = dict(loss=loss.item(),
                     coord=loss_coord.item()/max(1,B),
                     cls=loss_cls.item()/max(1,B) if obj_mask.any() else 0.0,
                     obj=loss_obj.item()/max(1,B) if obj_mask.any() else 0.0,
                     noobj=loss_noobj.item()/max(1,B) if noobj_mask.any() else 0.0)
        return loss, stats


In [8]:
# ===== 샘플 3개만 강제 로드해보기 (Dataset만 테스트) =====
import time, traceback

def probe_sample(ds, idx):
    print(f"\n[PROBE] idx={idx}")
    t0 = time.time()
    sample = ds[idx]           # <-- __getitem__이 여기서 멈추면 그 내부가 원인
    t1 = time.time()
    try:
        name, img, labels = sample
        print(f"  name={name} | img.shape={getattr(img,'shape',None)} | labels_type={type(labels)}")
        if hasattr(labels, 'shape'):
            print(f"  labels.shape={labels.shape}")
        elif isinstance(labels, (list, tuple)):
            print(f"  labels_len={len(labels)} (first={labels[0].shape if len(labels)>0 and hasattr(labels[0],'shape') else type(labels[0])})")
    except Exception as e:
        print("  unpack 실패:", e)
    print(f"  load time: {t1 - t0:.3f}s")

# train_ds, val_ds가 이미 있다고 가정
for i in [0, 1, len(train_ds)//2]:
    try:
        probe_sample(train_ds, i)
    except Exception as e:
        print("[ERROR in sample]", e)
        traceback.print_exc()



[PROBE] idx=0
  name=나무_10_남_00013.jpg | img.shape=torch.Size([3, 416, 416]) | labels_type=<class 'torch.Tensor'>
  labels.shape=torch.Size([1, 5])
  load time: 0.013s

[PROBE] idx=1
  name=나무_10_남_00022.jpg | img.shape=torch.Size([3, 416, 416]) | labels_type=<class 'torch.Tensor'>
  labels.shape=torch.Size([1, 5])
  load time: 0.011s

[PROBE] idx=22400
  name=여자사람_10_남_00010.jpg | img.shape=torch.Size([3, 416, 416]) | labels_type=<class 'torch.Tensor'>
  labels.shape=torch.Size([1, 5])
  load time: 0.017s


디버깅

In [9]:
# ===== 안전 collate_fn =====
from torch.utils.data import DataLoader

def yolo_collate(batch):
    names, imgs, labels_list = [], [], []
    for it in batch:
        if it is None:
            continue
        n, im, lb = it
        names.append(n)
        imgs.append(im)
        labels_list.append(lb)
    return names, torch.stack(imgs, dim=0), labels_list

DL_PIN = (DEVICE == 'cuda')

# ★ 기존 DataLoader를 이걸로 반드시 교체
train_dl = DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=0, pin_memory=DL_PIN,
    collate_fn=yolo_collate, persistent_workers=False, timeout=0
)
val_dl = DataLoader(
    val_ds, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=0, pin_memory=DL_PIN,
    collate_fn=yolo_collate, persistent_workers=False, timeout=0
)

# 배치 조립이 되는지 즉시 확인
b = next(iter(train_dl))
print("[BATCH TEST] OK:",
      len(b[0]),            # names
      b[1].shape,           # imgs
      type(b[2]),           # labels_list
      len(b[2]))


[BATCH TEST] OK: 16 torch.Size([16, 3, 416, 416]) <class 'list'> 16


In [10]:
# ===== 안전 collate_fn =====
from torch.utils.data import DataLoader

def yolo_collate(batch):
    names, imgs, labels_list = [], [], []
    for it in batch:
        if it is None:
            continue
        n, im, lb = it
        names.append(n)
        imgs.append(im)
        labels_list.append(lb)
    return names, torch.stack(imgs, dim=0), labels_list

DL_PIN = (DEVICE == 'cuda')

# ★ 기존 DataLoader를 이걸로 반드시 교체
train_dl = DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=0, pin_memory=DL_PIN,
    collate_fn=yolo_collate, persistent_workers=False, timeout=0
)
val_dl = DataLoader(
    val_ds, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=0, pin_memory=DL_PIN,
    collate_fn=yolo_collate, persistent_workers=False, timeout=0
)

# 배치 조립이 되는지 즉시 확인
b = next(iter(train_dl))
print("[BATCH TEST] OK:",
      len(b[0]),            # names
      b[1].shape,           # imgs
      type(b[2]),           # labels_list
      len(b[2]))


[BATCH TEST] OK: 16 torch.Size([16, 3, 416, 416]) <class 'list'> 16


In [11]:
# ===== 셀 5-CPU: 타깃 생성 (CPU) =====
import math
import torch

# ANCHORS를 CPU 텐서로 한번만 복제해두면 빠릅니다.
ANCHORS_CPU = ANCHORS.detach().cpu()

def build_targets_cpu(labels_list, anchors_cpu, S=13, num_classes=4):
    """
    labels_list: List[Tensor(N,5)]  # (cls, xc, yc, w, h) 모두 0~1 정규화
    anchors_cpu: Tensor(A,2)        # (aw, ah) in pixels
    반환: Tensor(B, A, S, S, 5+num_classes) on CPU
    """
    B = len(labels_list)
    A = anchors_cpu.size(0)
    target = torch.zeros(B, A, S, S, 5+num_classes)  # CPU 상에서 생성

    for b_idx, labels in enumerate(labels_list):
        if labels is None or len(labels) == 0:
            continue

        # CPU로 강제 + 스케일 픽셀 단위로 변환
        lab = labels.detach().cpu().clone()
        lab[:, 1:] *= torch.tensor([IMG_SIZE, IMG_SIZE, IMG_SIZE, IMG_SIZE], dtype=lab.dtype)

        aw = anchors_cpu[:, 0]  # (A,)
        ah = anchors_cpu[:, 1]  # (A,)

        for row in lab:
            cls = int(row[0].item() if row[0].numel() == 1 else int(row[0]))
            xc, yc, bw, bh = map(float, row[1:].tolist())

            gi = int(xc // STRIDE); gj = int(yc // STRIDE)
            gi = 0 if gi < 0 else (S-1 if gi >= S else gi)
            gj = 0 if gj < 0 else (S-1 if gj >= S else gj)

            # IoU(너비/높이만)로 앵커 선택
            # (cx,cy는 셀 할당에만 사용, 실제 IoU는 w,h로 근사)
            bw_t = torch.tensor(bw)
            bh_t = torch.tensor(bh)
            inter = torch.minimum(bw_t, aw) * torch.minimum(bh_t, ah)
            area1 = bw_t * bh_t
            area2 = aw * ah
            iou = inter / (area1 + area2 - inter + 1e-16)
            a = int(torch.argmax(iou))

            # 셀 좌표계 오프셋/로그 스페이스
            tx = (xc / STRIDE) - gi
            ty = (yc / STRIDE) - gj
            tw = math.log(bw / float(anchors_cpu[a, 0]) + 1e-16)
            th = math.log(bh / float(anchors_cpu[a, 1]) + 1e-16)

            tgt = target[b_idx, a, gj, gi]
            tgt[0] = tx; tgt[1] = ty; tgt[2] = tw; tgt[3] = th
            tgt[4] = 1.0
            if 0 <= cls < num_classes:
                tgt[5 + cls] = 1.0

    return target

# (선택) 실수 방지를 위해 기존 build_targets 호출이 있어도 CPU 버전으로 강제 바인딩
build_targets = lambda labels_list, anchors, S, num_classes: \
    build_targets_cpu(labels_list, ANCHORS_CPU, S=S, num_classes=num_classes)


In [12]:
names, imgs, labels_list = next(iter(train_dl))
imgs = imgs.to(DEVICE, non_blocking=True)
target = build_targets_cpu(labels_list, ANCHORS_CPU, S=GRID_SIZE, num_classes=NUM_CLASSES).to(DEVICE, non_blocking=True)


In [13]:
import time
from torch import amp

scaler = amp.GradScaler(enabled=(DEVICE=='cuda'))

def train_one_epoch(model, loader, optimizer):
    model.train()
    meters = {"loss":0,"coord":0,"cls":0,"obj":0,"noobj":0,"n":0}
    t0 = time.time()
    for ib, (names, imgs, labels_list) in enumerate(tqdm(loader, desc="Train", leave=False)):
        t_a = time.time()
        imgs = imgs.to(DEVICE, non_blocking=True)
        target = build_targets_cpu(labels_list, ANCHORS.cpu(), S=GRID_SIZE, num_classes=NUM_CLASSES)\
                    .to(DEVICE, non_blocking=True)
        t_b = time.time()

        with amp.autocast(device_type='cuda', dtype=torch.float16, enabled=(DEVICE=='cuda')):
            pred = model(imgs)
            # 느리면 여기서 동기화로 정확히 찍힘
            if DEVICE=='cuda': torch.cuda.synchronize()
            t_c = time.time()

            loss, stats = criterion(pred, target)
            if DEVICE=='cuda': torch.cuda.synchronize()
            t_d = time.time()

        optimizer.zero_grad(set_to_none=True)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        if DEVICE=='cuda': torch.cuda.synchronize()
        t_e = time.time()

        # 1배치 시간 분해 출력 (초기 3~5배치만)
        if ib < 5:
            print(f"[TIMING b{ib}] H2D+build_targets={(t_b-t_a):.3f}s | forward={(t_c-t_b):.3f}s | loss={(t_d-t_c):.3f}s | step={(t_e-t_d):.3f}s")

        for k in ["loss","coord","cls","obj","noobj"]:
            meters[k] += stats[k]
        meters["n"] += 1

    for k in ["loss","coord","cls","obj","noobj"]:
        meters[k] /= max(1, meters["n"])
    print(f"[EPOCH] total {(time.time()-t0):.1f}s")
    return meters


In [None]:
# ===== 셀 6: 학습 루프 & 저장 (AMP 최신화, dtype/device 정렬) =====
import time
import torch
from tqdm import tqdm
from torch import amp

# ----- 손실 함수 -----
criterion = YOLOv2Loss(ANCHORS.to(DEVICE), num_classes=NUM_CLASSES)

# ----- 모델/옵티마/스케줄러/AMP -----
model = model.to(DEVICE).float()

def build_optimizer(model, lr=1e-3, wd=5e-4):
    return torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

optimizer = build_optimizer(model, LR, WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

# AMP 스케일러 (CUDA에서만 활성)
scaler = amp.GradScaler(enabled=(DEVICE == 'cuda'))

# ----- 타깃 생성 헬퍼 (CPU에서 만들고 GPU로 한 번만 복사) -----
def make_target(labels_list):
    anchors_cpu = ANCHORS_CPU if 'ANCHORS_CPU' in globals() else ANCHORS.detach().cpu()
    target_cpu = build_targets_cpu(labels_list, anchors_cpu, S=GRID_SIZE, num_classes=NUM_CLASSES)
    return target_cpu.to(DEVICE, dtype=torch.float32, non_blocking=True)

# ----- 한 epoch 학습 -----
def train_one_epoch(model, loader, optimizer):
    model.train()
    meters = {"loss":0.0,"coord":0.0,"cls":0.0,"obj":0.0,"noobj":0.0,"n":0}
    for names, imgs, labels_list in tqdm(loader, desc="Train", leave=False):
        imgs = imgs.to(DEVICE, non_blocking=True)
        target = make_target(labels_list)

        optimizer.zero_grad(set_to_none=True)
        with amp.autocast(device_type='cuda', dtype=torch.float16, enabled=(DEVICE=='cuda')):
            pred = model(imgs)
            loss, stats = criterion(pred, target)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        for k in ["loss","coord","cls","obj","noobj"]:
            meters[k] += float(stats[k])
        meters["n"] += 1
    for k in ["loss","coord","cls","obj","noobj"]:
        meters[k] = meters[k] / max(1, meters["n"])
    return meters

# ----- 검증 -----
@torch.no_grad()
def validate(model, loader):
    model.eval()
    meters = {"loss":0.0,"coord":0.0,"cls":0.0,"obj":0.0,"noobj":0.0,"n":0}
    for names, imgs, labels_list in tqdm(loader, desc="Val", leave=False):
        imgs = imgs.to(DEVICE, non_blocking=True)
        target = make_target(labels_list)
        with amp.autocast(device_type='cuda', dtype=torch.float16, enabled=(DEVICE=='cuda')):
            pred = model(imgs)
            loss, stats = criterion(pred, target)
        for k in ["loss","coord","cls","obj","noobj"]:
            meters[k] += float(stats[k])
        meters["n"] += 1
    for k in ["loss","coord","cls","obj","noobj"]:
        meters[k] = meters[k] / max(1, meters["n"])
    return meters

# ----- 학습 루프 -----
BEST = float('inf')
SAVE_PATH = "yolov2_4cls.pt"

for ep in range(1, EPOCHS+1):
    tr = train_one_epoch(model, train_dl, optimizer)
    if ep % VAL_EVERY == 0 or ep == EPOCHS:
        va = validate(model, val_dl)
        if va["loss"] < BEST:
            BEST = va["loss"]
            torch.save({
                "model": model.state_dict(),
                "anchors": ANCHORS.detach().cpu().numpy(),
                "classes": IDX2CLASS,
                "img_size": IMG_SIZE
            }, SAVE_PATH)
            print(f"[E{ep:02d}] Train {tr['loss']:.4f} | Val {va['loss']:.4f}  ↳ ✅ Saved {SAVE_PATH}")
        else:
            print(f"[E{ep:02d}] Train {tr['loss']:.4f} | Val {va['loss']:.4f}")
    else:
        print(f"[E{ep:02d}] Train {tr['loss']:.4f} (coord {tr['coord']:.3f} | cls {tr['cls']:.3f} | obj {tr['obj']:.3f} | noobj {tr['noobj']:.3f})")
    scheduler.step()


                                                         

KeyboardInterrupt: 

: 

In [15]:
# ===== 셀 7 (강화판): 경로 자동탐색 + 안전 읽기/쓰기 + 디버그 로그 =====
import os, math
from pathlib import Path
import torch, torchvision
import cv2, numpy as np
from torch import amp

CKPT_PATH = "yolov2_4cls.pt"
OUT_DIR   = Path("results_detect")
VIS_DIR   = OUT_DIR / "vis"
CROP_DIR  = OUT_DIR / "crops"
for d in [VIS_DIR, CROP_DIR]: d.mkdir(parents=True, exist_ok=True)

# 1) 테스트 파일 이름 (이 “파일명”을 프로젝트 내에서 자동으로 찾습니다)
WANTED_BASENAMES = [
    "집_13_여_09223.jpg",
    "여자사람_11_여_05400.jpg",
    "남자사람_11_남_07042.jpg",
    "나무_8_여_01105.jpg",
]

# 2) 먼저 뒤져볼 시작 폴더 후보 (필요시 본인 데이터셋 최상위 경로를 하나 추가)
START_DIRS = [
    ".", "..", "../..",
    # 예시: 실제 데이터 폴더를 알고 있다면 아래 줄 주석 해제 후 경로 수정
    # r"C:\team_project\second_project\266.AI 기반 아동 미술심리 진단을 위한 그림 데이터 구축\01-1.정식개방데이터",
]

cv2.setNumThreads(0)

def safe_imread(path_str):
    p = os.path.normpath(path_str)
    try:
        data = np.fromfile(p, dtype=np.uint8)
        img  = cv2.imdecode(data, cv2.IMREAD_COLOR)
        if img is None:
            raise ValueError("imdecode returned None")
        return img
    except Exception:
        img = cv2.imread(p, cv2.IMREAD_COLOR)
        if img is None:
            raise FileNotFoundError(f"이미지 읽기 실패: {p}")
        return img

def safe_imwrite(path_str, img_bgr, ext=".jpg", params=None):
    # 한글/공백 경로에서도 안전하게 저장
    p = os.path.normpath(path_str)
    ok, buf = cv2.imencode(ext, img_bgr, params or [])
    if not ok:
        raise IOError("imencode failed")
    buf.tofile(p)

def find_file_anywhere(basename, start_dirs):
    # 여러 시작 폴더에서 파일명을 재귀 탐색 (대소문자 무시)
    lower = basename.lower()
    for sd in start_dirs:
        sd = Path(sd).resolve()
        if not sd.exists():
            continue
        # 빠른 경로: exact name
        exact = list(sd.glob(f"**/{basename}"))
        if exact:
            return str(exact[0])
        # 느리지만 확실: 대소문자 무시
        for p in sd.rglob("*"):
            if p.is_file() and p.name.lower() == lower:
                return str(p)
    return None

# ----- 디코드 -----
def yolo_decode(pred, anchors, S=13, stride=32, num_classes=4, conf_thr=0.9, nms_iou=0.18):
    device = pred.device
    B, _, _, _ = pred.shape
    A = anchors.size(0); C = num_classes
    p = pred.reshape(B, A, 5+C, S, S).permute(0,1,3,4,2).contiguous()

    tx = torch.sigmoid(p[...,0]); ty = torch.sigmoid(p[...,1])
    tw = p[...,2];               th = p[...,3]
    tobj = torch.sigmoid(p[...,4])
    tcls = torch.sigmoid(p[...,5:])

    gy, gx = torch.meshgrid(torch.arange(S, device=device),
                            torch.arange(S, device=device), indexing='ij')
    gx = gx[None,None]; gy = gy[None,None]
    aw = anchors[:,0].view(1,A,1,1).to(device)
    ah = anchors[:,1].view(1,A,1,1).to(device)

    cx = (tx + gx) * stride
    cy = (ty + gy) * stride
    w  = torch.exp(tw) * aw
    h  = torch.exp(th) * ah

    x1 = cx - w/2; y1 = cy - h/2
    x2 = cx + w/2; y2 = cy + h/2

    outs = []
    for b in range(B):
        scores, labels = tcls[b].max(dim=-1)
        conf = tobj[b] * scores
        m = conf > conf_thr
        if m.sum() == 0:
            outs.append(torch.zeros(0,6, device=device)); continue
        boxes = torch.stack([x1[b][m], y1[b][m], x2[b][m], y2[b][m]], dim=1)
        sc    = conf[m]
        lb    = labels[m].float()
        keep  = torchvision.ops.nms(boxes, sc, nms_iou)
        outs.append(torch.cat([boxes[keep], sc[keep,None], lb[keep,None]], dim=1))
    return outs

def preprocess_img(img_bgr, img_size=IMG_SIZE):
    h, w = img_bgr.shape[:2]
    if (h, w) != (img_size, img_size):
        img_bgr = cv2.resize(img_bgr, (img_size, img_size), interpolation=cv2.INTER_LINEAR)
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    return torch.from_numpy(img_rgb).permute(2,0,1).float() / 255.0

def draw_and_save(img_bgr, det, vis_path, crop_dir, base_name):
    im  = img_bgr.copy()
    H,W = im.shape[:2]
    COLORS = [(255,0,0),(0,255,0),(0,0,255),(255,128,0),(0,255,255),(255,0,255)]
    n_saved = 0
    for i, (x1,y1,x2,y2,score,cls) in enumerate(det.cpu().numpy()):
        x1 = int(max(0, min(W-1, x1))); y1 = int(max(0, min(H-1, y1)))
        x2 = int(max(0, min(W-1, x2))); y2 = int(max(0, min(H-1, y2)))
        c  = int(cls)
        name = IDX2CLASS.get(c, str(c))
        color = COLORS[c % len(COLORS)]
        cv2.rectangle(im, (x1,y1), (x2,y2), color, 2)
        cv2.putText(im, f"{name}:{score:.2f}", (x1, max(0,y1-7)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2, cv2.LINE_AA)
        crop = img_bgr[y1:y2, x1:x2]
        if crop.size > 0:
            crop_name = f"{base_name}_det{i}_{name}.jpg"
            safe_imwrite(str(crop_dir / crop_name), crop)
            n_saved += 1
    # 박스가 0개라도 시각화 이미지는 저장(원본 그대로)
    safe_imwrite(str(vis_path), im)
    return n_saved

# ----- 체크포인트 로드 -----
ckpt = torch.load(CKPT_PATH, map_location=DEVICE, weights_only=False)
model.load_state_dict(ckpt["model"])
model.eval()

# ----- 파일 탐색 -----
resolved = []
print("[INFO] 현재 작업 폴더:", Path(".").resolve())
for bn in WANTED_BASENAMES:
    fp = find_file_anywhere(bn, START_DIRS)
    if fp is None:
        print(f"[WARN] 찾지 못함 → {bn}")
    else:
        print(f"[OK] 발견 → {fp}")
        resolved.append(fp)

if not resolved:
    print("[ERROR] 테스트 이미지를 아무 곳에서도 찾지 못했습니다. START_DIRS에 실제 데이터셋 최상위 경로를 추가해 주세요.")
else:
    with torch.no_grad():
        for p in resolved:
            try:
                img0 = safe_imread(p)
            except Exception as e:
                print(f"[WARN] 이미지 읽기 실패: {p} | {e}")
                continue

            inp = preprocess_img(img0, IMG_SIZE).unsqueeze(0).to(DEVICE, non_blocking=True)
            with amp.autocast(device_type='cuda', dtype=torch.float16, enabled=(DEVICE=='cuda')):
                pred = model(inp)

            dets = yolo_decode(pred, ANCHORS.to(DEVICE), S=GRID_SIZE, stride=STRIDE, num_classes=NUM_CLASSES)
            det  = dets[0]

            H0, W0 = img0.shape[:2]
            sx, sy = W0 / IMG_SIZE, H0 / IMG_SIZE
            det_scaled = det.clone()
            det_scaled[:, [0, 2]] *= sx
            det_scaled[:, [1, 3]] *= sy
            det = det_scaled

            base = Path(p).stem
            vis_path = VIS_DIR / f"{base}_vis.jpg"
            n_crops = draw_and_save(img0, det, vis_path, CROP_DIR, base)
            print(f"[SAVE] {base}: boxes={len(det)} | crops_saved={n_crops} | vis='{vis_path.name}'")


[INFO] 현재 작업 폴더: C:\team_project\second_project
[OK] 발견 → C:\team_project\second_project\266.AI 기반 아동 미술심리 진단을 위한 그림 데이터 구축\01-1.정식개방데이터\Validation\01.원천데이터\VS_집\집_13_여_09223.jpg
[OK] 발견 → C:\team_project\second_project\266.AI 기반 아동 미술심리 진단을 위한 그림 데이터 구축\01-1.정식개방데이터\Validation\01.원천데이터\VS_여자사람\여자사람_11_여_05400.jpg
[OK] 발견 → C:\team_project\second_project\266.AI 기반 아동 미술심리 진단을 위한 그림 데이터 구축\01-1.정식개방데이터\Validation\01.원천데이터\VS_남자사람\남자사람_11_남_07042.jpg
[OK] 발견 → C:\team_project\second_project\266.AI 기반 아동 미술심리 진단을 위한 그림 데이터 구축\01-1.정식개방데이터\Validation\01.원천데이터\VS_나무\나무_8_여_01105.jpg
[SAVE] 집_13_여_09223: boxes=2 | crops_saved=2 | vis='집_13_여_09223_vis.jpg'
[SAVE] 여자사람_11_여_05400: boxes=2 | crops_saved=2 | vis='여자사람_11_여_05400_vis.jpg'
[SAVE] 남자사람_11_남_07042: boxes=1 | crops_saved=1 | vis='남자사람_11_남_07042_vis.jpg'
[SAVE] 나무_8_여_01105: boxes=2 | crops_saved=2 | vis='나무_8_여_01105_vis.jpg'
