## 전체 파이프라인
데이터 준비 -> YOLOv2 탐지기 학습(4클래스) -> 탐지 결과로 이미지 크롭 -> 크롭 이미지를 ResNet 분류기로 재분류(보정) -> .pt 모델 저장 + 테스트 2장 결과 저장 -> 추가 이미지 테스트

# 데이터 준비

In [None]:
266.AI 기반 아동 미술심리 진단을 위한 그림 데이터 구축/01-1.정식개방데이터
    Training
        01.원천데이터
            TS_나무
            TS_남자사람
            TS_여자사람
            TS_집
        02.라벨링데이터
            TL_나무
            TL_남자사람
            TL_여자사람
            TL_집
    Validation
        01.원천데이터
            VS_나무
            VS_남자사람
            VS_여자사람
            VS_집
        02.라벨링데이터
            VL_나무
            VL_남자사람
            VL_여자사람
            VL_집

# 1. YOLOv2 탐지기 학습

In [1]:
# GPU 사용 가능 여부 점검
import torch

print("PyTorch 버전:", torch.__version__)
print("CUDA 사용 가능 여부:", torch.cuda.is_available())

if torch.cuda.is_available():
    print("사용 중인 GPU 개수:", torch.cuda.device_count())
    print("현재 선택된 GPU:", torch.cuda.current_device())
    print("GPU 이름:", torch.cuda.get_device_name(torch.cuda.current_device()))

    # 간단한 연산으로 GPU 동작 테스트
    x = torch.rand((5000, 5000), device='cuda')
    y = torch.rand((5000, 5000), device='cuda')
    torch.cuda.synchronize()
    import time
    start = time.time()
    z = torch.matmul(x, y)
    torch.cuda.synchronize()
    print("GPU 행렬 곱 소요 시간: {:.4f}초".format(time.time() - start))
else:
    print("⚠ GPU(CUDA)를 사용할 수 없습니다. CPU를 사용 중입니다.")


PyTorch 버전: 2.5.1+cu121
CUDA 사용 가능 여부: True
사용 중인 GPU 개수: 1
현재 선택된 GPU: 0
GPU 이름: NVIDIA GeForce RTX 4060 Laptop GPU
GPU 행렬 곱 소요 시간: 0.1362초


## 1-1. YOLOv2 학습을 위한 라벨 변환
YOLOv2 포맷(txt)는 이미지 크기로 정규화한 객체의 중심 형식이다.
라벨링 데이터를 읽어, 원천 데이터의 이미지를 찾아 YOLO txt를 만든다

In [5]:
# JSON → YOLO txt 변환 (집/나무/남자/여자 4클래스), 이미지 링크/복사까지
from pathlib import Path
import json, os, shutil
from tqdm import tqdm

# === 경로 루트 ===
ROOT = Path("266.AI 기반 아동 미술심리 진단을 위한 그림 데이터 구축/01-1.정식개방데이터")

# === 고정 클래스 인덱스(모델/라벨 일치용) ===
CLASSES = {"tree":0, "man":1, "woman":2, "house":3}

# === JSON 내부 라벨명 → 우리가 쓸 대상 라벨 키 ===
TARGET_KEYS = {
    "house":  ("집전체",   "house"),
    "tree":   ("나무전체", "tree"),
    "man":    ("사람전체", "man"),
    "woman":  ("사람전체", "woman"),
}

# === 스플릿별 실제 폴더 위치 매핑 ===
SPLITS = {
    "Training": {
        "origin": ROOT / "Training" / "01.원천데이터",         # TS_나무, TS_남자사람, TS_여자사람, TS_집
        "label":  ROOT / "Training" / "02.라벨링데이터",        # TL_나무, TL_남자사람, TL_여자사람, TL_집
        "out":    Path("yolo_training"),
        "origin_sub_prefix": "TS_",   # 원천데이터 하위 폴더 접두사
        "label_sub_prefix":  "TL_",   # 라벨링데이터 하위 폴더 접두사
    },
    "Validation": {
        "origin": ROOT / "Validation" / "01.원천데이터",       # VS_나무, VS_남자사람, VS_여자사람, VS_집
        "label":  ROOT / "Validation" / "02.라벨링데이터",      # VL_나무, VL_남자사람, VL_여자사람, VL_집
        "out":    Path("yolo_validation"),
        "origin_sub_prefix": "VS_",
        "label_sub_prefix":  "VL_",
    }
}

# === 한국어 클래스명 매핑(폴더명용) ===
KO_CLASS = {"house":"집", "tree":"나무", "man":"남자사람", "woman":"여자사람"}

# === 속도/용량 옵션: 복사 대신 하드링크(같은 드라이브일 때 매우 빠름) ===
USE_HARDLINK = True   # 같은 파일시스템이면 권장
USE_SYMLINK  = False  # Windows에선 권한 필요할 수 있음
SKIP_COPY    = False  # True면 이미지 링크/복사 생략(원본만 사용하고 싶을 때)

def make_link_or_copy(src: Path, dst: Path):
    if SKIP_COPY or dst.exists():
        return
    dst.parent.mkdir(parents=True, exist_ok=True)
    try:
        if USE_HARDLINK:
            os.link(src, dst)     # 같은 드라이브/파티션이어야 함
        elif USE_SYMLINK:
            os.symlink(src, dst)
        else:
            shutil.copy2(src, dst)
    except Exception:
        # 실패 시 안전하게 복사
        shutil.copy2(src, dst)

def yolo_line(cls_idx, x, y, w, h, W, H):
    # YOLO 포맷: class xc yc w h (0~1 정규화)
    xc = (x + w/2) / W
    yc = (y + h/2) / H
    nw = w / W
    nh = h / H
    return f"{cls_idx} {xc:.6f} {yc:.6f} {nw:.6f} {nh:.6f}\n"

def parse_wh_from_json(meta):
    # "1280x1280" 같은 문자열을 우선 신뢰(이미지 열지 않아도 됨 → 매우 빠름)
    res = (meta or {}).get("img_resolution") or ""
    if "x" in res:
        try:
            w, h = res.split("x")
            return int(w), int(h)
        except Exception:
            return None, None
    return None, None

def index_origin_images(origin_root: Path):
    # 원천데이터 전체를 1회 스캔해 stem → 경로 인덱스 생성 (확장자 혼용 대비)
    stem2path = {}
    for p in origin_root.rglob("*"):
        if p.is_file() and p.suffix.lower() in {".jpg",".jpeg",".png"}:
            # 동일 stem이 여러 번 나오면 첫 번째만 사용(일반적으로 중복 없음)
            stem2path.setdefault(p.stem, p)
    return stem2path

summary = {}
for split_name, cfg in SPLITS.items():
    origin_root = cfg["origin"]
    label_root  = cfg["label"]
    out_root    = cfg["out"]
    out_img_dir = out_root / "images"
    out_lbl_dir = out_root / "labels"
    out_img_dir.mkdir(parents=True, exist_ok=True)
    out_lbl_dir.mkdir(parents=True, exist_ok=True)

    if not origin_root.exists() or not label_root.exists():
        print(f"[{split_name}] 경로 확인 필요 - origin:{origin_root.exists()} | label:{label_root.exists()}")
        continue

    # 1) 원천데이터 인덱스(한 번만 스캔)
    stem2path = index_origin_images(origin_root)

    # 2) 라벨 JSON 경로 모으기
    json_files = []
    for cls in ["house","man","tree","woman"]:
        ko = KO_CLASS[cls]
        # 스플릿에 맞는 접두사 폴더(TL_/VL_) 찾기
        label_dir = label_root / f"{cfg['label_sub_prefix']}{ko}"
        if label_dir.exists():
            json_files += sorted(label_dir.glob("*.json"))
        else:
            print(f"[{split_name}] 라벨 폴더 없음: {label_dir}")

    # 3) 변환 루프
    written, skip_no_label, skip_no_img, skip_no_wh = 0, 0, 0, 0

    for js in tqdm(json_files, desc=f"{split_name} JSON→YOLO", unit="file"):
        try:
            data = json.loads(js.read_text(encoding="utf-8"))
        except Exception:
            continue

        # (a) 대상 클래스/라벨 키 결정
        # 현재 json이 어느 폴더(집/나무/남자/여자)에서 왔는지로 클래스 판정
        if "집" in js.parts:
            key_label, class_name = TARGET_KEYS["house"]
        elif "나무" in js.parts:
            key_label, class_name = TARGET_KEYS["tree"]
        elif "남자사람" in js.parts:
            key_label, class_name = TARGET_KEYS["man"]
        elif "여자사람" in js.parts:
            key_label, class_name = TARGET_KEYS["woman"]
        else:
            # 혹시 모를 예외: 파일명에 한글 클래스가 안 보이면 meta.class 참고
            meta_cls = (data.get("annotations") or {}).get("class", "")
            if meta_cls == "집":
                key_label, class_name = TARGET_KEYS["house"]
            elif meta_cls == "나무":
                key_label, class_name = TARGET_KEYS["tree"]
            elif meta_cls == "남자사람":
                key_label, class_name = TARGET_KEYS["man"]
            elif meta_cls == "여자사람":
                key_label, class_name = TARGET_KEYS["woman"]
            else:
                continue
        cls_idx = CLASSES[class_name]

        # (b) 타깃 박스(집전체/나무전체/사람전체)만 추출
        boxes = []
        for b in (data.get("annotations") or {}).get("bbox", []):
            if b.get("label") == key_label:
                boxes.append(b)
        if not boxes:
            skip_no_label += 1
            continue

        # (c) 이미지 찾기: stem 기반(라벨 파일명과 동일 stem)
        stem = js.stem
        img_path = stem2path.get(stem)
        if img_path is None:
            skip_no_img += 1
            continue

        # (d) 이미지 크기: JSON의 img_resolution 사용(빠름)
        W, H = parse_wh_from_json(data.get("meta") or {})
        if not W or not H:
            # 해상도 정보가 없으면 이미지 열어 크기 확인(느리지만 호환)
            try:
                from PIL import Image
                with Image.open(img_path) as im:
                    W, H = im.size
            except Exception:
                skip_no_wh += 1
                continue

        # (e) YOLO 라벨 작성(경계 클램프)
        lines = []
        for b in boxes:
            x = float(b["x"]); y = float(b["y"])
            w = float(b["w"]); h = float(b["h"])
            x = max(0, min(x, W-1))
            y = max(0, min(y, H-1))
            w = max(1, min(w, W - x))
            h = max(1, min(h, H - y))
            lines.append(yolo_line(cls_idx, x, y, w, h, W, H))
        if not lines:
            skip_no_label += 1
            continue

        # (f) 저장: 이미지 링크/복사 + 라벨 txt
        out_img = out_img_dir / img_path.name
        out_lbl = out_lbl_dir / (img_path.stem + ".txt")
        make_link_or_copy(img_path, out_img)
        out_lbl.write_text("".join(lines), encoding="utf-8")
        written += 1

    summary[split_name] = dict(
        written=written,
        skip_no_label=skip_no_label,
        skip_no_img=skip_no_img,
        skip_no_wh=skip_no_wh
    )
    print(f"\n[{split_name}] 변환: {written}개 | 스킵(라벨없음): {skip_no_label} | 스킵(이미지없음): {skip_no_img} | 스킵(해상도확인실패): {skip_no_wh}")

print("\n== 전체 요약 ==")
for k, v in summary.items():
    print(f"{k}: {v['written']}개 (no_label {v['skip_no_label']}, no_img {v['skip_no_img']}, no_wh {v['skip_no_wh']})")
print("✅ 완료: yolo_training/, yolo_validation/ 에 images/ + labels/ 생성")


Training JSON→YOLO: 100%|██████████| 44800/44800 [00:55<00:00, 802.55file/s]



[Training] 변환: 44800개 | 스킵(라벨없음): 0 | 스킵(이미지없음): 0 | 스킵(해상도확인실패): 0


Validation JSON→YOLO: 100%|██████████| 5600/5600 [00:06<00:00, 827.99file/s]


[Validation] 변환: 5600개 | 스킵(라벨없음): 0 | 스킵(이미지없음): 0 | 스킵(해상도확인실패): 0

== 전체 요약 ==
Training: 44800개 (no_label 0, no_img 0, no_wh 0)
Validation: 5600개 (no_label 0, no_img 0, no_wh 0)
✅ 완료: yolo_training/, yolo_validation/ 에 images/ + labels/ 생성





## 1-2. 환경/경로/클래스 설정 + 하이퍼파라미터

In [2]:
# ===== 셀 1: 설정/하이퍼파라미터 =====
import os, math, json, random, time
from pathlib import Path
from typing import List, Tuple

import cv2
import numpy as np
from PIL import Image, ImageDraw

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.ops import nms

# 재현성
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)

# 경로 (변환된 YOLO 데이터)
TRAIN_ROOT = Path("yolo_training")
VAL_ROOT   = Path("yolo_validation")
IMG_DIRNAME = "images"
LBL_DIRNAME = "labels"

# 클래스
CLASS2IDX = {"tree":0, "man":1, "woman":2, "house":3}
IDX2CLASS = {v:k for k,v in CLASS2IDX.items()}
NUM_CLASSES = len(CLASS2IDX)

# 입력/그리드
IMG_SIZE  = 416
GRID_SIZE = 13
STRIDE    = IMG_SIZE // GRID_SIZE

# 앵커 개수
NUM_ANCHORS = 5

# 하이퍼파라미터
BATCH_SIZE    = 16
EPOCHS        = 5            # 빠르게 확인하려면 5~10으로 시작
LR            = 1e-3
WEIGHT_DECAY  = 5e-4
WARMUP_EPOCHS = 2
VAL_EVERY     = 3             # 검증 주기(에폭 단위) – 시간 절약용

LAMBDA_COORD  = 5.0
LAMBDA_NOOBJ  = 0.5
IGNORE_IOU    = 0.5

# 디바이스
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("DEVICE:", DEVICE)

# 가속 옵션
torch.backends.cudnn.benchmark = True   # 입력 크기 고정 시 속도↑
cv2.setNumThreads(0)                    # OpenCV 스레드 줄여 워커와 경쟁↓


DEVICE: cuda


In [3]:
# ===== 셀 2: Dataset/전처리 + DataLoader =====
from typing import Optional

def letterbox(im: np.ndarray, new_size=416, color=(114,114,114)):
    h, w = im.shape[:2]
    scale = min(new_size / h, new_size / w)
    nh, nw = int(round(h * scale)), int(round(w * scale))
    im_resized = cv2.resize(im, (nw, nh), interpolation=cv2.INTER_LINEAR)
    top = (new_size - nh) // 2
    bottom = new_size - nh - top
    left = (new_size - nw) // 2
    right = new_size - nw - left
    im_padded = cv2.copyMakeBorder(im_resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
    return im_padded, scale, left, top

def load_labels(txt_path: Path):
    boxes = []
    if not txt_path.exists():
        return boxes
    with open(txt_path, "r", encoding="utf-8") as f:
        for line in f:
            ss = line.strip().split()
            if len(ss) != 5:
                continue
            cls = int(ss[0]); xc = float(ss[1]); yc = float(ss[2]); w = float(ss[3]); h = float(ss[4])
            boxes.append([cls, xc, yc, w, h])
    return boxes

# 견고한 이미지 로더: cv2 → imdecode → PIL
def imread_robust(path: Path) -> Optional[np.ndarray]:
    img = cv2.imread(str(path))
    if img is not None:
        return img
    try:
        data = np.fromfile(str(path), dtype=np.uint8)
        img  = cv2.imdecode(data, cv2.IMREAD_COLOR)
        if img is not None:
            return img
    except Exception:
        pass
    try:
        img = Image.open(path).convert("RGB")
        img = np.array(img)[:, :, ::-1].copy()  # RGB→BGR
        return img
    except Exception:
        return None

class YOLODataset(Dataset):
    def __init__(self, root: Path, img_size=416, augment=False):
        self.img_dir = root / IMG_DIRNAME
        self.lbl_dir = root / LBL_DIRNAME
        self.img_paths = sorted([p for p in self.img_dir.glob("*") if p.suffix.lower() in [".jpg",".jpeg",".png"]])
        self.img_size = img_size
        self.augment = augment

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        lbl_path = (self.lbl_dir / img_path.stem).with_suffix(".txt")

        img = imread_robust(img_path)
        if img is None:
            raise FileNotFoundError(f"[imread_robust 실패] {img_path}")

        img, scale, padw, padh = letterbox(img, self.img_size)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

        labels = load_labels(lbl_path)
        labels = np.array(labels, dtype=np.float32) if labels else np.zeros((0,5), dtype=np.float32)

        img = torch.from_numpy(img).permute(2,0,1)
        labels = torch.from_numpy(labels)
        return img_path.name, img, labels

def collate_fn(batch):
    names, imgs, labels = zip(*batch)
    imgs = torch.stack(imgs, 0)
    return names, imgs, labels

train_ds = YOLODataset(TRAIN_ROOT, img_size=IMG_SIZE, augment=True)
val_ds   = YOLODataset(VAL_ROOT,   img_size=IMG_SIZE, augment=False)

# DataLoader – 윈도우면 2~4 권장
NUM_WORKERS = max(2, min(8, (os.cpu_count() or 8) // 2))
train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
                      num_workers=NUM_WORKERS, pin_memory=True,
                      persistent_workers=True, prefetch_factor=2,
                      collate_fn=collate_fn)
val_dl   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False,
                      num_workers=max(2, NUM_WORKERS//2), pin_memory=True,
                      persistent_workers=True, prefetch_factor=2,
                      collate_fn=collate_fn)

len(train_ds), len(val_ds)


(44800, 5600)

In [4]:
# ===== 셀 3: 빠른 앵커 계산 (라벨 txt만, 멀티스레드) =====
import random
from concurrent.futures import ThreadPoolExecutor, as_completed

LABELS_DIR = TRAIN_ROOT / LBL_DIRNAME   # yolo_training/labels
IMG_SIZE_  = IMG_SIZE
K          = NUM_ANCHORS
MAX_FILES  = 30000       # 파일 샘플 상한(속도용)
MAX_BOXES  = 300000      # 전체 박스 상한(속도/메모리용)
WORKERS    = min(32, (os.cpu_count() or 8) * 2)

all_txts = [p for p in LABELS_DIR.iterdir() if p.suffix.lower()==".txt"]
random.shuffle(all_txts)
txts = all_txts[:MAX_FILES]

def parse_wh(txt_path: Path):
    wh_local = []
    try:
        with open(txt_path, "r", encoding="utf-8") as f:
            for line in f:
                ss = line.strip().split()
                if len(ss)==5:
                    w = float(ss[3]) * IMG_SIZE_
                    h = float(ss[4]) * IMG_SIZE_
                    if w>0 and h>0:
                        wh_local.append((w,h))
    except Exception:
        pass
    return wh_local

wh_list = []
with ThreadPoolExecutor(max_workers=WORKERS) as ex:
    futures = [ex.submit(parse_wh, p) for p in txts]
    for fu in as_completed(futures):
        wh_list.extend(fu.result())
        if len(wh_list) >= MAX_BOXES:
            break

wh = np.array(wh_list, dtype=np.float32)
print(f"수집한 w,h 개수: {len(wh)} (파일 {len(txts)}/{len(all_txts)})")

def iou_wh(wh1, wh2):
    w1, h1 = wh1[:,0][:,None], wh1[:,1][:,None]
    w2, h2 = wh2[:,0][None,:], wh2[:,1][None,:]
    inter  = np.minimum(w1, w2) * np.minimum(h1, h2)
    area1  = w1*h1; area2 = w2*h2
    return inter / (area1 + area2 - inter + 1e-9)

def kmeanspp_init(data, k):
    centroids = [data[np.random.randint(len(data))]]
    for _ in range(1, k):
        d2 = np.min([np.sum((data - c)**2, axis=1) for c in centroids], axis=0)
        probs = d2 / (d2.sum() + 1e-9)
        idx = np.random.choice(len(data), p=probs)
        centroids.append(data[idx])
    return np.stack(centroids, axis=0)

if len(wh) < K:
    print("라벨이 부족해서 기본 앵커 사용")
    anchors = np.array([[12,16],[19,36],[40,28],[36,75],[76,55]], dtype=np.float32)
else:
    centroids = kmeanspp_init(wh, K)
    for _ in range(25):  # 이터레이션 단축(보통 충분)
        iou = iou_wh(wh, centroids)
        clusters = np.argmax(iou, axis=1)
        new_centroids = []
        changed = False
        for ki in range(K):
            pts = wh[clusters==ki]
            if len(pts)==0:
                new_centroids.append(centroids[ki])
            else:
                med = np.median(pts, axis=0)
                new_centroids.append(med)
                if np.any(np.abs(med - centroids[ki]) > 1e-3):
                    changed = True
        centroids = np.array(new_centroids)
        if not changed:
            break
    order = np.argsort(centroids.prod(axis=1))
    anchors = centroids[order]

print("앵커(픽셀, WxH):\n", anchors)

# 학습에서 사용할 텐서
ANCHORS = torch.tensor(anchors, dtype=torch.float32).to(DEVICE)

# (선택) 캐시
# np.save("anchors.npy", anchors)


수집한 w,h 개수: 30001 (파일 30000/44800)
앵커(픽셀, WxH):
 [[ 87.4249  153.4    ]
 [117.      257.72488]
 [159.24979 198.24979]
 [180.04979 313.6249 ]
 [268.1249  368.22488]]


In [5]:
# ===== 셀 4: YOLOv2 간단 모델 =====
def conv_bn_lrelu(c_in, c_out, k=3, s=1, p=1):
    return nn.Sequential(
        nn.Conv2d(c_in, c_out, k, s, p, bias=False),
        nn.BatchNorm2d(c_out),
        nn.LeakyReLU(0.1, inplace=True),
    )

class YOLOv2Tiny(nn.Module):
    def __init__(self, num_classes=4, num_anchors=5):
        super().__init__()
        c = [32, 64, 128, 256, 512, 1024]
        self.layer1 = nn.Sequential(
            conv_bn_lrelu(3, c[0], 3,1,1),
            nn.MaxPool2d(2,2),   # 208
            conv_bn_lrelu(c[0], c[1], 3,1,1),
            nn.MaxPool2d(2,2),   # 104
            conv_bn_lrelu(c[1], c[2], 3,1,1),
            conv_bn_lrelu(c[2], c[1], 1,1,0),
            conv_bn_lrelu(c[1], c[2], 3,1,1),
            nn.MaxPool2d(2,2),   # 52
            conv_bn_lrelu(c[2], c[3], 3,1,1),
            conv_bn_lrelu(c[3], c[2], 1,1,0),
            conv_bn_lrelu(c[2], c[3], 3,1,1),
            nn.MaxPool2d(2,2),   # 26
            conv_bn_lrelu(c[3], c[4], 3,1,1),
            conv_bn_lrelu(c[4], c[3], 1,1,0),
            conv_bn_lrelu(c[3], c[4], 3,1,1),
            nn.MaxPool2d(2,2),   # 13
            conv_bn_lrelu(c[4], c[5], 3,1,1),
            conv_bn_lrelu(c[5], c[4], 1,1,0),
            conv_bn_lrelu(c[4], c[5], 3,1,1),
        )
        out_ch = num_anchors * (5 + num_classes)
        self.head = nn.Conv2d(c[5], out_ch, 1,1,0)

        self.num_classes = num_classes
        self.num_anchors = num_anchors

    def forward(self, x):
        x = self.layer1(x)
        x = self.head(x)  # [B, A*(5+C), 13, 13]
        return x

model = YOLOv2Tiny(num_classes=NUM_CLASSES, num_anchors=NUM_ANCHORS).to(DEVICE)
sum(p.numel() for p in model.parameters())/1e6, "M params"


(13.305293, 'M params')

In [13]:
# ===== 셀 5: 타깃 할당 & 손실 (with logits) =====
def build_targets(labels_list, anchors, S=13, num_classes=4):
    B = len(labels_list)
    A = anchors.size(0)
    target = torch.zeros(B, A, S, S, 5+num_classes, device=DEVICE)
    for b_idx, labels in enumerate(labels_list):
        if labels is None or len(labels)==0:
            continue
        labels = labels.to(DEVICE).clone()
        labels[:,1:] *= torch.tensor([IMG_SIZE, IMG_SIZE, IMG_SIZE, IMG_SIZE], device=DEVICE)
        for cls, xc, yc, bw, bh in labels:
            gi = int(xc // STRIDE); gj = int(yc // STRIDE)
            gi = min(max(gi,0), S-1); gj = min(max(gj,0), S-1)
            box = torch.tensor([bw, bh], device=DEVICE)[None,:]
            inter = torch.min(box[:,0], anchors[:,0]) * torch.min(box[:,1], anchors[:,1])
            area1 = box[:,0]*box[:,1]; area2 = anchors[:,0]*anchors[:,1]
            iou = inter / (area1 + area2 - inter + 1e-16)
            a = torch.argmax(iou).item()
            tx = (xc / STRIDE) - gi
            ty = (yc / STRIDE) - gj
            tw = torch.log(bw / anchors[a,0] + 1e-16)
            th = torch.log(bh / anchors[a,1] + 1e-16)
            target[b_idx, a, gj, gi, 0] = tx
            target[b_idx, a, gj, gi, 1] = ty
            target[b_idx, a, gj, gi, 2] = tw
            target[b_idx, a, gj, gi, 3] = th
            target[b_idx, a, gj, gi, 4] = 1.0
            target[b_idx, a, gj, gi, 5 + int(cls.item())] = 1.0
    return target

class YOLOv2Loss(nn.Module):
    def __init__(self, anchors, num_classes=4, lambda_coord=5.0, lambda_noobj=0.5, ignore_iou=0.5):
        super().__init__()
        self.anchors = anchors
        self.num_classes = num_classes
        self.lambda_coord = lambda_coord
        self.lambda_noobj = lambda_noobj
        self.ignore_iou = ignore_iou

    def forward(self, pred, target):
        B, _, S, _ = pred.shape
        A = self.anchors.size(0); C = self.num_classes

        # [B,A,S,S,5+C]
        pred = pred.reshape(B, A, 5+C, S, S).permute(0,1,3,4,2)

        # 좌표: px,py는 sigmoid, pw,ph는 log-스페이스
        px = torch.sigmoid(pred[...,0]); py = torch.sigmoid(pred[...,1])
        pw = pred[...,2];               ph = pred[...,3]

        # objectness/cls는 "로짓" 그대로 두고 BCEWithLogits 사용
        lo          = pred[...,4]        # objectness logits
        pcls_logits = pred[...,5:]       # class logits

        # 타깃
        tx,ty,tw,th = target[...,0],target[...,1],target[...,2],target[...,3]
        tobj, tcls  = target[...,4], target[...,5:]
        obj_mask = tobj.bool()

        # coord loss
        loss_x = F.mse_loss(px[obj_mask], tx[obj_mask], reduction='sum') if obj_mask.any() else torch.tensor(0., device=pred.device)
        loss_y = F.mse_loss(py[obj_mask], ty[obj_mask], reduction='sum') if obj_mask.any() else torch.tensor(0., device=pred.device)
        loss_w = F.mse_loss(pw[obj_mask], tw[obj_mask], reduction='sum') if obj_mask.any() else torch.tensor(0., device=pred.device)
        loss_h = F.mse_loss(ph[obj_mask], th[obj_mask], reduction='sum') if obj_mask.any() else torch.tensor(0., device=pred.device)
        loss_coord = self.lambda_coord * (loss_x + loss_y + loss_w + loss_h)

        # cls loss (with logits)
        loss_cls = F.binary_cross_entropy_with_logits(pcls_logits[obj_mask], tcls[obj_mask], reduction='sum') \
                   if obj_mask.any() else torch.tensor(0., device=pred.device)

        # ignore_iou 계산용 보조
        grid_y, grid_x = torch.meshgrid(torch.arange(S, device=pred.device), torch.arange(S, device=pred.device), indexing='ij')
        grid_x = grid_x[None,None,:,:]; grid_y = grid_y[None,None,:,:]
        ax = self.anchors[:,0].view(1,A,1,1); ay = self.anchors[:,1].view(1,A,1,1)
        bx = (px + grid_x) * STRIDE
        by = (py + grid_y) * STRIDE
        bw = torch.exp(pw) * ax
        bh = torch.exp(ph) * ay
        def to_xyxy(cx,cy,w,h):
            x1 = cx - w/2; y1 = cy - h/2
            x2 = cx + w/2; y2 = cy + h/2
            return x1,y1,x2,y2
        px1,py1,px2,py2 = to_xyxy(bx,by,bw,bh)
        ignore_mask = torch.zeros_like(tobj, dtype=torch.bool)
        for b in range(B):
            if not obj_mask[b].any():
                continue
            gtx = (tx[b] + grid_x) * STRIDE
            gty = (ty[b] + grid_y) * STRIDE
            gtw = torch.exp(tw[b]) * ax
            gth = torch.exp(th[b]) * ay
            gx1,gy1,gx2,gy2 = to_xyxy(gtx,gty,gtw,gth)
            inter_x1 = torch.maximum(px1[b], gx1)
            inter_y1 = torch.maximum(py1[b], gy1)
            inter_x2 = torch.minimum(px2[b], gx2)
            inter_y2 = torch.minimum(py2[b], gy2)
            inter_w = torch.clamp(inter_x2 - inter_x1, min=0)
            inter_h = torch.clamp(inter_y2 - inter_y1, min=0)
            inter = inter_w * inter_h
            area_p = (px2[b]-px1[b]) * (py2[b]-py1[b])
            area_g = (gx2-gx1) * (gy2-gy1)
            iou = inter / (area_p + area_g - inter + 1e-16)
            ignore_mask[b] = iou.detach() > self.ignore_iou

        # obj/noobj (with logits)
        loss_obj   = F.binary_cross_entropy_with_logits(lo[obj_mask], tobj[obj_mask], reduction='sum') \
                     if obj_mask.any() else torch.tensor(0., device=pred.device)
        noobj_mask = (~obj_mask) & (~ignore_mask)
        loss_noobj = self.lambda_noobj * F.binary_cross_entropy_with_logits(lo[noobj_mask], tobj[noobj_mask], reduction='sum') \
                     if noobj_mask.any() else torch.tensor(0., device=pred.device)

        loss = (loss_coord + loss_cls + loss_obj + loss_noobj) / max(1,B)
        stats = dict(loss=loss.item(),
                     coord=loss_coord.item()/max(1,B),
                     cls=loss_cls.item()/max(1,B) if obj_mask.any() else 0.0,
                     obj=loss_obj.item()/max(1,B) if obj_mask.any() else 0.0,
                     noobj=loss_noobj.item()/max(1,B) if noobj_mask.any() else 0.0)
        return loss, stats


In [14]:
try: del criterion
except: pass

criterion = YOLOv2Loss(ANCHORS, num_classes=NUM_CLASSES,
                       lambda_coord=LAMBDA_COORD, lambda_noobj=LAMBDA_NOOBJ,
                       ignore_iou=IGNORE_IOU)


In [15]:
from torch import amp

names, imgs, labels_list = next(iter(train_dl))
imgs = imgs.to(DEVICE)
target = build_targets(labels_list, ANCHORS, S=GRID_SIZE, num_classes=NUM_CLASSES)

with amp.autocast(device_type='cuda', dtype=torch.float16, enabled=(DEVICE=='cuda')):
    pred = model(imgs)
    loss, stats = criterion(pred, target)
print("✅ smoke test OK | loss:", float(loss))


✅ smoke test OK | loss: 105.512939453125


In [16]:
# ===== 셀 6: 학습 루프 & 저장 (AMP 최신화, dtype/device 정렬) =====
from tqdm import tqdm
from torch import amp

# 혹시 모를 dtype 꼬임 방지: 모델을 항상 fp32로 고정해 GPU에 올려 둠
model = model.to(DEVICE).float()

def build_optimizer(model, lr=1e-3, wd=5e-4):
    return torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

optimizer = build_optimizer(model, LR, WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

# AMP 스케일러 (CUDA일 때만 의미 있음)
scaler = amp.GradScaler('cuda' if DEVICE == 'cuda' else 'cpu')

def train_one_epoch(model, loader, optimizer):
    model.train()
    meters = {"loss":0,"coord":0,"cls":0,"obj":0,"noobj":0,"n":0}
    for names, imgs, labels_list in tqdm(loader, desc="Train", leave=False):
        # 입력을 반드시 디바이스로 (fp32 유지)
        imgs = imgs.to(DEVICE, non_blocking=True)

        # 타깃 생성은 내부에서 DEVICE 맞춰짐
        target = build_targets(labels_list, ANCHORS, S=GRID_SIZE, num_classes=NUM_CLASSES)

        optimizer.zero_grad(set_to_none=True)
        # CUDA에서만 autocast 활성화 (dtype=fp16)
        with amp.autocast(device_type='cuda', dtype=torch.float16, enabled=(DEVICE=='cuda')):
            pred = model(imgs)
            loss, stats = criterion(pred, target)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        for k in ["loss","coord","cls","obj","noobj"]:
            meters[k] += stats[k]
        meters["n"] += 1
    for k in ["loss","coord","cls","obj","noobj"]:
        meters[k] = meters[k] / max(1, meters["n"])
    return meters

@torch.no_grad()
def validate(model, loader):
    model.eval()
    meters = {"loss":0,"coord":0,"cls":0,"obj":0,"noobj":0,"n":0}
    for names, imgs, labels_list in tqdm(loader, desc="Val", leave=False):
        imgs = imgs.to(DEVICE, non_blocking=True)
        target = build_targets(labels_list, ANCHORS, S=GRID_SIZE, num_classes=NUM_CLASSES)
        # 평가도 autocast로 빠르게 (CUDA일 때만)
        with amp.autocast(device_type='cuda', dtype=torch.float16, enabled=(DEVICE=='cuda')):
            pred = model(imgs)
            loss, stats = criterion(pred, target)
        for k in ["loss","coord","cls","obj","noobj"]:
            meters[k] += stats[k]
        meters["n"] += 1
    for k in ["loss","coord","cls","obj","noobj"]:
        meters[k] = meters[k] / max(1, meters["n"])
    return meters

BEST = 1e9
SAVE_PATH = "yolov2_4cls.pt"

for ep in range(1, EPOCHS+1):
    tr = train_one_epoch(model, train_dl, optimizer)
    if ep % VAL_EVERY == 0 or ep == EPOCHS:
        va = validate(model, val_dl)
        if va["loss"] < BEST:
            BEST = va["loss"]
            torch.save({"model": model.state_dict(),
                        "anchors": ANCHORS.detach().cpu().numpy(),
                        "classes": IDX2CLASS,
                        "img_size": IMG_SIZE}, SAVE_PATH)
            print(f"[E{ep:02d}] Train {tr['loss']:.4f} | Val {va['loss']:.4f}  ↳ ✅ Saved {SAVE_PATH}")
    else:
        print(f"[E{ep:02d}] Train {tr['loss']:.4f} (coord {tr['coord']:.3f} | cls {tr['cls']:.3f} | obj {tr['obj']:.3f} | noobj {tr['noobj']:.3f})")
    scheduler.step()


                                                          

[E01] Train 3.2194 (coord 0.928 | cls 0.972 | obj 0.495 | noobj 0.824)


                                                          

[E02] Train 1.4309 (coord 0.622 | cls 0.466 | obj 0.203 | noobj 0.140)


                                                          

[E03] Train 1.0866 | Val 1.1735  ↳ ✅ Saved yolov2_4cls.pt


                                                          

[E04] Train 0.8449 (coord 0.412 | cls 0.274 | obj 0.091 | noobj 0.068)


                                                          

[E05] Train 0.6425 | Val 0.7157  ↳ ✅ Saved yolov2_4cls.pt




In [38]:
# ===== 셀 7 (최종): 견고한 로더 + with-logits 디코딩 + 클래스별 NMS + 유니코드 안전 저장 =====
from torch import amp
from pathlib import Path
import cv2, torch
import numpy as np
from torchvision.ops import nms

# 간이 imread_robust (셀 2에 이미 동일 함수가 있으면 생략 가능)
def imread_robust(path: Path):
    img = cv2.imread(str(path))
    if img is not None:
        return img
    # 유니코드 경로 대응
    try:
        data = np.fromfile(str(path), dtype=np.uint8)
        img  = cv2.imdecode(data, cv2.IMREAD_COLOR)
        if img is not None:
            return img
    except Exception:
        pass
    # PIL fallback
    try:
        from PIL import Image
        img = Image.open(path).convert("RGB")
        return np.array(img)[:, :, ::-1].copy()  # RGB->BGR
    except Exception:
        return None

# 유니코드(한글) 경로에서도 안전한 저장
def imwrite_unicode(path: Path, img) -> bool:
    path = str(path)
    ext = Path(path).suffix or ".jpg"
    ok, buf = cv2.imencode(ext, img)
    if not ok:
        return False
    try:
        buf.tofile(path)
        return True
    except Exception:
        return False

@torch.no_grad()
def yolo_decode(
    pred,
    conf_thres=0.95,
    nms_thres=0.05,
    max_det=300,
    per_cell_top1=True,        # ★ 같은 셀에서는 앵커 1개만 허용
    min_area_ratio=0.06,       # ★ 너무 작은 박스 제거 (416^2 대비 비율)
    single_instance_per_class=True  # ★ 이미지당 클래스별 1개만 남기기 옵션
):
    """
    pred: [B, A*(5+C), S, S]
    return: list of tensors, each [N,6] = [x1,y1,x2,y2,score,cls]
    """
    B, _, S, _ = pred.shape
    A, C = NUM_ANCHORS, NUM_CLASSES
    img_area = (IMG_SIZE * IMG_SIZE)
    min_area = img_area * float(min_area_ratio)

    # [B, A, S, S, 5+C]
    pred = pred.reshape(B, A, 5 + C, S, S).permute(0, 1, 3, 4, 2).contiguous()

    # grid / anchors
    gy, gx = torch.meshgrid(
        torch.arange(S, device=pred.device),
        torch.arange(S, device=pred.device),
        indexing='ij'
    )
    ax = ANCHORS[:, 0].view(A, 1, 1)
    ay = ANCHORS[:, 1].view(A, 1, 1)

    outs = []
    for b in range(B):
        p = pred[b]  # [A,S,S,5+C]

        # coords / logits
        px = torch.sigmoid(p[..., 0])  # [A,S,S]
        py = torch.sigmoid(p[..., 1])
        pw = p[..., 2]
        ph = p[..., 3]
        lo = p[..., 4]                 # objectness logits
        po = torch.sigmoid(lo)         # objectness prob
        pcls = torch.softmax(p[..., 5:], -1)  # [A,S,S,C]

        # absolute boxes (416 space)
        bx = (px + gx) * STRIDE
        by = (py + gy) * STRIDE
        bw = torch.exp(pw) * ax
        bh = torch.exp(ph) * ay

        x1 = bx - bw / 2
        y1 = by - bh / 2
        x2 = bx + bw / 2
        y2 = by + bh / 2

        # scores
        cls_scores, cls_idx = torch.max(pcls, dim=-1, keepdim=True)  # [A,S,S,1]
        conf = (po.unsqueeze(-1) * cls_scores).squeeze(-1)           # [A,S,S]

        # --- (1) per‑cell top‑1 suppression across anchors ---
        if per_cell_top1:
            # 각 (S,S) 위치에서 A개 앵커 중 최고 앵커만 True
            best_a = torch.argmax(conf, dim=0)            # [S,S]
            keep_anchor = torch.zeros_like(conf, dtype=torch.bool)  # [A,S,S]
            for a in range(A):
                keep_anchor[a] = (best_a == a)
            conf = torch.where(keep_anchor, conf, torch.zeros_like(conf))

        # threshold
        mask = conf > conf_thres
        if not mask.any():
            outs.append(torch.zeros((0, 6), device=pred.device))
            continue

        # gather
        xs = x1[mask].float(); ys = y1[mask].float()
        xe = x2[mask].float(); ye = y2[mask].float()
        sc = conf[mask].float()
        cl = cls_idx.squeeze(-1)[mask].float()

        # --- (2) too-small box filter (remove leaf/root fragments) ---
        w = (xe - xs).clamp(min=0)
        h = (ye - ys).clamp(min=0)
        areas = w * h
        big = areas >= min_area
        if big.sum() == 0:
            outs.append(torch.zeros((0, 6), device=pred.device))
            continue
        xs, ys, xe, ye, sc, cl = xs[big], ys[big], xe[big], ye[big], sc[big], cl[big]

        boxes = torch.stack([xs, ys, xe, ye], dim=1)  # [N,4]

        # --- (3) class-wise NMS ---
        det_list = []
        classes = cl.long()
        for cval in torch.unique(classes):
            idx = (classes == cval)
            if idx.sum() == 0:
                continue
            keep = nms(boxes[idx], sc[idx], nms_thres)
            det_c = torch.cat([boxes[idx][keep], sc[idx][keep][:, None], cl[idx][keep][:, None]], dim=1)
            # optional: class-wise top-1
            if single_instance_per_class and det_c.size(0) > 0:
                best = torch.argmax(det_c[:, 4])
                det_c = det_c[best:best+1]
            det_list.append(det_c)

        det = torch.cat(det_list, dim=0) if det_list else torch.zeros((0, 6), device=pred.device)

        # max_det
        if det.numel():
            order = torch.argsort(det[:, 4], descending=True)
            det = det[order][:max_det]

        outs.append(det)
    return outs


@torch.no_grad()
def infer_and_draw(model, img_paths: list[Path], out_dir="runs_yolo2_vis",
                   conf=0.50, nms_thr=0.30, to_orig=True, max_det=300):
    """
    to_orig=True: 레터박스 → 원본 좌표로 역변환하여 원본 이미지 위에 그림(권장)
    """
    model.eval()
    out_dir = Path(out_dir); out_dir.mkdir(parents=True, exist_ok=True)

    for p in img_paths:
        orig = imread_robust(p)
        if orig is None:
            print(f"[skip] 이미지 로드 실패: {p}")
            continue

        # 416 레터박스
        img, scale, padw, padh = letterbox(orig, IMG_SIZE)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
        t = torch.from_numpy(img_rgb).permute(2, 0, 1)[None].to(DEVICE)

        # CUDA일 때만 AMP 켬
        with amp.autocast(device_type='cuda', dtype=torch.float16, enabled=(DEVICE=='cuda')):
            pred = model(t)

        dets = yolo_decode(pred, conf_thres=conf, nms_thres=nms_thr, max_det=max_det)[0].detach().cpu().numpy()

        # 시각화: 원본 좌표로 역변환해서 그리기
        vis = orig.copy()
        for x1, y1, x2, y2, score, cls in dets:
            if to_orig:
                x1 = int(max(0, min(vis.shape[1]-1, (x1 - padw) / scale)))
                y1 = int(max(0, min(vis.shape[0]-1, (y1 - padh) / scale)))
                x2 = int(max(0, min(vis.shape[1],     (x2 - padw) / scale)))
                y2 = int(max(0, min(vis.shape[0],     (y2 - padh) / scale)))
            else:
                x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])

            if x2 <= x1 or y2 <= y1:
                continue
            color = (0, 255, 0)
            cv2.rectangle(vis, (x1, y1), (x2, y2), color, 2)
            cv2.putText(vis, f"{IDX2CLASS[int(cls)]}:{score:.2f}",
                        (x1, max(0, y1-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 1, cv2.LINE_AA)

        out_path = out_dir / f"{p.stem}_pred.jpg"
        ok = imwrite_unicode(out_path, vis)
        print(("saved:" if ok else "[FAIL SAVE]"), out_path)

# 샘플 2장 시각화 (중복 박스 억제를 위해 임계치/스레시홀드 보수적으로)
sample_imgs = sorted((TRAIN_ROOT / IMG_DIRNAME).glob("*.jpg"))[:2]
infer_and_draw(model, sample_imgs, out_dir="runs_yolo2_vis", conf=0.50, nms_thr=0.30, to_orig=True, max_det=300)


saved: runs_yolo2_vis\나무_10_남_00013_pred.jpg
saved: runs_yolo2_vis\나무_10_남_00022_pred.jpg


In [35]:
# ===== 셀 8: 검출 박스 크롭 저장 (ResNet 단계 대비, 원본좌표 역-레터박스 지원) =====
from torch import amp
from pathlib import Path
import cv2, torch
import numpy as np

# 셀 7에서 이미 정의했다면 중복 정의 생략 가능
def imread_robust(path: Path):
    img = cv2.imread(str(path))
    if img is not None:
        return img
    try:
        data = np.fromfile(str(path), dtype=np.uint8)     # 유니코드 경로 대응
        img  = cv2.imdecode(data, cv2.IMREAD_COLOR)
        if img is not None:
            return img
    except Exception:
        pass
    try:
        from PIL import Image
        img = Image.open(path).convert("RGB")
        return np.array(img)[:, :, ::-1].copy()           # RGB->BGR
    except Exception:
        return None

def imwrite_unicode(path: Path, img) -> bool:
    path = str(path)
    ext = Path(path).suffix or ".jpg"
    ok, buf = cv2.imencode(ext, img)
    if not ok:
        return False
    try:
        buf.tofile(path)
        return True
    except Exception:
        return False

@torch.no_grad()
def save_crops_for_resnet(
    model,
    img_paths: list[Path],
    out_root="crops",
    conf=0.25,
    nms_thr=0.45,
    size=224,
    space='orig'  # 'orig' = 원본 이미지 좌표로 자르기, 'lbox' = 416 레터박스 좌표로 자르기
):
    """
    각 이미지에서 탐지 박스를 크롭하여 클래스별 폴더에 저장.
    파일명: <원본stem>_<i>_<cls>_<score>.jpg
    """
    assert space in ('orig', 'lbox')
    model.eval()
    out_root = Path(out_root)

    # 클래스 폴더 생성
    for c in IDX2CLASS.values():
        (out_root / c).mkdir(parents=True, exist_ok=True)

    saved = 0
    for p in img_paths:
        orig = imread_robust(p)
        if orig is None:
            print(f"[skip] 이미지 로드 실패: {p}")
            continue

        # 416 레터박스
        img, scale, padw, padh = letterbox(orig, IMG_SIZE)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
        t = torch.from_numpy(img_rgb).permute(2, 0, 1)[None].to(DEVICE)

        # CUDA일 때만 AMP 켬
        with amp.autocast(device_type='cuda', dtype=torch.float16, enabled=(DEVICE=='cuda')):
            pred = model(t)

        dets = yolo_decode(pred, conf_thres=conf, nms_thres=nms_thr)[0].detach().cpu().numpy()

        for i, (x1, y1, x2, y2, score, cls) in enumerate(dets):
            cls = int(cls)
            x1, y1, x2, y2 = float(x1), float(y1), float(x2), float(y2)

            if space == 'orig':
                # 레터박스 → 원본 좌표 역변환
                x1o = int(max(0, min(orig.shape[1]-1, (x1 - padw) / scale)))
                y1o = int(max(0, min(orig.shape[0]-1, (y1 - padh) / scale)))
                x2o = int(max(0, min(orig.shape[1],     (x2 - padw) / scale)))
                y2o = int(max(0, min(orig.shape[0],     (y2 - padh) / scale)))
                if x2o <= x1o or y2o <= y1o:
                    continue
                crop = orig[y1o:y2o, x1o:x2o].copy()
            else:
                # 레터박스 공간에서 바로 크롭
                xi1, yi1, xi2, yi2 = map(int, [x1, y1, x2, y2])
                xi1 = max(0, min(xi1, img.shape[1]-1))
                yi1 = max(0, min(yi1, img.shape[0]-1))
                xi2 = max(0, min(xi2, img.shape[1]))
                yi2 = max(0, min(yi2, img.shape[0]))
                if xi2 <= xi1 or yi2 <= yi1:
                    continue
                crop = img[yi1:yi2, xi1:xi2].copy()

            if crop.size == 0:
                continue

            crop = cv2.resize(crop, (size, size), interpolation=cv2.INTER_LINEAR)
            out_path = out_root / IDX2CLASS[cls] / f"{p.stem}_{i}_{IDX2CLASS[cls]}_{score:.2f}.jpg"
            ok = imwrite_unicode(out_path, crop)
            if not ok:
                print("[FAIL SAVE]", out_path)
            else:
                saved += 1

    print(f"✅ 크롭 저장 완료: {out_root}/(tree|man|woman|house) | 저장 {saved}개 | 모드={space}")

# 예시: 학습셋 앞 200장으로 크롭 생성 (원본 좌표로 자르기)
sample_imgs_more = sorted((TRAIN_ROOT / IMG_DIRNAME).glob("*.jpg"))[:200]
save_crops_for_resnet(model, sample_imgs_more, out_root="crops", conf=0.25, nms_thr=0.45, size=224, space='orig')


✅ 크롭 저장 완료: crops/(tree|man|woman|house) | 저장 771개 | 모드=orig
