<a href="https://colab.research.google.com/github/ilunga-k/AI-study/blob/main/YOLO_HAND.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 충돌 가능 패키지 정리 → 호환 버전 설치 → 자동 재시작
!pip -q uninstall -y numpy opencv-python opencv-contrib-python opencv-python-headless ultralytics albumentations albucore thinc spacy fastai
!pip -q install numpy==1.26.4 ultralytics==8.3.0 opencv-python-headless==4.11.0.86

# W&B 프롬프트 방지
%env WANDB_DISABLED=true

import os; os.kill(os.getpid(), 9)  # 자동 재시작


[0m

In [None]:
import numpy as np, cv2, ultralytics, os, glob, zipfile, shutil
print("NumPy:", np.__version__)       # 1.26.4 권장
print("OpenCV:", cv2.__version__)     # 4.11.x
print("Ultralytics:", ultralytics.__version__)  # 8.3.0

BASE = "/content/hand_dataset"  # 작업 루트

# zip 업로드(이미 풀었다면 건너뛰어도 됨)
from google.colab import files
up = files.upload()  # hand_dataset.zip 선택

zip_path = "/content/" + list(up.keys())[0]
with zipfile.ZipFile(zip_path, 'r') as zf:
    zf.extractall("/content")

# 데이터 루트 자동 탐지(train/val 또는 vaild 포함 폴더)
def looks_like_root(p):
    return (os.path.isdir(f"{p}/train/images") and os.path.isdir(f"{p}/train/labels") and
            (os.path.isdir(f"{p}/val/images") or os.path.isdir(f"{p}/vaild/images")) and
            (os.path.isdir(f"{p}/val/labels") or os.path.isdir(f"{p}/vaild/labels")))
cands = [d for d in glob.glob("/content/*") if os.path.isdir(d)]
root = next((d for d in sorted(cands, key=len) if looks_like_root(d)), None)
assert root, "데이터 루트를 찾지 못했어. zip 내부 구조를 확인해줘."

# 표준 경로로 이동
if os.path.abspath(root) != os.path.abspath(BASE):
    if os.path.exists(BASE): shutil.rmtree(BASE)
    shutil.move(root, BASE)

# 오탈자 폴더 교정: vaild → val
if os.path.exists(f"{BASE}/vaild") and not os.path.exists(f"{BASE}/val"):
    shutil.move(f"{BASE}/vaild", f"{BASE}/val")

# 구조 최종 확인
for sp in ["train","val"]:
    assert os.path.isdir(f"{BASE}/{sp}/images") and os.path.isdir(f"{BASE}/{sp}/labels"), f"{sp} 폴더 구조 오류"
print("구조 OK:", BASE)


NumPy: 1.26.4
OpenCV: 4.11.0
Ultralytics: 8.3.0


Saving hand_dataset.zip to hand_dataset (2).zip
구조 OK: /content/hand_dataset


In [None]:
from ultralytics import YOLO
import json, math

coco = YOLO("yolov8n.pt")   # COCO 사전학습
TARGET_NAMES = {"backpack","handbag","suitcase"}
CONF_THR = 0.35
AREA_MIN_RATIO = 0.02   # 너무 작은 가방 제거(프레임 대비)

def xyxy_to_yolo(x1,y1,x2,y2,W,H):
    cx=(x1+x2)/2/W; cy=(y1+y2)/2/H; w=(x2-x1)/W; h=(y2-y1)/H
    return cx,cy,w,h

def image_list(img_dir):
    exts = ("*.jpg","*.jpeg","*.png","*.bmp","*.JPG","*.PNG","*.JPEG","*.BMP")
    lst = []
    for e in exts: lst.extend(glob.glob(os.path.join(img_dir, e)))
    return sorted(lst)

def gen_pseudo(split):
    img_dir = f"{BASE}/{split}/images"
    made, found = 0, 0
    for imgp in image_list(img_dir):
        r = coco(imgp, conf=CONF_THR, iou=0.45, verbose=False)[0]
        H, W = r.orig_shape
        items = []
        if r.boxes is not None and len(r.boxes) > 0:
            for b, cls, conf in zip(r.boxes.xyxy.cpu().numpy(),
                                    r.boxes.cls.cpu().numpy(),
                                    r.boxes.conf.cpu().numpy()):
                name = r.names[int(cls)].lower()
                if name in TARGET_NAMES:
                    x1,y1,x2,y2 = b
                    if (x2-x1)*(y2-y1)/(W*H) >= AREA_MIN_RATIO:
                        cx,cy,w,h = xyxy_to_yolo(x1,y1,x2,y2,W,H)
                        items.append({"name":name,"cx":cx,"cy":cy,"w":w,"h":h,"conf":float(conf)})
        # 기존 hand 라벨 로드(없으면 빈 리스트)
        lbl_path = imgp.replace("/images/","/labels/").rsplit(".",1)[0]+".txt"
        hand_lines = []
        if os.path.exists(lbl_path):
            with open(lbl_path,"r") as f:
                hand_lines = [ln.strip() for ln in f if ln.strip()]
        # 임시 JSON 저장
        meta_path = lbl_path.replace(".txt",".json")
        with open(meta_path,"w") as f:
            json.dump({"hand_lines":hand_lines,"items":items}, f)
        made += 1; found += int(len(items)>0)
    print(f"[{split}] JSON {made}개, 가방 감지 포함 {found}개")

for sp in ["train","val"]:
    gen_pseudo(sp)
print("의사라벨(JSON) 생성 완료")


[train] JSON 491개, 가방 감지 포함 0개
[val] JSON 58개, 가방 감지 포함 0개
의사라벨(JSON) 생성 완료


In [None]:
import glob, json

CLASS_TO_IDX = {"hand":0, "backpack":1, "handbag":2, "suitcase":3}

def merge_labels(split):
    metas = sorted(glob.glob(f"{BASE}/{split}/labels/*.json"))
    out_cnt = 0
    for mp in metas:
        meta = json.load(open(mp))
        lines = []
        # hand 라벨 → class=0 강제 통일
        for ln in meta.get("hand_lines", []):
            ps = ln.split()
            if not ps: continue
            _, cx, cy, w, h = ps[:5]
            lines.append(f"{CLASS_TO_IDX['hand']} {cx} {cy} {w} {h}")
        # 의사라벨(가방류) 추가
        for it in meta.get("items", []):
            idx = CLASS_TO_IDX[it["name"]]
            lines.append(f"{idx} {it['cx']:.6f} {it['cy']:.6f} {it['w']:.6f} {it['h']:.6f}")
        # 최종 txt 저장
        out_txt = mp.replace(".json",".txt")
        with open(out_txt,"w") as f: f.write("\n".join(lines))
        out_cnt += 1
    print(f"[{split}] 최종 txt {out_cnt}개 생성/갱신")

for sp in ["train","val"]:
    merge_labels(sp)
print("멀티클래스(txt) 병합 완료")

# 새 data.yaml (원본 yaml은 수정하지 않음)
yaml_text = f"""\
path: {BASE}
train: train/images
val: val/images
nc: 4
names: [hand, backpack, handbag, suitcase]
"""
open(f"{BASE}/data.yaml","w").write(yaml_text)
print(open(f"{BASE}/data.yaml").read())


[train] 최종 txt 491개 생성/갱신
[val] 최종 txt 58개 생성/갱신
멀티클래스(txt) 병합 완료
path: /content/hand_dataset
train: train/images
val: val/images
nc: 4
names: [hand, backpack, handbag, suitcase]



In [None]:
# 1단계: 백본 다수 freeze(의사라벨 불확실성 완화)
!yolo detect train \
  model=yolov8n.pt \
  data={BASE}/data.yaml \
  epochs=20 imgsz=640 batch=8 \
  lr0=0.002 weight_decay=0.0005 \
  freeze=10 \
  mosaic=0.8 mixup=0.05 degrees=5 translate=0.10 scale=0.5 shear=2 \
  cache=True

# 2단계: unfreeze 후 미세조정
!yolo detect train \
  model=runs/detect/train/weights/best.pt \
  data={BASE}/data.yaml \
  epochs=40 imgsz=640 batch=8 \
  lr0=0.001 weight_decay=0.0005 \
  mosaic=0.6 mixup=0.0 degrees=5 translate=0.10 scale=0.5 shear=2 \
  cache=True


New https://pypi.org/project/ultralytics/8.3.176 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.0 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/content/hand_dataset/data.yaml, epochs=20, time=None, patience=100, batch=8, imgsz=640, save=True, save_period=-1, cache=True, device=None, workers=8, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=10, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=Fals

In [None]:
# 검증
!yolo detect val model=runs/detect/train2/weights/best.pt data={BASE}/data.yaml imgsz=640

# 샘플 시각화(선택)
!yolo detect predict model=runs/detect/train2/weights/best.pt source={BASE}/val/images conf=0.25 iou=0.45 save=True

# 클래스 확인
from ultralytics import YOLO
m = YOLO("runs/detect/train2/weights/best.pt")
print("model.names:", m.names)  # {0:'hand',1:'backpack',2:'handbag',3:'suitcase'} 기대

# 다운로드
from google.colab import files
files.download("runs/detect/train2/weights/best.pt")


Ultralytics 8.3.0 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 186 layers, 2,685,148 parameters, 0 gradients, 6.8 GFLOPs
[34m[1mval: [0mScanning /content/hand_dataset/val/labels.cache... 58 images, 0 backgrounds, 0 corrupt: 100% 58/58 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% 4/4 [00:01<00:00,  2.42it/s]
                   all         58        179      0.967      0.961      0.991      0.768
                  hand         58        179      0.967      0.961      0.991      0.768
Speed: 1.7ms preprocess, 8.1ms inference, 0.0ms loss, 5.1ms postprocess per image
Results saved to [1mruns/detect/val2[0m
💡 Learn more at https://docs.ultralytics.com/modes/val
Ultralytics 8.3.0 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 186 layers, 2,685,148 parameters, 0 gradients, 6.8 GFLOPs

image 1/58 /content/hand_dataset/val/images/CARDS_COURTYARD_

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>