# 1. 라이브러리 설치 및 import

## 1.1. 라이브러리 설치

In [None]:
!pip install mediapipe    # Mediapipe 설치



## 1.2. 라이브러리 import

In [None]:
import mediapipe as mp                                  # 손 랜드마크 오버레이 목적
import os
import pathlib
import time
from tqdm import tqdm                                   # 진행률 표시
import cv2                                              # OpenCV: 이미지 처리
import torch
import torchvision as tv                                # PyTorch 비전 관련 기능
from torch import nn
from torchvision import transforms as T                 # 이미지 전처리
from torch.utils.data import DataLoader, random_split
from torch.amp import autocast, GradScaler              # Mixed Precision 학습
import numpy as np
from PIL import Image                                   # 이미지 입출력
from google.colab import drive                          # Google Drive 마운트

# 2. Google Colab 마운트 및 데이터 압축 해제

In [None]:
drive.mount('/content/drive')
!tar -C /content/drive/MyDrive -cf - asl_alphabet_train | tar -C /tmp -xvf -

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
asl_alphabet_train/nothing/nothing1751.jpg
tar: asl_alphabet_train/nothing/nothing1751.jpg: file changed as we read it
asl_alphabet_train/nothing/nothing2802.jpg
tar: asl_alphabet_train/nothing/nothing2802.jpg: file changed as we read it
asl_alphabet_train/nothing/nothing1260.jpg
tar: asl_alphabet_train/nothing/nothing1260.jpg: file changed as we read it
asl_alphabet_train/nothing/nothing1906.jpg
tar: asl_alphabet_train/nothing/nothing1906.jpg: file changed as we read it
tar: asl_alphabet_train/nothing/nothing1156.jpg
asl_alphabet_train/nothing/nothing1156.jpg: file changed as we read it
tar: asl_alphabet_train/nothing/nothing1126.jpg: file changed as we read it
asl_alphabet_train/nothing/nothing1126.jpg
asl_alphabet_train/nothing/nothing2292.jpg
tar: asl_alphabet_train/nothing/nothing2292.jpg: file changed as we read it
asl_alphabet_train/nothing/nothing2549.jpg
tar: asl_alphabet_train/nothing/nothing2549.jpg: file changed as we read i

# 3. 데이터 증강 및 랜드마크 오버레이(overlay)

## 3.1. 원본 데이터(src) 경로와 증강/오버레이 결과 저장 경로 설정

In [None]:
SRC_ROOT = '/tmp/asl_alphabet_train'
AUG_ROOT = '/tmp/asl_alphabet_augmented'
DST_ROOT = '/tmp/asl_alphabet_augmented_overlayed'
NUM_AUG = 3      # 원본 이미지 당 생성할 증강본 개수

## 3.2. 증강(transform) 정의: 랜덤 수평 뒤집기, 회전, 색상 변경

In [None]:
aug_transform = T.Compose([
    T.RandomHorizontalFlip(),
    T.RandomRotation(15),
    T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
])

## 3.3. 출력 디렉터리 생성(AUG_ROOT, DST_ROOT)에 클래스 별 폴더 생성

In [None]:
for d in (AUG_ROOT, DST_ROOT):
    for cls in os.listdir(SRC_ROOT):
        os.makedirs(os.path.join(d, cls), exist_ok=True)

## 3.4. Mediapipe Hands 설정: 최대 1 개 손 검출, 신뢰도 기준 설정

In [None]:
hands = mp.solutions.hands.Hands(
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)
drawing = mp.solutions.drawing_utils    # 랜드마크 그리기 유틸리티

## 3.5. 증강 → 오버레이

In [None]:
for cls in os.listdir(SRC_ROOT):
    src_cls = os.path.join(SRC_ROOT, cls)
    aug_cls = os.path.join(AUG_ROOT, cls)
    dst_cls = os.path.join(DST_ROOT, cls)

    for fn in tqdm(os.listdir(src_cls), desc=f"Aug+Overlay {cls}"):
        src_path = os.path.join(src_cls, fn)
        img_pil = Image.open(src_path).convert('RGB')     # PIL 이미지로 읽기

        # 원본 포함, NUM_AUG만큼 증강본 생성
        variants = [img_pil] + [aug_transform(img_pil) for _ in range(NUM_AUG)]

        for i, img_aug in enumerate(variants):
            # PIL → OpenCV(BGR) 변환
            cv_img = cv2.cvtColor(np.array(img_aug), cv2.COLOR_RGB2BGR)
            # Mediapipe로 손 랜드마크 검출
            res = hands.process(cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB))
            # 검출된 경우, 랜드마크 및 연결선 오버레이
            if res.multi_hand_landmarks:
                drawing.draw_landmarks(
                    cv_img,
                    res.multi_hand_landmarks[0],
                    mp.solutions.hands.HAND_CONNECTIONS
                )
            # 결과 저장: 원본(_aug0), 증강본(_aug1~NUM_AUG)
            out_name = fn.replace('.jpg', f'_aug{i}.jpg')
            cv2.imwrite(os.path.join(dst_cls, out_name), cv_img)

# 리소스 해제
hands.close()

Aug+Overlay R: 100%|██████████| 3000/3000 [04:11<00:00, 11.94it/s]
Aug+Overlay U: 100%|██████████| 3000/3000 [04:13<00:00, 11.84it/s]
Aug+Overlay B: 100%|██████████| 3000/3000 [04:05<00:00, 12.20it/s]
Aug+Overlay K: 100%|██████████| 3000/3000 [04:02<00:00, 12.36it/s]
Aug+Overlay G: 100%|██████████| 3000/3000 [04:15<00:00, 11.72it/s]
Aug+Overlay V: 100%|██████████| 3000/3000 [04:09<00:00, 12.02it/s]
Aug+Overlay I: 100%|██████████| 3000/3000 [04:08<00:00, 12.08it/s]
Aug+Overlay N: 100%|██████████| 3000/3000 [04:26<00:00, 11.26it/s]
Aug+Overlay J: 100%|██████████| 3000/3000 [04:08<00:00, 12.07it/s]
Aug+Overlay X: 100%|██████████| 3000/3000 [04:08<00:00, 12.07it/s]
Aug+Overlay D: 100%|██████████| 3000/3000 [03:47<00:00, 13.19it/s]
Aug+Overlay Y: 100%|██████████| 3000/3000 [04:02<00:00, 12.35it/s]
Aug+Overlay F: 100%|██████████| 3000/3000 [03:52<00:00, 12.88it/s]
Aug+Overlay L: 100%|██████████| 3000/3000 [03:52<00:00, 12.91it/s]
Aug+Overlay Z: 100%|██████████| 3000/3000 [04:14<00:00, 11.78i

# 4. train/val split 및 DataLoader 준비

## 4.1. 전처리 완료된 이미지 폴더를 pathlib로 지정

In [None]:
DATA_ROOT = pathlib.Path(DST_ROOT)
mean, std = [0.5]*3, [0.5]*3        # Normalize 파라미터

## 4.2. 전체 데이터셋(ImageFolder) 로드 (transform = None)

In [None]:
full_ds = tv.datasets.ImageFolder(DATA_ROOT, transform=None)

## 4.3. Train/Validation을 비율 80:20로 분할, 시드 고정

In [None]:
n_total = len(full_ds)
n_train = int(0.8 * n_total)
n_val   = n_total - n_train
train_ds, val_ds = random_split(
    full_ds,
    [n_train, n_val],
    generator=torch.Generator().manual_seed(42)
)

## 4.4. train/val 전용 transform 할당

In [None]:
train_ds.dataset.transform = T.Compose([
    T.Resize((224,224)),                    # 입력 크기 맞춤
    T.ToTensor(),
    T.Normalize(mean, std),
])
val_ds.dataset.transform = T.Compose([
    T.Resize((224,224)),
    T.ToTensor(),
    T.Normalize(mean, std),
])

## 4.5. DataLoader 생성: 배치, 셔플, 병렬 처리 등 설정

In [None]:
train_loader = DataLoader(
    train_ds, batch_size=128, shuffle=True,
    num_workers=8, pin_memory=True, drop_last=True
)
val_loader = DataLoader(
    val_ds, batch_size=128, shuffle=False,
    num_workers=8, pin_memory=True
)

print(f"▶ train samples: {len(train_ds)}, val samples: {len(val_ds)}")

▶ train samples: 278400, val samples: 69600


# 5. 모델 학습

## 5.1. GPU 사용 가능 시 CUDA, 아니면 CPU

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

## 5.2. torchvision 기본 모델(MobileNetV2) 로드, 마지막 FC 레이어 클래스 수에 맞게 교체

In [None]:
model = tv.models.mobilenet_v2(weights="IMAGENET1K_V1")
model.classifier[1] = nn.Linear(model.last_channel, len(full_ds.classes))
model = model.to(device)

## 5.3. PyTorch 2.0 컴파일 기능을 사용하여 속도 최적화(CUDA일 때)

In [None]:
if device.type == 'cuda':
    model = torch.compile(model)

## 5.4. 손실 함수, optimizer, scaler 설정

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
scaler    = GradScaler()
EPOCHS    = 10

## 5.5. Epoch 별 학습 및 검증 루프

In [None]:
for epoch in range(1, EPOCHS+1):
    # --- Training ---
    model.train()
    running_loss = 0.0
    t0 = time.time()
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS} [Train]"):
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        optimizer.zero_grad()
        with autocast(device_type='cuda'):
            outputs = model(images)
            loss    = criterion(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item() * images.size(0)
    avg_loss = running_loss / len(train_ds)

    # --- Validation ---
    model.eval()
    val_loss = 0.0
    correct  = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            val_loss += criterion(outputs, labels).item() * images.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
    avg_val_loss = val_loss / len(val_ds)
    val_acc = correct / len(val_ds)

    t1 = time.time()
    print(f"Epoch {epoch} ▶ "
          f"Train Loss: {avg_loss:.4f} | "
          f"Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f} | "
          f"Time: {t1-t0:.1f}s")

Using device: cuda


Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 101MB/s] 
Epoch 1/10 [Train]: 100%|██████████| 2175/2175 [03:22<00:00, 10.74it/s]


Epoch 1 ▶ Train Loss: 0.0593 | Val Loss: 0.0083 | Val Acc: 0.9974 | Time: 272.0s


Epoch 2/10 [Train]: 100%|██████████| 2175/2175 [02:12<00:00, 16.45it/s]


Epoch 2 ▶ Train Loss: 0.0093 | Val Loss: 0.0085 | Val Acc: 0.9974 | Time: 164.6s


Epoch 3/10 [Train]: 100%|██████████| 2175/2175 [02:11<00:00, 16.49it/s]


Epoch 3 ▶ Train Loss: 0.0077 | Val Loss: 0.0032 | Val Acc: 0.9990 | Time: 164.1s


Epoch 4/10 [Train]: 100%|██████████| 2175/2175 [02:17<00:00, 15.78it/s]


Epoch 4 ▶ Train Loss: 0.0058 | Val Loss: 0.0039 | Val Acc: 0.9988 | Time: 170.2s


Epoch 5/10 [Train]: 100%|██████████| 2175/2175 [02:11<00:00, 16.48it/s]


Epoch 5 ▶ Train Loss: 0.0055 | Val Loss: 0.0032 | Val Acc: 0.9991 | Time: 164.5s


Epoch 6/10 [Train]: 100%|██████████| 2175/2175 [02:12<00:00, 16.41it/s]


Epoch 6 ▶ Train Loss: 0.0053 | Val Loss: 0.0087 | Val Acc: 0.9970 | Time: 164.5s


Epoch 7/10 [Train]: 100%|██████████| 2175/2175 [02:18<00:00, 15.71it/s]


Epoch 7 ▶ Train Loss: 0.0035 | Val Loss: 0.0020 | Val Acc: 0.9994 | Time: 170.5s


Epoch 8/10 [Train]: 100%|██████████| 2175/2175 [02:11<00:00, 16.56it/s]


Epoch 8 ▶ Train Loss: 0.0035 | Val Loss: 0.0054 | Val Acc: 0.9983 | Time: 163.7s


Epoch 9/10 [Train]: 100%|██████████| 2175/2175 [02:11<00:00, 16.53it/s]


Epoch 9 ▶ Train Loss: 0.0034 | Val Loss: 0.0036 | Val Acc: 0.9989 | Time: 163.6s


Epoch 10/10 [Train]: 100%|██████████| 2175/2175 [02:11<00:00, 16.50it/s]


Epoch 10 ▶ Train Loss: 0.0033 | Val Loss: 0.0026 | Val Acc: 0.9991 | Time: 165.4s


# 6. 모델 저장

In [None]:
SAVE_PATH = '/content/drive/MyDrive/asl_model_local.pth'
torch.save(model.state_dict(), SAVE_PATH)
print(f"📝 Saved model to {SAVE_PATH}")

📝 Saved model to /content/drive/MyDrive/asl_model_local.pth
