### VGG16 모델 학습

In [2]:
# 모델 다운로드
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [1]:
import os
import torch

from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
from sklearn.metrics import classification_report
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold

In [2]:
# 1) 데이터 준비 함수: train+val 병합, test 로더 생성
def prepare_datasets(base_dir,
                     train_txt="one-indexed-files-notrash_train.txt",
                     val_txt="one-indexed-files-notrash_val.txt",
                     test_txt="one-indexed-files-notrash_test.txt",
                     img_subdir="Garbage classification",
                     img_size=(224,224),
                     batch_size=16):
    img_dir = os.path.join(base_dir, img_subdir)
    # transform
    transform = transforms.Compose([transforms.Resize(img_size), transforms.ToTensor()])
    full_ds = datasets.ImageFolder(img_dir, transform=transform)
    class_names = full_ds.classes

    # 파일명→인덱스 맵
    name_to_idx = {os.path.basename(p): idx for idx,(p,_) in enumerate(full_ds.samples)}
    def _load_idxs(txt):
        path = os.path.join(base_dir, txt)
        idxs = []
        with open(path) as f:
            for line in f:
                fname = line.strip().split()[0]
                idxs.append(name_to_idx[fname])
        return idxs

    train_idxs = _load_idxs(train_txt)
    val_idxs   = _load_idxs(val_txt)
    test_idxs  = _load_idxs(test_txt)

    # train+val 합치기
    train_val_idxs = train_idxs + val_idxs
    train_val_ds = Subset(full_ds, train_val_idxs)
    test_ds      = Subset(full_ds, test_idxs)

    train_val_loader = DataLoader(train_val_ds, batch_size=batch_size, shuffle=True)
    test_loader      = DataLoader(test_ds,      batch_size=batch_size, shuffle=False)

    print("클래스:", class_names)
    print("Train+Val 샘플 수:", len(train_val_ds))
    print("Test 샘플 수:",  len(test_loader.dataset))

    return train_val_ds, train_val_loader, test_loader, class_names

In [18]:
# K-Fold 교차검증 및 최종 테스트 평가를 수행하는 함수
# tuning_index: 튜닝 식별용 인덱스(보고서 파일명에 사용)
# train_val_ds: train과 validation이 합쳐진 Subset
# test_loader: 최종 테스트 데이터 로더
# class_names: 클래스 이름 리스트
# k: 교차검증 Fold 수
# lr: 학습률
# batch_size: 배치 크기
# num_epochs: epoch 수
# freeze_epochs: feature extractor 동결 유지 epoch 수
def kfold_train_and_evaluate(train_val_ds,
                             test_loader,
                             class_names,
                             tuning_index,
                             lr,
                             batch_size,
                             num_epochs,
                             k=5,
                             freeze_epochs=0,
                             device=None):
    # 1) 재현성 확보
    torch.manual_seed(42); np.random.seed(42)

    # 2) 디바이스 설정
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # 3) 원본 전체 Dataset과 train+val 인덱스 분리
    full_dataset = train_val_ds.dataset
    all_idxs     = train_val_ds.indices
    all_labels   = [full_dataset.samples[i][1] for i in all_idxs]

    # 4) StratifiedKFold 설정
    skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

    # 기록용 리스트
    cv_epoch_history = []
    test_fold_history = []

    # --- K-Fold Cross-Validation ---
    for fold, (tr_idx, val_idx) in enumerate(
            skf.split(np.zeros(len(all_labels)), all_labels), start=1):
        # Subset & DataLoader 준비
        tr_sub = Subset(full_dataset, [all_idxs[i] for i in tr_idx])
        va_sub = Subset(full_dataset, [all_idxs[i] for i in val_idx])
        tr_loader = DataLoader(tr_sub, batch_size=batch_size, shuffle=True)
        va_loader = DataLoader(va_sub, batch_size=batch_size, shuffle=False)

        # 모델 초기화
        model = models.vgg16(pretrained=True)
        model.classifier[-1] = torch.nn.Linear(4096, len(class_names))
        model.to(device)
        # feature extractor freeze
        for p in model.features.parameters(): p.requires_grad = False

        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
        scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer, step_size=max(1, num_epochs//2), gamma=0.1)

        # Epoch별 학습 + 검증
        for epoch in range(1, num_epochs + 1):
            # --- Train ---
            model.train()
            if epoch == freeze_epochs + 1:
                # freeze 해제
                for p in model.features.parameters(): p.requires_grad = True
                optimizer.add_param_group({'params': model.features.parameters()})

            running_loss = 0.0
            for imgs, lbls in tr_loader:
                imgs, lbls = imgs.to(device), lbls.to(device)
                optimizer.zero_grad()
                loss = criterion(model(imgs), lbls)
                loss.backward(); optimizer.step()
                running_loss += loss.item() * imgs.size(0)
            scheduler.step()

            # --- Validation ---
            model.eval()
            preds, targets = [], []
            with torch.no_grad():
                for imgs, lbls in va_loader:
                    imgs = imgs.to(device)
                    out = model(imgs).argmax(dim=1).cpu().numpy()
                    preds.extend(out)
                    targets.extend(lbls.numpy())

            # 메트릭 계산
            report = classification_report(targets, preds, output_dict=True, zero_division=0)
            macro  = report["macro avg"]
            acc    = np.mean(np.array(preds) == np.array(targets))

            # 기록 저장
            cv_epoch_history.append({
                "tuning_index": tuning_index,
                "fold":         fold,
                "epoch":        epoch,
                "val_acc":      acc,
                "val_prec":     macro["precision"],
                "val_recall":   macro["recall"],
                "val_f1":       macro["f1-score"]
            })

            print(f"[Fold {fold}] Epoch {epoch}/{num_epochs}  "
                  f"Loss: {running_loss/len(tr_sub):.4f}  "
                  f"Val Acc: {acc:.4f}  "
                  f"Val F1: {macro['f1-score']:.4f}")
        
        # --- Test Evaluation after this fold ---
        model.eval()
        test_preds, test_targets = [], []
        with torch.no_grad():
            for imgs, lbls in test_loader:
                imgs = imgs.to(device)
                out = model(imgs).argmax(dim=1).cpu().numpy()
                test_preds.extend(out)
                test_targets.extend(lbls.numpy())

        test_report = classification_report(test_targets, test_preds, output_dict=True, zero_division=0)
        test_macro  = test_report["macro avg"]
        test_acc    = np.mean(np.array(test_preds) == np.array(test_targets))

        test_fold_history.append({
            "tuning_index": tuning_index,
            "fold":         fold,
            "test_acc":     test_acc,
            "test_prec":    test_macro["precision"],
            "test_recall":  test_macro["recall"],
            "test_f1":      test_macro["f1-score"]
        })

        print(f"▶ [Fold {fold}] Test Acc: {test_acc:.4f}  "
              f"Test F1: {test_macro['f1-score']:.4f}")

        # 메모리 정리
        del model, optimizer, scheduler
        torch.cuda.empty_cache()

    # 결과 디렉토리 생성
    os.makedirs("tuning_reports", exist_ok=True)

    # CV metrics 저장
    df_cv = pd.DataFrame(cv_epoch_history)
    cv_path = f"tuning_reports/tuning_{tuning_index}_lr{lr}_bs{batch_size}_cv.csv"
    df_cv.to_csv(cv_path, index=False)
    print(f"✅ CV metrics saved to: {cv_path}")

    # Test metrics 저장
    df_test = pd.DataFrame(test_fold_history)
    test_path = f"tuning_reports/tuning_{tuning_index}_lr{lr}_bs{batch_size}_test.csv"
    df_test.to_csv(test_path, index=False)
    print(f"✅ Test metrics saved to: {test_path}")

    # --- 전체 Train+Val 데이터로 최종 모델 재학습 및 저장 ---
    full_loader = DataLoader(full_dataset, batch_size=batch_size, shuffle=True)
    model = models.vgg16(pretrained=True)
    model.classifier[-1] = torch.nn.Linear(4096, len(class_names))
    model.to(device)
    for p in model.features.parameters(): p.requires_grad = False
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer, step_size=max(1, num_epochs//2), gamma=0.1)

    for epoch in range(1, num_epochs + 1):
        model.train()
        if epoch == freeze_epochs + 1:
            for p in model.features.parameters(): p.requires_grad = True
            optimizer.add_param_group({'params': model.features.parameters()})
        for imgs, lbls in full_loader:
            imgs, lbls = imgs.to(device), lbls.to(device)
            optimizer.zero_grad()
            loss = criterion(model(imgs), lbls)
            loss.backward(); optimizer.step()
        scheduler.step()

    final_path = f"tuning_reports/tuning_{tuning_index}_lr{lr}_bs{batch_size}_final_model.pth"
    torch.save(model.state_dict(), final_path)
    print(f"✅ Final model saved to: {final_path}")


    return df_cv, df_test


### 코드실행

In [None]:
# 1) 데이터 준비
base_dir = "./archive/Garbage classification"
train_val_ds, train_val_loader, test_loader, classes = prepare_datasets(
    base_dir,
    test_txt="one-indexed-files-notrash_test.txt",
    batch_size=16
)
# 2-1) K-Fold CV & Test 평가
cv_df, test_metrics, epoch_history = kfold_train_and_evaluate(
    train_val_ds, test_loader, classes,
    tuning_index=1, lr=0.001, batch_size=16, num_epochs=10, freeze_epochs=3
)
print("=== CV Results ===\n", cv_df)
print("=== Test Metrics ===\n", test_metrics)

In [None]:
# 1) 데이터 준비
base_dir = "./archive/Garbage classification"
train_val_ds, train_val_loader, test_loader, classes = prepare_datasets(
    base_dir,
    test_txt="one-indexed-files-notrash_test.txt",
    batch_size=32
)
# 2-2) K-Fold CV & Test 평가
df_cv_2, df_test_2 = kfold_train_and_evaluate(
    train_val_ds, test_loader, classes,
    tuning_index=2, lr=0.001, batch_size=32, num_epochs=10, freeze_epochs=3
)
print("=== CV Results ===\n", df_cv_2)
print("=== Test Metrics ===\n", df_test_2)


In [None]:
# 1) 데이터 준비
base_dir = "./archive/Garbage classification"
train_val_ds, train_val_loader, test_loader, classes = prepare_datasets(
    base_dir,
    test_txt="one-indexed-files-notrash_test.txt",
    batch_size=64
)
# 2-3) K-Fold CV & Test 평가
df_cv_3, df_test_3 = kfold_train_and_evaluate(
    train_val_ds, test_loader, classes,
    tuning_index=3, lr=0.001, batch_size=64, num_epochs=10, freeze_epochs=3
)
print("=== CV Results ===\n", df_cv_3)
print("=== Test Metrics ===\n", df_test_3)

In [None]:
# 2-4) K-Fold CV & Test 평가
df_cv_4, df_test_4 = kfold_train_and_evaluate(
    train_val_ds, test_loader, classes,
    tuning_index=4, lr=0.005, batch_size=16, num_epochs=10
)
print("=== CV Results ===\n", df_cv_4)
print("=== Test Metrics ===\n", df_test_4)

In [None]:
# 1) 데이터 준비
base_dir = "./archive/Garbage classification"
train_val_ds, train_val_loader, test_loader, classes = prepare_datasets(
    base_dir,
    test_txt="one-indexed-files-notrash_test.txt",
    batch_size=32
)
# 2-5) K-Fold CV & Test 평가
df_cv_5, df_test_5 = kfold_train_and_evaluate(
    train_val_ds, test_loader, classes,
    tuning_index=5, lr=0.005, batch_size=32, num_epochs=10
)
print("=== CV Results ===\n", df_cv_5)
print("=== Test Metrics ===\n", df_test_5)

In [None]:
# 1) 데이터 준비
base_dir = "./archive/Garbage classification"
train_val_ds, train_val_loader, test_loader, classes = prepare_datasets(
    base_dir,
    test_txt="one-indexed-files-notrash_test.txt",
    batch_size=64
)

# 2-6) K-Fold CV & Test 평가
df_cv_6, df_test_6 = kfold_train_and_evaluate(
    train_val_ds, test_loader, classes,
    tuning_index=6, lr=0.005, batch_size=64, num_epochs=10
)
print("=== CV Results ===\n", df_cv_6)
print("=== Test Metrics ===\n", df_test_6)