In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import yaml
import os
import sys
import time # 시간 측정을 위해 import

# -----------------------------------
# 1. 설정 로드 및 준비
# -----------------------------------

print("1. Loading configuration and preparing dataset...")
try:
    with open("dino_train.yaml", 'r', encoding='utf-8') as f:
        config = yaml.safe_load(f)
except FileNotFoundError:
    print("ERROR: config.yaml 파일을 찾을 수 없습니다. 파일을 생성해주세요.")
    sys.exit(1)

DATA_PATH = config['data_path']
MODEL_REPO = config['model_repo']
MODEL_NAME = config['model_name']
NUM_CLASSES = config['num_classes']
FREEZE_BACKBONE = config['freeze_backbone']
EPOCHS = config['epochs']
BATCH_SIZE = config['batch_size']
LEARNING_RATE = config['learning_rate']
DEVICE = torch.device(config['device'] if torch.cuda.is_available() else "cpu")
print(f"--> Using device: {DEVICE}")

1. Loading configuration and preparing dataset...
--> Using device: cuda


In [2]:
# --- 데이터 전처리 및 데이터로더 (이전과 동일) ---
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'validation': transforms.Compose([
        transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
}
train_dataset = datasets.ImageFolder(os.path.join(DATA_PATH, 'train'), data_transforms['train'])
val_dataset = datasets.ImageFolder(os.path.join(DATA_PATH, 'validation'), data_transforms['validation'])
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
print(f"--> Found {len(train_dataset)} training images and {len(val_dataset)} validation images.")
print(f"--> Classes: {train_dataset.classes}")

--> Found 28328 training images and 5665 validation images.
--> Classes: ['downy', 'healthy', 'powdery']


In [3]:
# -----------------------------------
# 2. DINOv2 모델 정의 (torch.hub 사용)
# -----------------------------------

print(f"\n2. Loading official DINOv2 model '{MODEL_NAME}' from torch.hub...")
try:
    model = torch.hub.load(MODEL_REPO, MODEL_NAME, pretrained=True)
    
    if FREEZE_BACKBONE:
        for param in model.parameters():
            param.requires_grad = False
        print("--> Backbone is frozen. Only the classifier head will be trained.")

    # --- ✅ 코드 수정 부분 ---
    # ViT-Small의 특징 벡터 크기는 384로 고정되어 있습니다.
    num_features = 384 
    # 기존의 model.head를 우리의 분류기로 완전히 교체합니다.
    model.head = nn.Linear(num_features, NUM_CLASSES) 
    # --- ✅ 수정 완료 ---

    model = model.to(DEVICE)
    print("--> Model loaded and classifier head replaced successfully.")

except Exception as e:
    print(f"\nERROR: An unexpected error occurred during model setup.")
    print(f"--> Original Error: {e}")
    sys.exit(1)


2. Loading official DINOv2 model 'dinov2_vits14' from torch.hub...


Using cache found in C:\Users\51100/.cache\torch\hub\facebookresearch_dinov2_main


Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth" to C:\Users\51100/.cache\torch\hub\checkpoints\dinov2_vits14_pretrain.pth


100%|██████████████████████████████████████████████████████████████████████████████| 84.2M/84.2M [00:00<00:00, 103MB/s]


--> Backbone is frozen. Only the classifier head will be trained.
--> Model loaded and classifier head replaced successfully.


In [4]:
# ----------------------------------
# 3. 학습 (Training)
# ----------------------------------

print("\n3. Starting the training process...")

# 손실 함수와 옵티마이저 정의 (기존과 동일)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LEARNING_RATE)

# 총 배치 수 미리 계산 (진행률 표시용)
total_batches = len(train_loader)
gauge_step = total_batches // 20 # 약 5%마다 게이지를 업데이트하기 위한 스텝

# 학습 루프
for epoch in range(EPOCHS):
    # --- 에포크 시작 시간 기록 ---
    epoch_start_time = time.time()

    # --- 학습 단계 --
    model.train()
    running_loss = 0.0
    running_corrects = 0

    # enumerate를 사용하여 배치 인덱스를 가져옴
    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        _, preds = torch.max(outputs, 1)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

        # --- 진행 게이지 출력 ---
        # sys.stdout.write와 '\r'을 사용하여 같은 줄에 덮어쓰기
        progress = (batch_idx + 1) / total_batches
        gauge_bar = '=' * int(progress * 20)
        sys.stdout.write(f"\rEpoch {epoch+1:02d}/{EPOCHS} | Train | [{'%-20s' % gauge_bar}] {progress:.0%}")
        sys.stdout.flush()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset)
    
    # 게이지 줄바꿈 처리
    print()

    # --- 검증 단계 --
    model.eval()
    val_loss = 0.0
    val_corrects = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            _, preds = torch.max(outputs, 1)
            val_loss += loss.item() * inputs.size(0)
            val_corrects += torch.sum(preds == labels.data)

    val_epoch_loss = val_loss / len(val_dataset)
    val_epoch_acc = val_corrects.double() / len(val_dataset)

    # --- 에포크 종료 시간 기록 및 출력 ---
    epoch_end_time = time.time()
    epoch_duration = epoch_end_time - epoch_start_time
    # 분, 초로 변환
    epoch_mins, epoch_secs = divmod(epoch_duration, 60)

    # 최종 결과 출력
    print(
        f"Epoch {epoch+1:02d}/{EPOCHS} | "
        f"Time: {int(epoch_mins):02d}:{int(epoch_secs):02d} | "
        f"Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} | "
        f"Val Loss: {val_epoch_loss:.4f} Acc: {val_epoch_acc:.4f}"
    )

print("\nTraining complete!")
torch.save(model.state_dict(), 'dinov2_hub_finetuned_model.pth')
print("Model saved to dinov2_hub_finetuned_model.pth")


3. Starting the training process...
Epoch 01/20 | Time: 14:31 | Train Loss: 0.1706 Acc: 0.9430 | Val Loss: 0.0715 Acc: 0.9763
Epoch 02/20 | Time: 18:38 | Train Loss: 0.0736 Acc: 0.9781 | Val Loss: 0.0510 Acc: 0.9818
Epoch 03/20 | Time: 23:34 | Train Loss: 0.0597 Acc: 0.9821 | Val Loss: 0.0425 Acc: 0.9857
Epoch 04/20 | Time: 14:20 | Train Loss: 0.0542 Acc: 0.9829 | Val Loss: 0.0442 Acc: 0.9843
Epoch 05/20 | Time: 14:30 | Train Loss: 0.0504 Acc: 0.9843 | Val Loss: 0.0323 Acc: 0.9896
Epoch 06/20 | Time: 14:27 | Train Loss: 0.0485 Acc: 0.9837 | Val Loss: 0.0364 Acc: 0.9875
Epoch 07/20 | Time: 14:22 | Train Loss: 0.0477 Acc: 0.9847 | Val Loss: 0.0310 Acc: 0.9910
Epoch 08/20 | Time: 14:30 | Train Loss: 0.0436 Acc: 0.9857 | Val Loss: 0.0297 Acc: 0.9901
Epoch 09/20 | Time: 14:21 | Train Loss: 0.0411 Acc: 0.9864 | Val Loss: 0.0266 Acc: 0.9922
Epoch 10/20 | Time: 14:27 | Train Loss: 0.0413 Acc: 0.9870 | Val Loss: 0.0242 Acc: 0.9921
Epoch 11/20 | Time: 14:16 | Train Loss: 0.0382 Acc: 0.9881 | Va