In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import yaml
import os
import sys
import time
import json 

# -----------------------------------
# 커스텀 데이터셋 클래스 정의
# -----------------------------------
class CropDataset(Dataset):
    """
    어노테이션을 읽어 이미지를 잘라낸 뒤 반환하는 커스텀 데이터셋
    """
    def __init__(self, image_dir, annotation_dir, transform=None):
        self.image_dir = image_dir
        self.annotation_dir = annotation_dir
        self.transform = transform
        
        self.samples = []
        self.classes, self.class_to_idx = self._find_classes(self.image_dir)

        # 이미지와 어노테이션 경로, 라벨을 미리 스캔하여 리스트에 저장
        for target_class in self.classes:
            class_idx = self.class_to_idx[target_class]
            img_class_dir = os.path.join(self.image_dir, target_class)
            ann_class_dir = os.path.join(self.annotation_dir, target_class)
            
            for fname in os.listdir(img_class_dir):
                if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(img_class_dir, fname)
                    # 어노테이션 파일 경로 생성 (확장자 변경)
                    ann_fname = os.path.splitext(fname)[0] + '.json'
                    ann_path = os.path.join(ann_class_dir, ann_fname)
                    
                    if os.path.exists(ann_path):
                        item = (img_path, ann_path, class_idx)
                        self.samples.append(item)
        
    def _find_classes(self, dir):
        classes = [d.name for d in os.scandir(dir) if d.is_dir()]
        classes.sort()
        class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
        return classes, class_to_idx

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):
        img_path, ann_path, label = self.samples[index]

        # 어노테이션 파일 읽기
        with open(ann_path, 'r', encoding='utf-8') as f:
            annotation = json.load(f)
        tmp = annotation['annotations']
        tmp2 = tmp['bbox'][0]
        box = [tmp2['x'], tmp2['y'], tmp2['x'] + tmp2['w'], tmp2['y'] + tmp2['h']] # [x1, y1, x2, y2]

        # 이미지 열고 자르기
        image = Image.open(img_path).convert("RGB")
        cropped_image = image.crop(box)

        # 데이터 변환 적용
        if self.transform:
            cropped_image = self.transform(cropped_image)
        
        return cropped_image, label

In [2]:
# -----------------------------------
# 1. 설정 로드 및 준비 
# -----------------------------------
print("1. Loading configuration and preparing dataset...")
try:
    with open("dino_train_bound.yaml", 'r', encoding='utf-8') as f:
        config = yaml.safe_load(f)
except FileNotFoundError:
    print("ERROR: config.yaml 파일을 찾을 수 없습니다. 파일을 생성해주세요.")
    sys.exit(1)
    
MODEL_NAME = config['model_name']
DATA_PATH = config['data_path']
ANNOTATION_PATH = config['annotation_path'] 
MODEL_REPO = config['model_repo']
NUM_CLASSES = config['num_classes']
FREEZE_BACKBONE = config['freeze_backbone']
EPOCHS = config['epochs']
BATCH_SIZE = config['batch_size']
LEARNING_RATE = config['learning_rate']
DEVICE = torch.device(config['device'] if torch.cuda.is_available() else "cpu")
print(f"--> Using device: {DEVICE}")

# --- 데이터 전처리 및 데이터로더  ---
data_transforms = {
    'train': transforms.Compose([
        # ✅ RandomResizedCrop은 잘라낸 이미지에 적용되므로 크기를 더 유연하게 조절 가능
        transforms.Resize(256), 
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'validation': transforms.Compose([
        transforms.Resize(256), 
        transforms.CenterCrop(224), 
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
}

# ✅ datasets.ImageFolder 대신 CropDataset 사용
train_img_dir = os.path.join(DATA_PATH, 'train')
train_ann_dir = os.path.join(ANNOTATION_PATH, 'train')
train_dataset = CropDataset(image_dir=train_img_dir, annotation_dir=train_ann_dir, transform=data_transforms['train'])

val_img_dir = os.path.join(DATA_PATH, 'validation')
val_ann_dir = os.path.join(ANNOTATION_PATH, 'validation')
val_dataset = CropDataset(image_dir=val_img_dir, annotation_dir=val_ann_dir, transform=data_transforms['validation'])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
print(f"--> Found {len(train_dataset)} training images and {len(val_dataset)} validation images.")
print(f"--> Classes: {train_dataset.classes}")

1. Loading configuration and preparing dataset...
--> Using device: cuda
--> Found 28315 training images and 5662 validation images.
--> Classes: ['downy', 'healthy', 'powdery']


In [3]:
# -----------------------------------
# 2. DINOv2 모델 정의 (torch.hub 사용)
# -----------------------------------

print(f"\n2. Loading official DINOv2 model '{MODEL_NAME}' from torch.hub...")
try:
    model = torch.hub.load(MODEL_REPO, MODEL_NAME, pretrained=True)
    
    if FREEZE_BACKBONE:
        for param in model.parameters():
            param.requires_grad = False
        print("--> Backbone is frozen. Only the classifier head will be trained.")

    # --- ✅ 코드 수정 부분 ---
    # ViT-Small의 특징 벡터 크기는 384로 고정되어 있습니다.
    num_features = 384 
    # 기존의 model.head를 우리의 분류기로 완전히 교체합니다.
    model.head = nn.Linear(num_features, NUM_CLASSES) 
    # --- ✅ 수정 완료 ---

    model = model.to(DEVICE)
    print("--> Model loaded and classifier head replaced successfully.")

except Exception as e:
    print(f"\nERROR: An unexpected error occurred during model setup.")
    print(f"--> Original Error: {e}")
    sys.exit(1)


2. Loading official DINOv2 model 'dinov2_vits14' from torch.hub...


Using cache found in C:\Users\51100/.cache\torch\hub\facebookresearch_dinov2_main


--> Backbone is frozen. Only the classifier head will be trained.
--> Model loaded and classifier head replaced successfully.


In [4]:
# ----------------------------------
# 3. 학습 (Training)
# ----------------------------------

print("\n3. Starting the training process...")

# 손실 함수와 옵티마이저 정의 (기존과 동일)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LEARNING_RATE)

# 총 배치 수 미리 계산 (진행률 표시용)
total_batches = len(train_loader)
gauge_step = total_batches // 20 # 약 5%마다 게이지를 업데이트하기 위한 스텝

# 학습 루프
for epoch in range(EPOCHS):
    # --- 에포크 시작 시간 기록 ---
    epoch_start_time = time.time()

    # --- 학습 단계 --
    model.train()
    running_loss = 0.0
    running_corrects = 0

    # enumerate를 사용하여 배치 인덱스를 가져옴
    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        _, preds = torch.max(outputs, 1)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

        # --- 진행 게이지 출력 ---
        # sys.stdout.write와 '\r'을 사용하여 같은 줄에 덮어쓰기
        progress = (batch_idx + 1) / total_batches
        gauge_bar = '=' * int(progress * 20)
        sys.stdout.write(f"\rEpoch {epoch+1:02d}/{EPOCHS} | Train | [{'%-20s' % gauge_bar}] {progress:.0%}")
        sys.stdout.flush()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset)
    
    # 게이지 줄바꿈 처리
    print()

    # --- 검증 단계 --
    model.eval()
    val_loss = 0.0
    val_corrects = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            _, preds = torch.max(outputs, 1)
            val_loss += loss.item() * inputs.size(0)
            val_corrects += torch.sum(preds == labels.data)

    val_epoch_loss = val_loss / len(val_dataset)
    val_epoch_acc = val_corrects.double() / len(val_dataset)

    # --- 에포크 종료 시간 기록 및 출력 ---
    epoch_end_time = time.time()
    epoch_duration = epoch_end_time - epoch_start_time
    # 분, 초로 변환
    epoch_mins, epoch_secs = divmod(epoch_duration, 60)

    # 최종 결과 출력
    print(
        f"Epoch {epoch+1:02d}/{EPOCHS} | "
        f"Time: {int(epoch_mins):02d}:{int(epoch_secs):02d} | "
        f"Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} | "
        f"Val Loss: {val_epoch_loss:.4f} Acc: {val_epoch_acc:.4f}"
    )

print("\nTraining complete!")
torch.save(model.state_dict(), 'dinov2_bound_crop.pth')
print("Model saved to dinov2_bound_crop.pth")


3. Starting the training process...
Epoch 01/20 | Time: 52:23 | Train Loss: 0.1685 Acc: 0.9430 | Val Loss: 0.0780 Acc: 0.9763
Epoch 02/20 | Time: 52:38 | Train Loss: 0.0665 Acc: 0.9797 | Val Loss: 0.0526 Acc: 0.9836
Epoch 03/20 | Time: 51:57 | Train Loss: 0.0510 Acc: 0.9848 | Val Loss: 0.0427 Acc: 0.9868
Epoch 04/20 | Time: 51:54 | Train Loss: 0.0434 Acc: 0.9875 | Val Loss: 0.0393 Acc: 0.9871
Epoch 05/20 | Time: 51:49 | Train Loss: 0.0363 Acc: 0.9889 | Val Loss: 0.0317 Acc: 0.9901
Epoch 06/20 | Time: 51:46 | Train Loss: 0.0344 Acc: 0.9900 | Val Loss: 0.0319 Acc: 0.9898
Epoch 07/20 | Time: 52:01 | Train Loss: 0.0315 Acc: 0.9905 | Val Loss: 0.0298 Acc: 0.9912
Epoch 08/20 | Time: 52:15 | Train Loss: 0.0307 Acc: 0.9906 | Val Loss: 0.0276 Acc: 0.9913
Epoch 09/20 | Time: 52:23 | Train Loss: 0.0281 Acc: 0.9914 | Val Loss: 0.0260 Acc: 0.9917
Epoch 10/20 | Time: 51:50 | Train Loss: 0.0278 Acc: 0.9913 | Val Loss: 0.0271 Acc: 0.9913
Epoch 11/20 | Time: 55:57 | Train Loss: 0.0282 Acc: 0.9900 | Va