In [37]:
import os
import time

import torch
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from PIL import Image
## GPU Setting
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
print(DEVICE)

cuda


In [31]:
import wandb

# Wandb 로그인
wandb.login()

# Sweep 설정
sweep_config = {
    "method": "grid",  # 또는 "random" 등을 사용할 수 있음
    "metric": {
        "name": "val_accuracy",  # 평가할 메트릭
        "goal": "maximize"  # 최소화가 목표
    },
    "parameters": {
        "learning_rate": {
            "values": [0.001, 0.01, 0.1]
        },
        "batch_size": {
            "values": [64, 128]
        },
        "epochs": {
            "values": [10,20]
        },
        "optimizer": {
            "values": ["SGD", "Adam"]
        },
        "momentum": {
            "values": [0.9, 0.95]  # SGD에만 적용
        },
        "dropout": {
            "values": [0.3, 0.5]
        },
    }
}

# Sweep 시작
sweep_id = wandb.sweep(sweep_config, project="CV-ActiveLearning01-01")

Create sweep with ID: 0vqh32fq
Sweep URL: https://wandb.ai/djftk_major/CV-ActiveLearning01-01/sweeps/0vqh32fq


In [22]:
## Custom Dataset
class CUB2011(Dataset) :
    def __init__(self, transforms, mode = 'train'):
        self.transforms = transforms
        self.mode = mode

        base_dir = r'C:\Users\GACHON\Documents\jwkang\CUB_200_2011_repackage_class50_v1\CUB_200_2011_repackage_class50\datasets'
        
        if self.mode == 'train' :
            self.image_folder = os.listdir(os.path.join(base_dir, 'train'))
        if self.mode == 'valid' :
            self.image_folder = os.listdir(os.path.join(base_dir, 'valid'))
        if self.mode == 'test' :
            self.image_folder = os.listdir(os.path.join(base_dir, 'test'))

    def __len__(self):
        return len(self.image_folder)

    def __getitem__(self, idx):
        base_dir = r'C:\Users\GACHON\Documents\jwkang\CUB_200_2011_repackage_class50_v1\CUB_200_2011_repackage_class50\datasets'
        img_path = self.image_folder[idx]
        img = Image.open(os.path.join(base_dir, self.mode, img_path)).convert('RGB')
        img = self.transforms(img)

        label_part = img_path.split('_')[-1].split('.')[0]
        if '(' in label_part:
            label_part = label_part.split('(')[0]  # 괄호 전까지 잘라냄
        try:
            label = int(label_part.strip())  # 공백 제거 후 숫자로 변환
        except ValueError:
            print(f"Error converting label for file: {img_path}")
            raise

        return (img, label)

## Data Preprocessing
transforms_train = transforms.Compose([transforms.Resize((448,448)),
                                       transforms.ToTensor(), ])

transforms_valtest = transforms.Compose([transforms.Resize((448,448)),
                                       transforms.ToTensor(), ])

BATCH_SIZE = 32
train_set = CUB2011(mode = 'train',transforms = transforms_train)
val_set = CUB2011(mode = 'valid',transforms = transforms_valtest)
test_set = CUB2011(mode = 'test',transforms = transforms_valtest)

print('Num of each dataset: ',len(train_set), len(val_set), len(test_set))

train_loader = DataLoader(train_set, batch_size = BATCH_SIZE, shuffle = True)
val_loader = DataLoader(val_set, batch_size = BATCH_SIZE, shuffle = False)
test_loader = DataLoader(test_set, batch_size = BATCH_SIZE, shuffle = False)

Num of each dataset:  2361 296 298


In [34]:
## Model / Optimizer
EPOCH = 30
lr = 0.1
model = models.resnet18(pretrained=True)

### Transfer Learning
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 50)
model.to(DEVICE)

print("Created a learning model and optimizer")

def train(model, train_loader, optimizer, epoch):
    model.train()
    running_loss = 0.0
    for i, (image, target) in enumerate(train_loader):
        image, target = image.to(DEVICE), target.to(DEVICE)
        output = model(image)
        optimizer.zero_grad()
        train_loss = F.cross_entropy(output, target).to(DEVICE)

        train_loss.backward()
        optimizer.step()

        running_loss += train_loss.item()
        if i % 10 == 0:
            print(f'Train Epoch: {epoch} [{i}/{len(train_loader)}] \t Loss: ({train_loss.item():.6f})')

    avg_train_loss = running_loss / len(train_loader)
    wandb.log({"Training Loss": avg_train_loss, "Epoch": epoch})

    return train_loss

def evaluate(model, val_loader):
    model.eval()
    eval_loss = 0
    correct = 0
    with torch.no_grad():
        for i, (image, target) in enumerate(val_loader):
            image, target = image.to(DEVICE), target.to(DEVICE)
            output = model(image)

            eval_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    eval_loss /= len(val_loader.dataset)
    eval_accuracy = 100. * correct / len(val_loader.dataset)

    wandb.log({"Validation Loss": eval_loss, "Validation Accuracy": eval_accuracy})

    return eval_loss, eval_accuracy

Created a learning model and optimizer


In [35]:
# 학습 및 검증을 포함한 sweep 함수
def sweep_train():
    # wandb 설정
    wandb.init()

    # config에서 하이퍼파라미터 가져오기
    config = wandb.config

    # (batch size/epoch/lr/optimizer/momentum/dropout) 형식의 실험 이름 지정
    experiment_name = f"{config.batch_size} / {config.epochs} / {config.learning_rate} / {config.optimizer} / {config.momentum} / {config.dropout}"

    # Wandb 설정, 이름에 하이퍼파라미터 값을 포함
    wandb.init(name=experiment_name)

    # 모델 설정 (Transfer Learning)
    model = models.resnet18(pretrained=True)
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Dropout(config.dropout),         # 설정된 dropout 값 적용
        nn.Linear(num_features, 50)         # 출력 레이어 수정
    )
    model.to(DEVICE)

    # Optimizer 설정
    if config.optimizer == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, momentum=config.momentum)
    elif config.optimizer == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    # 데이터셋과 DataLoader 설정
    train_loader = DataLoader(CUB2011(mode='train', transforms=transforms_train), batch_size=config.batch_size, shuffle=True)
    val_loader = DataLoader(CUB2011(mode='valid', transforms=transforms_valtest), batch_size=config.batch_size, shuffle=False)

    best_accuracy = 0

    for epoch in range(config.epochs):
        train_loss = train(model, train_loader, optimizer, epoch)
        val_loss, val_accuracy = evaluate(model, val_loader)

        # 모델의 정확도 저장
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            torch.save(model.state_dict(), './best_model.pth')

        # Wandb 로그 기록
        wandb.log({
            'Train Loss': train_loss,
            'Validation Loss': val_loss,
            'Validation Accuracy': val_accuracy,
            'Epoch': epoch
        })
        print(f'[{epoch}] Validation Loss : {val_loss:.4f}, Accuracy: {val_accuracy:.4f}%')

    # 최종 테스트 결과를 기록하기
    test_loader = DataLoader(CUB2011(mode='test', transforms=transforms_valtest), batch_size=config.batch_size, shuffle=False)
    test_loss, test_accuracy = evaluate(model, test_loader)
    wandb.log({
        'Test Loss': test_loss,
        'Test Accuracy': test_accuracy
    })

    print(f'Test Loss : {test_loss:.4f}, Accuracy: {test_accuracy:.4f}%')

# Wandb Sweep 실행
wandb.agent(sweep_id, function=sweep_train)


wandb: Agent Starting Run: 3y5kdqug with config:
wandb: 	batch_size: 64
wandb: 	dropout: 0.3
wandb: 	epochs: 10
wandb: 	learning_rate: 0.001
wandb: 	momentum: 0.95
wandb: 	optimizer: Adam


Train Epoch: 0 [0/37] 	 Loss: (4.149582)


Run 3y5kdqug errored:
Traceback (most recent call last):
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\GACHON\AppData\Local\Temp\ipykernel_17256\2804148834.py", line 37, in sweep_train
    train_loss = train(model, train_loader, optimizer, epoch)
  File "C:\Users\GACHON\AppData\Local\Temp\ipykernel_17256\3374298231.py", line 16, in train
    for i, (image, target) in enumerate(train_loader):
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\dataloader.py", line 521, in __next__
    data = self._next_data()
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\dataloader.py", line 561, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\_utils\fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possi

Train Epoch: 0 [0/37] 	 Loss: (4.266821)
Train Epoch: 0 [10/37] 	 Loss: (3.923336)


Run o7xgpt6e errored:
Traceback (most recent call last):
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\GACHON\AppData\Local\Temp\ipykernel_17256\2804148834.py", line 37, in sweep_train
    train_loss = train(model, train_loader, optimizer, epoch)
  File "C:\Users\GACHON\AppData\Local\Temp\ipykernel_17256\3374298231.py", line 16, in train
    for i, (image, target) in enumerate(train_loader):
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\dataloader.py", line 521, in __next__
    data = self._next_data()
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\dataloader.py", line 561, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\_utils\fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possi

Train Epoch: 0 [0/37] 	 Loss: (4.225710)
Train Epoch: 0 [10/37] 	 Loss: (4.717775)


Run kwrnxn99 errored:
Traceback (most recent call last):
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\GACHON\AppData\Local\Temp\ipykernel_17256\2804148834.py", line 37, in sweep_train
    train_loss = train(model, train_loader, optimizer, epoch)
  File "C:\Users\GACHON\AppData\Local\Temp\ipykernel_17256\3374298231.py", line 16, in train
    for i, (image, target) in enumerate(train_loader):
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\dataloader.py", line 521, in __next__
    data = self._next_data()
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\dataloader.py", line 561, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\_utils\fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possi

Train Epoch: 0 [0/37] 	 Loss: (4.035398)
Train Epoch: 0 [10/37] 	 Loss: (4.037912)


Run x01g7506 errored:
Traceback (most recent call last):
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\GACHON\AppData\Local\Temp\ipykernel_17256\2804148834.py", line 37, in sweep_train
    train_loss = train(model, train_loader, optimizer, epoch)
  File "C:\Users\GACHON\AppData\Local\Temp\ipykernel_17256\3374298231.py", line 16, in train
    for i, (image, target) in enumerate(train_loader):
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\dataloader.py", line 521, in __next__
    data = self._next_data()
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\dataloader.py", line 561, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\_utils\fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possi

Train Epoch: 0 [0/37] 	 Loss: (4.149684)
Train Epoch: 0 [10/37] 	 Loss: (4.368949)


Run kt7od8hy errored:
Traceback (most recent call last):
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\GACHON\AppData\Local\Temp\ipykernel_17256\2804148834.py", line 37, in sweep_train
    train_loss = train(model, train_loader, optimizer, epoch)
  File "C:\Users\GACHON\AppData\Local\Temp\ipykernel_17256\3374298231.py", line 16, in train
    for i, (image, target) in enumerate(train_loader):
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\dataloader.py", line 521, in __next__
    data = self._next_data()
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\dataloader.py", line 561, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\_utils\fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possi

Train Epoch: 0 [0/37] 	 Loss: (4.291768)
Train Epoch: 0 [10/37] 	 Loss: (3.552135)
Train Epoch: 0 [20/37] 	 Loss: (5.303220)


Run tbihdtzt errored:
Traceback (most recent call last):
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
    self._function()
  File "C:\Users\GACHON\AppData\Local\Temp\ipykernel_17256\2804148834.py", line 37, in sweep_train
    train_loss = train(model, train_loader, optimizer, epoch)
  File "C:\Users\GACHON\AppData\Local\Temp\ipykernel_17256\3374298231.py", line 16, in train
    for i, (image, target) in enumerate(train_loader):
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\dataloader.py", line 521, in __next__
    data = self._next_data()
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\dataloader.py", line 561, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
  File "C:\Users\GACHON\anaconda3\envs\djftk\lib\site-packages\torch\utils\data\_utils\fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possi

In [30]:
wandb.finish()