이번엔 Sweep 사용!!

In [1]:
pip install matplotlib

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import f1_score

In [3]:
pip install torchsummary

Note: you may need to restart the kernel to use updated packages.


In [7]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Mon_Apr__3_17:16:06_PDT_2023
Cuda compilation tools, release 12.1, V12.1.105
Build cuda_12.1.r12.1/compiler.32688072_0


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchsummary import summary

In [None]:
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device:', device)

In [None]:
class VGG16(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), 
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        )

        self.classifier = nn.Sequential(
            nn.Linear(256 * 3 * 3, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.Dropout(),
            nn.Linear(4096, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [None]:
model = VGG16(num_classes=10)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
summary(model, input_size=(1, 28, 28))

In [None]:
## 자, 이제 완디비를 설치해 봅시다잇
import wandb

## 개인 API 먼저!
wandb.login()

Sweep: 하이퍼 파라미터 변경 시마다 새롭게 init!!

In [9]:
pip install torchvision

Collecting torchvision
  Downloading torchvision-0.21.0-cp39-cp39-manylinux1_x86_64.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: torchvision
Successfully installed torchvision-0.21.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST

def create_data_loaders(batch_size):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    train_dataset = MNIST(root='./mnistdata', train=True, download=True, transform=transform)
    test_dataset = MNIST(root='./mnistdata', train=False, download=True, transform=transform)

    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

In [None]:
def create_train_loader(batch_size):
    train_loader, _ = create_data_loaders(batch_size)
    return train_loader

def create_test_loader(batch_size):
    _, test_loader = create_data_loaders(batch_size)
    return test_loader

옵티마이저도 하나의 하이퍼 파라미터!!

In [11]:
def get_optimizer(optimizer_name, model_parameters, learning_rate, momentum=0.0, weight_decay=0.0):
    if optimizer_name == 'adam':
        return torch.optim.Adam(model_parameters, lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name == 'sgd':
        return torch.optim.SGD(model_parameters, lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
    elif optimizer_name == 'rmsprop':
        return torch.optim.RMSprop(model_parameters, lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
    elif optimizer_name == 'adamw':
        return torch.optim.AdamW(model_parameters, lr=learning_rate, weight_decay=weight_decay)
    else:
        raise ValueError(f"Unsupported Optimizer: {optimizer_name}")

모델 학습 Loop

In [None]:
# Training Loop
def train_one_epoch(model, train_loader, optimizer, criterion):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()     # Back Propagation
        optimizer.step()
    return loss.item()

# Validation 과정(with 테스트 데이터셋)
def validate_one_epoch(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    total_metrics = {'accuracy': 0, 'f1_score': 0}
    num_batches = 0

    with torch.no_grad():
        for images, labels in test_loader:
             images, labels = images.to(device), labels.to(device)
             outputs = model(images)
             loss = criterion(outputs, labels)
             total_loss += loss.item()

             _, predicted = torch.max(outputs.data, 1)
             total = labels.size(0)
             correct = (predicted == labels).sum().item()
             accuracy = correct / total
             predicted_cpu = predicted.cpu()
             labels_cpu = labels.cpu()
             f1 = f1_score(labels_cpu, predicted_cpu, average='macro')

             total_metrics['accuracy'] += accuracy
             total_metrics['f1_score'] += f1
             num_batches += 1
    
    avg_loss = total_loss / num_batches
    avg_metrics = {k: v / num_batches for k, v in total_metrics.items()}

    # 한 에포크마다 validation 하고 기록!!
    wandb.log({
        'val_accuracy': avg_metrics['accuracy'],
        'val_f1_score': avg_metrics['f1_score']
    })

    return avg_loss

자, 이제 main train 함수에서 WandB 사용!

In [None]:
def train_model(config=None):
    with wandb.init(config=config):
        config = wandb.config

        model = VGG16(num_classes=10)
        model = model.to(device)

        train_loader = create_train_loader(config.batch_size)
        test_loader = create_test_loader(config.batch_size)

        optimizer = get_optimizer(
            optimizer_name = config.parameter,
            model_parameters = model.parameters(),
            learning_rate=config.learning_rate,
            momentum=config.momentum if hasattr(config, 'momentum') else 0.0,
            weight_decay=config.weight_decay if hasattr(config, 'weight_decay') else 0.0
        )

        criterion = nn.CrossEntropyLoss()       # loss function 같은 경우에는 고정함!!

        wandb.watch(model)

        for epoch in range(config.n_epochs):
            train_loss = train_one_epoch(model, train_loader, optimizer, criterion)
            val_loss = validate_one_epoch(model, test_loader, criterion)
            wandb.log({
                'train_loss': train_loss,
                'val_loss': val_loss,
                'epoch': epoch
            })
            print(f"Epoch {epoch+1}/{config.n_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

Sweep Configuration

In [None]:
## 학습을 어케 진행할 것인가??
sweep_configuration = {
    {'method': 'bayes',
     'name': 'sweep-bayes',         # 그냥 실험 이름 기입 ㄱㄱ
     'metric': {'goal': 'maximize', 'name': 'val_accuracy'},
     ## 어떤 하이퍼 파라미터를 최적화??
     'parameters': {
         'batch_size': {'values': [16, 32, 64]},
         'n_epochs': {'values': [3, 5, 10]},
         'learning_rate': {'max': 0.1, 'min': 0.0001},
         'optimizer': {
             'values': ['adam', 'sgd', 'rmsprop', 'adamw']
         }},
         'momentum': {'values': [0.0, 0.9]},
         'weight_decay': {'values': [0.0, 0.001, 0.0001]}
    }
}

sweep_id = wandb.sweep(
    sweep = sweep_configuration,
    entity = 'GDGC-CNU',        # 팀 이름
    project = 'Weights_Biases_Advanced'
)

In [None]:
wandb.agent(sweep_id, function=train_model, count=30)       # 에포크 돌릴 main 함수(train model) => 결과적으로 총 30개 모델!!

In [None]:
wandb.finish()