In [1]:
import torch
from torchvision import datasets, transforms

from skimage.transform import resize

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import random
from tqdm.auto import tqdm
import wandb

In [2]:
# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)

device = torch.device("cpu")

In [3]:
wandb.login(key='905a03c6f175d2e773d5a505f4806f09cd514e03')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mymkim78[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/student/.netrc


True

In [4]:
# Define the CNN module
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc = nn.Linear(32 * 7 * 7, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x
    
    def predict_dight(self, x):
        with torch.no_grad():
            output = self(x)
            _, predicted = torch.max(output.data, 1)
            return predicted.item()

In [14]:
def model_pipeline(hyperparameters):

    # tell wandb to get started
    with wandb.init(project="db_hw_3-demo", config=hyperparameters):
      # access all logs through wandb.config, so logging matches execution!
      config = wandb.config

      # make the model, data, and optimization problem
      model, train_loader, test_loader, criterion, optimizer = make(config)
      print(model)

      # and use them to train the model
      train(model, train_loader, criterion, optimizer, config)

      # and test its final performance
      test(model, test_loader)

    return model

In [13]:
def make(config):
    # Make the data
    train, test = get_data(train=True), get_data(train=False)
    train_loader = make_loader(train, batch_size=config.batch_size)
    test_loader = make_loader(test, batch_size=config.batch_size)

    # Make the model
    model = CNN().to(device)

    # Make the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(
        model.parameters(), lr=config.learning_rate)

    return model, train_loader, test_loader, criterion, optimizer

In [7]:
mnist_train_dataset = datasets.MNIST('../data/mnist', train=True, download=True,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                            transforms.Normalize((0.1307,), (0.3081,))
                                        ]))

mnist_test_dataset = datasets.MNIST('../data/mnist', train=False, download=True,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                            transforms.Normalize((0.1307,), (0.3081,))
                                        ]))


In [8]:

def group_data_by_digit(dataset, grouped_data, grouped_labels):
    for image, label in dataset:
        if label in [0, 1]:
            key = "01"
        elif label in [2, 3]:
            key = "23"
        elif label in [4, 5]:
            key = "45"
        elif label in [6, 7]:
            key = "67"
        elif label in [8, 9]:
            key = "89"
        
        grouped_data[key].append(image)
        grouped_labels[key].append(label)

In [9]:
# Create dictionaries to store the groups
grouped_data = {f"{i}{i+1}": [] for i in range(0, 10, 2)}
grouped_labels = {f"{i}{i+1}": [] for i in range(0, 10, 2)}

group_data_by_digit(mnist_train_dataset, grouped_data, grouped_labels)
group_data_by_digit(mnist_test_dataset, grouped_data, grouped_labels)

for key in grouped_data:
    grouped_data[key] = torch.stack(grouped_data[key])
    grouped_labels[key] = torch.tensor(grouped_labels[key])

class GroupedDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [10]:
def get_data(train=True):
    
    if train is False:
        return mnist_test_dataset
    else:
        return mnist_train_dataset



def make_loader(dataset, batch_size):
    if key is None:
        loader = torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         pin_memory=True, num_workers=2)
    return loader

In [12]:
def train(model, loader, criterion, optimizer, config, key=None):
    # Tell wandb to watch what the model gets up to: gradients, weights, and more!
    wandb.watch(model, criterion, log="all", log_freq=10)

    if key is None:
        # Run training and track with wandb
        total_batches = len(loader) * config.epochs
        example_ct = 0  # number of examples seen
        batch_ct = 0
        for epoch in tqdm(range(config.epochs)):
            for _, (images, labels) in enumerate(loader):

                loss = train_batch(images, labels, model, optimizer, criterion)
                example_ct +=  len(images)
                batch_ct += 1

                # Report metrics every 25th batch
                if ((batch_ct + 1) % 25) == 0:
                    train_log(loss, example_ct, epoch)
    else:
        for key in loader:
            total_batches = len(loader[key]) * config.epochs
            example_ct = 0
            batch_ct = 0
            for epoch in tqdm(range(config.epochs)):
                for _, (images, labels) in enumerate(loader):

                    loss = train_batch(images, labels, model, optimizer, criterion)
                    example_ct +=  len(images)
                    batch_ct += 1

                    # Report metrics every 25th batch
                    if ((batch_ct + 1) % 25) == 0:
                        train_log(loss, example_ct, epoch)


def train_batch(images, labels, model, optimizer, criterion):
    images, labels = images.to(device), labels.to(device)

    # Forward pass ➡
    outputs = model(images)
    loss = criterion(outputs, labels)

    # Backward pass ⬅
    optimizer.zero_grad()
    loss.backward()

    # Step with optimizer
    optimizer.step()

    return loss

In [11]:
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

In [20]:
def test(model, test_loader):
    model.eval()

    # Run the model on some test examples
    with torch.no_grad():
        correct, total = 0, 0
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Accuracy of the model on the {total} " +
              f"test images: {correct / total:%}")

        wandb.log({"test_accuracy": correct / total})

    # Save the model in the exchangeable ONNX format
    torch.onnx.export(model, images, "model.onnx")
    wandb.save("model.onnx")

In [15]:
# Sweep configuration
sweep_config = {
    'method': 'random',
    'metric': {
        'name': 'loss',
        'goal': 'minimize'
    },
    'parameters': {
        'epochs': {
            'values': [5, 10, 15]
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
        'learning_rate': {
            'min': 0.0001,
            'max': 0.1
        },
        'classes': {
            'values': [10]  # 예를 들어 MNIST 데이터셋을 사용하는 경우 10개의 클래스를 갖습니다.
        }
    }
}

In [16]:
# Initialize sweep
sweep_id = wandb.sweep(sweep_config, project="your_project_name")

# Function to be called for each sweep
def train_with_wandb():
    with wandb.init() as run:
        config = wandb.config
        model = model_pipeline(config)

# Run the sweep agent
wandb.agent(sweep_id, train_with_wandb, count=10)

Create sweep with ID: tcj781aa
Sweep URL: https://wandb.ai/ymkim78/your_project_name/sweeps/tcj781aa


[34m[1mwandb[0m: Agent Starting Run: wv5rnz24 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	classes: 10
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.05366512260532099
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112344333337711, max=1.0…

Traceback (most recent call last):
  File "/tmp/ipykernel_20638/1057894848.py", line 9, in model_pipeline
    model, train_loader, test_loader, criterion, optimizer = make(config)
                                                             ^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/3480473988.py", line 4, in make
    train_loader = make_loader(train, batch_size=config.batch_size)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/1730621210.py", line 16, in make_loader
    return loader
           ^^^^^^
UnboundLocalError: cannot access local variable 'loader' where it is not associated with a value


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Traceback (most recent call last):
  File "/tmp/ipykernel_20638/2229655354.py", line 8, in train_with_wandb
    model = model_pipeline(config)
            ^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/1057894848.py", line 9, in model_pipeline
    model, train_loader, test_loader, criterion, optimizer = make(config)
                                                             ^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/3480473988.py", line 4, in make
    train_loader = make_loader(train, batch_size=config.batch_size)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/1730621210.py", line 16, in make_loader
    return loader
           ^^^^^^
UnboundLocalError: cannot access local variable 'loader' where it is not associated with a value
Run wv5rnz24 errored:
Traceback (most recent call last):
  File "/home/student/anaconda3/envs/db_hw/lib/python3.12/site-packages/wandb/agents/pyagent.py", line 307, in _run_job
    self._function()
  File



VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112526633345018, max=1.0…

Traceback (most recent call last):
  File "/tmp/ipykernel_20638/1057894848.py", line 9, in model_pipeline
    model, train_loader, test_loader, criterion, optimizer = make(config)
                                                             ^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/3480473988.py", line 4, in make
    train_loader = make_loader(train, batch_size=config.batch_size)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/1730621210.py", line 16, in make_loader
    return loader
           ^^^^^^
UnboundLocalError: cannot access local variable 'loader' where it is not associated with a value


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Traceback (most recent call last):
  File "/tmp/ipykernel_20638/2229655354.py", line 8, in train_with_wandb
    model = model_pipeline(config)
            ^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/1057894848.py", line 9, in model_pipeline
    model, train_loader, test_loader, criterion, optimizer = make(config)
                                                             ^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/3480473988.py", line 4, in make
    train_loader = make_loader(train, batch_size=config.batch_size)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/1730621210.py", line 16, in make_loader
    return loader
           ^^^^^^
UnboundLocalError: cannot access local variable 'loader' where it is not associated with a value
Run 7i9yu45y errored:
Traceback (most recent call last):
  File "/home/student/anaconda3/envs/db_hw/lib/python3.12/site-packages/wandb/agents/pyagent.py", line 307, in _run_job
    self._function()
  File



VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113005422198006, max=1.0…

Traceback (most recent call last):
  File "/tmp/ipykernel_20638/1057894848.py", line 9, in model_pipeline
    model, train_loader, test_loader, criterion, optimizer = make(config)
                                                             ^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/3480473988.py", line 4, in make
    train_loader = make_loader(train, batch_size=config.batch_size)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/1730621210.py", line 16, in make_loader
    return loader
           ^^^^^^
UnboundLocalError: cannot access local variable 'loader' where it is not associated with a value


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Traceback (most recent call last):
  File "/tmp/ipykernel_20638/2229655354.py", line 8, in train_with_wandb
    model = model_pipeline(config)
            ^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/1057894848.py", line 9, in model_pipeline
    model, train_loader, test_loader, criterion, optimizer = make(config)
                                                             ^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/3480473988.py", line 4, in make
    train_loader = make_loader(train, batch_size=config.batch_size)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_20638/1730621210.py", line 16, in make_loader
    return loader
           ^^^^^^
UnboundLocalError: cannot access local variable 'loader' where it is not associated with a value
Run shgpri8r errored:
Traceback (most recent call last):
  File "/home/student/anaconda3/envs/db_hw/lib/python3.12/site-packages/wandb/agents/pyagent.py", line 307, in _run_job
    self._function()
  File



VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112575444430048, max=1.0…