In [1]:
!pip install -qqq wandb
!pip install -qqq pytorch-lightning

In [2]:
import wandb
from pytorch_lightning.loggers import WandbLogger
import torch
from torch.nn import functional as F
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import CIFAR10
from pytorch_lightning import LightningDataModule, LightningModule, Trainer
import pytorch_lightning as pl
import torchmetrics
from torchvision.datasets import MNIST
from torchvision import transforms
import torchvision

In [3]:
class LitMNIST(LightningModule):

    def __init__(self, n_classes=10, n_layer_1=128, n_layer_2=256, lr=1e-3):
        super().__init__()
        self.layer_1 = torch.nn.Linear(3 * 32 * 32, n_layer_1)
        self.layer_2 = torch.nn.Linear(n_layer_1, n_layer_2)
        self.layer_3 = torch.nn.Linear(n_layer_2, n_classes)
        self.lr = lr
        self.accuracy = torchmetrics.Accuracy()
        self.save_hyperparameters()

    def forward(self, x):
        batch_size, channels, width, height = x.size()
        x = x.view(batch_size, -1)
        x = self.layer_1(x)
        x = F.relu(x)
        x = self.layer_2(x)
        x = F.relu(x)
        x = self.layer_3(x)
        x = F.log_softmax(x, dim=1)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        self.log('valid_loss', loss)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        self.log('test_loss', loss)
    
    def configure_optimizers(self):
        return Adam(self.parameters(), lr=self.lr)

In [4]:
class MNISTDataModule(LightningDataModule):

    def __init__(self, data_dir='./', batch_size=256):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.transform = transforms.ToTensor()

    def prepare_data(self):
        CIFAR10(self.data_dir, train=True, download=True)
        CIFAR10(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        if stage == 'fit' or stage is None:
            mnist_train = CIFAR10(self.data_dir, train=True, transform=self.transform)
            print(len(mnist_train))
            self.mnist_train, self.mnist_val = random_split(mnist_train, [46000, 4000])
        if stage == 'test' or stage is None:
            self.mnist_test = CIFAR10(self.data_dir, train=False, transform=self.transform)

    def train_dataloader(self):
        mnist_train = DataLoader(self.mnist_train, batch_size=self.batch_size)
        return mnist_train

    def val_dataloader(self):
        mnist_val = DataLoader(self.mnist_val, batch_size=self.batch_size)
        return mnist_val

    def test_dataloader(self):
        mnist_test = DataLoader(self.mnist_test, batch_size=self.batch_size)
        return mnist_test

In [27]:
wandb.login()

True

In [28]:
wandb_logger = WandbLogger(project='2022707004_서정환_pytorch_lightning_Cifar10')



In [29]:
mnist = MNISTDataModule()
model = LitMNIST(n_layer_1=128, n_layer_2=256, lr=1e-3)

In [30]:
trainer = Trainer(
    logger=wandb_logger,    # W&B integration
    gpus=0,                # use all GPU's
    max_epochs=3            # number of epochs
    )

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [31]:
trainer.fit(model, mnist)

Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.callbacks.model_summary:
  | Name     | Type     | Params
--------------------------------------
0 | layer_1  | Linear   | 393 K 
1 | layer_2  | Linear   | 33.0 K
2 | layer_3  | Linear   | 2.6 K 
3 | accuracy | Accuracy | 0     
--------------------------------------
428 K     Trainable params
0         Non-trainable params
428 K     Total params
1.716     Total estimated model params size (MB)


50000


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.


In [32]:
trainer.test(model, datamodule=mnist)

Files already downloaded and verified
Files already downloaded and verified




Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss            1.606942057609558
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 1.606942057609558}]

In [33]:
wandb.finish()

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▃▃▃▃▃▆▆▆▆█
test_loss,▁
train_loss,██▄▂▃▂▃▃▂▁
trainer/global_step,▁▂▂▃▃▄▅▅▅▆▇▇██
valid_loss,█▃▁

0,1
epoch,3.0
test_loss,1.60694
train_loss,1.5689
trainer/global_step,540.0
valid_loss,1.62034


In [34]:
sweep_config = {
  "method": "random",
  "metric": {
      "name": "valid_acc",
      "goal": "maximize"
  },
  "parameters": {
        "n_layer_1": {
            "values": [32, 64, 128, 256, 512]
        },
        "n_layer_2": {
            "values": [32, 64, 128, 256, 512, 1024]
        },
        "lr": {
            "distribution": "log_uniform",
            "min": -9.21,
            "max": -4.61
        }
    }
}

In [35]:
sweep_id = wandb.sweep(sweep_config, project="2022707004_서정환_pytorch_lightning_Cifar10")



Create sweep with ID: m9x55ysu
Sweep URL: https://wandb.ai/gaeul/2022707004_%EC%84%9C%EC%A0%95%ED%99%98_pytorch_lightning_Cifar10/sweeps/m9x55ysu


In [37]:
def sweep_iteration():
    wandb.init()
    wandb_logger = WandbLogger()
    mnist = MNISTDataModule()
    model = LitMNIST(
        n_layer_1=wandb.config.n_layer_1,
        n_layer_2=wandb.config.n_layer_2,
        lr=wandb.config.lr
    )
    trainer = Trainer(
        logger=wandb_logger,    # W&B integration
        gpus=0,                # use all GPU's
        max_epochs=3            # number of epochs
        )
    trainer.fit(model, mnist)

In [38]:
wandb.agent(sweep_id, function=sweep_iteration)

[34m[1mwandb[0m: Agent Starting Run: n1ykb37l with config:
[34m[1mwandb[0m: 	lr: 0.00018997025952529825
[34m[1mwandb[0m: 	n_layer_1: 128
[34m[1mwandb[0m: 	n_layer_2: 32
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.callbacks.model_summary:
  | Name     | Type     | Params
--------------------------------------
0 | layer_1  | Linear   | 393 K 
1 | layer_2  | Linear   | 4.1 K 
2 | layer_3  | Linear   | 330   
3 | accuracy | Accuracy | 0     
--------------------------------------
397 K     Trainable params
0         Non-trainable params
397 K     Total params
1.591     Total estimated model params size (MB)


50000


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▅▅▅▅▅████
train_loss,█▅▄▃▂▁▂▁▂▂
trainer/global_step,▁▂▂▃▃▄▅▅▅▆▇▇█
valid_loss,█▄▁

0,1
epoch,2.0
train_loss,1.83219
trainer/global_step,539.0
valid_loss,1.77253


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
