In [1]:
import os
from pathlib import Path

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import torchvision as tv
from pl_bolts.transforms import dataset_normalizations
from pytorch_lightning import seed_everything

In [2]:
seed_everything(123)

Global seed set to 123


123

In [3]:
# `data` folder should be under the project root directory.
PATH_ROOT = Path(os.getcwd()).absolute().parent
PATH_DATASETS = os.path.join(PATH_ROOT, 'data')
print('Data path:', PATH_DATASETS)

BATCH_SIZE = 256 if torch.cuda.is_available() else 64
NUM_WORKERS = os.cpu_count()

Data path: /home/lukec/workspace/baard_v4/data


In [4]:
transform_train = tv.transforms.Compose([
    tv.transforms.RandomCrop(32, padding=4),
    tv.transforms.RandomHorizontalFlip(),
    tv.transforms.ToTensor(),
    dataset_normalizations.cifar10_normalization(),
])

transform_test = tv.transforms.Compose([
    tv.transforms.ToTensor(),
    dataset_normalizations.cifar10_normalization(),
])

In [5]:
dataset_train = tv.datasets.CIFAR10(PATH_DATASETS, train=True, download=True, transform=transform_train)
dataset_test = tv.datasets.CIFAR10(PATH_DATASETS, train=False, download=True, transform=transform_test)

loader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
loader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
def create_model():
    model = tv.models.resnet18(weights=None, num_classes=10)
    model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    model.maxpool = nn.Identity()
    return model

In [7]:
from torchinfo import summary

model = create_model()

i = iter(dataset_train)
x, y = next(i)
input_size = tuple([BATCH_SIZE] + list(x.size()))
summary(model, input_size=input_size)

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [256, 10]                 --
├─Conv2d: 1-1                            [256, 64, 32, 32]         1,728
├─BatchNorm2d: 1-2                       [256, 64, 32, 32]         128
├─ReLU: 1-3                              [256, 64, 32, 32]         --
├─Identity: 1-4                          [256, 64, 32, 32]         --
├─Sequential: 1-5                        [256, 64, 32, 32]         --
│    └─BasicBlock: 2-1                   [256, 64, 32, 32]         --
│    │    └─Conv2d: 3-1                  [256, 64, 32, 32]         36,864
│    │    └─BatchNorm2d: 3-2             [256, 64, 32, 32]         128
│    │    └─ReLU: 3-3                    [256, 64, 32, 32]         --
│    │    └─Conv2d: 3-4                  [256, 64, 32, 32]         36,864
│    │    └─BatchNorm2d: 3-5             [256, 64, 32, 32]         128
│    │    └─ReLU: 3-6                    [256, 64, 32, 32]         --
│

In [8]:
from torch.optim.lr_scheduler import OneCycleLR
from pytorch_lightning import LightningModule


class LitResnet(LightningModule):
    def __init__(self, lr):
        super().__init__()

        self.save_hyperparameters()
        self.model = create_model()
        self.loss_fn = nn.CrossEntropyLoss()
        self.example_input_array = torch.zeros((1, 3, 32, 32), dtype=torch.float32)

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        outputs = self.model(x)
        preds = outputs.argmax(dim=-1)
        loss = self.loss_fn(outputs, y)
        acc = (preds == y).float().mean()
        self.log("train_acc", acc, on_step=False, on_epoch=True)
        self.log("train_loss", loss)
        return loss

    def evaluate(self, batch, stage=None):
        x, y = batch
        outputs = self.model(x)
        preds = outputs.argmax(dim=-1)
        loss = self.loss_fn(outputs, y)
        acc = (preds == y).float().mean()
        preds = torch.argmax(outputs, dim=1)

        if stage:
            self.log(f"{stage}_loss", loss)
            self.log(f"{stage}_acc", acc)

    def validation_step(self, batch, batch_idx):
        self.evaluate(batch, "val")

    def test_step(self, batch, batch_idx):
        self.evaluate(batch, "test")
        
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(
            self.parameters(),
            lr=self.hparams.lr,
            momentum=0.9,
            weight_decay=5e-4,
        )
        steps_per_epoch = len(loader_train)
        scheduler_dict = {
            "scheduler": OneCycleLR(
                optimizer,
                0.1,
                epochs=self.trainer.max_epochs,
                steps_per_epoch=len(loader_train),
            ),
            "interval": "step",
        }
        return {"optimizer": optimizer, "lr_scheduler": scheduler_dict}

In [9]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.callbacks.progress import TQDMProgressBar

LEARNING_RATE = 0.05
MAX_EPOCHS = 5
PATH_LOG = os.path.join(PATH_ROOT, 'logs')
MODEL_NAME = 'cifar10-resnet18'
PATH_CHECKPOINT = os.path.join(PATH_ROOT, 'pretrained_models')
PATH_MODEL = os.path.join(PATH_CHECKPOINT, MODEL_NAME)

trainer = Trainer(
    max_epochs=MAX_EPOCHS,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
    logger=CSVLogger(save_dir=PATH_CHECKPOINT),
    callbacks=[
        ModelCheckpoint(save_weights_only=True, mode="max", monitor="val_acc"),
        LearningRateMonitor(logging_interval="step"),
        TQDMProgressBar(refresh_rate=10),
    ]
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [10]:
pretrained_filename = os.path.join(PATH_MODEL + ".ckpt")
if os.path.isfile(pretrained_filename):
    model = LitResnet.load_from_checkpoint(pretrained_filename)
else:
    model = LitResnet(lr=LEARNING_RATE)
    trainer.fit(model, train_dataloaders=loader_train, val_dataloaders=loader_test)

trainer.test(model, dataloaders=loader_test)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | In sizes       | Out sizes
--------------------------------------------------------------------------
0 | model   | ResNet           | 11.2 M | [1, 3, 32, 32] | [1, 10]  
1 | loss_fn | CrossEntropyLoss | 0      | ?              | ?        
--------------------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.696    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc             0.453000009059906
        test_loss           1.4720160961151123
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 1.4720160961151123, 'test_acc': 0.453000009059906}]

In [None]:
import pandas as pd
import seaborn as sn

metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all").head())
sn.relplot(data=metrics, kind="line")

In [None]:
predictions = trainer.predict(model, loader_test)
predictions