In [1]:
from torch.utils.data import DataLoader
from torch import Tensor
from torch import optim
from torch.optim.adam import Adam

from torchvision.datasets import CIFAR10 # type: ignore
from torchvision.transforms import Compose, ToTensor, Normalize # type: ignore

import utils

In [2]:
transform = Compose([ # type: ignore
    ToTensor(),
    Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)
train_loader: DataLoader[tuple[Tensor, Tensor]] = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader: DataLoader[tuple[Tensor, Tensor]] = DataLoader(test_dataset, batch_size=64, shuffle=False)

cfgs = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

model = utils.BayesianVGG(cfgs['VGG16'])

optimizer = Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)


def reg_coef_lambda(epoch: int) -> float:
    if epoch < 100:
        return 1e-6
    elif epoch < 200:
        return (epoch - 109) / 100
    else:
        return 1

Files already downloaded and verified
Files already downloaded and verified


In [3]:
trainer = utils.Trainer(model, train_loader, len(train_dataset), test_loader, len(test_dataset), optimizer, scheduler, epochs=500, reg_coef_lambda=reg_coef_lambda)
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mantonii-belyshev[0m. Use [1m`wandb login --relogin`[0m to force relogin


KeyboardInterrupt: 

In [18]:
model.train()

for x, y in test_loader:
    pred = model(x.cuda())
    print((pred - pred.exp().sum(1, keepdims=True).log())[range(len(x)), y].mean())
    print(torch.nn.CrossEntropyLoss()(pred, y.cuda()))
    print()

tensor(-1.7062, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.7062, device='cuda:0', grad_fn=<NllLossBackward0>)

tensor(-1.7137, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.7137, device='cuda:0', grad_fn=<NllLossBackward0>)

tensor(-1.7296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.7296, device='cuda:0', grad_fn=<NllLossBackward0>)

tensor(-2.0127, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.0127, device='cuda:0', grad_fn=<NllLossBackward0>)

tensor(-2.4567, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4567, device='cuda:0', grad_fn=<NllLossBackward0>)

tensor(-1.2312, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2312, device='cuda:0', grad_fn=<NllLossBackward0>)

tensor(-1.3176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3176, device='cuda:0', grad_fn=<NllLossBackward0>)

tensor(-2.1945, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.1945, device='cuda:0', grad_fn=<NllLossBackward0>)

tensor(-0.6489, device='cuda:0', grad_fn=<MeanBa

tensor(-1.7852, device='cuda:0', grad_fn=<MeanBackward0>)

In [17]:
import torch

torch.nn.CrossEntropyLoss()(pred, y.cuda())

tensor(1.7852, device='cuda:0', grad_fn=<NllLossBackward0>)