In [None]:
#| default_exp training

# Initial setup

In [None]:
#| export
import torch
import torch.nn.functional as F

In [None]:
from pathlib import Path
import pickle
import gzip

from fastcore.test import test_close
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

torch.manual_seed(1103)

matplotlib.rcParams["image.cmap"] = "gray"
torch.set_printoptions(precision=2, linewidth=160, sci_mode=False)
np.set_printoptions(precision=2, linewidth=160)

data_path = Path("data")
mnist_path = data_path / "mnist.pkl.gz"
with gzip.open(mnist_path) as f:
    ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="bytes")
x_train, y_train, x_valid, y_valid = map(torch.tensor, (x_train, y_train, x_valid, y_valid))

In [None]:
import torch.nn as nn

In [None]:
class Model(nn.Module):
    def __init__(self, n_in, n_h, n_out):
        super().__init__()
        self.layers = [nn.Linear(n_in, n_h), nn.ReLU(), nn.Linear(n_h, n_out)]

    def __call__(self, x, target):
        for l in self.layers:
            x = l(x)
        return x

In [None]:
nh = 50
c = y_train.max().item() + 1
c

10

In [None]:
model = Model(x_train.shape[1], nh, c)

In [None]:
preds = model(x_train, x_valid)

In [None]:
preds.shape

torch.Size([50000, 10])

## Cross entropy loss

In [None]:
def log_softmax(x):
    exps = torch.exp(x)
    return torch.log(exps / exps.sum(keepdims=True, dim=1))

In [None]:
res = log_softmax(preds)
res.shape, res

(torch.Size([50000, 10]),
 tensor([[-2.40, -2.41, -2.43,  ..., -2.20, -2.25, -2.14],
         [-2.30, -2.51, -2.23,  ..., -2.18, -2.30, -2.26],
         [-2.34, -2.43, -2.34,  ..., -2.23, -2.20, -2.20],
         ...,
         [-2.37, -2.42, -2.38,  ..., -2.18, -2.27, -2.23],
         [-2.31, -2.40, -2.42,  ..., -2.21, -2.22, -2.25],
         [-2.41, -2.43, -2.38,  ..., -2.17, -2.30, -2.19]], grad_fn=<LogBackward0>))

In [None]:
def log_softmax(x):
    return x - torch.log(torch.exp(x).sum(keepdims=True, dim=1))

In [None]:
res = log_softmax(preds)
res.shape, res

(torch.Size([50000, 10]),
 tensor([[-2.40, -2.41, -2.43,  ..., -2.20, -2.25, -2.14],
         [-2.30, -2.51, -2.23,  ..., -2.18, -2.30, -2.26],
         [-2.34, -2.43, -2.34,  ..., -2.23, -2.20, -2.20],
         ...,
         [-2.37, -2.42, -2.38,  ..., -2.18, -2.27, -2.23],
         [-2.31, -2.40, -2.42,  ..., -2.21, -2.22, -2.25],
         [-2.41, -2.43, -2.38,  ..., -2.17, -2.30, -2.19]], grad_fn=<SubBackward0>))

In [None]:
test_close(F.log_softmax(preds, dim=1), res)

In [None]:
def logsumexp(x):
    max = torch.max(x)
    return (x - max).exp().sum(keepdims=True, dim=1).log() + max

In [None]:
def log_softmax(x):
    return x - logsumexp(x)

In [None]:
test_close(res, log_softmax(preds))

In [None]:
ll = F.log_softmax(preds, dim=1)

In [None]:
ll.shape, y_train.shape

(torch.Size([50000, 10]), torch.Size([50000]))

In [None]:
ll[range(y_train.shape[0]), y_train].shape

torch.Size([50000])

In [None]:
def nll(x, target):
    ll = F.log_softmax(x, dim=1)
    return -ll[range(target.shape[0]), target].mean()

In [None]:
res = nll(preds, y_train)

In [None]:
test_close(res, F.cross_entropy(preds, y_train))

## Basic training loop

In [None]:
xb = x_train[:512]
yb = y_train[:512]
preds_b = model(xb, yb)

In [None]:
loss = F.cross_entropy(preds_b, yb)

In [None]:
cls = preds_b.argmax(dim=1)

In [None]:
cls.shape

torch.Size([512])

In [None]:
def accuracy(out, yb):
    return (out==yb).float().mean()

In [None]:
accuracy(cls, yb)

tensor(0.14)

In [None]:
#| export
def accuracy(preds, yb):
    return (preds.argmax(dim=1)==yb).float().mean().item()

In [None]:
accuracy(preds_b, yb)

0.13671875

In [None]:
lr = 0.1
epochs = 5
bs = 512

In [None]:
hasattr(epochs, "1")

False

In [None]:
#| export
def report(loss, preds, yb):
    print(f"loss={loss.item():.4f}, accuracy={accuracy(preds, yb):.2f}")

In [None]:
xb = x_train[:bs]
yb = y_train[:bs]
with torch.no_grad():
    predb = model(xb, yb)
    loss = F.cross_entropy(predb, yb)
report(loss, predb, yb)

for i in range(epochs):
    for b in range(0, len(x_train), bs):
        xb = x_train[b:b+bs]
        yb = y_train[b:b+bs]
        predb = model(xb, yb)
        loss = F.cross_entropy(predb, yb)
        loss.backward()

        with torch.no_grad():
            for l in model.layers:
                if hasattr(l, "weight"):
                    l.weight -= l.weight.grad * lr
                    l.bias -= l.bias.grad * lr
                    l.weight.grad.zero_()
                    l.bias.grad.zero_()
    report(loss, predb, yb)

loss=2.2953, accuracy=0.14
loss=0.7081, accuracy=0.84
loss=0.5040, accuracy=0.87
loss=0.4349, accuracy=0.90
loss=0.3978, accuracy=0.91
loss=0.3731, accuracy=0.92


## Using parameters and optim

### Parameters

In [None]:
dummy = nn.Module()
dummy

Module()

In [None]:
dummy.a = 1
dummy.b = 2
dummy

Module()

In [None]:
dummy.c = nn.Module()
dummy.c.a = nn.Module()
dummy.d = nn.Module()
dummy

Module(
  (c): Module(
    (a): Module()
  )
  (d): Module()
)

In [None]:
for name, nc in list(dummy.named_children()):
    print(name, nc)

c Module(
  (a): Module()
)
d Module()


In [None]:
list(dummy.parameters())

[]

In [None]:
dummy.l = nn.Linear(2, 3)
list(dummy.parameters())

[Parameter containing:
 tensor([[ 0.25, -0.53],
         [ 0.45, -0.38],
         [ 0.37, -0.13]], requires_grad=True),
 Parameter containing:
 tensor([ 0.30, -0.41, -0.18], requires_grad=True)]

In [None]:
class MLP(nn.Module):
    def __init__(self, nin, nh, nout):
        super().__init__()
        self.l1 = nn.Linear(nin, nh)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(nh, nout)

    def forward(self, x):
        return self.l2(self.relu(self.l1(x)))

In [None]:
model = MLP(x_train.shape[1], nh, c)
model

MLP(
  (l1): Linear(in_features=784, out_features=50, bias=True)
  (relu): ReLU()
  (l2): Linear(in_features=50, out_features=10, bias=True)
)

In [None]:
for name, nc in model.named_children():
    print(f"{name}: {nc}")

l1: Linear(in_features=784, out_features=50, bias=True)
relu: ReLU()
l2: Linear(in_features=50, out_features=10, bias=True)


In [None]:
for p in model.parameters():
    print(p.shape)

torch.Size([50, 784])
torch.Size([50])
torch.Size([10, 50])
torch.Size([10])


In [None]:
def fit():
    for e in range(epochs):
        for b in range(0, len(x_train), bs):
            s = slice(b, b + bs)
            xb = x_train[s]
            yb = y_train[s]
            predb = model(xb)
            loss = F.cross_entropy(predb, yb)
            loss.backward()

            with torch.no_grad():
                for p in model.parameters():
                    p -= p.grad * lr
                model.zero_grad()
        report(loss, predb, yb)

In [None]:
fit()

loss=0.7223, accuracy=0.84
loss=0.5041, accuracy=0.89
loss=0.4316, accuracy=0.90
loss=0.3919, accuracy=0.91
loss=0.3660, accuracy=0.92


In [None]:
class MyModule():
    def __init__(self):
        self._modules = {}
        
    def __setattr__(self, k, v):
        if not k.startswith("_") and (isinstance(v, nn.Module) or isinstance(v, MyModule)):
            self._modules[k] = v
        super().__setattr__(k, v)

    def __repr__(self):
        return f"{self._modules}"

    def parameters(self):
        for module in self._modules:
            yield from self._modules[module].parameters()

In [None]:
dummy = MyModule()

In [None]:
dummy.a = 1
dummy._b = nn.Linear(2, 2)
dummy.c = nn.Linear(3, 2)
dummy.c.a = nn.Linear(1, 2)
dummy.d = nn.Linear(1, 1)
dummy

{'c': Linear(
  in_features=3, out_features=2, bias=True
  (a): Linear(in_features=1, out_features=2, bias=True)
), 'd': Linear(in_features=1, out_features=1, bias=True)}

In [None]:
for p in dummy.parameters():
    print(p.shape)

torch.Size([2, 3])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2])
torch.Size([1, 1])
torch.Size([1])


### Registering modules

In [None]:
nn.Module.add_module?

[0;31mSignature:[0m
[0mnn[0m[0;34m.[0m[0mModule[0m[0;34m.[0m[0madd_module[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mself[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mname[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmodule[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mForwardRef[0m[0;34m([0m[0;34m'Module'[0m[0;34m)[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Adds a child module to the current module.

The module can be accessed as an attribute using the given name.

Args:
    name (str): name of the child module. The child module can be
        accessed from this module using the given name
    module (Module): child module to be added to the module.
[0;31mFile:[0m      ~/miniforge3/envs/fastai/lib/python3.11/site-packages/torch/nn/modules/module.py
[0;31mType:[0m      function

In [None]:
class Model(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = layers
        for i, l in enumerate(layers):
            self.add_module(f"layer_{i}", l)

    def forward(self, x):
        for l in self.layers:
            x = l(x)
        return x

In [None]:
layers = [nn.Linear(x_train.shape[1], nh), nn.ReLU(), nn.Linear(nh, c)]
model = Model(layers)
model

Model(
  (layer_0): Linear(in_features=784, out_features=50, bias=True)
  (layer_1): ReLU()
  (layer_2): Linear(in_features=50, out_features=10, bias=True)
)

In [None]:
model(xb).shape, xb.shape

(torch.Size([336, 10]), torch.Size([336, 784]))

### nn.ModuleList

In [None]:
class SequentialModel(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        for l in self.layers:
            x = l(x)
        return x

In [None]:
layers = [nn.Linear(x_train.shape[1], nh), nn.ReLU(), nn.Linear(nh, c)]
model = SequentialModel(layers)
model

SequentialModel(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=10, bias=True)
  )
)

In [None]:
fit()

loss=0.6743, accuracy=0.85
loss=0.4682, accuracy=0.90
loss=0.4068, accuracy=0.91
loss=0.3734, accuracy=0.92
loss=0.3507, accuracy=0.92


### nn.Sequential

In [None]:
layers = [nn.Linear(x_train.shape[1], nh), nn.ReLU(), nn.Linear(nh, c)]
model = nn.Sequential(*layers)
fit()

loss=0.6929, accuracy=0.84
loss=0.4940, accuracy=0.89
loss=0.4289, accuracy=0.90
loss=0.3913, accuracy=0.91
loss=0.3651, accuracy=0.92


### optim

In [None]:
class Optimizer:
    def __init__(self, parameters, lr):
        self.ps = list(parameters)
        self.lr = lr

    def step(self):
        with torch.no_grad():
            for p in self.ps:
                p -= p.grad * self.lr

    def zero_grad(self):
        for p in self.ps:
            p.grad.data.zero_()
    

In [None]:
layers = [nn.Linear(x_train.shape[1], nh), nn.ReLU(), nn.Linear(nh, c)]
model = nn.Sequential(*layers)
opt = Optimizer(model.parameters(), lr)

In [None]:
n = len(x_train)

In [None]:
for e in range(epochs):
    for b in range(0, n, bs):
        s = slice(b, b + bs)
        xb, yb = x_train[s], y_train[s]
        predb = model(xb)
        loss = F.cross_entropy(predb, yb)
        loss.backward()
        opt.step()
        opt.zero_grad()
    report(loss, predb, yb)

loss=0.6846, accuracy=0.86
loss=0.4903, accuracy=0.88
loss=0.4245, accuracy=0.90
loss=0.3855, accuracy=0.92
loss=0.3558, accuracy=0.93


In [None]:
from torch.optim import SGD

In [None]:
def get_model():
    model = nn.Sequential(nn.Linear(x_train.shape[1], nh), nn.ReLU(), nn.Linear(nh, c))
    opt = SGD(model.parameters(), lr=lr)
    return model, opt

In [None]:
model, opt = get_model()

In [None]:
print(F.cross_entropy(model(xb), yb))
for e in range(epochs):
    for b in range(0, n, bs):
        s = slice(b, b + bs)
        xb, yb = x_train[s], y_train[s]
        predb = model(xb)
        loss = F.cross_entropy(predb, yb)
        loss.backward()
        opt.step()
        opt.zero_grad()
    report(loss, predb, yb)

tensor(2.29, grad_fn=<NllLossBackward0>)
loss=0.6792, accuracy=0.84
loss=0.4877, accuracy=0.88
loss=0.4237, accuracy=0.89
loss=0.3879, accuracy=0.91
loss=0.3631, accuracy=0.91


## Dataset and DataLoader

### Dataset

In [None]:
#| export

class Dataset:
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, i):
        return self.x[i], self.y[i]

In [None]:
ds_train = Dataset(x_train, y_train)
ds_valid = Dataset(x_valid, y_valid)

In [None]:
xb, yb = ds_train[:5]
xb.shape, yb.shape

(torch.Size([5, 784]), torch.Size([5]))

In [None]:
model, opt = get_model()
print(F.cross_entropy(model(xb), yb))
for e in range(epochs):
    for b in range(0, n, bs):
        xb, yb = ds_train[b:b+bs]
        predb = model(xb)
        loss = F.cross_entropy(predb, yb)
        loss.backward()
        opt.step()
        opt.zero_grad()
    report(loss, predb, yb)

tensor(2.31, grad_fn=<NllLossBackward0>)
loss=0.6922, accuracy=0.85
loss=0.4931, accuracy=0.89
loss=0.4264, accuracy=0.91
loss=0.3888, accuracy=0.91
loss=0.3616, accuracy=0.93


### DataLoader

In [None]:
class DataLoader:
    def __init__(self, ds, bs):
        self.ds = ds
        self.bs = bs

    def __iter__(self):
        for b in range(0, len(self.ds), bs):
            yield self.ds[b:b+bs]

In [None]:
dl_train = DataLoader(ds_train, bs)
dl_valid = DataLoader(ds_valid, bs)

In [None]:
xb, yb = next(iter(dl_train))
xb.shape, yb.shape

(torch.Size([512, 784]), torch.Size([512]))

In [None]:
def fit():
    for e in range(epochs):
        for xb, yb in dl_train:
            predb = model(xb)
            loss = F.cross_entropy(predb, yb)
            loss.backward()
            opt.step()
            opt.zero_grad()
        report(loss, predb, yb)

In [None]:
model, opt = get_model()
fit()

loss=0.6949, accuracy=0.84
loss=0.4973, accuracy=0.88
loss=0.4316, accuracy=0.90
loss=0.3955, accuracy=0.91
loss=0.3709, accuracy=0.91


### Random sampling

In [None]:
import random

In [None]:
class Sampler:
    def __init__(self, ds, shuffle=False):
        self.n = len(ds)
        self.shuffle = shuffle

    def __iter__(self):
        order = list(range(self.n))
        if self.shuffle:
            random.shuffle(order)
        yield from order

In [None]:
sampler = Sampler(ds_train, True)

In [None]:
it = iter(sampler)
for i in range(5):
    print(next(it))

49050
32152
15156
15577
5772


In [None]:
import fastcore.all as fc

In [None]:
fc.chunked?

[0;31mSignature:[0m [0mfc[0m[0;34m.[0m[0mchunked[0m[0;34m([0m[0mit[0m[0;34m,[0m [0mchunk_sz[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mdrop_last[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m [0mn_chunks[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m Return batches from iterator `it` of size `chunk_sz` (or return `n_chunks` total)
[0;31mFile:[0m      ~/miniforge3/envs/fastai/lib/python3.11/site-packages/fastcore/basics.py
[0;31mType:[0m      function

In [None]:
class BatchSampler:
    def __init__(self, sampler, bs, drop_last=False): fc.store_attr()

    def __iter__(self):
        yield from fc.chunked(self.sampler, self.bs, self.drop_last)    

In [None]:
sampler = BatchSampler(Sampler(ds_train, True), 5)
it = iter(sampler)
for i in range(5):
    print(next(it))

[29458, 972, 15537, 39180, 9944]
[18789, 5883, 36352, 29319, 25918]
[41655, 17080, 43608, 31881, 48094]
[26847, 36816, 9073, 33588, 47073]
[9705, 33646, 38182, 22450, 14457]


In [None]:
batch = ds_train[next(it)]

In [None]:
batch[0].shape, batch[1].shape

(torch.Size([5, 784]), torch.Size([5]))

In [None]:
def collate(b):
    x, y = zip(*b)
    return torch.stack(x), torch.stack(y)

In [None]:
class DataLoader:
    def __init__(self, ds, batches, collate_fn=collate): fc.store_attr()

    def __iter__(self):
        yield from [self.collate_fn(self.ds[i] for i in b) for b in self.batches]

In [None]:
s_train = BatchSampler(Sampler(ds_train, True), bs)
s_valid = BatchSampler(Sampler(ds_valid, False), bs * 2)

In [None]:
dl_train = DataLoader(ds_train, s_train)
dl_valid = DataLoader(ds_valid, s_valid)

In [None]:
xb, yb = next(iter(dl_train))
xb.shape, yb.shape

(torch.Size([512, 784]), torch.Size([512]))

In [None]:
model, opt = get_model()
fit()

loss=0.6354, accuracy=0.81
loss=0.4308, accuracy=0.88
loss=0.3326, accuracy=0.89
loss=0.3861, accuracy=0.88
loss=0.3546, accuracy=0.91


### Multiprocessing DataLoader

In [None]:
import torch.multiprocessing as mp

In [None]:
ds_train[[0,1,2,3]]

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([5, 0, 4, 1]))

In [None]:
for sb in map(ds_train.__getitem__, [[0,1],[2,3]]): print(sb)

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]), tensor([5, 0]))
(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]), tensor([4, 1]))


In [None]:
x, y = next(map(ds_train.__getitem__, iter(s_train)))
x.shape, y.shape

(torch.Size([512, 784]), torch.Size([512]))

In [None]:
with mp.Pool(2) as pool:
    res = pool.map(ds_train.__getitem__, iter([[0,1],[2,3]]))
    print(type(res), len(res[0]), len(res[1]))

<class 'list'> 2 2


In [None]:
tmp = Dataset(x_train[:16], y_train[:16])
res = list(map(tmp.__getitem__, iter(Sampler(tmp, False))))
print(len(res))

16


In [None]:
with mp.Pool(2) as pool:
    res = pool.map(tmp.__getitem__, iter(Sampler(tmp, False)))
    print(len(res))

16


In [None]:
from time import perf_counter

In [None]:
class DataLoader:
    def __init__(self, ds, batches, n_workers=0, collate_fn=collate): fc.store_attr()

    def __iter__(self):
        if self.n_workers:
            with mp.Pool(self.n_workers) as pool:
                yield from pool.map(self.ds.__getitem__, iter(self.batches))
        else:
            yield from map(self.ds.__getitem__, iter(self.batches))

In [None]:
dl_train = DataLoader(ds_train, s_train, 4)
st = perf_counter()
_ = next(iter(dl_train))
print(perf_counter() - st)

1.1399545919998673


In [None]:
st = perf_counter()
_ = next(iter(dl_valid))
print(perf_counter() - st)

0.15937871899996026


### PyTorch DataLoader

In [None]:
#| export
from torch.utils.data import DataLoader, BatchSampler, RandomSampler, SequentialSampler

In [None]:
BatchSampler?

[0;31mInit signature:[0m
[0mBatchSampler[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0msampler[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mtorch[0m[0;34m.[0m[0mutils[0m[0;34m.[0m[0mdata[0m[0;34m.[0m[0msampler[0m[0;34m.[0m[0mSampler[0m[0;34m[[0m[0mint[0m[0;34m][0m[0;34m,[0m [0mIterable[0m[0;34m[[0m[0mint[0m[0;34m][0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbatch_size[0m[0;34m:[0m [0mint[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdrop_last[0m[0;34m:[0m [0mbool[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Wraps another sampler to yield a mini-batch of indices.

Args:
    sampler (Sampler or Iterable): Base sampler. Can be any iterable object
    batch_size (int): Size of mini-batch.
    drop_last (bool): If ``True``, the sampler will drop the last batch if
        its size would be less than ``batch_size``

Example:
    >>> list(BatchSample

In [None]:
s_train = BatchSampler(RandomSampler(ds_train), bs, False)
s_valid = BatchSampler(SequentialSampler(ds_train), bs, False)

In [None]:
dl_train = DataLoader(ds_train, batch_sampler=s_train, collate_fn=collate)
dl_valid = DataLoader(ds_valid, batch_sampler=s_valid, collate_fn=collate)

In [None]:
xb, yb = next(iter(dl_train))
xb.shape, yb.shape

(torch.Size([512, 784]), torch.Size([512]))

In [None]:
model, opt = get_model()
fit()

loss=0.6344, accuracy=0.85
loss=0.3726, accuracy=0.90
loss=0.3693, accuracy=0.90
loss=0.4026, accuracy=0.88
loss=0.3488, accuracy=0.91


In [None]:
dl_train = DataLoader(ds_train, batch_size=bs, shuffle=True, drop_last=False, num_workers=2)
dl_valid = DataLoader(ds_valid, batch_size=bs*2, shuffle=False, drop_last=False, num_workers=2)

In [None]:
xb, yb = next(iter(dl_train))
xb.shape, yb.shape

(torch.Size([512, 784]), torch.Size([512]))

In [None]:
model, opt = get_model()
fit()

loss=0.6148, accuracy=0.85
loss=0.5123, accuracy=0.83
loss=0.4209, accuracy=0.90
loss=0.3258, accuracy=0.91
loss=0.2468, accuracy=0.92


In [None]:
dl_train = DataLoader(ds_train, sampler=s_train)
dl_valid = DataLoader(ds_valid, sampler=s_valid)

In [None]:
xb, yb = next(iter(dl_train))
xb.shape, yb.shape

(torch.Size([1, 512, 784]), torch.Size([1, 512]))

## Validation

In [None]:
#| export

def fit(epochs, model, opt, loss_func, dl_train, dl_valid):
    for e in range(epochs):
        model.train()
        for xb, yb in dl_train:
            predb = model(xb)
            loss = F.cross_entropy(predb, yb)
            loss.backward()
            opt.step()
            opt.zero_grad()

        model.eval()
        count = 0
        loss_acc = 0.
        acc_acc = 0.
        with torch.no_grad():
            for xb, yb in dl_valid:
                predb = model(xb)
                loss = F.cross_entropy(predb, yb)
                size = len(xb)
                count += size
                loss_acc += loss.item() * size
                acc_acc += accuracy(predb, yb) * size
        print(f"Epoch {e}: loss={loss_acc/count:.4f}, accuracy={acc_acc/count:.2f}")
    return loss_acc / count, acc_acc / count

In [None]:
#| export
def get_dls(ds_train, ds_valid, bs, **kwargs):
    dl_train = DataLoader(ds_train, batch_size=bs, shuffle=True, **kwargs)
    dl_valid = DataLoader(ds_valid, batch_size=bs*2, shuffle=False, **kwargs)
    return dl_train, dl_valid

In [None]:
dl_train, dl_valid = get_dls(ds_train, ds_valid, bs)
model, opt = get_model()

In [None]:
%time loss, acc = fit(5, model, opt, F.cross_entropy, dl_train, dl_valid)
print(loss, acc)

Epoch 0: loss=0.5993, accuracy=0.87
Epoch 1: loss=0.4047, accuracy=0.89
Epoch 2: loss=0.3487, accuracy=0.90
Epoch 3: loss=0.3219, accuracy=0.91
Epoch 4: loss=0.3072, accuracy=0.91
CPU times: user 20.2 s, sys: 112 ms, total: 20.3 s
Wall time: 6.41 s
0.30717602343559264 0.9095999980926514


In [None]:
dl_train, dl_valid = get_dls(ds_train, ds_valid, 64)
model, opt = get_model()

In [None]:
%time loss, acc = fit(5, model, opt, F.cross_entropy, dl_train, dl_valid)
print(loss, acc)

Epoch 0: loss=0.2867, accuracy=0.92
Epoch 1: loss=0.3424, accuracy=0.90
Epoch 2: loss=0.1913, accuracy=0.95
Epoch 3: loss=0.1677, accuracy=0.95
Epoch 4: loss=0.1459, accuracy=0.96
CPU times: user 54.3 s, sys: 206 ms, total: 54.6 s
Wall time: 12.7 s
0.1459468141913414 0.9612


# Export

In [None]:
import nbdev; nbdev.nbdev_export()