# MyModule -> Foundations

In [None]:
%load_ext autoreload 
%autoreload 2

In [None]:
import os
import gzip
import numpy as np
from itertools import islice
import torch
from torch import tensor
from matplotlib import pyplot as plt
from fastcore.test import test_eq, test_close
from numba import njit
import pdb
from torch import nn
import torch.nn.functional as F

def load_mnist(path, kind='train'):

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels

# labels from fashion mnist dataset
labels = ['T-shirt/top', 
          'Trouser',
          'Pullover',
          'Dress',
          'Coat',
          'Sandal',
          'Shirt',
          'Sneaker',
          'Bag',
          'Ankle boot'
         ]

x_train, y_train = load_mnist('../../data/fashion_mnist', kind='train')
x_valid, y_valid = load_mnist('../../data/fashion_mnist', kind='t10k')

x_train, x_valid = x_train.astype('float32') / 255., x_valid.astype('float32') / 255.
x_train, x_valid, y_train, y_valid = map(tensor, (x_train, x_valid, y_train, y_valid))

## MyModule

In [None]:
class MyModule:
    
    def __init__(self):
        self._module = {}
    
    def __setattr__(self, k, v):
        if  k[0] != '_': self._module[k] = v
        super().__setattr__(k, v)
    
    def __call__(self, x):
        return self.forward(x)
    
    def forward(self, x):
        raise Exception("Not implemented")
        
    def __repr__(self):
        return f'{self._module}'
    
    def parameters(self):
        for l in self._module.values(): yield from l.parameters()

In [None]:
m, nh, c = 784, 50, 10

In [None]:
class Model(MyModule):
    def __init__(self, m, nh, c):
        super().__init__()
        self.l1 = nn.Linear(m, nh)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(nh, c)
    
    def forward(self, x):
        return self.l2(self.relu(self.l1(x)))

In [None]:
model = Model(m, nh, c)

In [None]:
model

{'l1': Linear(in_features=784, out_features=50, bias=True), 'relu': ReLU(), 'l2': Linear(in_features=50, out_features=10, bias=True)}

In [None]:
for p in model.parameters():
    print(p.shape)

torch.Size([50, 784])
torch.Size([50])
torch.Size([10, 50])
torch.Size([10])


In [None]:
model(x_valid).shape

torch.Size([10000, 10])

In [None]:
class Model(nn.Module):
    def __init__(self, m, nh, c):
        super().__init__()
        self.l1 = nn.Linear(m, nh)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(nh, c)
    
    def forward(self, x):
        return self.l2(self.relu(self.l1(x)))

In [None]:
model = Model(m, nh, c)

In [None]:
model

Model(
  (l1): Linear(in_features=784, out_features=50, bias=True)
  (relu): ReLU()
  (l2): Linear(in_features=50, out_features=10, bias=True)
)

In [None]:
for p in model.parameters():
    print(p.shape)

torch.Size([50, 784])
torch.Size([50])
torch.Size([10, 50])
torch.Size([10])


In [None]:
model(x_valid).shape

torch.Size([10000, 10])

## Register Modules

In [None]:
layers = [nn.Linear(m, nh), nn.ReLU(), nn.Linear(nh, c)]

In [None]:
from functools import reduce

In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = layers
        for i, l in enumerate(self.layers): 
            self.register_module(f'layer_{i}', l)
    
    def forward(self, x):
        return reduce(lambda x, l: l(x), self.layers, x)

In [None]:
model = Model(); model

Model(
  (layer_0): Linear(in_features=784, out_features=50, bias=True)
  (layer_1): ReLU()
  (layer_2): Linear(in_features=50, out_features=10, bias=True)
)

In [None]:
for p in model.parameters():
    print(p.shape)

torch.Size([50, 784])
torch.Size([50])
torch.Size([10, 50])
torch.Size([10])


In [None]:
list(model.named_children())

[('layer_0', Linear(in_features=784, out_features=50, bias=True)),
 ('layer_1', ReLU()),
 ('layer_2', Linear(in_features=50, out_features=10, bias=True))]

In [None]:
model(x_valid).shape

torch.Size([10000, 10])

## `nn.ModuleList`

In [None]:
class Model(nn.Module):
    def __init__(self, m, nh, c):
        super().__init__()
        self.layers = nn.ModuleList([nn.Linear(m, nh), nn.ReLU(), nn.Linear(nh, c)])
    
    def forward(self, x):
        for l in self.layers: x = l(x)
        return x

In [None]:
model = Model(m, nh, c); model

Model(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=10, bias=True)
  )
)

In [None]:
list(model.named_children())

[('layers',
  ModuleList(
    (0): Linear(in_features=784, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=10, bias=True)
  ))]

In [None]:
model(x_valid).shape

torch.Size([10000, 10])

## `nn.Sequential`

In [None]:
model = nn.Sequential(nn.Linear(m, nh), nn.ReLU(), nn.Linear(nh, c))

In [None]:
model

Sequential(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=10, bias=True)
)

In [None]:
model(x_valid).shape

torch.Size([10000, 10])

In [None]:
list(model.named_children())

[('0', Linear(in_features=784, out_features=50, bias=True)),
 ('1', ReLU()),
 ('2', Linear(in_features=50, out_features=10, bias=True))]

## Training 

In [None]:
def accuracy(preds, act):
    return (preds.argmax(dim=-1) == act).float().mean()

In [None]:
def report(loss, preds, act):
    print(f"loss: {loss}, accuracy: {accuracy(preds, act)}")

In [None]:
epochs = 3
bs = 64
lr = 5e-3
loss_func = F.cross_entropy

In [None]:
def train(epochs, lr):
    for epoch in range(epochs):
        for i in range(0, len(x_train), bs):
            s = slice(i, i + bs)
            xb, yb = x_train[s], y_train[s]
            preds = model(xb)
            loss = loss_func(preds, yb)
            loss.backward()
            with torch.no_grad():
                for p in model.parameters(): p -= p.grad * lr
                model.zero_grad()
        report(loss.item(), preds, yb)

In [None]:
train(epochs, lr)

loss: 1.0674773454666138, accuracy: 0.65625
loss: 0.8739128708839417, accuracy: 0.71875
loss: 0.7890669703483582, accuracy: 0.71875


## opt

In [None]:
class Opt:
    def __init__(self, params, lr=0.001):
        self.params = list(params)
        self.lr = lr
    
    def step(self):
        with torch.no_grad():
            for p in self.params: p -= p.grad * lr
    
    def zero_grad(self):
        for p in self.params: p.grad.data.zero_()

In [None]:
opt = Opt(model.parameters())

In [None]:
for epoch in range(epochs):
    for i in range(0, len(x_train), bs):
        s = slice(i, i + bs)
        xb, yb = x_train[s], y_train[s]
        preds = model(xb)
        loss = loss_func(preds, yb)
        loss.backward()
        opt.step()
        opt.zero_grad()
    report(loss.item(), preds, yb)

loss: 0.7346241474151611, accuracy: 0.75
loss: 0.6982141137123108, accuracy: 0.8125
loss: 0.672683596611023, accuracy: 0.8125


In [None]:
from torch import optim

In [None]:
def get_model():
    model = nn.Sequential(nn.Linear(m, nh), nn.ReLU(), nn.Linear(nh, c))
    return model, optim.SGD(model.parameters(), lr=5e-3)

In [None]:
model, opt = get_model()
loss_func(model(x_train), y_train)

tensor(2.3213, grad_fn=<NllLossBackward0>)

In [None]:
for epoch in range(epochs):
    for i in range(0, len(x_train), bs):
        s = slice(i, i + bs)
        xb, yb = x_train[s], y_train[s]
        preds = model(xb)
        loss = loss_func(preds, yb)
        loss.backward()
        opt.step()
        opt.zero_grad()
    report(loss.item(), preds, yb)

loss: 1.0670915842056274, accuracy: 0.6875
loss: 0.86658775806427, accuracy: 0.71875
loss: 0.7830334901809692, accuracy: 0.71875


## Dataset

In [None]:
class Dataset:
    def __init__(self, x, y):
        self.x, self.y = x, y
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [None]:
train_ds = Dataset(x_train, y_train)
valid_ds = Dataset(x_valid, y_valid)

In [None]:
xb, yb = train_ds[:5]

In [None]:
assert len(train_ds) == len(x_train)
assert xb.shape == (5, 28*28)
assert yb.shape == (5,)

In [None]:
model, opt = get_model()

In [None]:
for epoch in range(epochs):
    for i in range(0, len(train_ds), bs):
        xb, yb = train_ds[i: i+bs]
        preds = model(xb)
        loss = loss_func(preds, yb)
        loss.backward()
        opt.step()
        opt.zero_grad()
    report(loss, preds, yb)

loss: 1.0604238510131836, accuracy: 0.625
loss: 0.8579964637756348, accuracy: 0.6875
loss: 0.7736406326293945, accuracy: 0.71875


## DataLoader

In [None]:
class DataLoader:
    def __init__(self, ds, bs):
        self.ds = ds
        self.bs = bs
    
    def __iter__(self):
        for i in range(0, len(self.ds), self.bs): yield self.ds[i: i + bs]

In [None]:
train_dl = DataLoader(train_ds, bs)

In [None]:
for epoch in range(epochs):
    for xb, yb in train_dl:
        preds = model(xb)
        loss = loss_func(preds, yb)
        loss.backward()
        opt.step()
        opt.zero_grad()
    report(loss, preds, yb)

loss: 0.724354088306427, accuracy: 0.75
loss: 0.6934358477592468, accuracy: 0.8125
loss: 0.6712349653244019, accuracy: 0.8125


## Random Sampling

### Sampler

In [None]:
import random; random.shuffle([1, 2, 3, 4, 5])

In [None]:
class Sampler:
    def __init__(self, ds, shuffle=False):
        self.n = len(ds)
        self.shuffle = shuffle
    
    def __iter__(self):
        nums = list(range(self.n))
        if self.shuffle: random.shuffle(nums)
        return iter(nums)

In [None]:
ss = Sampler(train_ds)

In [None]:
it = iter(ss)

In [None]:
for i in range(5): print(next(it))

0
1
2
3
4


In [None]:
list(islice(it, 5))

[5, 6, 7, 8, 9]

In [None]:
random_sampler = Sampler(train_ds, shuffle=True)

In [None]:
it = iter(random_sampler)

In [None]:
for i in range(5): print(next(it))

7447
10894
41340
30521
15198


In [None]:
list(islice(it, 5))

[47048, 58428, 11329, 36380, 48454]

## Batch Sampler

In [None]:
import fastcore.all as fc

In [None]:
class BatchSampler:
    def __init__(self, sampler, bs, drop_last=False):
        fc.store_attr()
    
    def __iter__(self):
        yield from fc.chunked(self.sampler, self.bs, self.drop_last)

In [None]:
batch_sampler = BatchSampler(Sampler(train_ds, shuffle=True), 4)

In [None]:
list(islice(batch_sampler, 5))

[[18767, 24837, 40120, 12476],
 [50289, 53807, 15181, 54935],
 [44117, 33315, 13845, 42563],
 [5075, 4980, 29009, 28210],
 [50110, 31810, 53922, 46702]]

In [None]:
class DataLoader:
    def __init__(self, ds, batch_sampler):
        fc.store_attr()
    
    def __iter__(self):
        yield from (self.ds[b] for b in self.batch_sampler)

In [None]:
train_dl = DataLoader(train_ds, BatchSampler(Sampler(train_ds, shuffle=True), bs))
model, opt = get_model()

In [None]:
def fit():
    for epoch in range(epochs):
        for xb, yb in train_dl:
            preds = model(xb)
            loss = loss_func(preds, yb)
            loss.backward()
            opt.step()
            opt.zero_grad()
        report(loss, preds, yb)

In [None]:
def collate(b):
    xb, yb = zip(*b)
    return torch.stack(xb), torch.stack(yb)

In [None]:
class DataLoader:
    def __init__(self, ds, batch_sampler, collate_fn=collate):
        fc.store_attr()
    
    def __iter__(self):
        yield from (self.collate_fn(self.ds[i] for i in b) for b in self.batch_sampler)

In [None]:
train_dl = DataLoader(train_ds, BatchSampler(Sampler(train_ds, shuffle=True), bs))

In [None]:
xb, yb = next(iter(train_dl))
xb.shape, yb.shape

(torch.Size([64, 784]), torch.Size([64]))

## Multiprocessing DataLoader

In [None]:
import torch.multiprocessing as mp

In [None]:
def square(x):
    return x ** 2

In [None]:
with mp.Pool(2) as process:
    res = process.map(square, torch.tensor([2, 4, 5, 6, 7, 8]))

In [None]:
train_ds[[2, 3]]

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([0, 3], dtype=torch.uint8))

In [None]:
train_ds.__getitem__([2, 3])

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([0, 3], dtype=torch.uint8))

In [None]:
class DataLoader:
    def __init__(self, ds, batch_sampler, n_workers=1):
        fc.store_attr()
    
    def __iter__(self):
        with mp.Pool(self.n_workers) as process:
            yield from process.map(self.ds.__getitem__, self.batch_sampler)

In [None]:
train_dl = DataLoader(train_ds, BatchSampler(Sampler(train_ds, shuffle=True), bs), 2)

In [None]:
it = iter(train_dl)
xb, yb = next(it)
xb.shape, yb.shape

(torch.Size([64, 784]), torch.Size([64]))

## PyTorch DataLoader

In [None]:
from torch.utils.data import DataLoader, RandomSampler, BatchSampler, SequentialSampler

In [None]:
train_samp = BatchSampler(RandomSampler(train_ds), batch_size=bs, drop_last=False)
valid_samp = BatchSampler(SequentialSampler(valid_ds), batch_size=bs*2, drop_last=False)

In [None]:
train_dl = DataLoader(train_ds, batch_sampler=train_samp)
valid_dl = DataLoader(valid_ds, batch_sampler=valid_samp)

In [None]:
it = iter(train_dl)
xb, yb = next(it)
xb.shape, yb.shape

(torch.Size([64, 784]), torch.Size([64]))

In [None]:
it = iter(valid_dl)
xb, yb = next(it)
xb.shape, yb.shape

(torch.Size([128, 784]), torch.Size([128]))

In [None]:
train_dl = DataLoader(train_ds, batch_size=bs, )