In [None]:
#| default_exp training

In [None]:
#|export
import pickle,gzip,math,os,time,shutil,torch,matplotlib as mpl,numpy as np,matplotlib.pyplot as plt
from pathlib import Path
from torch import tensor,nn
import torch.nn.functional as F
import fastcore.all as fc

In [None]:
from functools import partial

# Mini Batch Training 

In [None]:
from fastcore.test import test_close

torch.set_printoptions(precision=2, linewidth=140, sci_mode=False)
torch.manual_seed(1)
mpl.rcParams['image.cmap'] = 'gray'

path_data = Path('../../course22p2/data')
path_gz = path_data/'mnist.pkl.gz'
with gzip.open(path_gz, 'rb') as f: ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
x_train, y_train, x_valid, y_valid = map(tensor, [x_train, y_train, x_valid, y_valid])

## Datasets and DataLoaders

In [None]:


def dl(x_train, bs = 64):
    for i in range(0,len(x_train),bs):
        yield x_train[i:i+bs]

In [None]:
f = partial(dl, x_train)

In [None]:
next(f())

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [None]:
#| export 
class dataset():
    def __init__(self, x, y):fc.store_attr()
        
    def __getitem__(self,i):
        return self.x[i],self.y[i]
    def __len__(self):
        return len(self.x)

In [None]:
train_ds = dataset(x_train, y_train)
valid_ds = dataset(x_valid, y_valid)

In [None]:
#| export
class DL:
    def __init__(self, ds, bs):fc.store_attr()
        
#     def __call__(self):
#         for i in range(0,len(self.x),self.bs):
#             yield self.x[i:i+bs], self.y[i:i+bs]
    def __iter__(self):
        for i in range(0,len(self.ds),self.bs):
            yield self.ds[i:i+self.bs]
    
#     def __repr__(self):
#         return f"shape of x{self.ds[0].shape} \n, shape of y{self.ds[1].shape} {self.bs} "
        
        
        

In [None]:
train_dl = DL(train_ds, bs = 64)

In [None]:
valid_dl = DL(valid_ds, bs = 512)

In [None]:
for xb, yb in train_dl:
    print(xb.shape, yb.shape)
    break

torch.Size([64, 784]) torch.Size([64])


## Loss Function 

In [None]:
#| export 
loss_func = F.cross_entropy

In [None]:
# (preds.argmax(1) == yb).float().mean()

In [None]:
#|
def accuracy(preds, yb): return (preds.argmax(dim=1) == yb).float().mean()

In [None]:
#|export
def report(loss, preds, yb,train="training"): print(f' {train} Loss: {loss:.2f}, Accuracy: {accuracy(preds, yb):.2f}')

## Model 

In [None]:
# Model
model = nn.Sequential(nn.Linear(784,50), nn.ReLU(),nn.Linear(50,10))

In [None]:
yb

tensor([9, 3, 7, 0, 9, 0, 8, 5, 5, 2, 4, 5, 0, 8, 4, 8])

## Training Loop

In [None]:
# Training loop
epochs = 3
lr = 0.1


for epoch in range(epochs):
    for xb, yb in train_dl:
        preds = model(xb)
#         print(preds.squeeze(dim=1).shape)
#         print(yb.shape)
        loss = loss_func(preds.squeeze(dim=1), yb)
        loss.backward()
        
        with torch.no_grad():
            for params in model.parameters():
                params -= lr * params.grad
                
            model.zero_grad()
                
        
        
        


In [None]:
loss

In [None]:
loss

In [None]:
loss

## Optimiser

In [None]:
# list(model.parameters())[0].grad.data.zero_()

In [None]:

model.eval()

In [None]:
#| export
class opt():
    def __init__(self, params , lr=0.5):self.params,self.lr=list(params),lr
    
    def zero_grad(self):
        for p in self.params: p.grad.data.zero_() 
        
        
    
    def step(self):
        with torch.no_grad():
            for p in self.params:
                p -= self.lr * p.grad
    

In [None]:
# class Optimizer():
#     def __init__(self, params, lr=0.5): self.params,self.lr=list(params),lr

#     def step(self):
#         with torch.no_grad():
#             for p in self.params: p -= p.grad * self.lr

#     def zero_grad(self):
#         for p in self.params: p.grad.data.zero_()

In [None]:
#| export
def fit(model, epochs=3, lr = 0.2):
    
    o = opt(model.parameters())
#     o= Optimizer(model.parameters())
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_dl:
            preds = model(xb)
    #         print(preds.squeeze(dim=1).shape)
    #         print(yb.shape)
            loss = loss_func(preds, yb)
            
            loss.backward()
            o.step()
            o.zero_grad()
    
        report(loss, preds, yb, "training ")
    
        model.eval()    
        with torch.no_grad():
            for xb, yb in valid_dl:
                preds = model(xb)
                loss = loss_func(preds, yb)
                
#         print(preds.shape)
        report(loss, preds, yb, "validation ")
            
        

In [None]:
fit(model)

 training  Loss: 0.02, Accuracy: 1.00
 validation  Loss: 0.18, Accuracy: 0.97
 training  Loss: 0.01, Accuracy: 1.00
 validation  Loss: 0.18, Accuracy: 0.97
 training  Loss: 0.01, Accuracy: 1.00
 validation  Loss: 0.20, Accuracy: 0.97


In [None]:
#|export
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler, BatchSampler

# Export 

In [None]:
#| hide 
import nbdev; nbdev.nbdev_export()