In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
#export
from exp.nb_02 import *
import torch.nn.functional as F

## Initial setup

### Data

In [3]:
mpl.rcParams['image.cmap'] = 'gray'

In [4]:
x_train, y_train, x_valid, y_valid = get_data()

In [13]:
n,m = x_train.shape
c = y_train.max()+1
nh = 50

In [14]:
n

50000

In [15]:
class Model(nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.layers = [nn.Linear(n_in,nh),nn.ReLU(), nn.Linear(nh,n_out)]
        
    def __call__(self, x):
        for l in self.layers: x=l(x)
        return x

In [16]:
model = Model(m,nh,10)

In [17]:
pred = model(x_train)

In [23]:
def log_softmax(x): return (x.exp()/(x.exp().sum(-1, keepdim=True))).log()

In [24]:
sm_pred = log_softmax(pred)

In [25]:
y_train[:3]

tensor([5, 0, 4])

In [30]:
sm_pred[[0,1,2],[5,0,4]]

tensor([-2.1853, -2.4032, -2.1699], grad_fn=<IndexBackward>)

In [27]:
y_train.shape[0]

50000

In [32]:
def nll(input, target): return -input[range(target.shape[0]),target].mean()

In [33]:
loss = nll(sm_pred, y_train)

In [34]:
loss

tensor(2.3159, grad_fn=<NegBackward>)

In [35]:
def log_softmax(x): return x - x.logsumexp(-1, keepdim=True)

## Basic training loop

In [37]:
loss_func = F.cross_entropy

In [38]:
#export
def accuracy(out, yb): return (torch.argmax(out, dim=1)==yb).float().mean()

In [39]:
bs=64 #batch size

xb = x_train[0:bs]
preds = model(xb)
preds[0], preds.shape

(tensor([-0.1154, -0.0852,  0.1446, -0.0250,  0.1456,  0.1460,  0.2136, -0.0887,
         -0.0673, -0.0520], grad_fn=<SelectBackward>), torch.Size([64, 10]))

In [40]:
yb = y_train[:bs]
loss_func(preds, yb)

tensor(2.3109, grad_fn=<NllLossBackward>)

In [41]:
accuracy(preds, yb)

tensor(0.0781)

In [49]:
lr = 0.5
epochs = 1

In [59]:
for epoch in range(epochs):
    for i in range((n-1)//bs + 1):
        start_i = i*bs
        end_i = start_i+bs
        xb = x_train[start_i: end_i]
        yb = y_train[start_i: end_i]
        loss = loss_func(model(xb),yb)
        
        loss.backward()
        with torch.no_grad():
            for l in model.layers:
                if hasattr(l, 'weight'):
                    l.weight -= l.weight.grad*lr
                    l.bias -= l.bias.grad*lr
                    l.weight.grad.zero_()
                    l.bias.grad.zero_()

In [60]:
loss_func(model(xb),yb), accuracy(model(xb),yb)

(tensor(0.0261, grad_fn=<NllLossBackward>), tensor(1.))

## Using parameters and optim

In [62]:
class Model(nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.l1 = nn.Linear(n_in,nh)
        self.l2 = nn.Linear(nh,n_out)
        
    def __call__(self,x): return self.l2(F.relu(self.l1(x)))

In [63]:
model = Model(m,nh,10)

In [68]:
for name, l in model.named_children(): print(f"{name} : {l}")

l1 : Linear(in_features=784, out_features=50, bias=True)
l2 : Linear(in_features=50, out_features=10, bias=True)


In [67]:
?named_children()

Object `named_children()` not found.
