In [3]:
import torch, torchvision
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch import optim
from torch.utils.data import DataLoader, RandomSampler

In [4]:
# download mnist dataset
mnist_train = torchvision.datasets.MNIST(root='./data', train=True, download=True)
mnist_valid = torchvision.datasets.MNIST(root='./data', train=False, download=True)
x_train, y_train = mnist_train.data, mnist_train.targets
x_valid, y_valid = mnist_valid.data, mnist_valid.targets

### Dataset & Dataloader

- for details on the evolution of dataset & dataloader, see notebook **1. PyTorch Dataset & DataLoader**
- note that we added `self.x[i].view(-1)` below to flatten the input image (2D) into 1-dimensional array

In [5]:
class Dataset():
    def __init__(self, x, y): self.x,self.y = x,y
    def __len__(self): return len(self.x)
    def __getitem__(self, i): 
        return self.x[i].view(-1).float(), self.y[i]

    
def get_dls(train_ds, valid_ds, bs, **kwargs):
    return (DataLoader(train_ds, batch_size=bs, shuffle=True, **kwargs),
            DataLoader(valid_ds, batch_size=bs*2, **kwargs))


# create dataset object around MNIST dataset
train_ds = Dataset(x_train, y_train)
valid_ds = Dataset(x_valid, y_valid)

# create dataloaders
bs = 64
dl_train, dl_valid = get_dls(train_ds, valid_ds, bs)

### model

- for detaiils on construction of model using nn.Module, see notebook **2. PyTorch Dataset & DataLoader**

In [6]:
# model
model = nn.Sequential(
            nn.Linear(784,100),
            nn.ReLU(),
            nn.Linear(100,10),
            nn.Sigmoid()
        )
model

Sequential(
  (0): Linear(in_features=784, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
  (3): Sigmoid()
)

### optimizer & loss function
- for optimizer & loss function, we will use native PyTorch functions
- in PyTorch, the `torch.nn.functional` modules is often just referred to as `F`

In [7]:
# optimizer
opt = optim.SGD(model.parameters(), lr=0.1)
# loss function
loss_func = F.cross_entropy

### evaluation metrics
- for this toy mnist problem, we will just use accuracy as the metric of evaluation

In [8]:
def accuracy(out, yb):
    return (torch.argmax(out, dim=1)==yb).float().mean()

### basic training loop

In [9]:
epochs=5

In [10]:
def fit(epochs, model, loss_func, opt):
    for epoch in range(epochs):
        print('epoch {}'.format(epoch))
        for xb, yb in dl_train:
            # forward pass; compute prediction
            pred = model(xb)
            # compute loss
            loss = loss_func(pred, yb)
            # backward pass; compute gradients
            loss.backward()
            # update model parameters
            opt.step()
            # zero gradients
            opt.zero_grad()

In [11]:
fit(5, model, loss_func, opt)

epoch 0
epoch 1
epoch 2
epoch 3
epoch 4


In [12]:
xb, yb = next(iter(dl_train))
loss,acc = loss_func(model(xb), yb), accuracy(model(xb), yb)
print(loss, acc)

tensor(1.7211, grad_fn=<NllLossBackward>) tensor(0.4531)


### add validation

- it is import to evaluate the performance of the model on the validation dataset as we train our model to detect overfitting
- below, we modify our basic training loop to account for this

In [13]:
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    for epoch in range(epochs):
        # set to training mode for layers such as batchnorm, dropout
        model.train()
        for xb, yb in train_dl:
            # forward pass; compute prediction
            pred = model(xb)
            # compute loss
            loss = loss_func(pred, yb)
            # backward pass; compute gradients
            loss.backward()
            # update model parameters
            opt.step()
            # zero gradients
            opt.zero_grad()
        
        model.eval()
        with torch.no_grad():
            tot_loss, tot_acc = 0., 0.
            for xb, yb in valid_dl:
                pred = model(xb)
                tot_loss += loss_func(pred, yb)
                tot_acc += accuracy(pred, yb)
        nv = len(valid_dl)
        print(epoch, tot_loss/nv, tot_acc/nv)
    return tot_loss/nv, tot_acc/nv

In [14]:
loss, acc = fit(5, model, loss_func, opt, dl_train, dl_valid)

0 tensor(1.7051) tensor(0.5022)
1 tensor(1.6714) tensor(0.5475)
2 tensor(1.6568) tensor(0.5829)
3 tensor(1.6440) tensor(0.6228)
4 tensor(1.6048) tensor(0.6376)


### Databunch & Learner

- In the code above, `fit(...)` still requires us passing in many objects, this is inconvenient & easy to make a mistake.
- In FastAI, databunch & learner classes are wrapper classes around dataloaders and model + optimizer, respectively create to simplify the training code even further
- The code below shows the overall concept of classes `DataBunch` and `Learner`
- Note that the parameter c is for number of classes (for classification problems)

In [28]:
def get_dls(train_ds, valid_ds, bs, **kwargs):
    return (DataLoader(train_ds, batch_size=bs, shuffle=True, **kwargs),
            DataLoader(valid_ds, batch_size=bs*2, **kwargs))


def get_model(data, lr=0.1, nh=50):
    m = next(iter(data.train_dl))[0].shape[1]
    model = nn.Sequential(nn.Linear(m,nh), nn.ReLU(), nn.Linear(nh,data.c), nn.Sigmoid())
    return model, optim.SGD(model.parameters(), lr=lr)

In [29]:
class DataBunch():
    def __init__(self, train_dl, valid_dl, c=None):  
        self.train_dl, self.valid_dl, self.c = train_dl, valid_dl, c
        
    @property
    def train_ds(self): return self.train_dl.dataset

    @property
    def valid_ds(self): return self.valid_dl.dataset
    
    
class Learner():
    def __init__(self, model, opt, loss_func, data):
        self.model, self.opt, self.loss_func, self.data = model, opt, loss_func, data

In [30]:
# instantiate DataBunch object
data = DataBunch(*get_dls(train_ds, valid_ds, bs=64), c=y_train.max().item()+1)

# instantiate Learner object
learn = Learner(*get_model(data), loss_func, data)

- after all these abstractions & wrappers, the basic training loop simplifies to the following

In [31]:
def fit(epochs, learn):
    for epoch in range(epochs):
        learn.model.train()
        for xb,yb in learn.data.train_dl:
            loss = learn.loss_func(learn.model(xb), yb)
            loss.backward()
            learn.opt.step()
            learn.opt.zero_grad()

        learn.model.eval()
        with torch.no_grad():
            tot_loss,tot_acc = 0.,0.
            for xb,yb in learn.data.valid_dl:
                pred = learn.model(xb)
                tot_loss += learn.loss_func(pred, yb)
                tot_acc  += accuracy (pred,yb)
        nv = len(learn.data.valid_dl)
        print(epoch, tot_loss/nv, tot_acc/nv)
    return tot_loss/nv, tot_acc/nv

- the `fit` function simplifies to a 1-liner

In [32]:
loss, acc = fit(5, learn)

0 tensor(2.0595) tensor(0.0970)
1 tensor(2.0701) tensor(0.0970)
2 tensor(1.9802) tensor(0.2095)
3 tensor(1.8819) tensor(0.4069)
4 tensor(1.8772) tensor(0.3447)
