In [42]:
#hide
from fastai.vision.all import *
from fastbook import *

matplotlib.rc('image', cmap='Greys')
path = untar_data(URLs.MNIST_SAMPLE)
Path.BASE_PATH = path


# 4 DIFFERENT WAYS TO CREATE 3/7 CLASSIFIER MODEL

##### First off we'll set everything up for all three models and then we'll do the methods:

Ensuring same order of files:

In [43]:
threes = (path/'train'/'3').ls().sorted()
sevens = (path/'train'/'7').ls().sorted()

Creating tensor with all of the training 3s stacked together and all of the 7s stacked together:

In [44]:
three_tensors = [tensor(Image.open(o)) for o in threes]
seven_tensors = [tensor(Image.open(o)) for o in sevens]

stacked_threes = torch.stack(three_tensors).float()/255
stacked_sevens = torch.stack(seven_tensors).float()/255

Same thing for the validation 3s and 7s:

In [45]:
valid_3_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'3').ls()])
valid_7_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'7').ls()])

valid_3_tens = valid_3_tens.float()/255
valid_7_tens = valid_7_tens.float()/255

Now we create the training set with one row and enough columns to fit training 3s and 7s:

In [46]:
train_x = torch.cat([stacked_threes, stacked_sevens]).view(-1, 28*28)

Then we create the label set for the training set:

In [47]:
train_y = tensor([1]*len(threes) + [0]*len(sevens)).unsqueeze(1)

Next, create a dataset where each tuple is the data with its label:

In [48]:
dset = list(zip(train_x,train_y))

Then, we're going to the same for the validation set:

In [49]:
valid_x = torch.cat([valid_3_tens, valid_7_tens]).view(-1, 28*28)
valid_y = tensor([1]*len(valid_3_tens) + [0]*len(valid_7_tens)).unsqueeze(1)
valid_dset = list(zip(valid_x,valid_y))

### 1. CREATE A CUSTOM MODEL:

First thing is we need to initialize the parameters (i.e. set a random value for all the weights) and tell PyTorch that we're going to want to calculate the gradient for each value in this set:

In [50]:
def init_params(size, std=1.0): return (torch.randn(size)*std).requires_grad_()

weights = init_params((28*28,1))

bias = init_params(1)

No we're going to create a linear model.

This model scales the input data by the weights. It then adds bias (another parameter that adds flexibility).

Weights and bias (the parameters) are used alongside the input data to generate a prediction.

The formula for the prediction is as follows:

`y=w*x+b`

- Input data (`x`) = 28x28 matrix
- Weights (`w`) = 1 row, $28*28$ columns
- Bias (`b`) = scalar value

In [51]:
def linear1(xb): return xb@weights + bias

Therefore using this model we can get predictions:

In [52]:
preds = linear1(train_x)
preds

tensor([[-9.5725],
        [-0.2023],
        [-6.3332],
        ...,
        [ 4.1000],
        [-3.6134],
        [-1.7304]], grad_fn=<AddBackward0>)

Let's first see how accurate our model is (i.e. proporition of correct guesses)

In [53]:
corrects = (preds>0.0).float() == train_y
corrects.float().mean().item()

0.5458212494850159

Now we need a loss function. 

This loss function first applies sigmoid to the inputs to convert digits into a range from 0 to 1 (i.e. a probability). 

Next, it will check if the target for that prediction is 1. If it is 1, then return 1-prediction else return prediction.

This is done so that values with labels 1 will have a low loss score if their prediction are close to their targets.

It's a simple way to create a distance function between prediction and target.

In this case, the label 1 means it is a 3, and the label 0 means it is not a 3 (i.e. it is a 7).

In [54]:
def mnist_loss(predictions, targets):
    predictions = predictions.sigmoid()
    return torch.where(targets==1, 1-predictions, predictions).mean()

We now want to create a DataLoader so that it can convert a dataset into batches with randomized values. We want mini batches since calculating the loss for the entire data set will take too long.

In [55]:
dl = DataLoader(dset, batch_size=256)
# dset contains data and target for all 3s and 7s
valid_dl = DataLoader(valid_dset, batch_size=256)
# valid_dset is same as dset but from validation folder

Now we want to create a function that will calculate the gradient for each batch we pass into it:

In [56]:
def calc_grad(xb, yb, model):
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()

Now we want to write a function that will update the weights based on the gradient calculated for each mini batch:

In [57]:
def train_epoch(model, lr, params):
    for xb,yb in dl:
        calc_grad(xb, yb, model)
        for p in params:
            p.data -= p.grad*lr         # UPDATE EACH WEIGHT BASED ON GRADIENT CALCULATED FOR THE BATCH
            p.grad.zero_()              # ZERO OUT THE GRADIENT SINCE LOSS.BACKWARD MESSES WITH VALUES OF GRADIENTS

Now we need a way to evaluate the performance of the model (generate the metrics so we can see if the model improves).

To do this we create a function that returns the accuracy of each batch (i.e. whether each prediction is correct):

In [58]:
def batch_accuracy(xb, yb):
    preds = xb.sigmoid()
    correct = (preds>0.5) == yb
    return correct.float().mean()

Then we create a function that uses `batch_accuracy` to generate the average accuracy of the model.

This is done by finding the average accuracy of every batch in valid_dl.

In [59]:
def validate_epoch(model):
    accs = [batch_accuracy(model(xb), yb) for xb,yb in valid_dl]
    return round(torch.stack(accs).mean().item(), 4)

Now we're going to train the model over 20 epochs. We initialize the learning rate and the parameters, then train the model and print its overall accuracy.

In [60]:
lr = 1.
params = weights,bias

for i in range(20):
    train_epoch(linear1, lr, params)
    print(validate_epoch(linear1), end=' ')

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0.6547 0.8158 0.8914 0.929 0.9412 0.9486 0.9529 0.9559 0.9573 0.9588 0.9598 0.9612 0.9642 0.9647 0.9661 0.9652 0.9661 0.9691 0.9686 0.9701 

### 2. USING PYTORCH'S LINEAR MODEL

In [62]:
linear_model = nn.Linear(28*28,1)

# DOES THE SAME THING AS INIT_PARAMS AND CREATING linear1

Next, we define an optimizer (an object that will update/change weights and zero out the gradients)

In [63]:
class BasicOptim:
    def __init__(self,params,lr): self.params,self.lr = list(params),lr

    def step(self, *args, **kwargs):
        for p in self.params: p.data -= p.grad.data * self.lr

    def zero_grad(self, *args, **kwargs):
        for p in self.params: p.grad = None
        
opt = BasicOptim(linear_model.parameters(), lr)

Thus, our training loop becomes:

In [None]:
def train_epoch(model):
    for xb, yb in dl:
        calc_grad(xb, yb, model)
        opt.step()
        opt.zero_grad()

We can then simplify things even further by creating a train_model loop (similar to for loop from custom model)

In [65]:
def train_model(model, epochs):
    for i in range(epochs):
        train_epoch(model)
        print(validate_epoch(model), end=' ')
        
train_model(linear_model, 20)

0.4932 0.8017 0.854 0.915 0.935 0.9477 0.956 0.9634 0.9658 0.9678 0.9692 0.9712 0.9726 0.9751 0.9751 0.9765 0.9775 0.9775 0.9785 0.9785 

### 3. USING SGD CLASS (OPTIMIZER) AS WELL AS PYTORCH LINEAR MODEL

In [None]:
linear_model = nn.Linear(28*28, 1)

opt = SGD(linear_model.parameters(), lr)

def mnist_loss(predictions, targets):
    predictions = predictions.sigmoid()
    return torch.where(targets==1, 1-predictions, predictions).mean()

def calc_grad(xb, yb, model):
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()
    
def batch_accuracy(xb, yb):
    preds = xb.sigmoid()
    correct = (preds>0.5) == yb
    return correct.float().mean()
    
def validate_epoch(model):
    accs = [batch_accuracy(model(xb), yb) for xb,yb in valid_dl]
    return round(torch.stack(accs).mean().item(), 4)

def train_epoch(model):
    for xb, yb in dl:
        calc_grad(xb, yb, model)
        opt.step()
        opt.zero_grad()

def train_model(model, epochs):
    for i in range(epochs):
        train_epoch(model)
        print(validate_epoch(model), end=' ')
        
train_model(linear_model, 20)

0.4932 0.852 0.8349 0.9101 0.9326 0.9468 0.9546 0.9624 0.9653 0.9678 0.9692 0.9707 0.9731 0.9746 0.9761 0.977 0.9775 0.978 0.9785 0.979 

### 4. USING FASTAI LEARNER CLASS

In [None]:
dls = DataLoaders(dl, valid_dl)

def mnist_loss(predictions, targets):
    predictions = predictions.sigmoid()
    return torch.where(targets==1, 1-predictions, predictions).mean()

def batch_accuracy(xb, yb):
    preds = xb.sigmoid()
    correct = (preds>0.5) == yb
    return correct.float().mean()

learn = Learner(
    dls,
    nn.Linear(28*28,1),
    opt_func=SGD,
    loss_func=mnist_loss,
    metrics=batch_accuracy
)

Next, we call the fit method and set number of epochs and learning rate:

In [68]:
learn.fit(10, lr=lr)

epoch,train_loss,valid_loss,batch_accuracy,time
0,0.637295,0.503271,0.495584,00:00
1,0.460429,0.225399,0.800294,00:00
2,0.171302,0.167958,0.848381,00:00
3,0.076295,0.102711,0.916094,00:00
4,0.041308,0.07592,0.93474,00:00
5,0.027613,0.061219,0.947988,00:00
6,0.021953,0.051976,0.95633,00:00
7,0.019416,0.045791,0.962709,00:00
8,0.018115,0.04143,0.965653,00:00
9,0.017321,0.038207,0.966634,00:00
