## This is the condensed version of the MNIST training single layer NN with SGD from the Fastai course

# Imports

In [None]:
#hide
!pip install -Uqq fastbook
import fastbook
fastbook.setup_book()
#hide
from fastai.vision.all import *
from fastbook import *
matplotlib.rc('image', cmap='Greys')

# Define the path

Here we define the path where all the images can be found. This already contains both training and validation images

In [None]:
path = untar_data(URLs.MNIST_SAMPLE)
Path.BASE_PATH = path

# Create dataloader objects
- define the paths for sevens and threes
- open the images and place them in a tensor
- create a stack of all threes and of all sevens
- create a training x set including images of threes and sevens
- create a training y set of labels of threes and sevens
- create the dataset which includes x's and y's
- create a dataloader object and define the batch size

In [None]:
threes = (path/'train'/'3').ls().sorted()
three_tensors = [tensor(Image.open(o)) for o in threes]
stacked_threes = torch.stack(three_tensors).float()/255

sevens = (path/'train'/'7').ls().sorted()
seven_tensors = [tensor(Image.open(o)) for o in sevens]
stacked_sevens = torch.stack(seven_tensors).float()/255

train_x = torch.cat([stacked_threes, stacked_sevens]).view(-1, 28*28)
train_y = tensor([1]*len(threes) + [0]*len(sevens)).unsqueeze(1)
dset = list(zip(train_x,train_y))
dl = DataLoader(dset, batch_size=256)

# Now do the same thing for the validation data

In [None]:
valid_threes=(path/'valid'/'3').ls().sorted()
valid_three_tensors = [tensor(Image.open(o)) for o in valid_threes]
valid_stacked_threes = torch.stack(valid_three_tensors).float()/255

valid_sevens=(path/'valid'/'7').ls().sorted()
valid_seven_tensors = [tensor(Image.open(o)) for o in valid_sevens]
valid_stacked_sevens = torch.stack(valid_seven_tensors).float()/255

valid_x = torch.cat([valid_stacked_threes, valid_stacked_sevens]).view(-1, 28*28)
valid_y = tensor([1]*len(valid_threes) + [0]*len(valid_sevens)).unsqueeze(1)
valid_dset = list(zip(valid_x,valid_y))
valid_dl = DataLoader(valid_dset, batch_size=256)

# Define the initial weights and biases
- First define a function init_params which takes in the dimenension, and defines them as differentiable values.
- Then create a set of weights (28*28) and biases 

In [None]:
def init_params(size, std=1.0): return (torch.randn(size)*std).requires_grad_()
weights = init_params((28*28,1))
bias = init_params(1)
#weights.grad.zero_()
#bias.grad.zero_();

# Define functions for training
- Batch_accuracy takes in a batch of outputs of the model, pushes them to be between 0 and 1 and compares them with the true answer. The ouput is the average of the right and wrong answers, and therefore isn't the same as the loss function.
- validate_epoch for each batch.
- linear1 takes in all images and outputs the linear transformation from the weights and biases.
- sigmoid
- mnist_loss takes in the predictions (outputs of the model) flattens them with the sigmoid and compares them to the targets.
- calc_grad takes the predictions, calculates the losses and the gradients using backpropagation. These gradients are attached to all parameters which have been defined previously as differentiable.
- train_epoch passes in images, and labels, with the model (linear model in this case) to calc_grad, and updates parameters with grad values. It then runs through all parameters and applies gradient descent using the learning rate.

In [None]:
def batch_accuracy(xb, yb):
    preds = xb.sigmoid()
    correct = (preds>0.5) == yb
    return correct.float().mean()
def validate_epoch(model):
    accs = [batch_accuracy(model(xb), yb) for xb,yb in valid_dl]
    return round(torch.stack(accs).mean().item(), 4)

def linear1(xb): return xb@weights + bias
def sigmoid(x): return 1/(1+torch.exp(-x))
def mnist_loss(predictions, targets):
    predictions = predictions.sigmoid()
    return torch.where(targets==1, 1-predictions, predictions).mean()
def calc_grad(xb, yb, model):
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()
def train_epoch(model, lr, params):
    for xb,yb in dl:
        calc_grad(xb, yb, model)
        for p in params:
            p.data -= p.grad*lr
            p.grad.zero_()

# Run for 20 epochs
- Define the parameters, and run 20 epochs of train_epoch with a linear model and a learning rate of 1. print out the accuracy using validate_epoch on the model.

In [None]:
lr = 1.
params = weights,bias
for i in range(20):
    train_epoch(linear1, lr, params)
    print(validate_epoch(linear1), end=' ')