In [None]:
from fastbook import *
from fastai.vision.all import *
import random

In [None]:
def obtain_stacked_tensors(data_path, number):
    """
    This function receives the path where the images are stored + the label
    (which in this case happens to be a number) and returns a tensor of rank 3
    and shape [number of images, 28, 28] where each pixel goes from 0 (white) to 1 (black).
    """
    values = (path/'{}'.format(data_path)/'{}'.format(number)).ls().sorted()
    tensors = [tensor(Image.open(o)) for o in values]
    stacked_tensors = torch.stack(tensors).float()/255
    
    return stacked_tensors

In [None]:
def obtain_labels(dataset_rows, data_path):
    """
    This function receives the total number of rows in the dataset (i.e. the number of images)
    and the path where the images are stored, and returns a tensor of rank 2 and shape [dataset_rows, numbers],
    where every row is an image and every column is a label (a number from 0 to 9 in this case). So, for each
    image it has a vector that is 1 at the number it corresponds, and 0 for the rest.
    """
    labels = []
    start = 0
    numbers = len((path/'{}'.format(data_path)).ls())
    for number in range(numbers): 
        labels_column = tensor([0]*dataset_rows)
        number_rows = len((path/'{}'.format(data_path)/'{}'.format(number)).ls().sorted())
        if number != 9:
            labels_column[start:number_rows] = 1
            start = number_rows
        else:
            labels_column[start:] = 1
            
        labels.append(labels_column)
        
    labels_tensor = torch.stack(labels)
    
    return torch.transpose(labels_tensor, 0, 1).view(-1, numbers).float()

In [None]:
def obtain_dataloader(data_path, batch_size=32):
    """
    Given a data path and an optional batch size, returns a DataLoader object composed by x, a tensor of
    rank 2 with shape [dataset_size, 28*28] (28*28 consists of putting height and weight in the same dimension) and
    x, a tensor of rank 2 with shape [dataset_size, 10], where the column of size 10 is the labels vector.
    """
    x = torch.cat([obtain_stacked_tensors(data_path, number) for number in range(10)]).view(-1, 28*28)
    y = obtain_labels(x.shape[0], data_path)
    
    dset = list(zip(x, y))
    
    return DataLoader(dset, batch_size, shuffle=True)

In [None]:
def obtain_dataloaders(data_path, batch_size=32, validation_set=0.2):
    """
    Given a data path and an optional batch size, returns a DataLoader object composed by x, a tensor of
    rank 2 with shape [dataset_size, 28*28] (28*28 consists of putting height and weight in the same dimension) and
    x, a tensor of rank 2 with shape [dataset_size, 10], where the column of size 10 is the labels vector.
    """
    x = torch.cat([obtain_stacked_tensors(data_path, number) for number in range(10)]).view(-1, 28*28)
    y = obtain_labels(x.shape[0], data_path)
    
    dset = list(zip(x, y))
    random.shuffle(dset)
    
    valid_dl_size = int(round(len(dset)*validation_set, 0))
    valid_dl = DataLoader(dset[:valid_dl_size], batch_size, shuffle=True)
    
    training_dl = DataLoader(dset[valid_dl_size:], batch_size, shuffle=True)
    
    return training_dl, valid_dl

In [None]:
def init_params(size, std=1.0):
    """
    Given the parameters size and std, returns a tensor that allows gradeint calculations
    of the size and std indicated.
    """
    return (torch.randn(size)*std).requires_grad_()

In [None]:
def linear1(xb):
    """
    Given a tensor xb, returns xb tensor multiplied by weights tensor + bias tensor
    """
    return xb@weights + bias

In [None]:
def obtain_preds(preds):
    """
    Given a tensor preds, returns a new tensor of the same shape of the input but where only the maximum value
    of the input tensor is different from zero (being one), and returning the rest of values as zeros.
    """
    return torch.where(preds == max(preds), 1, 0).float()

In [9]:
def obtain_correct(pred, target):
    """
    Given a prediction tensor and a target tensor, returns True if the prediction tensor has the same values as
    the target tensor for every column.
    """
    return (pred == target).sum().item() == target.shape[0]

In [10]:
def batch_accuracy(preds, targets):
    """
    Given a predictions tensor and a targets tensor, returns the accuracy of the predictions against the targets.
    """
    cleaned_preds = torch.stack([obtain_preds(pred) for pred in preds])
    return tensor([obtain_correct(cleaned_preds[item], targets[item]) for item in range(targets.shape[0])]).float().mean()

In [11]:
def mnist_loss(preds, targets):
    """
    Given a predictions tensor and a targets tensor, returns the mean of the sigmoid function
    of the predictions and returns 1-prediction for the cases where the target value == 1 and
    the prediction for the cases where the target value == 0.
    This is because we want to minimize the difference between 1 and the prediction when the target is one
    and the difference between the prediction and zero when the target is 0.
    """
    my_preds = preds.sigmoid()
    return (torch.where(targets==1, 1-my_preds, my_preds)).mean()

In [12]:
def calc_grad(xb, yb, model):
    """
    Given a independient variables tensor xb, a dependent variables tensor yb and a model,
    calculates the predictions of xb given the model, the loss function between the predictions and the actual
    values yb, and computes the backpropagation of the loss function for each parameter of the model, obtaining
    their gradients.
    """
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()

In [13]:
def train_epoch(model, dl, lr, params):
    """
    Performs the training of the model for one epoch. Given a model, a DataLoader object, a learning rate
    and a params tuple, calculates the gradient for those params and performs one step on them.
    """
    for xb, yb in dl:
        calc_grad(xb, yb, model)
        for p in params:
            p.data -= p.grad*lr
            p.grad.zero_()

In [14]:
def validate_epoch(model, dl):
    """
    Given a model and a DataLoader object, obtains the accuracy of the model for that DataLoader.
    """
    accs = [batch_accuracy(model(xb), yb) for xb, yb in dl]
    return round(torch.stack(accs).mean().item(), 4)

###  ----

In [15]:
path = untar_data(URLs.MNIST)
Path.BASE_PATH = path

In [17]:
"""
The problem with the function we used is that is does not stratify our dataset, and thus we will get
an uneven representation of the full dataset in both our training and validation datasets.
"""

training_dl, valid_dl = obtain_dataloaders('training')

In [18]:
weights = init_params((28*28, 10))

In [19]:
bias = init_params((1, 10))

In [21]:
validate_epoch(linear1, valid_dl)

0.2775

In [22]:
lr = 1.
params = weights, bias
for i in range(5):
    train_epoch(linear1, training_dl, lr, params)
    print('{} '.format(validate_epoch(linear1, valid_dl)))

0.8857 
0.8858 
0.8859 
0.886 
0.886 


In [24]:
dls = DataLoaders(training_dl, valid_dl)

In [28]:
simple_net = nn.Sequential(
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30,10)
)

In [29]:
learn = Learner(
    dls,
#     nn.Linear(28*28, 10),
    simple_net,
    opt_func=SGD,
    loss_func=mnist_loss,
    metrics=batch_accuracy
)

In [31]:
learn.fit(10)

epoch,train_loss,valid_loss,batch_accuracy,time
0,0.215184,0.21014,0.886,00:07
1,0.111236,0.110689,0.886,00:07
2,0.073621,0.071684,0.886,00:07
3,0.054111,0.054692,0.886,00:07
4,0.046644,0.045836,0.886,00:07
5,0.039246,0.040582,0.886,00:08
6,0.037916,0.037161,0.886,00:07
7,0.033086,0.034784,0.886,00:07
8,0.031624,0.033048,0.886,00:08
9,0.031364,0.031731,0.886,00:07
