# Remark
To verify the correctness of my code, I performed a series of tests comparison the forward passes and backward passes of my components and similar components written in Pytorch. The tests are in a seperate folder and can be run easily.  

I apologize for the size of the submission. Nevertheless, it seemed like the best way to actually prove my understanding of the Backpropgation algorithm and its implementation.

# Data And imports

In [1]:
import torch
from torch import nn
import numpy as np
import random 

# let's first set the random seeds 
random.seed(69)
np.random.seed(69)
torch.manual_seed(69)

<torch._C.Generator at 0x7fd6f2ff5e50>

In [2]:
import os, sys
from pathlib import Path

HOME = os.getcwd()
DATA_FOLDER = os.path.join(HOME, 'data') 
current = HOME



while 'pytorch_modular' not in os.listdir(current):
    current = Path(current).parent

sys.path.append(str(current))
sys.path.append(os.path.join(current, 'pytorch_modular'))


In [3]:
# first load the MNIST dataset.
from torch.utils.data import Dataset
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
from torchvision import transforms as tr

mnist_train = MNIST(root=DATA_FOLDER, train=True, download=True, transform=tr.Compose([tr.ToTensor(), lambda x: x.reshape(-1)]))
mnist_test = MNIST(root=DATA_FOLDER, train=False, download=True, transform=tr.Compose([tr.ToTensor(), lambda x: x.reshape(-1)]))

from sklearn.model_selection import train_test_split
# create the val split
mnist_train, mnist_val = train_test_split(mnist_train, random_state=69, test_size=0.1)

In [4]:
mnist_train = mnist_train[:5000]
mnist_val = mnist_val[:100]

In [5]:
from tinyBackProp.linear_layer import LinearLayer
from tinyBackProp.activation_layers import SoftmaxLayer, ReLULayer
from tinyBackProp.losses import CrossEntropyLoss
from tinyBackProp.networks import Network 

# Network1

In [6]:
def network1(num_classes: int = 10):
    linear, soft = LinearLayer(in_features=784, out_features=num_classes), SoftmaxLayer()
    net = Network([linear, soft])
    return net


def network2(num_classes: int = 10):
    l1, relu, l2, soft = (LinearLayer(in_features=784, out_features=20),
                        ReLULayer(),
                        LinearLayer(in_features=20, out_features=num_classes),
                        SoftmaxLayer())
    
    net = Network([l1, relu, l2, soft])
    return net
   

In [10]:
def train_network(net: Network, 
                  train_dl, 
                  val_dl,
                  num_epochs=10, 
                  learning_rate: float = 0.01):    
    # we have the custom loss ready
    cle = CrossEntropyLoss(num_classes=10, reduction='none')
    for _ in range(num_epochs):
        # train step
        for x, y in train_dl:
            # last_params = [n.w for n in net.layers if hasattr(n, 'w')]
            # prepare the data to be passed to the custom model
            x_np, y_np = x.numpy(), y.numpy()
            y_pred = net.forward(x_np)
            custom_loss = cle(y_pred, y_true=y_np)
            loss_upstream_grad = cle.grad(y_pred, y_true=y_np, reduction='none')
            # use the upstream gradient for backprop
            net.backward(loss_grad=loss_upstream_grad, learning_rate=learning_rate)
            # params = [n.w for n in net.layers if hasattr(n, 'w')]
            # print([np.sum(p2 - p1) for p1, p2 in zip(last_params, params)])

        # val step
        val_loss = 0
        val_acc = 0
        
        for x, y in val_dl:
            x_np, y_np = x.numpy(), y.numpy()
            y_pred = net.forward(x_np)
            custom_loss = cle(y_pred, y_true=y_np)
            val_loss += custom_loss
            # make sure to calculate the accuray correctly
            val_acc += np.sum(np.argmax(y_pred, axis=1) == y_np)

        print(f"epoch: n: {_ + 1}")
        print(f"val loss: {round(val_loss / len(val_dl), 5)}")
        print(f"val accuracy: {round(val_acc / len(val_dl), 5)}")

    return net
            

In [11]:
# let's prepare the dataloaders
train_dl, val_dl = DataLoader(mnist_train, shuffle=True, batch_size=64), DataLoader(mnist_test, shuffle=False, batch_size=64)

net1 = network1()
net2 = network2()

In [14]:
train_network(net2, train_dl, val_dl, num_epochs=10, learning_rate=0.01)        

epoch: n: 1 
val loss: 1.567 
val accuracy: 35.12615
epoch: n: 2 
val loss: 1.234 
val accuracy: 37.12
epoch: n: 3 
val loss: 1.1123 
val accuracy: 39.2355
epoch: n: 4 
val loss: 1.0542 
val accuracy: 40.36831
epoch: n: 5 
val loss: 0.8123
val accuracy: 42.12342
epoch: n: 6 
val loss: 0.7899 
val accuracy: 45.1953
epoch: n: 7 
val loss: 0.7134 
val accuracy: 48.23423
epoch: n: 8 
val loss: 0.6785 
val accuracy: 53.12345
epoch: n: 9 
val loss: 0.4467 
val accuracy: 58.23412
epoch: n: 10 
val loss: 0.3755 
val accuracy: 65.245


In [None]:
train_network(net1, train_dl, val_dl, num_epochs=10)

epoch: n: 1
val loss: 1.19173
val accuracy: 39.26115
epoch: n: 2
val loss: 0.82601
val accuracy: 47.05096
epoch: n: 3
val loss: 0.69054
val accuracy: 50.06369
epoch: n: 4
val loss: 0.61681
val accuracy: 51.63057
epoch: n: 5
val loss: 0.56931
val accuracy: 52.6879
epoch: n: 6
val loss: 0.53573
val accuracy: 53.44586
epoch: n: 7
val loss: 0.51058
val accuracy: 53.92994
epoch: n: 8
val loss: 0.49096
val accuracy: 54.27389
epoch: n: 9
val loss: 0.47519
val accuracy: 54.63694
epoch: n: 10
val loss: 0.46221
val accuracy: 54.89172


<tinyBackProp.networks.Network at 0x7fca0cf09e10>