In [10]:
import time
import torch
from torch import Tensor
import utils as prologue

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## 1 Activation function

In [33]:
sigma = lambda x: torch.tanh(x)

In [34]:
def dsigma(x):
    e = torch.exp(-2 * x)
    return (4 * e) * torch.pow((1 + e), -2)

## 2 Loss

In [35]:
loss = lambda v, t: torch.sum(torch.pow(t - v, 2))

In [37]:
dloss = lambda v, t: torch.sum(2 * (v - t))

## 3 Forward and backward passes

In [114]:
def forward_pass(w1, b1, w2, b2, x): 
    
    x0 = x
    
    s1 = x0 @ w1 + b1
    
    x1 = sigma(s1) 
    
    s2 = x1 @ w2 + b2
    
    x2 = sigma(s2)
    
    return (x0, s1, x1, s2, x2)

In [115]:
def backward_pass(
    w1, b1, w2, b2,
    t,
    x0, s1, x1, s2, x2,
    dl_dw1, dl_db1, dl_dw2, dl_db2
) :
    dl_dx2 = dloss(x2, t)
    
    dl_db2 = dl_dx2 * dsigma(s2)
    dl_dw2 = x1.view(-1, 1) @ dl_db2.view(1, -1) 
    
    dl_dx1 = dl_db2.view(1, -1) @ w2.t()
    
    dl_db1 = dl_dx1 * dsigma(s1)
    dl_dw1 = x0.view(-1, 1) @ dl_db1.view(1, -1)
    
    return (dl_dw1, dl_db1, dl_dw2, dl_db2)

## 4 Training the network

In [116]:
train_input, train_target, test_input, test_target = prologue.load_data(one_hot_labels=True)

* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


In [117]:
train_input.size(), train_target.size(), test_input.size(), test_target.size()

(torch.Size([1000, 784]),
 torch.Size([1000, 10]),
 torch.Size([1000, 784]),
 torch.Size([1000, 10]))

In [118]:
train_target, test_target = train_target * 0.9, test_target * 0.9

In [119]:
epsilon = 1e-1 / train_input.size(0)
units = 50
classes = 10

w1 = torch.empty(train_input.size(1), units).normal_(0, epsilon)
b1 = torch.empty(units).normal_(0, epsilon)
w2 = torch.empty(units, classes).normal_(0, epsilon)
b2 = torch.empty(classes).normal_(0, epsilon)

In [120]:
dl_dw1 = torch.empty(train_input.size(1), units, units)
dl_db1 = torch.empty(units)
dl_dw2 = torch.empty(units, classes)
dl_db2 = torch.empty(classes)

In [None]:
eta = 1e-1 / train_input.size(0)
epochs = 1000

for k in range(epochs):
    
    for sample in range(train_input.size(0)):
        
        x = train_input[sample, :]
        
        x0, s1, x1, s2, x2 = forward_pass(w1, b1, w2, b2, x)

        dl_dw1, dl_db1, dl_dw2, dl_db2 = backward_pass(
            w1, b1, w2, b2,
            train_target[sample, :],
            x0, s1, x1, s2, x2,
            dl_dw1, dl_db1, dl_dw2, dl_db2
        )

        w1 -= eta * dl_dw1
        b1 -= eta * dl_db1.view(-1)

        w2 -= eta * dl_dw2
        b2 -= eta * dl_db2

    print(f"Epoch {k} | loss: {loss(x2, t)}")