In [1]:
import time
import torch
from torch import Tensor
import utils as prologue

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## 1 Activation function

In [2]:
sigma = lambda x: x.tanh()

In [3]:
dsigma = lambda x: 1 - sigma(x).pow(2)

## 2 Loss

In [4]:
loss = lambda v, t: (t - v).pow(2).sum()

In [5]:
dloss = lambda v, t: 2 * (v - t)

In [6]:
test = torch.rand(10)
dloss(test, test).size()

torch.Size([10])

## 3 Forward and backward passes

In [7]:
def forward_pass(w1, b1, w2, b2, x): 

    x0 = x
    s1 = w1 @ x0 + b1
    x1 = sigma(s1) 
    s2 = w2 @ x1 + b2
    x2 = sigma(s2)

    return (x0, s1, x1, s2, x2)

In [8]:
def backward_pass(
  w1, b1, w2, b2,
  t,
  x0, s1, x1, s2, x2,
  dl_dw1, dl_db1, dl_dw2, dl_db2
) :

    dl_dx2 = dloss(x2, t)
    dl_ds2 = dl_dx2.mul(dsigma(s2))

    dl_dw2 += dl_ds2.view(-1, 1) @ x1.view(1, -1)
    dl_db2 += dl_ds2

    dl_dx1 = w2.t() @ dl_ds2
    dl_ds1 = dl_dx1.mul(dsigma(s1))

    dl_dw1 += dl_ds1.view(-1, 1) @ x0.view(1, -1)
    dl_db1 += dl_ds1

    return (dl_dw1, dl_db1, dl_dw2, dl_db2)

## 4 Training the network

In [9]:
train_input, train_target, test_input, test_target = prologue.load_data(one_hot_labels=True)

* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


In [10]:
train_input.size(), train_target.size(), test_input.size(), test_target.size()

(torch.Size([1000, 784]),
 torch.Size([1000, 10]),
 torch.Size([1000, 784]),
 torch.Size([1000, 10]))

In [13]:
start = time.perf_counter()

N, C = train_target.shape # examples and classes
K = 50 # hidden units
D = train_input.size(1) # features

zeta = 0.9
train_target, test_target = train_target * zeta, test_target * zeta

eta = 1e-2 / float(N)
epochs = 1000
epsilon = 1e-6

w1 = torch.empty(K, D).normal_(0, epsilon)
b1 = torch.zeros(K)
w2 = torch.empty(C, K).normal_(0, epsilon)
b2 = torch.zeros(C)

for k in range(epochs):

    dl_dw1 = torch.zeros(w1.size())
    dl_db1 = torch.zeros(b1.size())
    dl_dw2 = torch.zeros(w2.size())
    dl_db2 = torch.zeros(b2.size())

    loss_ = 0
    train_acc = 0
    test_acc = 0

    for sample in range(N):
      
    x0, s1, x1, s2, x2 = forward_pass(w1, b1, w2, b2, train_input[sample])

    loss_ += loss(x2, train_target[sample])

    train_acc += int(train_target[sample, x2.argmax(0)] != 0)

    backward_pass(
        w1, b1, w2, b2,
        train_target[sample],
        x0, s1, x1, s2, x2,
        dl_dw1, dl_db1, dl_dw2, dl_db2
    )

    w1 -= eta * dl_dw1
    b1 -= eta * dl_db1

    w2 -= eta * dl_dw2
    b2 -= eta * dl_db2

    for i in range(N):
      
        test_sample = test_input[i]

        _, _, _, _, v = forward_pass(w1, b1, w2, b2, test_sample)

        test_acc += int(test_target[i, v.argmax(0)] != 0)

    print(f"Epoch {k} | loss: {loss_:.3f} | train accuracy: {(100 * train_acc / 1e3):.3f} | test accuracy: {(100 * test_acc / 1e3):.3f}")

duration = time.perf_counter() - start

print(f"Elapsed time for training: {duration} s")

Epoch 0 | loss: 531.439 | train accuracy: 13.300 | test accuracy: 9.900
Epoch 1 | loss: 529.315 | train accuracy: 11.700 | test accuracy: 9.900
Epoch 2 | loss: 527.231 | train accuracy: 11.700 | test accuracy: 9.900
Epoch 3 | loss: 522.861 | train accuracy: 11.700 | test accuracy: 9.900
Epoch 4 | loss: 498.140 | train accuracy: 11.700 | test accuracy: 9.900
Epoch 5 | loss: 478.668 | train accuracy: 11.700 | test accuracy: 9.900
Epoch 6 | loss: 477.812 | train accuracy: 11.700 | test accuracy: 9.900
Epoch 7 | loss: 477.573 | train accuracy: 11.700 | test accuracy: 9.900
Epoch 8 | loss: 476.786 | train accuracy: 11.700 | test accuracy: 15.600
Epoch 9 | loss: 476.443 | train accuracy: 17.000 | test accuracy: 9.900
Epoch 10 | loss: 476.082 | train accuracy: 11.700 | test accuracy: 12.100
Epoch 11 | loss: 476.879 | train accuracy: 14.300 | test accuracy: 15.100
Epoch 12 | loss: 476.380 | train accuracy: 14.000 | test accuracy: 21.200
Epoch 13 | loss: 472.685 | train accuracy: 21.900 | test 