In [1]:
# Lab 3
# Implement in Python + Numpy
# a Neural Network ( sigmoid( W0 * sigmoid( W1 * x ) ) )
# + Gradient Descent.
# Use 1/2 L2 as loss function.

import pickle
import numpy as np

BATCH_SIZE = 64

with open('data.pkl', 'rb') as f:
    data = pickle.load(f)
training_data, test_data = data[0], data[2]

In [2]:
np.random.seed( 1000 )

n_input, n_hidden, n_output = 784, 30, 10
biases = [ np.random.randn(n_hidden, 1), np.random.randn(n_output, 1) ]
weights = [ np.random.randn(n_hidden, n_input), np.random.randn(n_output, n_hidden) ]

n_epochs, lr = 100, 3.

In [3]:
def sigmoid(z, deriv = False):
    if deriv:
        sig = sigmoid(z)
        return sig * (1. - sig)
    else:
        return 1. / (1. + np.exp(-z))


def forward(x):
    wxb0 = np.dot(weights[0], x) + biases[0]
    hidden = sigmoid(wxb0)
    wxb1 = np.dot(weights[1], hidden) + biases[1]
    output = sigmoid(wxb1)
    return wxb0, hidden, wxb1, output
  

def backprop(x, y):
    nabla_b = [ np.zeros(biases[0].shape), np.zeros(biases[1].shape) ]
    nabla_w = [ np.zeros(weights[0].shape), np.zeros(weights[1].shape) ]
  
    # forward pass
    wxb0, hidden, wxb1, output = forward( x )
  
    # backward pass
    nabla_b[1] = (output - y) * sigmoid(wxb1, deriv=True) / 10
    nabla_w[1] = np.dot(nabla_b[1], hidden.T)
    
    nabla_b[0] = np.dot(weights[1].T, nabla_b[1]) * sigmoid(wxb0, deriv = True)
    nabla_w[0] = np.dot(nabla_b[0], x.T)
    
    nabla_b[1] = np.mean(nabla_b[1], axis=1)[..., np.newaxis]
    nabla_b[0] = np.mean(nabla_b[0], axis=1)[..., np.newaxis]
    
    nabla_w[1] /= BATCH_SIZE
    nabla_w[0] /= BATCH_SIZE
    
    return nabla_w, nabla_b

In [4]:
def batch(iterable, n=BATCH_SIZE):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]


for ep in range(n_epochs):
    # train
    nabla_w = [ np.zeros(weights[0].shape), np.zeros(weights[1].shape) ]
    nabla_b = [ np.zeros(biases[0].shape), np.zeros(biases[1].shape) ]
    for batch_ in batch(training_data):
        x, y = list(map(lambda p: p[0], batch_)), list(map(lambda p: p[1], batch_))
        x, y = np.concatenate(x, axis=1), np.concatenate(y, axis=1)
        nabla_wi, nabla_bi = backprop(x, y)
        
        weights = [weights[0] - lr * nabla_wi[0], weights[1] - lr * nabla_wi[1]]
        biases = [biases[0] - lr * nabla_bi[0], biases[1] - lr * nabla_bi[1]]

    # evaluate
    s = 0
    for x, y in test_data:
        _, _, _, output = forward( x )
        s += int(np.argmax(output) == y)
    print("Epoch {} : {} / {}".format( ep, s, len(test_data) ))

Epoch 0 : 4489 / 10000
Epoch 1 : 6013 / 10000
Epoch 2 : 6600 / 10000
Epoch 3 : 7031 / 10000
Epoch 4 : 7657 / 10000
Epoch 5 : 7915 / 10000
Epoch 6 : 8082 / 10000
Epoch 7 : 8181 / 10000
Epoch 8 : 8287 / 10000
Epoch 9 : 8375 / 10000
Epoch 10 : 8460 / 10000
Epoch 11 : 8531 / 10000
Epoch 12 : 8567 / 10000
Epoch 13 : 8614 / 10000
Epoch 14 : 8651 / 10000
Epoch 15 : 8670 / 10000
Epoch 16 : 8709 / 10000
Epoch 17 : 8750 / 10000
Epoch 18 : 8773 / 10000
Epoch 19 : 8793 / 10000
Epoch 20 : 8820 / 10000
Epoch 21 : 8835 / 10000
Epoch 22 : 8851 / 10000
Epoch 23 : 8868 / 10000
Epoch 24 : 8877 / 10000
Epoch 25 : 8888 / 10000
Epoch 26 : 8901 / 10000
Epoch 27 : 8915 / 10000
Epoch 28 : 8927 / 10000
Epoch 29 : 8933 / 10000
Epoch 30 : 8943 / 10000
Epoch 31 : 8954 / 10000
Epoch 32 : 8972 / 10000
Epoch 33 : 8982 / 10000
Epoch 34 : 8999 / 10000
Epoch 35 : 9005 / 10000
Epoch 36 : 9013 / 10000
Epoch 37 : 9018 / 10000
Epoch 38 : 9028 / 10000
Epoch 39 : 9036 / 10000
Epoch 40 : 9045 / 10000
Epoch 41 : 9056 / 10000
Ep