In [1]:
import random


import numpy as np

In [2]:
import mnist_loader

training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

In [32]:
def sigmoid(z):
    """Calculate the sigmoid function."""
    return 1.0/(1.0 + np.exp(-z))


def sigmoid_prime(z):
    """Calculate the derivateive of sigmoid function."""
    return sigmoid(z) * (1-sigmoid(z))


class Network:
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(m,1) for m in sizes[1:]]
        self.weights = [np.random.randn(m, n)/np.sqrt(m)
                        for n, m in zip(sizes[:-1], sizes[1:])]
        
        self.v_biases = [np.zeros_like(b) for b in self.biases]
        self.v_weights = [np.zeros_like(w) for w in self.weights]
        
        self.deltas = {
            "mse": lambda a, y, z: (a - y) * sigmoid_prime(z),
            "xentropy": lambda a, y, _z: (a - y)
        }
        
    def feedforward(self, a):
        """Walk through each layer of the network."""
        for w, b in zip(self.weights, self.biases):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta, lmbda, mu,
            test_data=None, cost="xentropy"):
        "Given all these hyperparams, run a stochastic gradient descent."
        if test_data: n_test = len(test_data)
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k:k+mini_batch_size]
                           for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch_matrix(mini_batch, eta, lmbda, mu, cost, n)
            if test_data:
                n_correct = self.evaluate(test_data, cost)
                print("Epoch {}: {} / {}".format(
                    j, n_correct, n_test))
            else:
                print("Epoch {} complete.".format(j))

    def update_mini_batch_matrix(self, mini_batch, eta, lmbda, mu, cost, n):
        arr = np.array(mini_batch)
        x_mat, y_mat = (np.hstack(arr[:,0]), np.hstack(arr[:,1]))
        nabla_b, nabla_w = self.backprop_matrix(x_mat, y_mat, cost)
        # nabla_b, nabla_w are collapsed into a single column vector each
        m = float(len(mini_batch))
        self.v_b = [mu * v_b - (eta/m) * nb for v_b, nb in zip(self.v_biases, nabla_b)]
        self.biases = [b + v_b for b, v_b in zip(self.biases, self.v_biases)]
        self.v_weights = [(mu - eta * lmbda / n) * v_w - (eta/m)*nw for v_w, nw in zip(self.v_weights, nabla_w)]
        self.weights = [w + v_w for w, v_w in zip(self.weights, self.v_weights)]

        
    def backprop_matrix(self, x_mat, y_mat, cost):
        num_inputs = x_mat.shape[1]
        deltas = [np.zeros((b.shape[0], num_inputs)) for b in self.biases] # index by layer
        nabla_w = [np.zeros(w.shape + (num_inputs,)) for w in self.weights]
        activations = [x_mat]
        zs = []
        
        for b, w in zip(self.biases, self.weights):
            zs.append(np.matmul(w, activations[-1]) + b)
            activations.append(sigmoid(zs[-1]))

        last_error = self.deltas[cost](activations[-1], y_mat, zs[-1])
        
        deltas[-1] = last_error
        
        nabla_w[-1] = np.matmul(last_error, activations[-2].transpose())
        
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            error = np.matmul(self.weights[-l+1].transpose(), deltas[-1]) * sp
            deltas[-l] = error
            ac = activations[-l]
            nabla_w[-l] = np.matmul(error, activations[-l-1].transpose())
                   
        return [nb.sum(axis=1, keepdims=True) for nb in deltas], nabla_w
            
    def evaluate(self, test_data, cost):
        "Return # of correct results"
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]
        return sum(int(x == y) for x, y in test_results)

    def cost_derivative(self, output_activations, y):
        return (output_activations - y)



In [39]:
net = Network([784, 30, 10])
net.SGD(training_data[:1000], 50, 10, 0.5, 0.1, 0.0, test_data=test_data, cost="xentropy")

Epoch 0: 7539 / 10000
Epoch 1: 8476 / 10000
Epoch 2: 8462 / 10000
Epoch 3: 8618 / 10000
Epoch 4: 8673 / 10000
Epoch 5: 8685 / 10000
Epoch 6: 8662 / 10000
Epoch 7: 8621 / 10000
Epoch 8: 8691 / 10000
Epoch 9: 8675 / 10000
Epoch 10: 8708 / 10000
Epoch 11: 8719 / 10000
Epoch 12: 8714 / 10000
Epoch 13: 8728 / 10000
Epoch 14: 8697 / 10000
Epoch 15: 8723 / 10000
Epoch 16: 8706 / 10000
Epoch 17: 8709 / 10000
Epoch 18: 8739 / 10000
Epoch 19: 8734 / 10000
Epoch 20: 8722 / 10000
Epoch 21: 8732 / 10000
Epoch 22: 8718 / 10000
Epoch 23: 8729 / 10000
Epoch 24: 8730 / 10000
Epoch 25: 8722 / 10000
Epoch 26: 8737 / 10000
Epoch 27: 8717 / 10000
Epoch 28: 8730 / 10000
Epoch 29: 8708 / 10000
Epoch 30: 8717 / 10000
Epoch 31: 8724 / 10000
Epoch 32: 8728 / 10000
Epoch 33: 8722 / 10000
Epoch 34: 8723 / 10000
Epoch 35: 8720 / 10000
Epoch 36: 8738 / 10000
Epoch 37: 8720 / 10000
Epoch 38: 8705 / 10000
Epoch 39: 8725 / 10000
Epoch 40: 8723 / 10000
Epoch 41: 8715 / 10000
Epoch 42: 8714 / 10000
Epoch 43: 8713 / 1000

In [38]:
net = Network([784, 30, 10])
net.SGD(training_data[:1000], 50, 10, 0.5, 0.1, 0.1, test_data=test_data, cost="xentropy")

Epoch 0: 7862 / 10000
Epoch 1: 8123 / 10000
Epoch 2: 8500 / 10000
Epoch 3: 8659 / 10000
Epoch 4: 8694 / 10000
Epoch 5: 8654 / 10000
Epoch 6: 8668 / 10000
Epoch 7: 8733 / 10000
Epoch 8: 8663 / 10000
Epoch 9: 8720 / 10000
Epoch 10: 8662 / 10000
Epoch 11: 8724 / 10000
Epoch 12: 8735 / 10000
Epoch 13: 8716 / 10000
Epoch 14: 8765 / 10000
Epoch 15: 8749 / 10000
Epoch 16: 8752 / 10000
Epoch 17: 8767 / 10000
Epoch 18: 8755 / 10000
Epoch 19: 8714 / 10000
Epoch 20: 8752 / 10000
Epoch 21: 8751 / 10000
Epoch 22: 8738 / 10000
Epoch 23: 8762 / 10000
Epoch 24: 8737 / 10000
Epoch 25: 8753 / 10000
Epoch 26: 8760 / 10000
Epoch 27: 8742 / 10000
Epoch 28: 8766 / 10000
Epoch 29: 8758 / 10000
Epoch 30: 8734 / 10000
Epoch 31: 8750 / 10000
Epoch 32: 8743 / 10000
Epoch 33: 8734 / 10000
Epoch 34: 8743 / 10000
Epoch 35: 8751 / 10000
Epoch 36: 8755 / 10000
Epoch 37: 8761 / 10000
Epoch 38: 8747 / 10000
Epoch 39: 8747 / 10000
Epoch 40: 8760 / 10000
Epoch 41: 8754 / 10000
Epoch 42: 8752 / 10000
Epoch 43: 8749 / 1000