In [18]:
import random

import numpy as np

In [19]:
import mnist_loader

training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

In [20]:
def sigmoid(z):
    """Calculate the sigmoid function."""
    return 1.0/(1.0 + np.exp(-z))


def sigmoid_prime(z):
    """Calculate the derivateive of sigmoid function."""
    return sigmoid(z) * (1-sigmoid(z))


class Network:
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(m,1) for m in sizes[1:]]
        self.weights = [np.random.randn(m, n)
                        for n, m in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        """Walk through each layer of the network."""
        for w, b in zip(self.weights, self.biases):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta,
            test_data=None):
        "Given all these hyperparams, run a stochastic gradient descent."
        if test_data: n_test = len(test_data)
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k:k+mini_batch_size]
                           for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch_matrix(mini_batch, eta)
            if test_data:
                print("Epoch {}: {} / {}".format(
                    j, self.evaluate(test_data), n_test))
            else:
                print("Epoch {} complete.".format(j))

    def update_mini_batch_matrix(self, mini_batch, eta):
        arr = np.array(mini_batch)
        x_mat, y_mat = (np.hstack(arr[:,0]), np.hstack(arr[:,1]))
        nabla_b, nabla_w = self.backprop_matrix(x_mat, y_mat)
        # nabla_b, nabla_w are collapsed into a single column vector each
        m = len(mini_batch)
        self.biases = [b - (eta/m)*nb for b, nb in zip(self.biases, nabla_b)]
        self.weights = [w - (eta/m)*nw for w, nw in zip(self.weights, nabla_w)]
        
    def backprop_matrix(self, x_mat, y_mat):
        num_inputs = x_mat.shape[1]
        deltas = [np.zeros((b.shape[0], num_inputs)) for b in self.biases] # index by layer
        nabla_w = [np.zeros(w.shape + (num_inputs,)) for w in self.weights]
        activations = [x_mat]
        zs = []
        
        for b, w in zip(self.biases, self.weights):
            zs.append(np.matmul(w, activations[-1]) + b)
            activations.append(sigmoid(zs[-1]))

        # if using cross-entropy:
        last_error = activations[-1] - y_mat
        # if using MSE:
        # last_error = (activations[-1] - y_mat) * sigmoid_prime(zs[-1])
        
        deltas[-1] = last_error
        
        nabla_w[-1] = np.matmul(last_error, activations[-2].transpose())
        
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            error = np.matmul(self.weights[-l+1].transpose(), deltas[-1]) * sp
            deltas[-l] = error
            ac = activations[-l]
            nabla_w[-l] = np.matmul(error, activations[-l-1].transpose())
                   
        return [nb.sum(axis=1, keepdims=True) for nb in deltas], nabla_w
            
    def evaluate(self, test_data):
        "Return # of correct results"
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]
        return sum(int(x == y) for x, y in test_results)

    def cost_derivative(self, output_activations, y):
        return (output_activations - y)

In [21]:
net = Network([784, 30, 10])

In [22]:
net.SGD(training_data, 10, 9, 0.5, test_data=test_data)

Epoch 0: 9108 / 10000
Epoch 1: 9209 / 10000
Epoch 2: 9304 / 10000
Epoch 3: 9375 / 10000
Epoch 4: 9362 / 10000
Epoch 5: 9397 / 10000
Epoch 6: 9425 / 10000
Epoch 7: 9416 / 10000
Epoch 8: 9453 / 10000
Epoch 9: 9437 / 10000


In [24]:
# see the effect of overfit
net = Network([784, 30, 10])
net.SGD(training_data[:1000], 100, 10, 0.5, test_data=test_data)

Epoch 0: 5544 / 10000
Epoch 1: 6954 / 10000
Epoch 2: 7166 / 10000
Epoch 3: 7537 / 10000
Epoch 4: 7815 / 10000
Epoch 5: 7901 / 10000
Epoch 6: 8061 / 10000
Epoch 7: 8037 / 10000
Epoch 8: 8119 / 10000
Epoch 9: 8114 / 10000
Epoch 10: 8174 / 10000
Epoch 11: 8223 / 10000
Epoch 12: 8236 / 10000
Epoch 13: 8233 / 10000
Epoch 14: 8257 / 10000
Epoch 15: 8256 / 10000
Epoch 16: 8230 / 10000
Epoch 17: 8284 / 10000
Epoch 18: 8299 / 10000
Epoch 19: 8279 / 10000
Epoch 20: 8292 / 10000
Epoch 21: 8336 / 10000
Epoch 22: 8331 / 10000
Epoch 23: 8330 / 10000
Epoch 24: 8349 / 10000
Epoch 25: 8334 / 10000
Epoch 26: 8367 / 10000
Epoch 27: 8336 / 10000
Epoch 28: 8384 / 10000
Epoch 29: 8375 / 10000
Epoch 30: 8365 / 10000
Epoch 31: 8372 / 10000
Epoch 32: 8390 / 10000
Epoch 33: 8390 / 10000
Epoch 34: 8382 / 10000
Epoch 35: 8368 / 10000
Epoch 36: 8385 / 10000
Epoch 37: 8392 / 10000
Epoch 38: 8401 / 10000
Epoch 39: 8388 / 10000
Epoch 40: 8404 / 10000
Epoch 41: 8395 / 10000
Epoch 42: 8389 / 10000
Epoch 43: 8403 / 1000