In [37]:
#MNIST- Handwriting recognition- 95% accuracy using 2 layered Neural Network

In [38]:
import numpy as np
import random

#array representing neurons in each layer
neurons = [784, 30, 10]

#number of layers
layers = len(neurons)

#bias arrays
bias = [np.random.randn(y, 1) for y in neurons[1:]]

#weights arrays
weights = [np.random.randn(y,x) for x,y in zip(neurons[:-1], neurons[1:])]

In [39]:
#sigmoid activation function
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

In [40]:
#feed-forward functionality

def feedforward(a):
    for b,w in zip(bias, weights):
        a = sigmoid(np.dot(w,a)+b)
    return a

In [41]:
# stochastic gradient descent

def sgd(training_data, count, mini_batch_size, learning_rate, test_data=None):
    if test_data: n_test = len(test_data) 
    n = len(training_data)
    for j in xrange(count):
        random.shuffle(training_data)
        mini_batches = [training_data[k:k+mini_batch_size] for k in xrange(0, n, mini_batch_size)]
        
        for mini_batch in mini_batches:
            update_mini_batch(mini_batch, learning_rate)
            
        if test_data:
            print "Epoch {0}: {1} / {2}".format(
                j, evaluate(test_data), n_test)
        else:
            print "Epoch {0} complete".format(j)

In [42]:
#update weights and bias for each mini batch

def update_mini_batch(mini_batch, learning_rate):
    
    global weights, bias
    gradient_cost_weights = [np.zeros(w.shape) for w in weights]
    gradient_cost_bias = [np.zeros(b.shape) for b in bias]
    
    for x,y in mini_batch:
        delta_c_b, delta_c_w = back_prop(x, y)
        gradient_cost_bias = [gcb+dcb for gcb, dcb in zip(gradient_cost_bias, delta_c_b)]
        gradient_cost_weights = [gcw+dcw for gcw, dcw in zip(gradient_cost_weights, delta_c_w)]
    
    weights = [w-(learning_rate/len(mini_batch))*nw for w, nw in zip(weights, gradient_cost_weights)]
    bias = [b-(learning_rate/len(mini_batch))*nb for b, nb in zip(bias, gradient_cost_bias)]

In [43]:
#back propagation- explaination later

def back_prop(x, y):
        nabla_b = [np.zeros(b.shape) for b in bias]
        nabla_w = [np.zeros(w.shape) for w in weights]
        
        # feedforward
        activation = x
        activations = [x] # list to store all the activations, layer by layer
        zs = [] # list to store all the z vectors, layer by layer
        for b, w in zip(bias, weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        
        # backward pass
        delta = cost_derivative(activations[-1], y) * \
            sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        for l in xrange(2, layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)
    
#cost derivative

def cost_derivative(output_activations, y):
    return (output_activations-y)
    
def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

def evaluate(test_data):
    test_results = [(np.argmax(feedforward(x)), y)
                    for (x, y) in test_data]
    return sum(int(x == y) for (x, y) in test_results)


In [44]:
#data loader
import mnist_loader

training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

In [45]:
sgd(training_data, 30, 10, 3.0, test_data)

Epoch 0: 9020 / 10000
Epoch 1: 9232 / 10000
Epoch 2: 9281 / 10000
Epoch 3: 9289 / 10000
Epoch 4: 9348 / 10000
Epoch 5: 9383 / 10000
Epoch 6: 9346 / 10000
Epoch 7: 9415 / 10000
Epoch 8: 9370 / 10000
Epoch 9: 9421 / 10000
Epoch 10: 9427 / 10000
Epoch 11: 9435 / 10000
Epoch 12: 9439 / 10000
Epoch 13: 9419 / 10000
Epoch 14: 9423 / 10000
Epoch 15: 9453 / 10000
Epoch 16: 9429 / 10000
Epoch 17: 9461 / 10000
Epoch 18: 9467 / 10000
Epoch 19: 9455 / 10000
Epoch 20: 9456 / 10000
Epoch 21: 9447 / 10000
Epoch 22: 9465 / 10000
Epoch 23: 9475 / 10000
Epoch 24: 9455 / 10000
Epoch 25: 9435 / 10000
Epoch 26: 9464 / 10000
Epoch 27: 9456 / 10000
Epoch 28: 9461 / 10000
Epoch 29: 9459 / 10000
