In [13]:
import numpy as np
import random

In [14]:
class Network():
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
    
    def feedforward(self, inputs):
        for biases, weights in zip(self.biases, self.weights):
            inputs = sigmoid(np.dot(weights, inputs) + biases)
        return inputs
    
    def SGD(self, training_data, epochs, batch_size, lr, test_data=None):
        if test_data: n_test = len(test_data)
        n = len(training_data)
        for i in range(epochs):
            random.shuffle(training_data)
            batches = [training_data[k:k+batch_size] for k in range(0, n, batch_size)]
            for batch in batches:
                self.update_params(batch, lr)
        
            if test_data:
                print("Epoch {0}: {1} / {2}".format(i, self.evaluate(test_data), n_test))
            else:
                print("Epoch {0} complete".format(i))
                    
    def update_params(self, batch, lr):
        new_b = [np.zeros(b.shape) for b in self.biases]
        new_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in batch:
            delta_w, delta_b = self.backprop(x, y)
            new_b = [nb + db for nb, db in zip(new_b, delta_b)]
            new_w = [nw + dw for nw, dw in zip(new_w, delta_w)]
        self.weights = [w - (lr/len(batch)) * nw for w, nw in zip(self.weights, new_w)]
        self.biases = [b - (lr/len(batch)) * nb for b, nb in zip(self.biases, new_b)]
        
    def backprop(self, x, y):
        # First fill up all of the zs and the activations
        # Feedforward
        activation = x
        activations = [x]
        zs = []
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, x) + b
            zs.append(z)
            activations.append(sigmoid(z))
        
         # backward pass
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        
        for L in range(2, self.num_layers):
            delta = np.dot(self.weights[-L + 1].transpose(), delta) * sigmoid_prime(zs[-L])
            nabla_b[-L] = delta
            nabla_w[-L] = np.dot(delta, activations[-L - 1])
            
        return (nabla_w, nabla_b)
    
    def cost_deriv(self, prediction, y):
        # Derivation of the root mean square is just 2*(prediction - y)
        return 2*(prediction - y)
    
    def evaluate(self, test_data):
        """Return the number of test inputs for which the neural
        network outputs the correct result. Note that the neural
        network's output is assumed to be the index of whichever
        neuron in the final layer has the highest activation."""
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)
    
def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))
        
                
                                               