# **Handwriting Classifier**
**From “Neural Networks and Deep Learning,” by Michael A. Nielsen**

<br>
A simple module to implement the stochastic gradient descent learning algorithm
for a feedforward neural network.

We evaluate the implementation as a classifier for handwritten digits (0–9).
Training and testing data is from the MNIST data set.

In [None]:
import random
import numpy as np

In [None]:
'''
A simple feedforward neural network which uses the stochastic gradient descent
learning algorithm. Gradients are computed using backpropagation.
'''
class Network(object):
  '''
  Initialize the Network using random weights and biases. (There exist better
  ways of initializing weights and biases.) Assume the first layer of neurons is
  an input layer and do not set biases for this layer.
  '''
  def __init__(self, sizes):
    self.numLayers = len(sizes)
    self.sizes = sizes
    self.biases = [
      np.random.randn(y, 1) for y in sizes[1:]
    ]
    self.weights = [
      np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])
    ]

  '''
  Compute the output of the network for the input vector a.
  '''
  def feedforward(self, a):
    for b, w in zip(self.biases, self.weights):
      a = sigmoid(np.dot(w, a) + b)

    return a

  '''
  Train the neural network using mini-batch stochastic gradient descent.
  '''
  def SGD(self, trainingData, epochs, miniBatchSize, eta, testData=None):
    if testData:
      nTest = len(testData)

    n = len(trainingData)

    for j in range(epochs):
      random.shuffle(trainingData)

      miniBatches = [
        trainingData[k:(k + miniBatchSize)] for k in range(0, n, miniBatchSize)
      ]

      for miniBatch in miniBatches:
        self.updateMiniBatch(miniBatch, eta)

      if testData:
        print("Epoch {0} : {1} / {2}".format(j, self.evaluate(testData), nTest))
      else:
        print("Epoch {0} complete.".format(j))

  '''
  Update the network's weights and biases by applying gradient descent using
  backpropagation to a single mini batch.
  '''
  def updateMiniBatch(self, miniBatch, eta):
    Vb = [np.zeros(b.shape) for b in self.biases]  # "del-b"
    Vw = [np.zeros(w.shape) for w in self.weights] # "del-w"

    for x, y in miniBatch:
      dVb, dVw = self.backprop(x, y) # "delta-del-b," "delta-del-w"

      Vb = [nb+dnb for nb, dnb in zip(Vb, dVb)]
      Vw = [nw+dnw for nw, dnw in zip(Vw, dVw)]

    self.weights = [
      w - nw*(eta/len(miniBatch)) for w, nw in zip(self.weights, Vw)
    ]
    self.biases = [
      b - nb*(eta/len(miniBatch)) for b, nb in zip(self.biases, Vb)
    ]

  '''
  Compute a tuple (Vb, Vw) representing the gradient of the cost function.

  Vb and Vw ("del-b and del-w") are layer-by-layer lists of numpy arrays,
  similar to self.biases and self.weights.
  '''
  def backprop(self, x, y):
    Vb = [np.zeros(b.shape) for b in self.biases]
    Vw = [np.zeros(w.shape) for w in self.weights]

    '''
    Feedforward
    '''

    activation = x
    activations = [x] # A list to store all activations, layer by layer.
    zs = []           # A list to store all z vectors, layer by layer.

    for b, w in zip(self.biases, self.weights):
      z = np.dot(w, activation) + b
      zs.append(z)
      activation = sigmoid(z)
      activations.append(activation)

    '''
    Backward Pass
    '''

    delta = self.costDerivative(activations[-1], y) * sigmoidPrime(zs[-1])

    Vb[-1] = delta
    Vw[-1] = np.dot(delta, activations[-2].transpose())

    # l = 1 ==> last layer, l = 2 ==> second-last layer, and so on.
    for l in range(2, self.numLayers):
      z = zs[-l]
      sp = sigmoidPrime(z)

      delta = np.dot(self.weights[-l+1].transpose(), delta) * sp

      Vb[-l] = delta
      Vw[-l] = np.dot(delta, activations[-l-1].transpose())
    
    return (Vb, Vw)

  '''
  Compute the number of test inputs for which the neural network outputs the
  correct result.

  Assume the output is the index of whichever neuron in the final layer has
  the highest activation.
  '''
  def evaluate(self, testData):
    testResults = [(np.argmax(self.feedforward(x)), y) for (x, y) in testData]

    return sum(int(x == y) for (x, y) in testResults)

  '''
  Compute the vector of partial derivatives (of the cost function with respect
  to activation a) for the output activations.
  '''
  def costDerivative(self, outputActivations, y):
    return (outputActivations - y)

'''
GENERAL HELPER FUNCTIONS
'''

'''
Compute the sigmoid function of the real number z.
'''
def sigmoid(z):
  return 1.0 / (1.0 + np.exp(-z))

'''
Compute the derivative of the sigmoid function of the real number z.
'''
def sigmoidPrime(z):
  return sigmoid(z) * (1 - sigmoid(z))

### **Training and Evaluation**

Train and evaluate the neural network as a handwriting classifier using images from the MNIST data set.

In [None]:
import mnist_loader
trainingData, validationData, testData = mnist_loader.load_data_wrapper()

In [None]:
net = Network([784, 30, 10])

net.SGD(list(trainingData), 30, 10, 3.0, testData=list(testData))

Epoch 0 : 9157 / 10000
Epoch 1 : 9284 / 10000
Epoch 2 : 9289 / 10000
Epoch 3 : 9353 / 10000
Epoch 4 : 9321 / 10000
Epoch 5 : 9418 / 10000
Epoch 6 : 9452 / 10000
Epoch 7 : 9469 / 10000
Epoch 8 : 9467 / 10000
Epoch 9 : 9468 / 10000
Epoch 10 : 9508 / 10000
Epoch 11 : 9501 / 10000
Epoch 12 : 9487 / 10000
Epoch 13 : 9513 / 10000
Epoch 14 : 9508 / 10000
Epoch 15 : 9537 / 10000
Epoch 16 : 9541 / 10000
Epoch 17 : 9526 / 10000
Epoch 18 : 9536 / 10000
Epoch 19 : 9541 / 10000
Epoch 20 : 9522 / 10000
Epoch 21 : 9514 / 10000
Epoch 22 : 9543 / 10000
Epoch 23 : 9539 / 10000
Epoch 24 : 9546 / 10000
Epoch 25 : 9538 / 10000
Epoch 26 : 9544 / 10000
Epoch 27 : 9533 / 10000
Epoch 28 : 9548 / 10000
Epoch 29 : 9533 / 10000
