# Neural Networks Codes Implementation 

## 1: Libraries 

In [None]:
import random
import mnist_loader
import numpy as np

## 2: Class definition

In [None]:
class Network(object):

### 2.1: Initialization 

In [None]:
def __init__(self, sizes):
    self.num_layers = len(sizes)
    self.sizes = sizes
    self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        # randn(y,1) produce a y * 1 vector; 'size[1:]' means the list without first element [3,1]
        # If list is [2,3,1], the biases matrix would be two random vector: 3 * 1 biases[0] and 1 * 1 biases[1]
    self.weights = [np.random.randn(y, x)
                for x, y in zip(sizes[:-1], sizes[1:])]
        # zip([1,2],[3,4])) is [(1,3),(2,4)]; [:-1] is the list without last element i.e. [2,3]
        # zip([2,3],[3,1]) = [(2,3),(3,1)], so weight[0] is 3 * 2 matrix, weight[1] is 1 *3 matrix

For example of a $[2,3,1]$ neural networks with 2 neurons as input layers, hidden layers and 1 output layer.

The calculation of second layer could be expressed as: 


$$
\left[\begin{array}{cccc}
w_{11} & w_{12} \\
w_{21} & w_{22} \\
w_{31} & w_{32}
\end{array}\right] \left[\begin{array}{cccc}
x_{11} \\
x_{12} 
\end{array}\right]+\left[\begin{array}{cccc}
b_1 \\
b_2 
\end{array}\right]=\left[\begin{array}{cccc}
x_{21} \\
x_{22}
\end{array}\right]
$$


The calculation fo third layer could be expressed as:

$$
\left[\begin{array}{cccc}
w_{11} & w_{12} & w_{13} \\
\end{array}\right] \left[\begin{array}{cccc}
x_{21} \\
x_{22} \\
x_{23}
\end{array}\right] + [b_3] = [x_{out}]
$$

### 2.2: Definition of activation function   

In [None]:
def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))
def feedforward(self, a):

    for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
    return a

Return the output value of input $a$

### 2.3: Stochastic Gradient Descent 

In [None]:
def SGD(self, training_data, epochs, mini_batch_size, eta,
            test_data=None):
        # Stochastic Gradient Decent  
        training_data = list(training_data)
        n = len(training_data)

        if test_data:
            test_data = list(test_data)
            n_test = len(test_data)

        for j in range(epochs):
            random.shuffle(training_data)
            # randomly changing the order
            # setting a small batch of data (mini_batch_size)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
                # update the learning step
            if test_data:
                print("Epoch {} : {} / {}".format(j,self.evaluate(test_data),n_test))
            else:
                print("Epoch {} complete".format(j))


In [None]:
def update_mini_batch(self, mini_batch, eta):
        
        """Update the network's weights and biases by applying
        gradient descent using backpropagation to a single mini batch.
        The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
        is the learning rate."""
        
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w-(eta/len(mini_batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb
                       for b, nb in zip(self.biases, nabla_b)]
