## Backpropagation implementation using Python

All the parameters

1. def  `__init__`: 

`layers`: A list of integers which represents the actual architecture of the feedforward network. For example, a value of [2, 2, 1] would imply that the first input layer has two nodes, the hidden layer has two nodes, and the final output layer has one node.

`alpha`: specifies the learning rate of the neural network. This value is applied during the weight update phase.

2. def `fit`: 

`X`: training data.

`y`: corresponding class labels for each entry in `X`.

`epochs`: the number of epochs we’ll train the network for.

`displayUpdate`: simply controls how many N epochs we’ll print training progress to the terminal.

3. def `fit_partial`:

`x`: An individual data point from the design matrix.
`y`: The corresponding class label.


In [3]:
import numpy as np

In [8]:
class NeuralNetwork:
    def __init__(self, layers, alpha=0.1):
        # initialise the list of weight matrices
        # store the network architecture & learning rate
        self.W = []
        self.layers = layers
        self.alpha = alpha 
        # loop from index of 1st layer but 
        # stop before reaching last 2 layers
        for i in np.arange(0, len(layers) - 2):
            # randomly initialize a weight matrix 
            # connecting no. of nodes in each respective layer together,
            # adding extra node for the bias
            w = np.random.randn(layers[i] + 1, layers[i + 1] + 1)
            # normalizing the variance of each neuron’s output
            self.W.append(w / np.sqrt(layers[i]))
            
            # handling a special case where the input 
            # connections need a bias term 
            # but output does not
            w = np.random.randn(layers[-2] + 1, layers[-1])
            self.W.append(w / np.sqrt(layers[-2]))
        
        def __repr__(self):
            # a string that represents the network architecture
            return "NeuralNetwork: {}".format("-".join(str(l) for l in self.layers))
        
        def sigmoid(self, x):
            return 1.0 / (1 + np.exp(-x))
        
        def sigmoid_deriv(self, x):
            # assume x has already been passed through sigmoid
            return x * (1-x)
        
        ### Training the Neural Network ###
        
        def fit(self, x, y, epochs=1000, displayUpdate=100):
            # insert a columns of 1's as last entry in the feature matrix
            # to allow us create a bias as a trainable parameter with the weight matrix
            X = np.c_[X, np.ones((X.shape[0]))]
            
            for epoch in np.arange(0, epochs):
                # loop over individual data point and train NN on it
                for (x, target) in zip(X, y):
                    self.fit_partial(x, target)
                    
                    # check to see if we should display a training update
                    if epoch == 0 or (epoch + 1) % displayUpdate == 0:
                        loss = self.calculate_loss(X, y)
                        print("[INFO] epoch={}, loss={:.7f}".format(epoch + 1, loss))
                        
        def fit_partial(self, x, y):
            # construct the list of output activations for each layer
            # as the data point flows through the network; the first
            # activation is a special case -- it's just the input
            # feature vector itself
            A = [np.atleast_2d(x)]