# Neural network

In [1]:
import numpy as np
import math

#### Activation function

In [2]:
# Sigmoid activation function.
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

# Derivative of activation function.
def d_sigmoid(x):
    return x * (1.0 - x)

## Neural network class object

In [3]:
class NeuralNetwork(object):
    def __init__(self, n_nodes_hidden, n_hidden_layers, learning_rate, n_epochs):
        """Class constructor."""
        self.n_nodes_hidden = n_nodes_hidden
        self.n_hidden_layers = n_hidden_layers
        self.learning_rate = np.array([learning_rate])
        self.n_epochs = n_epochs

    def initalise_parameters(self):
        """Initialise the neural network parameters (weights and biases) with uniform 
        random variates in the range 0 to 1."""
        self.w_input = np.random.rand(self.n_nodes_input, self.n_nodes_hidden)
        self.b_input = np.random.rand(self.n_nodes_hidden)

        self.w_hidden = np.random.rand(self.n_nodes_hidden, self.n_nodes_hidden, self.n_hidden_layers - 1)
        self.b_hidden = np.random.rand(self.n_nodes_hidden, self.n_hidden_layers - 1)

        self.w_output = np.random.rand(self.n_nodes_hidden, self.n_nodes_output)
        self.b_output = np.random.rand(self.n_nodes_output)
        
    def initialise_z_a_cache(self):
        """Initialise the z and a cache. This is necessary for updating the parameters during backpropogation."""
        z_hidden = np.zeros(shape = [self.n_training_samples, self.n_nodes_hidden, self.n_hidden_layers])
        a_hidden = np.zeros(shape = [self.n_training_samples, self.n_nodes_hidden, self.n_hidden_layers])
    
        return z_hidden, a_hidden
    
    def propogate_forward(self, z_hidden, a_hidden):
        """Propogate information forward through the network."""
        z = np.dot(self.x, self.w_input) + self.b_input
        a = sigmoid(z)

        z_hidden[:, :, 0] = z
        a_hidden[:, :, 0] = a

        for i in range(0, self.n_hidden_layers - 1):
            z = np.dot(a, self.w_hidden[:, :, i]) + self.b_hidden[:, i]
            a = sigmoid(z)
            z_hidden[:, :, i + 1] = z
            a_hidden[:, :, i + 1] = a

        z_output = np.dot(z, self.w_output) + self.b_output
        a_output = sigmoid(z_output)

        return z_hidden, a_hidden, z_output, a_output
    
    def calc_loss(self, a_output):
        """Calculate loss using a mean squared error loss function."""
        return 0.5 * np.sum((a_output.T - self.y) ** 2) / self.n_training_samples
    
    def propogate_backward(self, a_hidden, z_hidden, a_output, z_output):
        """Propogate backward through the network, updating the parameters via stochastic gradient descent."""
        d_z = (a_output.T - self.y) * d_sigmoid(a_output).T / self.n_training_samples
        d_w = np.dot(d_z, a_hidden[:, :, self.n_hidden_layers - 1])
        d_b = np.sum(d_z, axis = 1)

        self.w_output -= (self.learning_rate * d_w).T
        self.b_output -= self.learning_rate * d_b

        w_next = self.w_output

        for i in range(self.n_hidden_layers - 1, 0, -1):
            d_z = np.dot(w_next, d_z) * d_sigmoid(a_hidden[:, :, i]).T / self.n_training_samples
            d_w = np.dot(d_z, a_hidden[:, :, i - 1])
            d_b = np.sum(d_z, axis = 1)

            self.w_hidden[:, :, i - 1] -= self.learning_rate * d_w
            self.b_hidden[:, i - 1] -= self.learning_rate * d_b

            w_next = self.w_hidden[:, :, i - 1]

        d_z = np.dot(w_next, d_z) * d_sigmoid(a_hidden[:, :, 0]).T / self.n_training_samples
        d_w = np.dot(d_z, self.x)
        d_b = np.sum(d_z, axis = 1)

        self.w_input -= (self.learning_rate * d_w).T
        self.b_input -= self.learning_rate * d_b
    
    def train(self, x, y):
        """Train the neural network object on the training data."""
        np.random.seed(42)
        
        self.x = x
        self.y = y
        
        self.n_training_samples = x.shape[0]
        self.n_nodes_input = x.shape[1]
        
        if (len(y.shape) == 1):
            self.n_nodes_output = 1
        else:
            self.n_nodes_output = y.shape[1]
            
        self.initalise_parameters()
        
        z_hidden, a_hidden = self.initialise_z_a_cache()

        for i in range(0, self.n_epochs):
            z_hidden, a_hidden, z_output, a_output = self.propogate_forward(z_hidden, a_hidden)

            self.propogate_backward(a_hidden, z_hidden, a_output, z_output)
    
    def predict(self, x):
        """Predict the output y for a given input x."""
        z = np.dot(x, self.w_input) + self.b_input
        a = sigmoid(z)
        
        for i in range(0, self.n_hidden_layers - 1):
            z = np.dot(a, self.w_hidden[:, :, i]) + self.b_hidden[:, i]
            a = sigmoid(z)

        z_output = np.dot(z, self.w_output) + self.b_output
        a_output = sigmoid(z_output)

        return a_output

## Testing

### XOR operator

In [4]:
net = NeuralNetwork(n_nodes_hidden = 2,
                    n_hidden_layers = 2,
                    learning_rate = 5.0,
                    n_epochs = 10000)

In [5]:
net.train(x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]),
          y = np.array([0, 1, 1, 0]))

In [6]:
net.predict(x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]))

array([[0.03842953],
       [0.96968537],
       [0.97106383],
       [0.03861592]])

### AND operator

In [7]:
net = NeuralNetwork(n_nodes_hidden = 5,
                    n_hidden_layers = 2,
                    learning_rate = 3.0,
                    n_epochs = 10000)

In [8]:
net.train(x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]),
          y = np.array([0, 0, 0, 1]))

In [9]:
net.predict(x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]))

array([[1.35427802e-06],
       [1.61786156e-02],
       [1.67356623e-02],
       [9.76561842e-01]])