In [1]:
import numpy as np
import math

Define the activation function and its derivative.

In [2]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def d_sigmoid(x):
    return x * (1.0 - x)

Initialise random seed.

In [3]:
np.random.seed(42)

Define feature and target data. There are three feature variables, one target variable, and four data items.

In [4]:
def initialise_x_y():
    x = np.array([
        [0, 0],
        [0, 1],
        [1, 0],
        [1, 1]
    ])

    y = np.array([0, 1, 1, 0])
    
    return x, y

Define the number of hidden layers, nodes in each hidden layer, and nodes in the output layer.

In [33]:
def initialise_hyperparameters(x):
    n_training_samples = x.shape[0]
    n_nodes_input = x.shape[1]
    n_nodes_hidden = 2
    n_nodes_output = 1
    n_hidden_layers = 2
    learning_rate = np.array([5])
    n_epochs = 10000
    
    return n_training_samples, n_nodes_input, n_nodes_hidden, n_nodes_output, n_hidden_layers, learning_rate, n_epochs

Initialise the weights and biases.

In [6]:
def initalise_parameters(n_training_samples, n_nodes_input, n_nodes_hidden, n_nodes_output, n_hidden_layers, learning_rate, n_epochs):
    w_input = np.random.rand(n_nodes_input, n_nodes_hidden)
    b_input = np.random.rand(n_nodes_hidden)

    w_hidden = np.random.rand(n_nodes_hidden, n_nodes_hidden, n_hidden_layers - 1)
    b_hidden = np.random.rand(n_nodes_hidden, n_hidden_layers - 1)

    w_output = np.random.rand(n_nodes_hidden, n_nodes_output)
    b_output = np.random.rand(n_nodes_output)
    
    return w_input, b_input, w_hidden, b_hidden, w_output, b_output

Initialise a cache to store the activations of each node in the network.

In [7]:
def initialise_a_z_cache(n_training_samples, n_nodes_hidden, n_hidden_layers):
    z_hidden = np.zeros(shape = [n_training_samples, n_nodes_hidden, n_hidden_layers])
    a_hidden = np.zeros(shape = [n_training_samples, n_nodes_hidden, n_hidden_layers])
    
    return z_hidden, a_hidden

Define a function for the forward propogation through the neural network.

In [26]:
def propogate_forward(x, n_hidden_layers,
            w_input, b_input,
            w_hidden, b_hidden, 
            w_output, b_output,
            z_hidden, a_hidden):
    
    z = np.dot(x, w_input) + b_input
    a = sigmoid(z)
    
    z_hidden[:, :, 0] = z
    a_hidden[:, :, 0] = a
    
    for i in range(0, n_hidden_layers - 1):
        z = np.dot(a, w_hidden[:, :, i]) + b_hidden[:, i]
        a = sigmoid(z)
        z_hidden[:, :, i + 1] = z
        a_hidden[:, :, i + 1] = a
        
    z_output = np.dot(z, w_output) + b_output
    a_output = sigmoid(z_output)
    
    return z_hidden, a_hidden, z_output, a_output

Define a loss function.

In [9]:
def calc_loss(a_output, y, n_training_samples):
    return 0.5 * np.sum((a_output.T - y) ** 2) / n_training_samples

In [20]:
def propogate_backward(a_hidden, z_hidden,
                       a_output, z_output,
                       w_input, b_input,
                       w_hidden, b_hidden, 
                       w_output, b_output,
                       n_hidden_layers,
                       learning_rate, y):
    
    d_z = (a_output.T - y) * d_sigmoid(a_output).T / n_training_samples
    d_w = np.dot(d_z, a_hidden[:, :, n_hidden_layers - 1])
    d_b = np.sum(d_z, axis = 1)
    
    w_output -= (learning_rate * d_w).T
    b_output -= learning_rate * d_b
    
    w_next = w_output
    
    for i in range(n_hidden_layers - 1, 0, -1):
        d_z = np.dot(w_next, d_z) * d_sigmoid(a_hidden[:, :, i]).T / n_training_samples
        d_w = np.dot(d_z, a_hidden[:, :, i - 1])
        d_b = np.sum(d_z, axis = 1)
        
        w_hidden[:, :, i - 1] -= learning_rate * d_w
        b_hidden[:, i - 1] -= learning_rate * d_b
        
        w_next = w_hidden[:, :, i - 1]
        
    d_z = np.dot(w_next, d_z) * d_sigmoid(a_hidden[:, :, 0]).T / n_training_samples
    d_w = np.dot(d_z, x)
    d_b = np.sum(d_z, axis = 1)
    
    w_input -= (learning_rate * d_w).T
    b_input -= learning_rate * d_b
        
    return w_input, b_input, w_hidden, b_hidden, w_output, b_output

In [17]:
for i in range(n_hidden_layers - 1, 0, -1):
    print(i)

1


Train neural network.

In [11]:
def train(x, y, 
          n_epochs, n_training_samples, n_hidden_layers,
          w_input, b_input,
          w_hidden, b_hidden,
          w_output, b_output,
          z_hidden, a_hidden):

    for i in range(0, n_epochs):
        z_hidden, a_hidden, z_output, a_output = propogate_forward(x, n_hidden_layers,
                                                                   w_input, b_input,
                                                                   w_hidden, b_hidden, 
                                                                   w_output, b_output,
                                                                   z_hidden, a_hidden)

        w_input, b_input, w_hidden, b_hidden, w_output, b_output = propogate_backward(a_hidden, z_hidden,
                                                                                      a_output, z_output,
                                                                                      w_input, b_input,
                                                                                      w_hidden, b_hidden, 
                                                                                      w_output, b_output,
                                                                                      n_hidden_layers,
                                                                                      learning_rate, y)
    
    z_hidden, a_hidden, z_output, a_output = propogate_forward(x, n_hidden_layers,
                                                               w_input, b_input,
                                                               w_hidden, b_hidden, 
                                                               w_output, b_output,
                                                               z_hidden, a_hidden)
    
    loss = calc_loss(a_output, y, n_training_samples)
    
    return a_output, y, loss

In [12]:
def initialise_a_z_cache(n_training_samples, n_nodes_hidden, n_hidden_layers):
    z_hidden = np.zeros(shape = [n_training_samples, n_nodes_hidden, n_hidden_layers])
    a_hidden = np.zeros(shape = [n_training_samples, n_nodes_hidden, n_hidden_layers])
    
    return z_hidden, a_hidden

In [34]:
np.random.seed(42)

x, y = initialise_x_y()

n_training_samples, n_nodes_input, n_nodes_hidden, \
n_nodes_output, n_hidden_layers, learning_rate, n_epochs = initialise_hyperparameters(x)

w_input, b_input, w_hidden, b_hidden, w_output, b_output = \
initalise_parameters(n_training_samples, n_nodes_input, n_nodes_hidden, n_nodes_output, 
                     n_hidden_layers, learning_rate, n_epochs)

z_hidden, a_hidden = initialise_a_z_cache(n_training_samples, n_nodes_hidden, n_hidden_layers)

a_output, y, loss = train(x, y, 
                          n_epochs, n_training_samples, n_hidden_layers,
                          w_input, b_input,
                          w_hidden, b_hidden,
                          w_output, b_output,
                          z_hidden, a_hidden)

print(a_output.T)
print(y)
print(loss)

[[0.03842953 0.96968537 0.97106383 0.03861592]]
[0 1 1 0]
0.0005905371826493944
