# Backpropagation with NumPy

In [2]:
import numpy as np
from data_prep import features, targets, features_test, targets_test

np.random.seed(21)

We use the sigmoid function as activation function:

In [3]:
def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1 / (1 + np.exp(-x))

We define the hyperparameters of the network:

In [4]:
# Hyperparameters
n_hidden = 2  # number of hidden units
epochs = 900
learnrate = 0.005

n_records, n_features = features.shape

The error term for the output layer is

$$
\large \delta_k^0 = (y_k - \hat y_k) f'(a_k)
$$

where $a_k$ is the input of activation function of the output layer

The error term for the hidden layer is:

$$
\large \delta_j^h = \sum_k w_{jk} \delta_k^0 f'(h_j)
$$

where $h_j$ is the input of activation function of the considered hidden layer and $w_{jk}$ the weights of the hidden layer.

For now we'll only consider a simple network with one hidden layer and one output unit. Here's the general algorithm for updating the weights with backpropagation:

- Set the weight steps for each layer to zero
    - The input to hidden weights $\large \Delta w_{ij} = 0$
    - The hidden to output weights $\large \Delta W_j = 0$

- For each record in the training data:
    - Make a forward pass through the network, calculating the output $\hat y$
    - Calculate the error gradient in the output unit, $\large \delta^o = (y - \hat y) f'(z)$ where $\large z = \sum_j W_j a_j$, the input to the output unit.
    - Propagate the errors to the hidden layer $\large \delta^h_j = \delta^o W_j f'(h_j)$
    - Update the weight steps:
        - $\large \Delta W_j = \Delta W_j + \delta^o a_j$
        - $\large \Delta w_{ij} = \Delta w_{ij} + \delta^h_j a_i$
    - Update the weights, where $\eta$ is the learning rate and m is the number of records:
        - $\large W_j = W_j + \eta \Delta W_j / m$ 
        - $\large w_{ij} = w_{ij} + \eta \Delta w_{ij} / m$

Repeat for each epochs.

It is worth mentioning than the derivative of the sigmoid function is:

$$
\large \frac{d\sigma(x)}{dx}=\frac{d(\frac{1}{1 + e^{-x}})}{dx}=\sigma(x) (1 - \sigma(x))
$$


Initialisations:

In [12]:
# Initialize weights
last_loss = None
weights_input_hidden = np.random.normal(scale=1 / n_features ** .5,
                                        size=(n_features, n_hidden))
weights_hidden_output = np.random.normal(scale=1 / n_features ** .5,
                                         size=n_hidden)

Training the neural network:

In [15]:
for e in range(epochs):
    del_w_input_hidden = np.zeros(weights_input_hidden.shape)
    del_w_hidden_output = np.zeros(weights_hidden_output.shape)
    for x, y in zip(features.values, targets):
        ## Forward pass ##
        # TODO: Calculate the output
        hidden_input = np.dot(x, weights_input_hidden) # we do this instead of w * x due to broadcasting issues
        hidden_output = sigmoid(hidden_input)
        output = sigmoid(np.dot(hidden_output, weights_hidden_output))

        ## Backward pass ##
        # TODO: Calculate the network's prediction error
        error = y - output

        # TODO: Calculate error term for the output unit
        output_error_term = error * output * (1 - output) # the error term is error * derivative of sigmoid

        ## propagate errors to hidden layer

        # TODO: Calculate the hidden layer's contribution to the error
        hidden_error = np.dot(output_error_term, weights_hidden_output) # we use weights of hidden layer
        
        # TODO: Calculate the error term for the hidden layer
        hidden_error_term = hidden_error * hidden_output * (1 - hidden_output) #we use sigmoid of hidden layer
        
        # TODO: Update the change in weights
        del_w_hidden_output += output_error_term * hidden_output # we multiply by input of output layer
        del_w_input_hidden += hidden_error_term * x[:, None] # we multiply by input of hidden layer

    # TODO: Update weights  (don't forget to division by n_records or number of samples)
    weights_input_hidden += del_w_input_hidden / n_records
    weights_hidden_output += del_w_hidden_output / n_records

    # Printing out the mean square error on the training set
    if e % (epochs / 10) == 0:
        hidden_output = sigmoid(np.dot(x, weights_input_hidden))
        out = sigmoid(np.dot(hidden_output,
                             weights_hidden_output))
        loss = np.mean((out - targets) ** 2)

        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss



Neural Network accuracy:

In [14]:
# Calculate accuracy on test data
hidden = sigmoid(np.dot(features_test, weights_input_hidden))
out = sigmoid(np.dot(hidden, weights_hidden_output))
predictions = out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))

Prediction accuracy: 0.650
