# Gradient Descent

**Steps:**

1. Define feedforward network and calculate mean squared error loss
2. Increase each weight and bias value by a very small amount and calculate overall squared error loss one at a time for each of the updates   

Here `learning_rate = 0.0001`

In [3]:
import numpy as np
from copy import deepcopy

In [2]:
# FeedForward Network

def feed_forward(inputs, outputs, weights):

    pre_hidden = np.dot(inputs, weights[0]) + weights[1]
    hidden = 1 / (1 + np.exp(-pre_hidden))
    prediction = np.dot(hidden, weights[2]) + weights[3]

    mean_squared_error = np.mean(np.square(prediction - outputs))

    return mean_squared_error

In [None]:
# Backpropagation

def update_weights(inputs, outputs, weights, lr):

    # Copy the weights so that computations don't change original weights
    original_weights = deepcopy(weights)
    temp_weights = deepcopy(weights)
    updated_weights = deepcopy(weights)

    # Calculate loss
    original_loss = feed_forward(inputs, outputs, original_weights)

    # Loop through all layers of the network
    for i, layer in range(original_weights):

        # Loop through individual parameters
        for index, weight in np.ndenumerate(layer): # Every layer has different different weights shape 
                                                    # so np.ndenumerate is used
            temp_weights = deepcopy(weights)                            # Weights of corresponding layer

            temp_weights[i][index] = temp_weights[i][index] + 0.0001    # Update weights

            _loss_plus = feed_forward(inputs, outputs, temp_weights)    # Calculate new loss

            grad = (_loss_plus - original_loss) / 0.0001                # Calculate gradient

            update_weights[i][index] = update_weights[i][index] - (grad * lr)

        
    return update_weights, original_loss
