In [1]:
from sklearn.metrics import mean_squared_error

# specify input and output datasets
x = [[1], [2], [3], [4]]
y = [[3], [4], [9], [12]]

In [2]:
# define feedforward function, no hidden layer, architecture: y=w*x+b
from copy import deepcopy
import numpy as np
def feed_forward(inputs, outputs, weights):
    pred_out = np.dot(inputs, weights[0]) + weights[1]
    mean_squared_error = np.mean(np.square(pred_out - outputs))
    return mean_squared_error

In [3]:
# define update_weights function
# define the update weights section
def update_weights(inputs, outputs, weights, lr):
    """performs gradient descent process to update weights
    Inputs:
     inputs: input variable to the network
     outputs: output variable to the network
     weights: weights variable to the network
     lr: learning rate
    weights are randomly initialized at the start of training the model
    """
    # use deepcopy: this ensures that we work with multiple copies of weights without disturbing the original weigth values
    original_weights = deepcopy(weights)
    temp_weights = deepcopy(weights)
    updated_weights = deepcopy(weights)

    # calculate the loss value with the original set of weights
    original_loss = feed_forward(inputs, outputs, original_weights)

    # loop thru all the layers of the network
    for i, layer in enumerate(original_weights):
        # loop thru all the individual parameters
        for index, weight in np.ndenumerate(layer):
            # select a weight and increase it by a small value
            temp_weights = deepcopy(weights)
            temp_weights[i][index] += 0.0001
            _loss_plus = feed_forward(inputs, outputs, temp_weights)

            # calculate the gradient descent due to the weight change
            grad = (_loss_plus - original_loss) / 0.0001

            # update the parameter present in the  corresponding layer and index of updated_weights
            updated_weights[i][index] -= grad*lr
    return updated_weights

In [5]:
# initialize weight and bias values to a random variable
W = [np.array([[0]], dtype=np.float32),
     np.array([[0]], dtype=np.float32)]

In [12]:
# leverage the update_weights function and check how the weight value varies over increasing epochs
weight_value = []
for epx in range(1000):
    W = update_weights(x, y, W, 0.01)
    weight_value.append(W[0][0][0])

In [None]:
# plot the value of weight parameter at the end of each epoch
import matplotlib.pyplot as plt
%matplotlib inline
epochs = range(1, 1001)
plt.plot(epochs, weight_value)
plt.title("weight value over increasing epochs when the lr is 0.01")
plt.xlabel("epochs")
plt.ylabel("weight value")

Text(0, 0.5, 'weight value')

In general, it is better to have a low learning rate. This way, the
 model is able to learn slowly but will adjust the weights toward
 an optimal value. Typical learning rate parameter values range
 between 0.0001 and 0.01.