In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.DataFrame([
    [2, 5, 3, 7, 15],
    [4, 6, 2, 8, 20],
    [5, 7, 4, 6, 22],
    [3, 8, 5, 5, 21],
    [6, 5, 6, 7, 24],
    [7, 3, 5, 8, 23],
    [8, 4, 7, 6, 25],
    [5, 6, 4, 9, 24],
    [6, 7, 3, 5, 21],
    [4, 5, 6, 7, 22]
], columns=['X1', 'X2', 'X3', 'X4', 'Y'])

data = data.to_numpy()
X = data[:, :4].T
y = data[:, [4]]

In [3]:
def init_parameters(layer_dims):
    parameters = {}
    L = len(layer_dims)

    for layer in range(1, L):
        # the cols are weights associated with a node of current layer 
        # the rows are weights assoc with the node of previous layer
        parameters["W" + str(layer)] = np.random.rand(layer_dims[layer-1], layer_dims[layer]) - .5

        parameters["b" + str(layer)] = np.random.rand(1, layer_dims[layer]) -.5

    return parameters

In [4]:
def linear_forward(A_prev, W, b):
    return np.dot(A_prev, W) + b

In [5]:
def forward_prop(X, parameters):
    A = X
    L = len(parameters) // 2

    for layer in range(1, L+1):
        A_prev = A
        W = parameters["W" + str(layer)]
        b = parameters['b' + str(layer)]
        
        A = linear_forward(A_prev, W, b)

        # print(f"{A=}, {A_prev=}")
        # print()
    return A, A_prev

In [6]:
def compute_gradients(X, y, parameters):

    #Forward pass
    W1, b1 = parameters["W1"], parameters["b1"]
    W2, b2 = parameters["W2"], parameters["b2"]

    Z1 = linear_forward(X, W1, b1)  # (1,2)
    A1 = Z1
    Z2 = linear_forward(Z1, W2, b2) # (1,1)
    y_hat = Z2

    #Backward pass
    dL_dyhat = -2 * (y - y_hat)  # (1,1)

    dW2 = A1.T @ dL_dyhat # (2,1)
    db2 = dL_dyhat # (1,1)

    dA1 = dL_dyhat @ W2.T # (1, 2)
    dZ1 = dA1 # (1, 2)
    dW1 = X.T @ dZ1 # (2,4)
    db1 = dZ1

    grads = {"dW1": dW1, "db1": db1,
             "dW2": dW2, "db2": db2,
             "y_hat": y_hat, "A1": A1}

    return grads

In [7]:
def update_parameters(parameters, grads, lr):
    parameters["W1"] -= lr * grads["dW1"]
    parameters["b1"] -= lr * grads["db1"]
    parameters["W2"] -= lr * grads["dW2"]
    parameters["b2"] -= lr * grads["db2"]

In [8]:
# this is an older function that i made.
# the better version is the one above, where we calculate the gradients separately and then update the parameters


# def update_parameters(parameters, y, y_hat, lr, A1, X):
#     scalar = -2 * (y - y_hat)

#     parameters["W2"] = parameters['W2'] - (lr * scalar * A1.T)
#     parameters["b2"] = parameters["b2"] - (lr * scalar)

#     parameters["W1"] = parameters["W1"] - (lr * scalar * np.dot(parameters["W2"], X).T)
#     parameters["b1"] = parameters["b1"] - (lr * scalar * parameters["W2"].T)

In [9]:
# epochs implementation

parameters = init_parameters([4,2,1])
epochs = 50
lr = 0.001

for i in range(epochs):

  Loss = []

  for j in range(10):

    X = data[j, :4].reshape(1,4) # Shape(no of features, no. of training example)
    y = data[j, 4]

    # Parameter initialization
    y_hat,A1 = forward_prop(X,parameters)
    y_hat = y_hat[0][0]

    grads = compute_gradients(X, y, parameters)
    update_parameters(parameters, grads, lr)

    Loss.append((y-y_hat)**2)
  print('Epoch - ',i+1,'Loss - ',np.array(Loss).mean())
  # print("\n")

parameters

Epoch -  1 Loss -  195.5232299027706
Epoch -  2 Loss -  1.7279994044502374
Epoch -  3 Loss -  1.5311742139138413
Epoch -  4 Loss -  1.3766854832695383
Epoch -  5 Loss -  1.254353000906228
Epoch -  6 Loss -  1.1567605831349803
Epoch -  7 Loss -  1.078381057815306
Epoch -  8 Loss -  1.0150425424016134
Epoch -  9 Loss -  0.9635623548199582
Epoch -  10 Loss -  0.9214902930025396
Epoch -  11 Loss -  0.8869251921198991
Epoch -  12 Loss -  0.8583817223825013
Epoch -  13 Loss -  0.8346923288807646
Epoch -  14 Loss -  0.8149341840218088
Epoch -  15 Loss -  0.7983742147458802
Epoch -  16 Loss -  0.7844273647184965
Epoch -  17 Loss -  0.7726246599044693
Epoch -  18 Loss -  0.7625886090919
Epoch -  19 Loss -  0.7540141409783255
Epoch -  20 Loss -  0.7466537527436771
Epoch -  21 Loss -  0.7403058840085427
Epoch -  22 Loss -  0.7348057758554343
Epoch -  23 Loss -  0.7300182547777154
Epoch -  24 Loss -  0.7258320148312125
Epoch -  25 Loss -  0.7221550709138947
Epoch -  26 Loss -  0.7189111311182612
E

{'W1': array([[-0.20104244,  0.82975883],
        [ 0.12214386,  0.77455879],
        [ 0.15281256,  0.72235402],
        [-0.59299841,  0.52750985]]),
 'b1': array([[-0.16588189,  0.38340484]]),
 'W2': array([[-0.36323589],
        [ 1.31060347]]),
 'b2': array([[0.21533049]])}

In [11]:
# Another example used in CampusX's backpropagation videos

In [12]:
df = pd.DataFrame([[8,8,4], [7,9,5], [6,10,6], [5,12,7]], columns=['cgpa', 'profile_score', 'lpa'])

In [15]:
# epochs implementation

parameters = init_parameters([2,2,1])
epochs = 5

for i in range(epochs):

  Loss = []

  for j in range(4):

    X = df[['cgpa', 'profile_score']].values[j].reshape(1,2) # Shape(no of features, no. of training example)
    y = df[['lpa']].values[j][0]

    # Parameter initialization


    y_hat,A1 = forward_prop(X,parameters)
    y_hat = y_hat[0][0]

    grads = compute_gradients(X, y, parameters)
    update_parameters(parameters, grads, lr)

    Loss.append((y-y_hat)**2)
  print('Epoch - ',i+1,'Loss - ',np.array(Loss).mean())
  # print("\n")
  
parameters

Epoch -  1 Loss -  27.08286100691636
Epoch -  2 Loss -  10.858235118624997
Epoch -  3 Loss -  2.941541078583097
Epoch -  4 Loss -  1.0261381881056533
Epoch -  5 Loss -  0.8975238641227243


{'W1': array([[0.3981836 , 0.21162732],
        [0.10289689, 0.6800775 ]]),
 'b1': array([[-0.3591457 , -0.03739591]]),
 'W2': array([[0.41611166],
        [0.53862126]]),
 'b2': array([[0.11536361]])}