In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.DataFrame([
    [2, 5, 3, 7, 15],
    [4, 6, 2, 8, 20],
    [5, 7, 4, 6, 22],
    [3, 8, 5, 5, 21],
    [6, 5, 6, 7, 24],
    [7, 3, 5, 8, 23],
    [8, 4, 7, 6, 25],
    [5, 6, 4, 9, 24],
    [6, 7, 3, 5, 21],
    [4, 5, 6, 7, 22]
], columns=['X1', 'X2', 'X3', 'X4', 'Y'])

data = data.to_numpy()
X = data[:, :4].T
y = data[:, [4]]

In [3]:
def init_parameters(layer_dims):
    parameters = {}
    L = len(layer_dims)

    for layer in range(1, L):
        # the cols are weights associated with a node of current layer
        # the rows are weights assoc with the node of previous layer
        parameters["W" + str(layer)] = np.random.rand(layer_dims[layer-1], layer_dims[layer]) - .5

        parameters["b" + str(layer)] = np.random.rand(1, layer_dims[layer]) -.5

    return parameters

In [4]:
def linear_forward(A_prev, W, b):
    return np.dot(A_prev, W) + b

In [5]:
def forward_prop(X, parameters):
    A = X
    L = len(parameters) // 2

    for layer in range(1, L+1):
        A_prev = A
        W = parameters["W" + str(layer)]
        b = parameters['b' + str(layer)]
        
        A = linear_forward(A_prev, W, b)

        # print(f"{A=}, {A_prev=}")
        # print()
    return A, A_prev

In [6]:
def compute_gradients(X, y, parameters):

    #Forward pass
    W1, b1 = parameters["W1"], parameters["b1"]
    W2, b2 = parameters["W2"], parameters["b2"]

    Z1 = linear_forward(X, W1, b1)  # (1,2)
    A1 = Z1
    Z2 = linear_forward(Z1, W2, b2) # (1,1)
    y_hat = Z2

    #Backward pass
    dL_dyhat = -2 * (y - y_hat)  # (1,1)

    dW2 = A1.T @ dL_dyhat # (2,1)
    db2 = dL_dyhat # (1,1)

    dA1 = dL_dyhat @ W2.T # (1, 2)
    dZ1 = dA1 # (1, 2)
    dW1 = X.T @ dZ1 # (2,4)
    db1 = dZ1

    grads = {"dW1": dW1, "db1": db1,
             "dW2": dW2, "db2": db2,
             "y_hat": y_hat, "A1": A1}

    return grads

In [7]:
def update_parameters(parameters, grads, lr):
    parameters["W1"] -= lr * grads["dW1"]
    parameters["b1"] -= lr * grads["db1"]
    parameters["W2"] -= lr * grads["dW2"]
    parameters["b2"] -= lr * grads["db2"]

In [8]:
# this is an older function that i made.
# the better version is the one above, where we calculate the gradients separately and then update the parameters


# def update_parameters(parameters, y, y_hat, lr, A1, X):
#     scalar = -2 * (y - y_hat)

#     parameters["W2"] = parameters['W2'] - (lr * scalar * A1.T)
#     parameters["b2"] = parameters["b2"] - (lr * scalar)

#     parameters["W1"] = parameters["W1"] - (lr * scalar * np.dot(parameters["W2"], X).T)
#     parameters["b1"] = parameters["b1"] - (lr * scalar * parameters["W2"].T)

In [10]:
# epochs implementation

parameters = init_parameters([4,2,1])
epochs = 50
lr = 0.001

for i in range(epochs):

  Loss = []

  for j in range(10):

    X = data[j, :4].reshape(1,4) # Shape(no of features, no. of training example)
    y = data[j, 4]

    # Parameter initialization
    y_hat,A1 = forward_prop(X,parameters)
    y_hat = y_hat[0][0]

    grads = compute_gradients(X, y, parameters)
    update_parameters(parameters, grads, lr)

    Loss.append((y-y_hat)**2)
  print('Epoch - ',i+1,'Loss - ',np.array(Loss).mean())
  # print("\n")
  

Epoch -  1 Loss -  205.78238002173862
Epoch -  2 Loss -  3.008717539293672
Epoch -  3 Loss -  2.5970403350934363
Epoch -  4 Loss -  2.2837504049340906
Epoch -  5 Loss -  2.0403465856750644
Epoch -  6 Loss -  1.8476338330742543
Epoch -  7 Loss -  1.6921793236811848
Epoch -  8 Loss -  1.564512443482407
Epoch -  9 Loss -  1.4579071645975847
Epoch -  10 Loss -  1.3675489102261333
Epoch -  11 Loss -  1.2899600573297798
Epoch -  12 Loss -  1.2226016844675744
Epoch -  13 Loss -  1.163596487907929
Epoch -  14 Loss -  1.1115355053058165
Epoch -  15 Loss -  1.0653430437713438
Epoch -  16 Loss -  1.024182141769514
Epoch -  17 Loss -  0.9873883089586546
Epoch -  18 Loss -  0.9544230144945101
Epoch -  19 Loss -  0.9248409738555899
Epoch -  20 Loss -  0.8982670773427317
Epoch -  21 Loss -  0.8743800536201196
Epoch -  22 Loss -  0.8529008352835575
Epoch -  23 Loss -  0.8335842048586176
Epoch -  24 Loss -  0.8162127279768306
Epoch -  25 Loss -  0.8005922807193573
Epoch -  26 Loss -  0.7865486885626976

In [None]:
# Another example used in CampusX's backpropagation videos

In [346]:
df = pd.DataFrame([[8,8,4], [7,9,5], [6,10,6], [5,12,7]], columns=['cgpa', 'profile_score', 'lpa'])

In [352]:
# epochs implementation

parameters = init_parameters([2,2,1])
epochs = 5

for i in range(epochs):

  Loss = []

  for j in range(4):

    X = df[['cgpa', 'profile_score']].values[j].reshape(1,2) # Shape(no of features, no. of training example)
    y = df[['lpa']].values[j][0]

    # Parameter initialization


    y_hat,A1 = forward_prop(X,parameters)
    y_hat = y_hat[0][0]

    update_parameters(parameters, y, y_hat, 0.001, A1, X)

    Loss.append((y-y_hat)**2)
  print('Epoch - ',i+1,'Loss - ',np.array(Loss).mean())
  print("\n")
  

# parameters

Epoch -  1 Loss -  7.903061447212958


Epoch -  2 Loss -  3.190277370979962


Epoch -  3 Loss -  1.3875770144953572


Epoch -  4 Loss -  0.9535781917194645


Epoch -  5 Loss -  0.8961373349576718


