In [None]:
# Backpropagation on a single neuron

import numpy as np

# Initial paramaters
weights = np.array([-3.0, -1.0, 2.0])
bias = 1.0
inputs = np.array([1.0, -2.0, 3.0])
target_output = 0.0
learning_rate = 0.001 # Or step size

def relu(x):
    return np.maximum(0,x)

def relu_derivative(x):
    if x > 0:
        return 1.0
    else:
        return 0.0

for i in range(200):
    # Forward Pass
    linear_output = np.dot(weights, inputs) + bias
    relu_output = relu(linear_output)
    loss = (relu_output - target_output) **2

    # Backward Pass
    dloss_doutput = 2 * (relu_output - target_output) # Derivative of Loss
    doutput_dlinear = relu_derivative(linear_output) # Derivative of ReLU w/r to dot prod.
    dlinear_dweights = inputs # Derivative of "mul", dot product w/r to w is the input
    dlinear_dbias = 1.0 # Derivative of bias is 1

    dloss_dlinear = dloss_doutput * doutput_dlinear # Deriv of loss w respect to linear inputs, x
    dloss_dweights = dloss_dlinear * dlinear_dweights # Deriv of loss w respect to weights
    dloss_dbias = dloss_dlinear * dlinear_dbias # Deriv of loss w respect to bias

    # Update weights and biases
    weights -= learning_rate * dloss_dweights
    bias -= learning_rate * dloss_dbias

    # Print the loss for this iteration
    print(f"Itereation {i + 1}'s Loss: {loss}")

print("Final weights:", weights)
print("Final bias:", bias)


In [None]:
# Backpropagation throuh an entire layer

import numpy as np

# Initial parameters
inputs = np.array([1,2,3,4])
weights = np.array([
    [0.1, 0.2, 0.3, 0.4],
    [0.5, 0.6, 0.7, 0.8],
    [0.9, 1.0, 1.1, 1.2]
])
biases = np.array([0.1, 0.2, 0.3])
learning_rate = 0.001
target_output = 0

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x>0, 1, 0) # Returns an array, where if f(x) = x, it's derivative is 1, else, f(x) must equal 0 and is therefore 0
    

for i in range(200):
    # Forward pass
    z = np.dot(weights, inputs) + biases
    a = relu(z)
    y = np.sum(a)

    loss = y**2

    # Backward Pass

    # Gradient of loss with respect to y
    dL_dy = 2*y

    # Gradient of y with respect to a
    dy_da = np.ones_like(a) # np.ones_like is an array of ones, derivative of y wirht respect to a1, a2, or a3 is 1.

    # Gradient of loss with respect to a
    dL_da = dL_dy * dy_da

    # Gradient of a with respect to z (relu derivative)
    da_dz = relu_derivative(z)

    # Gradient of loss with respect to z
    dL_dz = dL_da * da_dz # dloss w/r to a * da w/r to z

    # Gradient of z w/r to weights and biases
    dL_dW = np.outer(dL_dz, inputs) # Opposite of dot product
    dL_db = dL_dz # No need to do the same here since biases are added, not multiplied

    # Update weights and biases
    weights -= learning_rate*dL_dW
    biases -= learning_rate*dL_db

    # Print loss every 20 iterations
    if i % 20 == 0:
        print(f"Iteration {i + 1}, Loss: {loss}")
    
# Final weights and biases
print("Final weights:\n", weights, "\n")
print("Final biases:\n", biases)

Iteration 1, Loss: 466.56000000000006
Iteration 21, Loss: 5.32959636083938
Iteration 41, Loss: 0.41191523404899866
Iteration 61, Loss: 0.031836212079467595
Iteration 81, Loss: 0.002460565465389601
Iteration 101, Loss: 0.000190172825660145
Iteration 121, Loss: 1.4698126966451542e-05
Iteration 141, Loss: 1.1359926717815175e-06
Iteration 161, Loss: 8.779889800154524e-08
Iteration 181, Loss: 6.7858241357822796e-09
Final weights:
 [[-0.00698895 -0.01397789 -0.02096684 -0.02795579]
 [ 0.25975286  0.11950572 -0.02074143 -0.16098857]
 [ 0.53548461  0.27096922  0.00645383 -0.25806156]] 

Final biases:
 [-0.00698895 -0.04024714 -0.06451539]
