In [1]:
import matplotlib.pyplot as plt
import numpy as np

## **Backpropagation Single Variate**

In [2]:
#Defini the network architecture

input_size = 1
hidden_size = 1
output_size = 1

In [3]:
#defining the weights and biases

w1 = np.random.randn(input_size, hidden_size)
b1 = np.random.rand(1, hidden_size)
w2 = np.random.randn(hidden_size, output_size)
b2 = np.random.rand(1, output_size)


In [4]:
def sigmoid (x):
    return 1/(1 + np.exp(-x))

In [5]:
def forward (input_data):
    z1 = np.dot(input_data, w1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, w2) + b2
    output = sigmoid(z2)
    return a1, output

In [6]:
#calculate 
def loss (predicted, target):
    return np.mean((predicted - target)**2)

In [7]:
def backward (input_data, target, learning_rate):
    global w1, w2, b1, b2 #declare these as global so they can be modified

    #calculate the gradient of the loss with respect to the output
    a1, output = forward(input_data) #get a1 and output from the forward pass
    output_error = output - target
    
    #calculates gradients for weights and biases in the output layer
    dw2 = np.dot(a1.T, output_error)
    db2 = np.sum(output_error, axis=0)
    
    #calculate gradients for weights and biases in the hidden layer
    hidden_error = np.dot(output_error, w2.T) * (a1 * (1 - a1))
    dw1 = np.dot(input_data.T, hidden_error)
    db1 = np.sum(hidden_error, axis=0)
    
    #update weights and biases
    w2 -= learning_rate * dw2
    b2 -= learning_rate * db2
    w1 -= learning_rate * dw1
    b1 -= learning_rate * db1

In [9]:
#training loop
num_epochs = 1000
learning_rate = 0.1

for epoch in range(num_epochs):
    input_data = np.array([[0], [1]])
    target = np.array([[1], [0]])
    
    #forward pass
    a1, predicted = forward(input_data)
    
    #compute loss
    current_loss = loss(predicted, target)
    
    #backward pass
    backward(input_data, target, learning_rate)
    
    if epoch % 100 == 0:
        print(f'Epoch {epoch}: loss {current_loss}')

Epoch 0: loss 0.1606045948309189
Epoch 100: loss 0.06877819829832899
Epoch 200: loss 0.027170691801158376
Epoch 300: loss 0.012680057354919581
Epoch 400: loss 0.0069467285414194875
Epoch 500: loss 0.004275445787671714
Epoch 600: loss 0.002858655974182389
Epoch 700: loss 0.0020305734546652923
Epoch 800: loss 0.0015096525347742954
Epoch 900: loss 0.001162770127466281


In [110]:
input_data = 0.5
_, predicted = forward(input_data)
print(f'Input: {input_size}: Prediction: {predicted[0][0]:3f}')
# print(f'Input: {input_size}: Prediction: {predicted[0][0]:3f}, "test": {_[0][0][0]}')

Input: 0.5: Prediction: 0.380091


# **Backpropagation Multivariate**

In [124]:
#Defini the network architecture

input_size = 2
hidden_size = 4
output_size = 1

In [125]:
# SigmoidFx
def sigmoid (x):
    return 1/(1 + np.exp(-x))

In [126]:
#defining the weights and biases

w1 = np.random.randn(input_size, hidden_size)
b1 = np.random.rand(1, hidden_size)
w2 = np.random.randn(hidden_size, output_size)
b2 = np.random.rand(1, output_size)


In [127]:
def forward (input_data):
    z1 = np.dot(input_data, w1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, w2) + b2
    output = sigmoid(z2)
    return a1, output

In [128]:
#calculate 
def loss (predicted, target):
    return np.mean((predicted - target)**2)

In [129]:
def backward (input_data, target, learning_rate):
    global w1, w2, b1, b2 #declare these as global so they can be modified

    #calculate the gradient of the loss with respect to the output
    a1, output = forward(input_data) #get a1 and output from the forward pass
    output_error = output - target
    
    #calculates gradients for weights and biases in the output layer
    dw2 = np.dot(a1.T, output_error)
    db2 = np.sum(output_error, axis=0)
    
    #calculate gradients for weights and biases in the hidden layer
    hidden_error = np.dot(output_error, w2.T) * (a1 * (1 - a1))
    dw1 = np.dot(input_data.T, hidden_error)
    db1 = np.sum(hidden_error, axis=0)
    
    #update weights and biases
    w2 -= learning_rate * dw2
    b2 -= learning_rate * db2
    w1 -= learning_rate * dw1
    b1 -= learning_rate * db1

In [130]:
#training loop
num_epochs = 1000
learning_rate = 0.1

for epoch in range(num_epochs):
    input_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    target = np.array([[0], [1], [1], [0]])
    
    #forward pass
    a1, predicted = forward(input_data)
    
    #compute loss
    current_loss = loss(predicted, target)
    
    #backward pass
    backward(input_data, target, learning_rate)
    
    if epoch % 100 == 0:
        print(f'Epoch {epoch}: loss {current_loss}')

Epoch 0: loss 0.3606515349410203
Epoch 100: loss 0.2509884526306542
Epoch 200: loss 0.2495598908957077
Epoch 300: loss 0.2476955939374783
Epoch 400: loss 0.24326783838967908
Epoch 500: loss 0.23014906716203945
Epoch 600: loss 0.19459171313667345
Epoch 700: loss 0.131046030878966
Epoch 800: loss 0.06369295403301803
Epoch 900: loss 0.026118234547952236


In [136]:
_, predicted = forward(np.array([[1, 1], [1, 0], [0, 1], [0, 0]]))
print(f'{predicted}')

[[0.15012591]
 [0.92126791]
 [0.87463893]
 [0.05445411]]
