In [42]:
import torch

In [43]:
class Activation_Sigmoid:
    def forward(self, x):
        self.output = 1 / (1 + torch.exp(-x))
    
    def backward(self):
        return self.output * (1 - self.output)

In [44]:
class Activation_Linear:
    def forward(self, x):
        self.output = x
    
    def backward(self):
        return torch.ones_like(self.output)

In [45]:
class DenseLayer:
    def __init__(self, num_inputs, num_neurons):
        self.weights = 0.001 * torch.rand(num_inputs, num_neurons, requires_grad=True)
        self.biases = torch.rand((1, num_neurons), requires_grad=True)

    def forward(self, inputs):
        self.inputs = inputs
        self.output = torch.matmul(inputs, self.weights) + self.biases

    def backward(self, grad):
        self.weights.grad = torch.matmul(self.inputs.T, grad)
        self.biases.grad = torch.sum(grad, dim=0, keepdim=True)
        return torch.matmul(grad, self.weights.T)

In [46]:
hidden_layer = DenseLayer(2, 4)
activation1 = Activation_Sigmoid()
output_layer = DenseLayer(4, 2)
activation2 = Activation_Linear()

In [47]:
def forward_pass(X):
    hidden_layer.forward(X)
    activation1.forward(hidden_layer.output)
    output_layer.forward(activation1.output)
    activation2.forward(output_layer.output)
    return activation2.output

In [48]:
def back_prop(y_pred):
    # Calculate the loss
    loss = 0.5 * torch.mean((y - y_pred) ** 2)
    
    # Compute the gradient of the loss
    grad = y_pred - y
    
    # Backpropagate through the layers starting from the output layer
    # Calculate gradients with respect to the layers and activations
    
    activation2_grad = activation2.backward() * grad
    output_layer_grad = output_layer.backward(activation2_grad)
    activation1_grad = activation1.backward() * output_layer_grad
    hidden_layer.backward(activation1_grad)
    
    # Update weights and biases using the gradients with a learning rate of 0.01
    # We can iterate over the layers instead of hardcoding everything
    with torch.no_grad():
        for layer in [hidden_layer, output_layer]:
            layer.weights -= 0.01 * layer.weights.grad
            layer.biases -= 0.01 * layer.biases.grad
            
            # Reset gradients to zero after updating weights and biases
            layer.weights.grad.zero_()
            layer.biases.grad.zero_()


In [49]:

loss_threshold = 0.0001
y_pred = forward_pass(X)
err = torch.mean(0.5 * (y - y_pred) ** 2)
print("Initial loss:", err.item())
print("Initial prediction:", y_pred)

Initial loss: 0.21378612518310547
Initial prediction: tensor([[0.9127, 0.6170]], grad_fn=<AddBackward0>)


In [50]:
while err > loss_threshold:
    back_prop(y_pred)
    y_pred = forward_pass(X)
    err = torch.mean(0.5 * (y - y_pred) ** 2)
    print("Current loss:", err.item())

print("Final loss:", err.item())
print("Final prediction:", y_pred)
print("Target value:", y)

Current loss: 0.20310473442077637
Current loss: 0.19295696914196014
Current loss: 0.18331599235534668
Current loss: 0.17415645718574524
Current loss: 0.1654542237520218
Current loss: 0.157186359167099
Current loss: 0.1493312120437622
Current loss: 0.14186805486679077
Current loss: 0.13477736711502075
Current loss: 0.12804046273231506
Current loss: 0.12163974344730377
Current loss: 0.11555841565132141
Current loss: 0.10978050529956818
Current loss: 0.1042909026145935
Current loss: 0.09907524287700653
Current loss: 0.0941198319196701
Current loss: 0.08941169828176498
Current loss: 0.08493853360414505
Current loss: 0.08068863302469254
Current loss: 0.07665086537599564
Current loss: 0.07281464338302612
Current loss: 0.06916993856430054
Current loss: 0.06570722162723541
Current loss: 0.062417395412921906
Current loss: 0.05929188430309296
Current loss: 0.05632247030735016
Current loss: 0.05350140109658241
Current loss: 0.05082125961780548
Current loss: 0.04827503114938736
Current loss: 0.045