In [None]:
# Problem Statement:  Implement the backpropagation algorithm from scratch to train a neural network for a simple regression task. You are not allowed to use high-level libraries like TensorFlow or PyTorch for this task. Evaluate the network's performance and explain how the gradients are propagated through the network. 

In [10]:
import numpy as np

class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Weights and biases initialization
        self.w1 = np.random.randn(input_size, hidden_size)  # Weights for input to hidden layer
        self.b1 = np.zeros((1, hidden_size))  # Biases for hidden layer
        self.w2 = np.random.randn(hidden_size, output_size)  # Weights for hidden to output layer
        self.b2 = np.zeros((1, output_size))  # Biases for output layer
        
        # Learning rate
        self.learning_rate = 0.01
        
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def sigmoid_derivative(self, z):
        return z * (1 - z)
    
    def forward(self, X):
        # Forward pass
        self.z1 = np.dot(X, self.w1) + self.b1  # Input to hidden layer
        self.a1 = self.sigmoid(self.z1)  # Activation in hidden layer
        self.z2 = np.dot(self.a1, self.w2) + self.b2  # Hidden to output layer
        self.a2 = self.z2  # Linear output for regression task (no activation in output layer)
        return self.a2
    
    def compute_loss(self, Y, Y_pred):
        # Mean Squared Error (MSE) loss
        return np.mean((Y_pred - Y) ** 2)

    def backpropagate(self, X, Y):
        # Backward pass (backpropagation)
        m = Y.shape[0]  # Number of samples

        # Calculate the gradients
        dL_da2 = 2 * (self.a2 - Y) / m  # Derivative of the loss w.r.t output (MSE gradient)
        
        # Gradients for weights between hidden and output layer
        dL_dw2 = np.dot(self.a1.T, dL_da2)
        dL_db2 = np.sum(dL_da2, axis=0, keepdims=True)
        
        # Gradient for hidden layer activation
        dL_da1 = np.dot(dL_da2, self.w2.T) * self.sigmoid_derivative(self.a1)

        # Gradients for weights between input and hidden layer
        dL_dw1 = np.dot(X.T, dL_da1)
        dL_db1 = np.sum(dL_da1, axis=0, keepdims=True)
        
        # Update weights and biases using gradients
        self.w2 -= self.learning_rate * dL_dw2
        self.b2 -= self.learning_rate * dL_db2
        self.w1 -= self.learning_rate * dL_dw1
        self.b1 -= self.learning_rate * dL_db1
    
    def train(self, X, Y, epochs):
        # Train the network
        for epoch in range(epochs):
            Y_pred = self.forward(X)  # Forward pass
            loss = self.compute_loss(Y, Y_pred)  # Compute loss
            
            self.backpropagate(X, Y)  # Backward pass (backpropagation)
            
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")
    
    def predict(self, X):
        # Predict output for new inputs
        return self.forward(X)


In [11]:
# Generate synthetic data for a simple regression task
X = np.random.randn(100, 1)  # 100 samples, 1 feature
Y = 2 * X + 1  # Linear relation

# Initialize and train the network
nn = SimpleNeuralNetwork(input_size=1, hidden_size=10, output_size=1)
nn.train(X, Y, epochs=1000)

# Predict and evaluate performance
Y_pred = nn.predict(X)
print("Predicted values:\n", Y_pred[:5])
print("Actual values:\n", Y[:5])


Epoch 0, Loss: 3.5847918757897292
Epoch 100, Loss: 0.1563983738299983
Epoch 200, Loss: 0.04983915591220746
Epoch 300, Loss: 0.04234053764835421
Epoch 400, Loss: 0.040748545411325165
Epoch 500, Loss: 0.03954843286780654
Epoch 600, Loss: 0.03842836145999104
Epoch 700, Loss: 0.037367719197446955
Epoch 800, Loss: 0.03636175594780193
Epoch 900, Loss: 0.035406766007345346
Predicted values:
 [[ 0.4806405 ]
 [ 2.97051891]
 [ 2.57931127]
 [-1.97220613]
 [ 2.89888564]]
Actual values:
 [[ 0.52879706]
 [ 2.83116717]
 [ 2.41001394]
 [-1.96772864]
 [ 2.75067768]]
