## Training Neural Networks

In [2]:
import numpy as np

# Activation Functions and Their Derivatives
def relu(z):
    """ReLU activation function"""
    return np.maximum(0, z)

def relu_derivative(z):
    """Derivative of ReLU"""
    return np.where(z > 0, 1, 0)

def sigmoid(z):
    """Sigmoid activation function"""
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    """Derivative of Sigmoid"""
    return a * (1 - a)

# Binary Cross-Entropy Loss
def binary_cross_entropy(y_true, y_pred):
    """Binary Cross-Entropy Loss"""
    return -np.mean(y_true * np.log(y_pred + 1e-15) + (1 - y_true) * np.log(1 - y_pred + 1e-15))

# Neural Network Implementation
class NeuralNetwork:
    def __init__(self, layer_sizes, learning_rate=0.1):
        """
        Initialize the neural network with given layer sizes and learning rate.
        :param layer_sizes: List of integers specifying the number of neurons in each layer.
        :param learning_rate: Learning rate for gradient descent.
        """
        self.layer_sizes = layer_sizes
        self.learning_rate = learning_rate
        self.weights = []
        self.biases = []

        # Initialize weights and biases
        for i in range(len(layer_sizes) - 1):
            self.weights.append(np.random.randn(layer_sizes[i + 1], layer_sizes[i]))
            self.biases.append(np.random.randn(layer_sizes[i + 1], 1))

    def forward(self, x):
        """
        Perform forward propagation.
        :param x: Input data.
        :return: Activations and weighted sums for all layers.
        """
        activations = [x]
        z_values = []

        for i in range(len(self.weights)):
            z = np.dot(self.weights[i], activations[-1]) + self.biases[i]
            z_values.append(z)

            # Apply activation functions
            if i == len(self.weights) - 1:  # Output layer
                a = sigmoid(z)
            else:  # Hidden layers
                a = relu(z)

            activations.append(a)

        return activations, z_values

    def backward(self, x, y, activations, z_values):
        """
        Perform backpropagation.
        :param x: Input data.
        :param y: Ground truth output.
        :param activations: List of activations from forward pass.
        :param z_values: List of weighted sums from forward pass.
        :return: Gradients for weights and biases.
        """
        m = x.shape[1]  # Number of training examples
        dw = [None] * len(self.weights)
        db = [None] * len(self.biases)
        dz = [None] * len(self.weights)

        # Output layer error
        dz[-1] = activations[-1] - y
        dw[-1] = (1 / m) * np.dot(dz[-1], activations[-2].T)
        db[-1] = (1 / m) * np.sum(dz[-1], axis=1, keepdims=True)

        # Backpropagate through hidden layers
        for i in range(len(self.weights) - 2, -1, -1):
            dz[i] = np.dot(self.weights[i + 1].T, dz[i + 1]) * relu_derivative(z_values[i])
            dw[i] = (1 / m) * np.dot(dz[i], activations[i].T)
            db[i] = (1 / m) * np.sum(dz[i], axis=1, keepdims=True)

        return dw, db

    def update_parameters(self, dw, db):
        """
        Update weights and biases using gradients.
        """
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * dw[i]
            self.biases[i] -= self.learning_rate * db[i]

    def train(self, x, y, epochs):
        """
        Train the neural network using gradient descent.
        :param x: Input data.
        :param y: Ground truth output.
        :param epochs: Number of training epochs.
        """
        for epoch in range(epochs):
            # Forward propagation
            activations, z_values = self.forward(x)

            # Compute loss
            loss = binary_cross_entropy(y, activations[-1])

            # Backward propagation
            dw, db = self.backward(x, y, activations, z_values)

            # Update parameters
            self.update_parameters(dw, db)

            # Print loss and weights
            print(f"Epoch {epoch + 1}, Loss: {loss:.4f}")
            print("Updated Weights and Biases:")
            for i in range(len(self.weights)):
                print(f"W[{i + 1}]:\n", self.weights[i])
                print(f"b[{i + 1}]:\n", self.biases[i])

# Define the network structure based on the example
layer_sizes = [2, 2, 1]  # Input layer (2 neurons), 1 hidden layer (2 neurons), output layer (1 neuron)

# Initialize the neural network
nn = NeuralNetwork(layer_sizes, learning_rate=0.1)

# Input and output data
x = np.array([[1], [2]])  # Input vector x = [1, 2]
y = np.array([[1]])       # Actual output y = 1

# Train the neural network for 5 epochs
nn.train(x, y, epochs=5)


Epoch 1, Loss: 3.3137
Updated Weights and Biases:
W[1]:
 [[ 0.93154205  1.37203345]
 [ 0.74225569 -0.4905506 ]]
b[1]:
 [[-0.00659469]
 [-0.12003103]]
W[2]:
 [[-0.26436085 -1.10019516]]
b[2]:
 [[-0.53181587]]
Epoch 2, Loss: 1.7029
Updated Weights and Biases:
W[1]:
 [[ 0.90992164  1.32879265]
 [ 0.74225569 -0.4905506 ]]
b[1]:
 [[-0.02821509]
 [-0.12003103]]
W[2]:
 [[ 0.03570463 -1.10019516]]
b[2]:
 [[-0.45003219]]
Epoch 3, Loss: 0.8680
Updated Weights and Biases:
W[1]:
 [[ 0.91199329  1.33293593]
 [ 0.74225569 -0.4905506 ]]
b[1]:
 [[-0.02614345]
 [-0.12003103]]
W[2]:
 [[ 0.24106026 -1.10019516]]
b[2]:
 [[-0.39201052]]
Epoch 4, Loss: 0.4878
Updated Weights and Biases:
W[1]:
 [[ 0.92129815  1.35154566]
 [ 0.74225569 -0.4905506 ]]
b[1]:
 [[-0.01683858]
 [-0.12003103]]
W[2]:
 [[ 0.3781558  -1.10019516]]
b[2]:
 [[-0.35341078]]
Epoch 5, Loss: 0.3104
Updated Weights and Biases:
W[1]:
 [[ 0.93138819  1.37172574]
 [ 0.74225569 -0.4905506 ]]
b[1]:
 [[-0.00674855]
 [-0.12003103]]
W[2]:
 [[ 0.474413