Problem Definition
Dataset:
The dataset used for this task is a simple binary classification dataset, represented by the "OR problem." The input dataset consists of all possible combinations of binary values (0, 0), (0, 1), (1, 0), (1, 1), with corresponding output labels that represent the OR operation results. The output labels are binary (0 or 1).

Task:
The task is to train a neural network to predict the output of the OR operation. The model should learn the correct classification for each combination of inputs (X = [0, 0], [0, 1], [1, 0], [1, 1]) and produce the corresponding output (Y = [0], [1], [1], [1]). In the OR operation, the output is 1 if at least one of the inputs is 1, and 0 only when both inputs are 0.

In [1]:
import numpy as np

# Sigmoid Activation Function: Maps any input to a value between 0 or 1.
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of Sigmoid Activation Function: Used during backpropagation to calculate the gradient.
def sigmoid_derivative(x):
    return x * (1 - x)

# Neural Network Class Definition
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size      # Number of input features
        self.hidden_size = hidden_size    # Number of neurons in the hidden layer
        self.output_size = output_size    # Number of output neurons

        # Initialize weights and biases closer to zero
        self.weights_input_hidden = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]])  # Input to hidden
        self.bias_hidden = np.array([[0.1, 0.1, 0.1]])  # Hidden layer bias

        self.weights_hidden_output = np.array([[0.2], [0.3], [0.4]])  # Hidden to output
        self.bias_output = np.array([[0.1]])  # Output layer bias

    def forward(self, X):
        self.input_layer = X    # Store the input data

        # Calculate the input to the hidden layer and apply the activation function
        self.hidden_layer_input = np.dot(self.input_layer, self.weights_input_hidden) + self.bias_hidden
        self.hidden_layer_output = sigmoid(self.hidden_layer_input)

        # Calculate the input to the output layer and apply the activation function
        self.output_layer_input = np.dot(self.hidden_layer_output, self.weights_hidden_output) + self.bias_output
        self.output_layer_output = sigmoid(self.output_layer_input)

        return self.output_layer_output

    def backward(self, X, y, learning_rate):
        # Compute the error in the output layer
        error_output = y - self.output_layer_output

        # Calculate the gradient (delta) for the output layer
        output_layer_delta = error_output * sigmoid_derivative(self.output_layer_output)

        # Compute the error in the hidden layer
        error_hidden = output_layer_delta.dot(self.weights_hidden_output.T)

        # Calculate the gradient (delta) for the hidden layer
        hidden_layer_delta = error_hidden * sigmoid_derivative(self.hidden_layer_output)

        # Update weights and biases
        self.weights_hidden_output += self.hidden_layer_output.T.dot(output_layer_delta) * learning_rate
        self.bias_output += np.sum(output_layer_delta, axis=0, keepdims=True) * learning_rate
        self.weights_input_hidden += X.T.dot(hidden_layer_delta) * learning_rate
        self.bias_hidden += np.sum(hidden_layer_delta, axis=0, keepdims=True) * learning_rate

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            self.forward(X)   # Perform a forward pass
            self.backward(X, y, learning_rate)     # Perform a backward pass (backpropagation)

            # Print loss (mean squared error) every 1000 epochs
            if epoch % 1000 == 0:
                loss = np.mean(np.square(y - self.output_layer_output))
                print(f"Epoch {epoch} - Loss: {loss}")

# Main Program
if __name__ == "__main__":
    # OR Problem Dataset
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    # Changed y values to represent OR operation: output is 1 if either input is 1
    y = np.array([[0], [1], [1], [1]])

    # Increased hidden layer size to 3 neurons
    nn = NeuralNetwork(input_size=2, hidden_size=3, output_size=1)

    # Train for 10,000 epochs with a smaller learning rate for better convergence
    nn.train(X, y, epochs=10000, learning_rate=0.05)

    print("\nPredictions after training:")
    print(nn.forward(X))

Epoch 0 - Loss: 0.1872569373156708
Epoch 1000 - Loss: 0.13091205241705686
Epoch 2000 - Loss: 0.048059258618477305
Epoch 3000 - Loss: 0.017784068110331508
Epoch 4000 - Loss: 0.009223223381770408
Epoch 5000 - Loss: 0.005852448783419247
Epoch 6000 - Loss: 0.004165096539805562
Epoch 7000 - Loss: 0.0031829531234926728
Epoch 8000 - Loss: 0.0025512737606144895
Epoch 9000 - Loss: 0.002115524661126214

Predictions after training:
[[0.06696032]
 [0.96362056]
 [0.96311294]
 [0.99467589]]
