In [1]:
import numpy as np

# Network parameters
input_neurons = 2
hidden_neurons = 4  # Increased from 2 to 4 (enough for XOR)
output_neurons = 1
learning_rate = 0.5  # Increased from 0.1
epochs = 20000       # Increased from 10,000

# XOR dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])

# Initialize weights with larger scale (avoid vanishing gradients)
W1 = np.random.randn(input_neurons, hidden_neurons) * 0.1  # Changed from *0.01
b1 = np.zeros((1, hidden_neurons))                          # Explicit zeros for bias
W2 = np.random.randn(hidden_neurons, output_neurons) * 0.1
b2 = np.zeros((1, output_neurons))

# Sigmoid and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Training loop
for epoch in range(epochs):
    # Forward pass
    hidden_input = np.dot(X, W1) + b1
    hidden_output = sigmoid(hidden_input)
    output_input = np.dot(hidden_output, W2) + b2
    output_output = sigmoid(output_input)

    # Backpropagation
    output_error = Y - output_output
    output_delta = output_error * sigmoid_derivative(output_output)

    hidden_error = output_delta.dot(W2.T)
    hidden_delta = hidden_error * sigmoid_derivative(hidden_output)

    # Update weights and biases
    W2 += hidden_output.T.dot(output_delta) * learning_rate
    b2 += np.sum(output_delta, axis=0, keepdims=True) * learning_rate
    W1 += X.T.dot(hidden_delta) * learning_rate
    b1 += np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate

    # Print loss every 5000 epochs
    if epoch % 5000 == 0:
        loss = np.mean(np.abs(output_error))
        print(f"Epoch {epoch}, Loss: {loss:.6f}")

# Test the network
print("\nFinal Predictions:")
hidden_output = sigmoid(np.dot(X, W1) + b1)
output_output = sigmoid(np.dot(hidden_output, W2) + b2)
print("Input:\n", X)
print("Predicted Output:\n", np.round(output_output, 4))
print("Expected Output:\n", Y)

Epoch 0, Loss: 0.500000
Epoch 5000, Loss: 0.500000
Epoch 10000, Loss: 0.499999
Epoch 15000, Loss: 0.499984

Final Predictions:
Input:
 [[0 0]
 [0 1]
 [1 0]
 [1 1]]
Predicted Output:
 [[0.0284]
 [0.9714]
 [0.9712]
 [0.0322]]
Expected Output:
 [[0]
 [1]
 [1]
 [0]]
