In [2]:
import numpy as np

# XOR data
X = np.array([[0, 0, 1, 1], 
              [0, 1, 0, 1]])  # shape: (2, 4)
Y = np.array([[0, 1, 1, 0]])  # shape: (1, 4)


In [7]:
def tanh(z):
    return np.tanh(z)

def tanh_derivative(a):
    return 1 - np.square(a)

In [8]:
# Activation function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    return a * (1 - a)

# Initialize parameters
def init_params():
    np.random.seed(1)
    W1 = np.random.randn(2, 2)  # 2 inputs → 2 hidden
    b1 = np.zeros((2, 1))
    W2 = np.random.randn(1, 2)  # 2 hidden → 1 output
    b2 = np.zeros((1, 1))
    return W1, b1, W2, b2

# Forward propagation
def forward(X, W1, b1, W2, b2):
    Z1 = np.dot(W1, X) + b1
    A1 = tanh(Z1)

    Z2 = np.dot(W2, A1) + b2
    A2 = tanh(Z2)
    
    return A1, A2

# Compute loss
def compute_loss(A2, Y):
    m = Y.shape[1]
    return -np.sum(Y * np.log(A2) + (1 - Y) * np.log(1 - A2)) / m

# Backpropagation
def backward(X, Y, A1, A2, W2):
    m = X.shape[1]
    
    dZ2 = A2 - Y
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    
    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * tanh_derivative(A1)
    dW1 = (1 / m) * np.dot(dZ1, X.T)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
    
    return dW1, db1, dW2, db2

# Training loop
def train(X, Y, iterations=10000, learning_rate=0.1):
    W1, b1, W2, b2 = init_params()
    
    for i in range(iterations):
        A1, A2 = forward(X, W1, b1, W2, b2)
        loss = compute_loss(A2, Y)

        dW1, db1, dW2, db2 = backward(X, Y, A1, A2, W2)
        
        # Gradient descent
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2

        if i % 1000 == 0:
            print(f"Iteration {i}: Loss = {loss:.4f}")
    
    return W1, b1, W2, b2

# Predict
def predict(X, W1, b1, W2, b2):
    _, A2 = forward(X, W1, b1, W2, b2)
    return (A2 > 0.5).astype(int)

# Train
W1, b1, W2, b2 = train(X, Y)

# Predict on XOR
predictions = predict(X, W1, b1, W2, b2)
print("Predictions:", predictions)


  return -np.sum(Y * np.log(A2) + (1 - Y) * np.log(1 - A2)) / m
  return -np.sum(Y * np.log(A2) + (1 - Y) * np.log(1 - A2)) / m
  return -np.sum(Y * np.log(A2) + (1 - Y) * np.log(1 - A2)) / m


Iteration 0: Loss = nan
Iteration 1000: Loss = 0.3806
Iteration 2000: Loss = 0.3621
Iteration 3000: Loss = 0.3564
Iteration 4000: Loss = 0.3537
Iteration 5000: Loss = 0.3521
Iteration 6000: Loss = 0.3511
Iteration 7000: Loss = 0.3504
Iteration 8000: Loss = 0.3499
Iteration 9000: Loss = 0.3495
Predictions: [[0 0 1 1]]
