In [29]:
import numpy as np

# XOR dataset
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])  # XOR labels


In [54]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_grad(a):
    return a * (1 - a)

def relu(z):
    return np.maximum(0, z)

def relu_grad(z):
    return (z > 0).astype(float)


In [55]:
def initialize_parameters(input_dim, hidden_dim, output_dim):
    np.random.seed(42)
    W1 = np.random.randn(input_dim, hidden_dim) * np.sqrt(2./input_dim)
    b1 = np.zeros((1, hidden_dim))
    W2 = np.random.randn(hidden_dim, output_dim) * np.sqrt(2./hidden_dim)
    b2 = np.zeros((1, output_dim))
    return W1, b1, W2, b2


In [56]:
def forward_propagation(X, W1, b1, W2, b2):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)
    cache = (Z1, A1, Z2, A2)
    return A2, cache


In [57]:
def compute_loss(y, y_hat):
    m = y.shape[0]
    return - (1/m) * np.sum(y*np.log(y_hat+1e-9) + (1-y)*np.log(1-y_hat+1e-9))


In [58]:
def backward_propagation(X, y, W1, b1, W2, b2, cache):
    Z1, A1, Z2, A2 = cache
    m = X.shape[0]
    
    dZ2 = A2 - y
    dW2 = (1/m) * np.dot(A1.T, dZ2)
    db2 = (1/m) * np.sum(dZ2, axis=0, keepdims=True)
    
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_grad(Z1)
    dW1 = (1/m) * np.dot(X.T, dZ1)
    db1 = (1/m) * np.sum(dZ1, axis=0, keepdims=True)
    
    return dW1, db1, dW2, db2


In [59]:
def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, lr=0.1):
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2
    return W1, b1, W2, b2


In [60]:
def train(X, y, hidden_dim=4, lr=0.1, epochs=10000):
    W1, b1, W2, b2 = initialize_parameters(X.shape[1], hidden_dim, 1)
    losses = []
    
    for i in range(epochs):
        # Forward
        y_hat, cache = forward_propagation(X, W1, b1, W2, b2)
        
        # Loss
        loss = compute_loss(y, y_hat)
        losses.append(loss)
        
        # Backward
        dW1, db1, dW2, db2 = backward_propagation(X, y, W1, b1, W2, b2, cache)
        
        # Update
        W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, lr)
        
        if i % 1000 == 0:
            print(f"Epoch {i}, Loss: {loss:.4f}")
    
    return W1, b1, W2, b2, losses



In [61]:
if __name__ == "__main__":
    W1, b1, W2, b2, losses = train(X, y)
    
    # Test the trained model
    y_hat, _ = forward_propagation(X, W1, b1, W2, b2)
    predictions = (y_hat > 0.5).astype(int)
    print("Predictions after training:")
    print(predictions)
    print("True labels:")
    print(y)


Epoch 0, Loss: 0.8147
Epoch 1000, Loss: 0.0770
Epoch 2000, Loss: 0.0191
Epoch 3000, Loss: 0.0101
Epoch 4000, Loss: 0.0067
Epoch 5000, Loss: 0.0050
Epoch 6000, Loss: 0.0039
Epoch 7000, Loss: 0.0032
Epoch 8000, Loss: 0.0027
Epoch 9000, Loss: 0.0024
Predictions after training:
[[0]
 [1]
 [1]
 [0]]
True labels:
[[0]
 [1]
 [1]
 [0]]
