In [1]:
# Cell 1: Imports and helper functions

import numpy as np

def sigmoid(x):
    """Sigmoid activation."""
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    """Derivative of sigmoid assuming x = sigmoid(z)."""
    return x * (1 - x)


In [2]:
# Cell 2: Define the network class

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, seed=42):
        np.random.seed(seed)
        # Xavier initialization
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(1/input_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(1/hidden_size)
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        # Hidden layer
        self.z1 = X.dot(self.W1) + self.b1
        self.a1 = sigmoid(self.z1)
        # Output layer
        self.z2 = self.a1.dot(self.W2) + self.b2
        self.a2 = sigmoid(self.z2)
        return self.a2

    def backward(self, X, y, output, lr):
        # Output error
        error2 = y - output
        delta2 = error2 * sigmoid_derivative(output)
        # Hidden error
        error1 = delta2.dot(self.W2.T)
        delta1 = error1 * sigmoid_derivative(self.a1)
        # Update weights/biases
        self.W2 += self.a1.T.dot(delta2) * lr
        self.b2 += np.sum(delta2, axis=0, keepdims=True) * lr
        self.W1 += X.T.dot(delta1) * lr
        self.b1 += np.sum(delta1, axis=0, keepdims=True) * lr

    def train(self, X, y, epochs=10000, lr=0.1, print_every=1000):
        for i in range(1, epochs+1):
            out = self.forward(X)
            self.backward(X, y, out, lr)
            if i % print_every == 0 or i == 1:
                loss = np.mean((y - out)**2)
                print(f"Epoch {i}/{epochs}, MSE={loss:.4f}")

    def predict(self, X):
        """Return binary outputs (0/1) based on 0.5 threshold."""
        probs = self.forward(X)
        return (probs >= 0.5).astype(int)


In [3]:
# Cell 3: Prepare the XOR dataset and train

# Inputs and labels for XOR
X = np.array([[0,0],
              [0,1],
              [1,0],
              [1,1]])
y = np.array([[0],
              [1],
              [1],
              [0]])

# Instantiate and train
nn = NeuralNetwork(input_size=2, hidden_size=4, output_size=1)
nn.train(X, y, epochs=5000, lr=0.1, print_every=1000)


Epoch 1/5000, MSE=0.2574
Epoch 1000/5000, MSE=0.2491
Epoch 2000/5000, MSE=0.2429
Epoch 3000/5000, MSE=0.2051
Epoch 4000/5000, MSE=0.1337
Epoch 5000/5000, MSE=0.0428


In [4]:
# Cell 4: Test the trained network

preds = nn.predict(X)
print("Input:\n", X)
print("Predicted:\n", preds.flatten())
print("Expected:\n", y.flatten())


Input:
 [[0 0]
 [0 1]
 [1 0]
 [1 1]]
Predicted:
 [0 1 1 0]
Expected:
 [0 1 1 0]
