In [None]:
import numpy as np

np.random.seed(42)

def sigmoid(z):
    z = np.clip(z, -500, 500)  # Prevent overflow
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    return a * (1 - a)

def relu(z):
    return np.maximum(0, z)

def relu_derivative(a):
    return (a > 0).astype(float)

def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# Neural Network Class
class SimpleNN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1):
        self.W1 = np.random.randn(input_size, hidden_size) * 0.5
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.5
        self.b2 = np.zeros((1, output_size))
        self.lr = learning_rate

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = relu(self.z1)            # Hidden layer → ReLU
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = relu(self.z2)            # Output layer → ReLU (instead of sigmoid)
        return self.a2

    def backward(self, X, y):
        m = X.shape[0]
        dz2 = (self.a2 - y) * relu_derivative(self.a2) / m  # derivative of ReLU now
        self.dW2 = np.dot(self.a1.T, dz2)
        self.db2 = np.sum(dz2, axis=0, keepdims=True)

        da1 = np.dot(dz2, self.W2.T)
        dz1 = da1 * relu_derivative(self.a1)
        self.dW1 = np.dot(X.T, dz1)
        self.db1 = np.sum(dz1, axis=0, keepdims=True)

    def update_params(self):
        self.W1 -= self.lr * self.dW1
        self.b1 -= self.lr * self.db1
        self.W2 -= self.lr * self.dW2
        self.b2 -= self.lr * self.db2

    def train(self, X, y, epochs):
        losses = []
        for i in range(epochs):
            pred = self.forward(X)
            loss = mse_loss(y, pred)
            losses.append(loss)
            self.backward(X, y)
            self.update_params()
            if i % 100 == 0:
                print(f"Epoch {i}, Loss: {loss:.6f}")
        return losses

    def predict(self, X):
        pred = self.forward(X)
        return (pred > 0.5).astype(int)  # threshold for binary output

if __name__ == "__main__":
    X = np.array([[0,0], [0,1], [1,0], [1,1]])
    y = np.array([[0], [1], [1], [0]])

    nn = SimpleNN(input_size=2, hidden_size=3, output_size=1, learning_rate=0.1)
    nn.train(X, y, epochs=1000)

    predictions = nn.predict(X)
    print("\nResults:")
    print("Input\t\tPredicted\tTrue")
    for x, pred, true in zip(X, predictions, y):
        print(f"{x}\t\t{pred[0]}\t\t{true[0]}")
#