In [None]:
import numpy as np

# Activation functions and derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def sigmoid_deriv(x):
    s = sigmoid(x)
    return s * (1 - s)
def relu(x):
    return np.maximum(0, x)
def relu_deriv(x):
    return (x > 0).astype(float)
# Loss function: Binary Cross-Entropy
def binary_cross_entropy(y_true, y_pred):
    epsilon = 1e-8  # prevent log(0)
    return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))

def binary_cross_entropy_deriv(y_true, y_pred):
    epsilon = 1e-8
    return (-(y_true / (y_pred + epsilon)) + ((1 - y_true) / (1 - y_pred + epsilon))) / len(y_true)


In [None]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, activation='sigmoid', lr=0.1):
        self.lr = lr
        # Weight initialization
        self.W1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))

        # Set activation
        if activation == 'relu':
            self.act = relu
            self.act_deriv = relu_deriv
        elif activation == 'sigmoid':
            self.act = sigmoid
            self.act_deriv = sigmoid_deriv
        else:
            raise ValueError("Only 'relu' and 'sigmoid' supported.")

    def forward(self, X):
        self.Z1 = X @ self.W1 + self.b1
        self.A1 = self.act(self.Z1)
        self.Z2 = self.A1 @ self.W2 + self.b2
        self.A2 = sigmoid(self.Z2)  # output layer uses sigmoid for binary classification
        return self.A2

    def backward(self, X, y, output):
        # Derivative of loss w.r.t output
        dA2 = binary_cross_entropy_deriv(y, output)
        dZ2 = dA2 * sigmoid_deriv(self.Z2)
        dW2 = self.A1.T @ dZ2
        db2 = np.sum(dZ2, axis=0, keepdims=True)

        dA1 = dZ2 @ self.W2.T
        dZ1 = dA1 * self.act_deriv(self.Z1)
        dW1 = X.T @ dZ1
        db1 = np.sum(dZ1, axis=0, keepdims=True)

        # Update weights
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2

    def train(self, X, y, epochs=1000):
        for i in range(epochs):
            output = self.forward(X)
            loss = binary_cross_entropy(y, output)
            self.backward(X, y, output)
            if i % 100 == 0:
                print(f"Epoch {i}, Loss: {loss:.4f}")

In [None]:
# XOR input and output
X = np.array([[0,0], [0,1], [1,0], [1,1]])
y = np.array([[0], [1], [1], [0]])

# Try both ReLU and Sigmoid hidden layers
print("Training with ReLU activation:")
nn_relu = NeuralNetwork(input_size=2, hidden_size=4, output_size=1, activation='relu', lr=0.1)
nn_relu.train(X, y, epochs=1000)

print("\nTraining with Sigmoid activation:")
nn_sigmoid = NeuralNetwork(input_size=2, hidden_size=4, output_size=1, activation='sigmoid', lr=0.1)
nn_sigmoid.train(X, y, epochs=1000)


Training with ReLU activation:
Epoch 0, Loss: 0.6621
Epoch 100, Loss: 0.5389
Epoch 200, Loss: 0.5039
Epoch 300, Loss: 0.4912
Epoch 400, Loss: 0.4853
Epoch 500, Loss: 0.4830
Epoch 600, Loss: 0.4813
Epoch 700, Loss: 0.4808
Epoch 800, Loss: 0.4800
Epoch 900, Loss: 0.4797

Training with Sigmoid activation:
Epoch 0, Loss: 1.0345
Epoch 100, Loss: 0.7082
Epoch 200, Loss: 0.7011
Epoch 300, Loss: 0.6963
Epoch 400, Loss: 0.6925
Epoch 500, Loss: 0.6889
Epoch 600, Loss: 0.6850
Epoch 700, Loss: 0.6804
Epoch 800, Loss: 0.6745
Epoch 900, Loss: 0.6667


## Pytorch Implementation

In [None]:
import torch
import numpy as np

# NumPy array
a_np = np.array([[1.0, 2.0], [3.0, 4.0]])

# PyTorch tensor from NumPy
a_torch = torch.tensor(a_np, requires_grad=True)

print(a_torch)
print(f"Shape: {a_torch.shape}, Dtype: {a_torch.dtype}")

# Broadcasting works like NumPy
b = torch.tensor([1.0, 2.0])
print(a_torch + b)  # Broadcasting row-wise


tensor([[1., 2.],
        [3., 4.]], dtype=torch.float64, requires_grad=True)
Shape: torch.Size([2, 2]), Dtype: torch.float64
tensor([[2., 4.],
        [4., 6.]], dtype=torch.float64, grad_fn=<AddBackward0>)


In [None]:
x = torch.tensor([2.0], requires_grad=True)
y = x**2 + 3*x + 1  # simple function
y.backward()        # computes dy/dx

print(f"dy/dx: {x.grad}")  # Should be 2x + 3 = 7.0


dy/dx: tensor([7.])


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# XOR Data
X = torch.tensor([[0., 0.], [0., 1.], [1., 0.], [1., 1.]])
y = torch.tensor([[0.], [1.], [1.], [0.]])


In [None]:
class XORNet(nn.Module):
    def __init__(self):
        super(XORNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(2, 4),
            nn.ReLU(),
            nn.Linear(4, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)


In [None]:
model = XORNet()
loss_fn = nn.BCELoss()  # Binary cross-entropy
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Training
for epoch in range(1000):
    y_pred = model(X)
    loss = loss_fn(y_pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")


Epoch 0, Loss: 0.7091
Epoch 100, Loss: 0.6892
Epoch 200, Loss: 0.6690
Epoch 300, Loss: 0.6158
Epoch 400, Loss: 0.5316
Epoch 500, Loss: 0.4412
Epoch 600, Loss: 0.3223
Epoch 700, Loss: 0.2220
Epoch 800, Loss: 0.1544
Epoch 900, Loss: 0.1107


In [None]:
with torch.no_grad():
    preds = model(X).round()
    print("Predictions:\n", preds)

Predictions:
 tensor([[0.],
        [1.],
        [1.],
        [0.]])
