In [None]:
import numpy as np


# 1. ACTIVATION FUNCTIONS (Manual)
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def sigmoid_derivative(x):
    return x * (1 - x)


def relu(x):
    return np.maximum(0, x)


def relu_derivative(x):
    return (x > 0).astype(float)


def tanh(x):
    return np.tanh(x)


def tanh_derivative(x):
    return 1 - np.tanh(x) ** 2


def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)


# 2. LOSS FUNCTIONS (Manual)
def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)


def cross_entropy_loss(y_true, y_pred):
    samples = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-9)) / samples


# 3. THE MANUAL MLP CLASS
class ManualMLP:
    def __init__(self, in_dim, h_dim, out_dim, lr=0.01):
        self.lr = lr
        # Xavier/He Initialization
        self.W1 = np.random.randn(in_dim, h_dim) * np.sqrt(2.0 / in_dim)
        self.b1 = np.zeros((1, h_dim))
        self.W2 = np.random.randn(h_dim, out_dim) * np.sqrt(2.0 / h_dim)
        self.b2 = np.zeros((1, out_dim))

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = sigmoid(self.z2)  # Use Softmax if multi-class
        return self.a2

    def backward(self, X, y, output):
        # Error at output
        error_out = output - y
        dW2 = np.dot(self.a1.T, error_out)
        db2 = np.sum(error_out, axis=0, keepdims=True)

        # Error at hidden layer
        error_hidden = np.dot(error_out, self.W2.T) * relu_derivative(self.a1)
        dW1 = np.dot(X.T, error_hidden)
        db1 = np.sum(error_hidden, axis=0, keepdims=True)

        # Update Weights (Optimizer Step)
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim


# 1. DEFINE THE ARCHITECTURE
class TorchMLP(nn.Module):
    def __init__(self, in_size, hidden_size, out_size, task="classification"):
        super().__init__()
        self.task = task
        self.net = nn.Sequential(
            nn.Linear(in_size, hidden_size),
            nn.ReLU(),  # Try nn.LeakyReLU(0.1), nn.Tanh(), or nn.Sigmoid()
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, out_size),
        )

    def forward(self, x):
        return self.net(x)


# 2. SELECT LOSS & OPTIMIZER (SYLLABUS)
# For Regression:
# criterion = nn.MSELoss()
# For Multi-Class:
# criterion = nn.CrossEntropyLoss()
# For Binary Classification:
# criterion = nn.BCEWithLogitsLoss()


# 3. THE COMPLETE TRAINING WORKFLOW
def run_training(model, train_loader, criterion, lr=0.001, epochs=50):
    optimizer = optim.Adam(model.parameters(), lr=lr)  # or optim.SGD
    model.train()

    for epoch in range(epochs):
        for inputs, labels in train_loader:
            # A. Forward Pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # B. Backpropagation (The "Big Three")
            optimizer.zero_grad()  # 1. Clear gradients
            loss.backward()  # 2. Compute gradients
            optimizer.step()  # 3. Update parameters

        if epoch % 10 == 0:
            print(f"Epoch {epoch}: Loss {loss.item():.4f}")


# 4. INFERENCE PROCEDURE
def predict(model, inputs):
    model.eval()
    with torch.no_grad():
        logits = model(inputs)
        # Decision Logic
        if model.task == "classification":
            probs = torch.softmax(logits, dim=1)
            return torch.argmax(probs, dim=1)
        return logits  # Regression