# Neural Networks: Foundations    Hands-on code for core neural network ideas: building a small ANN, training loop, common activations, loss functions, and hyperparameter knobs. Comments inline keep the flow clear for study.

In [None]:
    # If packages are missing, install once (remove the leading '#'):    # !pip install torch matplotlib scikit-learn    import torch    import torch.nn as nn    import torch.optim as optim    import matplotlib.pyplot as plt    from sklearn.datasets import make_moons    from sklearn.model_selection import train_test_split    from sklearn.preprocessing import StandardScaler    # Fix seeds for reproducibility during study runs    torch.manual_seed(42)    

## Artificial Neural Networks (ANN)    Build a tiny multilayer perceptron on a toy 2D dataset to visualize decision boundaries.

In [None]:
    # Create a non-linear toy dataset (two interleaving half-moons)    X, y = make_moons(n_samples=800, noise=0.2, random_state=0)    X = StandardScaler().fit_transform(X)  # zero-mean/ unit-variance helps training stability    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)    # Convert to tensors    X_train_t = torch.tensor(X_train, dtype=torch.float32)    y_train_t = torch.tensor(y_train, dtype=torch.long)    X_val_t = torch.tensor(X_val, dtype=torch.float32)    y_val_t = torch.tensor(y_val, dtype=torch.long)    # Simple ANN: input -> hidden -> output with non-linear activations    ann = nn.Sequential(        nn.Linear(2, 16),        nn.ReLU(),        nn.Linear(16, 16),        nn.ReLU(),        nn.Linear(16, 2)    )    # Quick forward pass to confirm shapes    logits = ann(X_train_t[:5])    print("Logit sample shape:", logits.shape)    

## Training Neural Networks    Minimal training loop: cross-entropy loss, SGD optimizer, small batch loading. The loop is intentionally explicit so you can see each step.

In [None]:
    criterion = nn.CrossEntropyLoss()    optimizer = optim.SGD(ann.parameters(), lr=0.1, momentum=0.9)    def train_epoch(model, X, y, batch_size=64):        model.train()        perm = torch.randperm(X.size(0))        total_loss = 0.0        for i in range(0, X.size(0), batch_size):            idx = perm[i:i+batch_size]            xb, yb = X[idx], y[idx]            # Forward pass            logits = model(xb)            loss = criterion(logits, yb)            # Backward pass            optimizer.zero_grad()            loss.backward()            optimizer.step()            total_loss += loss.item() * xb.size(0)        return total_loss / X.size(0)    def eval_accuracy(model, X, y):        model.eval()        with torch.no_grad():            preds = model(X).argmax(dim=1)            return (preds == y).float().mean().item()    for epoch in range(1, 31):        loss = train_epoch(ann, X_train_t, y_train_t)        if epoch % 10 == 0:            val_acc = eval_accuracy(ann, X_val_t, y_val_t)            print(f"Epoch {epoch:02d} | loss={loss:.3f} | val_acc={val_acc:.3f}")    

## Activation Functions    Visualize common activations to compare gradients and saturation.

In [None]:
    import numpy as np    xs = np.linspace(-4, 4, 200)    activations = {        "sigmoid": 1 / (1 + np.exp(-xs)),        "tanh": np.tanh(xs),        "relu": np.maximum(0, xs),        "leaky_relu": np.where(xs > 0, xs, 0.1 * xs)    }    plt.figure(figsize=(6,4))    for name, vals in activations.items():        plt.plot(xs, vals, label=name)    plt.title("Common Activation Functions")    plt.xlabel("input")    plt.ylabel("activation")    plt.legend()    plt.grid(True)    plt.show()    

## Loss Functions    Compare mean squared error (MSE) and cross entropy on a tiny batch to see how they score predictions.

In [None]:
    # Fake logits and targets for a 3-class problem    fake_logits = torch.tensor([[2.0, 0.5, -1.0], [0.1, 1.0, 0.2]])    targets = torch.tensor([0, 2])    ce_loss = nn.CrossEntropyLoss()(fake_logits, targets)    # Turn logits into probabilities for MSE demo    probs = fake_logits.softmax(dim=1)    one_hot = torch.nn.functional.one_hot(targets, num_classes=3).float()    mse_loss = torch.mean((probs - one_hot) ** 2)    print("Cross Entropy:", ce_loss.item())    print("MSE on probabilities:", mse_loss.item())    

## Hyperparameters    Quick sweep over learning rates to illustrate their impact on convergence speed/stability.

In [None]:
    def run_lr(lr):        model = nn.Sequential(nn.Linear(2, 8), nn.ReLU(), nn.Linear(8, 2))        opt = optim.SGD(model.parameters(), lr=lr)        losses = []        for _ in range(50):            logits = model(X_train_t)            loss = criterion(logits, y_train_t)            opt.zero_grad()            loss.backward()            opt.step()            losses.append(loss.item())        return losses[-1]    for lr in [0.001, 0.01, 0.1]:        final_loss = run_lr(lr)        print(f"LR={lr}: final loss {final_loss:.3f}")    