# Neural Network — From Scratch

Fully connected MLP with backpropagation, He initialization, momentum SGD, and L2 regularization.

Architecture: `[input] → ReLU → ReLU → Softmax → [output]`

In [None]:
import sys, os
sys.path.insert(0, os.path.abspath(".."))

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from neural_network.neural_net import NeuralNetwork

plt.style.use("seaborn-v0_8-darkgrid")
np.random.seed(42)

# Load MNIST (scaled down to 70k samples)
print("Loading MNIST...")
mnist = fetch_openml("mnist_784", version=1, as_frame=False, parser="auto")
X, y = mnist.data / 255.0, mnist.target.astype(int)  # normalize to [0,1]

# Use 10k samples for a quick demo
X, _, y, _ = train_test_split(X, y, train_size=10_000, random_state=42, stratify=y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# One-hot encode labels
def one_hot(y, n_classes=10):
    oh = np.zeros((len(y), n_classes))
    oh[np.arange(len(y)), y] = 1
    return oh

y_train_oh = one_hot(y_train)
print(f"Train: {X_train.shape}  |  Test: {X_test.shape}")

In [None]:
# Architecture: 784 → 256 → 128 → 10
model = NeuralNetwork(
    layer_sizes  = [784, 256, 128, 10],
    activations  = ["relu", "relu", "softmax"],
    loss         = "cce",
    learning_rate= 0.01,
    lambda_      = 1e-4,
    momentum     = 0.9,
    batch_size   = 64,
    n_epochs     = 30,
)

model.fit(X_train, y_train_oh)
test_acc = model.score(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss curve
axes[0].plot(model.loss_history, color="darkorchid", lw=2)
axes[0].set_title("Categorical Cross-Entropy Loss")
axes[0].set_xlabel("Epoch")
axes[0].set_ylabel("CCE")

# Sample predictions
y_pred = model.predict(X_test)
n_show = 10
axes[1].axis("off")
fig2, axs = plt.subplots(2, 5, figsize=(12, 5))
for i, ax in enumerate(axs.flat):
    ax.imshow(X_test[i].reshape(28, 28), cmap="gray")
    color = "green" if y_pred[i] == y_test[i] else "red"
    ax.set_title(f"pred={y_pred[i]}\ntrue={y_test[i]}", color=color, fontsize=9)
    ax.axis("off")
fig2.suptitle(f"MNIST Sample Predictions  (Acc={test_acc:.3f})", fontsize=13)

plt.tight_layout()
plt.show()