# Part 1 â€” Autoencoder from Scratch (NumPy only)
- Fully-connected AE (>=3 hidden layers in encoder/decoder)
- Backprop, mini-batch SGD, LR scheduling, L2 regularization
- Encode / decode + reconstruction error

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import sys
os.chdir("..")
sys.path.append(os.getcwd())

from src.utils.random import set_seed
from src.utils.preprocessing import StandardScaler


ModuleNotFoundError: No module named 'src'

In [None]:
import os
os.chdir('..')
import sys
sys.path.append('.')
from src.dimred.autoencoder import Autoencoder


In [None]:
# Data loading (Breast Cancer Wisconsin Diagnostic)
# We avoid sklearn for loading; we use pandas to read the UCI dataset.
# If you're offline, download the CSV once and set LOCAL_CSV path.

UCI_URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
LOCAL_CSV = None  # e.g. "data/wdbc.data"

cols = ["id", "diagnosis"] + [f"f{i}" for i in range(30)]
if LOCAL_CSV is None:
    df = pd.read_csv(UCI_URL, header=None, names=cols)
else:
    df = pd.read_csv(LOCAL_CSV, header=None, names=cols)

y = (df["diagnosis"].values == "M").astype(int)  # Malignant=1, Benign=0 (used ONLY for evaluation)
X = df.drop(columns=["id","diagnosis"]).values.astype(float)

scaler = StandardScaler()
Xs = scaler.fit_transform(X)

print("X:", X.shape, "Xs:", Xs.shape, "Malignant%:", y.mean())


In [None]:
set_seed(42)
ae = Autoencoder(
    input_dim=Xs.shape[1],
    bottleneck_dim=10,
    hidden_dims=(64, 32, 16),   # 3 hidden layers in encoder + mirrored in decoder
    activation="relu",
    lr=1e-3,
    l2=1e-4,
    lr_decay=0.995,
    seed=42
)
ae.fit(Xs, epochs=200, batch_size=64, verbose=1)
Z = ae.transform(Xs)
print("Z shape:", Z.shape, "Recon MSE:", ae.reconstruction_error(Xs))


In [None]:
plt.figure(figsize=(6,4))
plt.plot(ae.loss_history)
plt.title("Autoencoder training loss")
plt.xlabel("Epoch")
plt.ylabel("Loss (MSE + L2)")
plt.tight_layout()
plt.show()
