# Autoencoders    Study-oriented notebook covering basic autoencoders, non-linear activations, deep variants, outlier detection, and sparse feature learning.

In [None]:
    # !pip install torch matplotlib scikit-learn    import torch    import torch.nn as nn    import torch.optim as optim    import matplotlib.pyplot as plt    import numpy as np    torch.manual_seed(7)    

## Basic Principles    Simple autoencoder on 2D data that lies near a circle; the model learns to reconstruct the inputs.

In [None]:
    # Create circle-like data    theta = torch.linspace(0, 2 * torch.pi, steps=400)    radius = 1.0 + 0.1 * torch.randn(theta.size())    X = torch.stack([radius * torch.cos(theta), radius * torch.sin(theta)], dim=1)    class Autoencoder(nn.Module):        def __init__(self, hidden=3):            super().__init__()            self.encoder = nn.Sequential(nn.Linear(2, hidden), nn.ReLU())            self.decoder = nn.Sequential(nn.Linear(hidden, 2))        def forward(self, x):            z = self.encoder(x)            return self.decoder(z), z    ae = Autoencoder(hidden=2)    opt = optim.Adam(ae.parameters(), lr=0.02)    mse = nn.MSELoss()    for _ in range(500):        recon, _ = ae(X)        loss = mse(recon, X)        opt.zero_grad(); loss.backward(); opt.step()    print("Final reconstruction MSE:", loss.item())    

## Non-linear Activations    Swap in different activations to see impact on representational power.

In [None]:
    act_options = [nn.ReLU(), nn.Tanh(), nn.Sigmoid()]    for act in act_options:        model = nn.Sequential(nn.Linear(2, 4), act, nn.Linear(4, 2))        opt = optim.Adam(model.parameters(), lr=0.02)        for _ in range(200):            pred = model(X)            loss = mse(pred, X)            opt.zero_grad(); loss.backward(); opt.step()        print(f"Activation {act.__class__.__name__}: recon loss {loss.item():.4f}")    

## Deep Autoencoders    Stack more layers to compress further; useful for denoising and manifold learning.

In [None]:
    class DeepAE(nn.Module):        def __init__(self):            super().__init__()            self.encoder = nn.Sequential(                nn.Linear(2, 8), nn.ReLU(),                nn.Linear(8, 3), nn.ReLU(),                nn.Linear(3, 2)            )            self.decoder = nn.Sequential(                nn.Linear(2, 3), nn.ReLU(),                nn.Linear(3, 8), nn.ReLU(),                nn.Linear(8, 2)            )        def forward(self, x):            z = self.encoder(x)            return self.decoder(z), z    deep_ae = DeepAE()    opt = optim.Adam(deep_ae.parameters(), lr=0.01)    for _ in range(400):        recon, _ = deep_ae(X)        loss = mse(recon, X)        opt.zero_grad(); loss.backward(); opt.step()    print("Deep AE recon loss:", loss.item())    

## Applications: Outlier Detection    Train on normal data, then measure reconstruction error on anomalies.

In [None]:
    # Create anomalies far from the circle    anomalies = torch.randn(20, 2) * 3.0    with torch.no_grad():        recon_normal, _ = ae(X)        recon_anom, _ = ae(anomalies)    normal_error = ((recon_normal - X) ** 2).sum(dim=1)    anom_error = ((recon_anom - anomalies) ** 2).sum(dim=1)    print("Normal error mean:", normal_error.mean().item())    print("Anomaly error mean:", anom_error.mean().item())    

## Sparse Feature Learning    Encourage sparsity in the bottleneck to learn disentangled features (L1 penalty on activations).

In [None]:
    sparse_ae = Autoencoder(hidden=3)    opt = optim.Adam(sparse_ae.parameters(), lr=0.02)    l1_weight = 1e-3  # stronger weight => sparser codes    for _ in range(400):        recon, z = sparse_ae(X)        loss = mse(recon, X) + l1_weight * torch.mean(torch.abs(z))        opt.zero_grad(); loss.backward(); opt.step()    print("Sparse AE loss:", loss.item())    print("Mean abs activation:", z.abs().mean().item())    