# Autoencoder + MLP para MNIST
Entrenamos un autoencoder para reducir la dimensionalidad y luego usamos sus representaciones para clasificar con un perceptrón multicapa.


In [1]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from pathlib import Path
import zipfile
import numpy as np
import time

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device:', device)


Device: cpu


## Carga y normalización de MNIST


In [2]:

DATA_DIR = Path('/home/camila/Documents/Redes Neuronales/Redes-Neuronales/TP2/ejercicio-6')

def load_mnist_images(zip_path: Path) -> np.ndarray:
    name = 'MNIST_train_data' if 'training' in zip_path.name else 'MNIST_test_data'
    with zipfile.ZipFile(zip_path) as z:
        raw = z.read(f'{name}/data/0')
    return np.frombuffer(raw, dtype=np.uint8).reshape(-1, 28 * 28)

def load_mnist_labels(zip_path: Path) -> np.ndarray:
    name = 'MNIST_train_labels' if 'training' in zip_path.name else 'MNIST_test_labels'
    with zipfile.ZipFile(zip_path) as z:
        raw = z.read(f'{name}/data/0')
    return np.frombuffer(raw, dtype='<i8')

train_images = load_mnist_images(DATA_DIR / 'MNIST_training_data.pt')
test_images  = load_mnist_images(DATA_DIR / 'MNIST_test_data.pt')
train_labels = load_mnist_labels(DATA_DIR / 'MNIST_training_labels.pt')
test_labels  = load_mnist_labels(DATA_DIR / 'MNIST_test_labels.pt')

train_images = torch.tensor(train_images, dtype=torch.float32) / 255.0
test_images  = torch.tensor(test_images, dtype=torch.float32) / 255.0
train_labels = torch.tensor(train_labels, dtype=torch.long)
test_labels  = torch.tensor(test_labels, dtype=torch.long)

print('Train:', train_images.shape, 'Test:', test_images.shape)


Train: torch.Size([60000, 784]) Test: torch.Size([10000, 784])


## Autoencoder

In [3]:

class Autoencoder(nn.Module):
    def __init__(self, latent_dim=16):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(784, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 256),
            nn.ReLU(),
            nn.Linear(256, 784),
            nn.Sigmoid()
        )
    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon

latent_dim = 16
autoencoder = Autoencoder(latent_dim).to(device)
ae_optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.001)
recon_loss = nn.MSELoss()
ae_loader = DataLoader(TensorDataset(train_images, train_images), batch_size=256, shuffle=True)


### Entrenamiento del autoencoder


In [4]:

ae_epochs = 20
start_time = time.time()
for epoch in range(1, ae_epochs + 1):
    autoencoder.train()
    total_loss = 0.0
    for xb, _ in ae_loader:
        xb = xb.to(device)
        ae_optimizer.zero_grad()
        recon = autoencoder(xb)
        loss = recon_loss(recon, xb)
        loss.backward()
        ae_optimizer.step()
        total_loss += loss.item() * xb.size(0)
    avg_loss = total_loss / len(ae_loader.dataset)
    if epoch % 5 == 0 or epoch == 1:
        print(f"Epoch {epoch}, recon loss: {avg_loss:.4f}")
ae_time = time.time() - start_time
print(f"Tiempo entrenamiento autoencoder: {ae_time:.1f} s")


Epoch 1, recon loss: 0.0688
Epoch 5, recon loss: 0.0219
Epoch 10, recon loss: 0.0162
Epoch 15, recon loss: 0.0134
Epoch 20, recon loss: 0.0119
Tiempo entrenamiento autoencoder: 55.9 s


## Clasificador usando embeddings del autoencoder


In [5]:

autoencoder.eval()
with torch.no_grad():
    train_latent = autoencoder.encoder(train_images.to(device)).cpu()
    test_latent  = autoencoder.encoder(test_images.to(device)).cpu()

class LatentMLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 10)
    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)

def train_classifier(model, train_data, train_labels, test_data, test_labels, epochs=20):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loader = DataLoader(TensorDataset(train_data, train_labels), batch_size=256, shuffle=True)
    start = time.time()
    for epoch in range(epochs):
        model.train()
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()
    elapsed = time.time() - start
    acc = evaluate_classifier(model, test_data, test_labels)
    return elapsed, acc

def evaluate_classifier(model, data, labels):
    model.eval()
    loader = DataLoader(TensorDataset(data, labels), batch_size=512)
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb).argmax(dim=1)
            correct += (preds == yb).sum().item()
            total += yb.size(0)
    return correct / total

latent_mlp = LatentMLP(latent_dim).to(device)
time_latent, acc_latent = train_classifier(latent_mlp, train_latent, train_labels, test_latent, test_labels)
print(f"Clasificador con embeddings -> tiempo {time_latent:.1f} s, accuracy {acc_latent*100:.2f}%")


Clasificador con embeddings -> tiempo 14.5 s, accuracy 95.62%


## Clasificador MLP directo sobre píxeles


In [6]:

class PixelMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

pixel_mlp = PixelMLP().to(device)
time_pixels, acc_pixels = train_classifier(pixel_mlp, train_images, train_labels, test_images, test_labels)
print(f"Clasificador directo -> tiempo {time_pixels:.1f} s, accuracy {acc_pixels*100:.2f}%")


Clasificador directo -> tiempo 20.2 s, accuracy 97.78%
