# 6. Clasificación de MNIST con arquitecturas mínimas

Buscamos la red convolucional y el perceptrón multicapa más pequeños que alcancen al menos 90% de exactitud en MNIST.


In [3]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from pathlib import Path
import zipfile
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device:', device)

Device: cpu


## Carga de los tensores MNIST
Leemos los archivos `.pt` provistos (zip) y dejamos las imágenes normalizadas en `[0,1]`.


In [5]:

DATA_DIR = Path('/home/camila/Documents/Redes Neuronales/Redes-Neuronales/TP2/ejercicio-6')

def load_mnist_images(zip_path: Path) -> np.ndarray:
    name = 'MNIST_train_data' if 'training' in zip_path.name else 'MNIST_test_data'
    with zipfile.ZipFile(zip_path) as z:
        raw = z.read(f'{name}/data/0')
    return np.frombuffer(raw, dtype=np.uint8).reshape(-1, 28, 28)

def load_mnist_labels(zip_path: Path) -> np.ndarray:
    name = 'MNIST_train_labels' if 'training' in zip_path.name else 'MNIST_test_labels'
    with zipfile.ZipFile(zip_path) as z:
        raw = z.read(f'{name}/data/0')
    return np.frombuffer(raw, dtype='<i8')

train_images = load_mnist_images(DATA_DIR / 'MNIST_training_data.pt')
test_images  = load_mnist_images(DATA_DIR / 'MNIST_test_data.pt')
train_labels = load_mnist_labels(DATA_DIR / 'MNIST_training_labels.pt')
test_labels  = load_mnist_labels(DATA_DIR / 'MNIST_test_labels.pt')

train_images = torch.tensor(train_images, dtype=torch.float32).unsqueeze(1) / 255.0
test_images  = torch.tensor(test_images, dtype=torch.float32).unsqueeze(1) / 255.0
train_labels = torch.tensor(train_labels, dtype=torch.long)
test_labels  = torch.tensor(test_labels, dtype=torch.long)

print('Train:', train_images.shape, 'Test:', test_images.shape)


Train: torch.Size([60000, 1, 28, 28]) Test: torch.Size([10000, 1, 28, 28])


In [6]:

batch_size = 128
train_loader = DataLoader(TensorDataset(train_images, train_labels), batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(TensorDataset(test_images, test_labels), batch_size=512, shuffle=False)


## Utilidades de entrenamiento


In [7]:

def count_params(model: nn.Module) -> int:
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def evaluate(model: nn.Module, loader: DataLoader) -> float:
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            preds = logits.argmax(dim=1)
            correct += (preds == yb).sum().item()
            total += yb.size(0)
    return correct / total

def train_model(model: nn.Module, train_loader: DataLoader, test_loader: DataLoader, epochs: int, lr: float = 0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    history = []
    for epoch in range(1, epochs + 1):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()
        train_acc = evaluate(model, train_loader)
        test_acc = evaluate(model, test_loader)
        history.append((train_acc, test_acc))
        print(f"Epoch {epoch:02d} - train acc {train_acc*100:.2f}% - eval acc {test_acc*100:.2f}%")
    return history


## Red convolucional mínima

In [8]:

class TinyConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Conv2d(1, 8, kernel_size=5)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc = nn.Linear(8 * 12 * 12, 10)
    def forward(self, x):
        x = self.pool(F.relu(self.conv(x)))
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

conv_net = TinyConvNet().to(device)
print('Parámetros conv:', count_params(conv_net))


Parámetros conv: 11738


In [9]:

conv_history = train_model(conv_net, train_loader, test_loader, epochs=5, lr=0.001)
conv_test_acc = evaluate(conv_net, test_loader)
print(f"Exactitud final (conv): {conv_test_acc*100:.2f}%")


Epoch 01 - train acc 93.88% - eval acc 93.96%
Epoch 02 - train acc 96.30% - eval acc 96.52%
Epoch 03 - train acc 97.26% - eval acc 97.29%
Epoch 04 - train acc 97.75% - eval acc 97.67%
Epoch 05 - train acc 98.06% - eval acc 97.83%
Exactitud final (conv): 97.83%


## Perceptrón multicapa mínimo

In [10]:

class TinyMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28 * 28, 64)
        self.fc2 = nn.Linear(64, 10)
    def forward(self, x):
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

mlp_net = TinyMLP().to(device)
print('Parámetros MLP:', count_params(mlp_net))


Parámetros MLP: 50890


In [11]:

mlp_history = train_model(mlp_net, train_loader, test_loader, epochs=8, lr=0.0015)
mlp_test_acc = evaluate(mlp_net, test_loader)
print(f"Exactitud final (MLP): {mlp_test_acc*100:.2f}%")


Epoch 01 - train acc 93.62% - eval acc 93.54%
Epoch 02 - train acc 95.58% - eval acc 95.11%
Epoch 03 - train acc 96.59% - eval acc 96.04%
Epoch 04 - train acc 97.23% - eval acc 96.61%
Epoch 05 - train acc 97.50% - eval acc 96.74%
Epoch 06 - train acc 97.96% - eval acc 97.00%
Epoch 07 - train acc 98.30% - eval acc 97.18%
Epoch 08 - train acc 98.45% - eval acc 97.35%
Exactitud final (MLP): 97.35%
