In [3]:
# pip install torch torchvision torchaudio -q
import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_CLASSES = 10

# ------------------------------
# CNN simples para MNIST
# ------------------------------
class CNN(nn.Module):
    def __init__(self, n_classes=NUM_CLASSES):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Linear(128, n_classes)
        )

    def forward(self, x): return self.fc(self.conv(x))

# ------------------------------
# Funções auxiliares
# ------------------------------
def soft_cross_entropy(logits, soft_targets):
    log_probs = F.log_softmax(logits, dim=1)
    return -(soft_targets * log_probs).sum(dim=1).mean()

def label_smoothing(one_hot, smoothing=0.1):
    return one_hot * (1 - smoothing) + smoothing / one_hot.size(1)

def to_one_hot(y, num_classes=NUM_CLASSES):
    return F.one_hot(y, num_classes=num_classes).float()

def mixup(x, y_onehot, alpha=0.4):
    lam = np.random.beta(alpha, alpha)
    index = torch.randperm(x.size(0), device=x.device)
    x_mix = lam * x + (1 - lam) * x[index]
    y_mix = lam * y_onehot + (1 - lam) * y_onehot[index]
    return x_mix, y_mix

# ------------------------------
# Dataset MNIST
# ------------------------------
tfm = transforms.Compose([transforms.ToTensor()])
train_ds = datasets.MNIST(root="./data", train=True, download=True, transform=tfm)
test_ds  = datasets.MNIST(root="./data", train=False, download=True, transform=tfm)
train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=256)

# ------------------------------
# Função de treino
# ------------------------------
def train_model(mode="hard", epochs=5):
    model = CNN().to(device)
    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    for epoch in range(1, epochs+1):
        model.train()
        loss_total = 0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            y_onehot = to_one_hot(y)

            # Hard labels (padrão)
            if mode == "hard":
                y_soft = y_onehot
                loss_fn = nn.CrossEntropyLoss()
                loss = loss_fn(model(x), y)

            #  Soft labels (Label Smoothing)
            elif mode == "smooth":
                y_soft = label_smoothing(y_onehot, 0.1)
                loss = soft_cross_entropy(model(x), y_soft)

            # Soft labels + MixUp
            elif mode == "mixup":
                y_soft = label_smoothing(y_onehot, 0.1)
                x, y_soft = mixup(x, y_soft, 0.4)
                loss = soft_cross_entropy(model(x), y_soft)

            opt.zero_grad()
            loss.backward()
            opt.step()
            loss_total += loss.item() * x.size(0)

        # Avaliação
        model.eval()
        correct, total = 0, 0
        with torch.inference_mode():
            for x, y in test_loader:
                x, y = x.to(device), y.to(device)
                preds = model(x).argmax(dim=1)
                correct += (preds == y).sum().item()
                total += y.size(0)
        acc = 100 * correct / total
        print(f"[{mode.upper()}] Epoch {epoch:02d} | Loss {loss_total/len(train_loader.dataset):.4f} | Acc {acc:.2f}%")
    return model, acc

# ------------------------------
# Treinar e comparar
# ------------------------------
modes = ["hard", "smooth", "mixup"]
results = {}
for m in modes:
    model, acc = train_model(m, epochs=5)
    results[m] = acc

print("\n Resultados finais (Acurácia no MNIST):")
for k, v in results.items():
    print(f"{k:>8s}: {v:.2f}%")
    # -*- coding: utf-8 -*-
# Gera gráficos de MNIST: hard vs soft (smoothing e mixup)
import matplotlib.pyplot as plt

epochs = [1, 2, 3, 4, 5]

hard_loss   = [0.2154, 0.0558, 0.0386, 0.0295, 0.0224]
hard_acc    = [98.13, 98.77, 98.60, 98.83, 98.98]

smooth_loss = [0.7168, 0.5681, 0.5502, 0.5408, 0.5343]
smooth_acc  = [98.15, 98.85, 99.08, 99.01, 99.27]

mixup_loss  = [1.1016, 0.9423, 0.9165, 0.9013, 0.8695]
mixup_acc   = [97.99, 98.39, 98.57, 98.66, 98.98]

def save_line(xs, ys, title, ylabel, outpath):
    plt.figure()
    plt.plot(xs, ys, marker='o')                 # (não define cores)
    plt.title(title)
    plt.xlabel('Epoch')
    plt.ylabel(ylabel)
    plt.grid(True, linestyle='--', linewidth=0.5)
    plt.tight_layout()
    plt.savefig(outpath, dpi=160)
    plt.close()

# Um gráfico por método — ACC
save_line(epochs, hard_acc,   'MNIST — Accuracy per Epoch (Hard labels)', 'Accuracy (%)', 'mnist_hard_acc.png')
save_line(epochs, smooth_acc, 'MNIST — Accuracy per Epoch (Soft: Label Smoothing)', 'Accuracy (%)', 'mnist_soft_acc.png')
save_line(epochs, mixup_acc,  'MNIST — Accuracy per Epoch (Soft: MixUp)', 'Accuracy (%)', 'mnist_mixup_acc.png')

# Um gráfico por método — LOSS
save_line(epochs, hard_loss,   'MNIST — Loss per Epoch (Hard labels)', 'Loss', 'mnist_hard_loss.png')
save_line(epochs, smooth_loss, 'MNIST — Loss per Epoch (Soft: Label Smoothing)', 'Loss', 'mnist_soft_loss.png')
save_line(epochs, mixup_loss,  'MNIST — Loss per Epoch (Soft: MixUp)', 'Loss', 'mnist_mixup_loss.png')

# Comparativos (opcionais) — ACC
plt.figure()
plt.plot(epochs, hard_acc,   marker='o', label='Hard')
plt.plot(epochs, smooth_acc, marker='o', label='Soft (Smoothing)')
plt.plot(epochs, mixup_acc,  marker='o', label='Soft (MixUp)')
plt.title('MNIST — Accuracy per Epoch (Comparison)')
plt.xlabel('Epoch'); plt.ylabel('Accuracy (%)'); plt.legend()
plt.grid(True, linestyle='--', linewidth=0.5)
plt.tight_layout(); plt.savefig('mnist_acc_comparison.png', dpi=160); plt.close()

# Comparativos (opcionais) — LOSS
plt.figure()
plt.plot(epochs, hard_loss,   marker='o', label='Hard')
plt.plot(epochs, smooth_loss, marker='o', label='Soft (Smoothing)')
plt.plot(epochs, mixup_loss,  marker='o', label='Soft (MixUp)')
plt.title('MNIST — Loss per Epoch (Comparison)')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend()
plt.grid(True, linestyle='--', linewidth=0.5)
plt.tight_layout(); plt.savefig('mnist_loss_comparison.png', dpi=160); plt.close()

print("Salvos: mnist_hard_acc.png, mnist_soft_acc.png, mnist_mixup_acc.png,",
      "mnist_hard_loss.png, mnist_soft_loss.png, mnist_mixup_loss.png,",
      "mnist_acc_comparison.png, mnist_loss_comparison.png")



100%|██████████| 9.91M/9.91M [00:00<00:00, 39.8MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.07MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 10.0MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.18MB/s]


[HARD] Epoch 01 | Loss 0.2519 | Acc 97.90%
[HARD] Epoch 02 | Loss 0.0605 | Acc 98.52%
[HARD] Epoch 03 | Loss 0.0428 | Acc 98.59%
[HARD] Epoch 04 | Loss 0.0320 | Acc 98.88%
[HARD] Epoch 05 | Loss 0.0234 | Acc 98.80%
[SMOOTH] Epoch 01 | Loss 0.6972 | Acc 98.64%
[SMOOTH] Epoch 02 | Loss 0.5656 | Acc 99.01%
[SMOOTH] Epoch 03 | Loss 0.5479 | Acc 99.03%
[SMOOTH] Epoch 04 | Loss 0.5389 | Acc 99.18%
[SMOOTH] Epoch 05 | Loss 0.5328 | Acc 99.26%
[MIXUP] Epoch 01 | Loss 1.0801 | Acc 98.32%
[MIXUP] Epoch 02 | Loss 0.9498 | Acc 98.60%
[MIXUP] Epoch 03 | Loss 0.9204 | Acc 98.75%
[MIXUP] Epoch 04 | Loss 0.9005 | Acc 98.94%
[MIXUP] Epoch 05 | Loss 0.8644 | Acc 98.98%

📊 Resultados finais (Acurácia no MNIST):
    hard: 98.80%
  smooth: 99.26%
   mixup: 98.98%
Salvos: mnist_hard_acc.png, mnist_soft_acc.png, mnist_mixup_acc.png, mnist_hard_loss.png, mnist_soft_loss.png, mnist_mixup_loss.png, mnist_acc_comparison.png, mnist_loss_comparison.png
