<a href="https://colab.research.google.com/github/cauarichard/crud2/blob/main/pesosmnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import os
import copy

# Hiperparâmetros
BATCH_SIZE = 128
MAX_EPOCHS = 50              # limite máximo
PATIENCE = 5                 # quantas épocas sem melhora antes de parar
MIN_DELTA = 0.001            # melhora mínima para ser considerada
LR = 1e-3

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Pasta no seu Drive para salvar o melhor modelo
SAVE_DIR = "/content/drive/MyDrive/mnist_model"
os.makedirs(SAVE_DIR, exist_ok=True)
BEST_MODEL_PATH = os.path.join(SAVE_DIR, "mnist_cnn_best.pt")

print("Device:", DEVICE)
print("Modelo será salvo em:", BEST_MODEL_PATH)


Device: cuda
Modelo será salvo em: /content/drive/MyDrive/mnist_model/mnist_cnn_best.pt


In [3]:
# Transformações de treino com aumento de dados leve
train_transform = transforms.Compose([
    transforms.RandomAffine(
        degrees=10,          # rotação até ±10°
        translate=(0.05, 0.05),  # deslocamento até 5% em x e y
        shear=5              # leve shear
    ),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # normalização padrão MNIST
])

# Transform de validação/teste SEM augmentation
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Baixar MNIST
full_train = datasets.MNIST(root="./data", train=True, download=True, transform=train_transform)
test_set   = datasets.MNIST(root="./data", train=False, download=True, transform=test_transform)

# Split train/val (ex: 50k treino, 10k validação)
train_size = int(0.83 * len(full_train))  # ~50.000
val_size   = len(full_train) - train_size
train_set, val_set = random_split(full_train, [train_size, val_size])

# Para validação, usamos o mesmo dataset mas SEM augmentation
# (substituímos o transform na view interna)
val_set.dataset.transform = test_transform

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_set,   batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_set,  batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

len(train_set), len(val_set), len(test_set)


100%|██████████| 9.91M/9.91M [00:00<00:00, 17.3MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 453kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.29MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 9.93MB/s]


(49800, 10200, 10000)

In [4]:
class MnistCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2),          # 14x14

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2),          # 7x7

            nn.Dropout(0.25)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64*7*7, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

model = MnistCNN().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

sum(p.numel() for p in model.parameters())  # número de parâmetros


421834

In [5]:
best_val_loss = float("inf")
best_model_state = None
epochs_no_improve = 0

for epoch in range(1, MAX_EPOCHS + 1):
    # ===== Treino =====
    model.train()
    train_loss = 0.0
    correct_train = 0
    total_train = 0

    for x, y in train_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * x.size(0)
        _, preds = torch.max(outputs, 1)
        correct_train += (preds == y).sum().item()
        total_train += y.size(0)

    train_loss /= total_train
    train_acc = correct_train / total_train

    # ===== Validação =====
    model.eval()
    val_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            outputs = model(x)
            loss = criterion(outputs, y)
            val_loss += loss.item() * x.size(0)
            _, preds = torch.max(outputs, 1)
            correct_val += (preds == y).sum().item()
            total_val += y.size(0)

    val_loss /= total_val
    val_acc = correct_val / total_val

    print(f"Época {epoch:02d}/{MAX_EPOCHS} | "
          f"Train loss: {train_loss:.4f}, acc: {train_acc*100:.2f}% | "
          f"Val loss: {val_loss:.4f}, acc: {val_acc*100:.2f}%")

    # ===== Early stopping com margem =====
    if val_loss < best_val_loss - MIN_DELTA:
        best_val_loss = val_loss
        best_model_state = copy.deepcopy(model.state_dict())
        epochs_no_improve = 0

        # Salvar peso atual como "melhor até agora" no Drive
        torch.save(best_model_state, BEST_MODEL_PATH)
        print(f"  ↳ Melhorou! Novo best val_loss = {best_val_loss:.4f}. Modelo salvo.")
    else:
        epochs_no_improve += 1
        print(f"  ↳ Sem melhora significativa ({epochs_no_improve}/{PATIENCE}).")

    if epochs_no_improve >= PATIENCE:
        print(f"\nEarly stopping ativado na época {epoch}.")
        break

print("\nTreino finalizado.")
print("Melhor val_loss:", best_val_loss)
print("Pesos salvos em:", BEST_MODEL_PATH)


Época 01/50 | Train loss: 0.1962, acc: 93.89% | Val loss: 0.0573, acc: 98.47%
  ↳ Melhorou! Novo best val_loss = 0.0573. Modelo salvo.
Época 02/50 | Train loss: 0.0806, acc: 97.59% | Val loss: 0.0520, acc: 98.57%
  ↳ Melhorou! Novo best val_loss = 0.0520. Modelo salvo.
Época 03/50 | Train loss: 0.0633, acc: 98.05% | Val loss: 0.0454, acc: 98.75%
  ↳ Melhorou! Novo best val_loss = 0.0454. Modelo salvo.
Época 04/50 | Train loss: 0.0518, acc: 98.37% | Val loss: 0.0455, acc: 98.85%
  ↳ Sem melhora significativa (1/5).
Época 05/50 | Train loss: 0.0469, acc: 98.51% | Val loss: 0.0430, acc: 98.95%
  ↳ Melhorou! Novo best val_loss = 0.0430. Modelo salvo.
Época 06/50 | Train loss: 0.0412, acc: 98.72% | Val loss: 0.0422, acc: 98.92%
  ↳ Sem melhora significativa (1/5).
Época 07/50 | Train loss: 0.0397, acc: 98.69% | Val loss: 0.0425, acc: 98.90%
  ↳ Sem melhora significativa (2/5).
Época 08/50 | Train loss: 0.0382, acc: 98.82% | Val loss: 0.0440, acc: 98.99%
  ↳ Sem melhora significativa (3/5).


In [6]:
# Carregar o melhor modelo salvo
best_model = MnistCNN().to(DEVICE)
best_model.load_state_dict(torch.load(BEST_MODEL_PATH, map_location=DEVICE))
best_model.eval()

test_loss = 0.0
correct_test = 0
total_test = 0

with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        outputs = best_model(x)
        loss = criterion(outputs, y)
        test_loss += loss.item() * x.size(0)
        _, preds = torch.max(outputs, 1)
        correct_test += (preds == y).sum().item()
        total_test += y.size(0)

test_loss /= total_test
test_acc = correct_test / total_test

print(f"Melhor modelo | Test loss: {test_loss:.4f} | Test accuracy: {test_acc*100:.2f}%")
print("Pesos estão salvos em:", BEST_MODEL_PATH)


Melhor modelo | Test loss: 0.0312 | Test accuracy: 99.27%
Pesos estão salvos em: /content/drive/MyDrive/mnist_model/mnist_cnn_best.pt
