In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

print("Torch:", torch.__version__)
print("CUDA verf√ºgbar:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

In [None]:
# models/lenet.py

import torch.nn as nn

class LeNet(nn.Module):
    def __init__(self):
        super().__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, padding=2)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2, 2, return_indices=True)

        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2, 2, return_indices=True)

        # Fully connected layers
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.relu3 = nn.ReLU()

        self.fc2 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()

        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Conv block 1
        x = self.conv1(x)
        x = self.relu1(x)
        x, indices1 = self.pool1(x)

        # Conv block 2
        x = self.conv2(x)
        x = self.relu2(x)
        x, indices2 = self.pool2(x)

        # Flatten
        x = x.view(x.size(0), -1)

        # Fully connected layers
        x = self.fc1(x)
        x = self.relu3(x)

        x = self.fc2(x)
        x = self.relu4(x)

        x = self.fc3(x)
        return x

In [None]:
mean = (0.1307,)
std = (0.3081,)

transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

trainset = torchvision.datasets.MNIST(
    root="./data", train=True, download=True, transform=transform_train
)

testset = torchvision.datasets.MNIST(
    root="./data", train=False, download=True, transform=transform_test
)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2
)

testloader = torch.utils.data.DataLoader(
    testset, batch_size=128, shuffle=False, num_workers=2
)

print(f"Train: {len(trainset):,} samples")
print(f"Test:  {len(testset):,} samples")

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LeNet().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")

In [None]:
def train_epoch():
    model.train()
    total_loss, correct, total = 0, 0, 0

    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    return total_loss / len(trainloader), 100.0 * correct / total


def evaluate():
    model.eval()
    correct, total = 0, 0

    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    acc = 100.0 * correct / total
    print(f"Test Accuracy: {acc:.2f}%")
    return acc

In [None]:
epochs = 10

for epoch in range(1, epochs + 1):
    loss, train_acc = train_epoch()
    print(f"Epoch {epoch:03d} | Loss {loss:.3f} | Train Acc {train_acc:.2f}%")

    if epoch % 5 == 0:
        evaluate()

In [None]:
test_acc = evaluate()

In [None]:
checkpoint = {
    "model": "LeNet-MNIST",
    "epochs": epochs,
    "test_acc": test_acc,
    "state_dict": model.state_dict()
}

torch.save(checkpoint, "lenet_mnist.pt")
print("Saved!")

In [None]:
from google.colab import files
files.download("lenet_mnist.pt")