In [None]:
!pip install torch scikit-learn

Collecting torch
  Using cached torch-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.8.93 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-runtime-cu12==12.8.90 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-cupti-cu12==12.8.90 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cudnn-cu12==9.10.2.21 (from torch)
  Using cached nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl.metadata (1.8 kB)
Collecting nvidia-cublas-cu12==12.8.4.1 (from torch)
  Using cached nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cufft-cu12==11.3.3.83 (from torch)
  Using cached nvi

In [1]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim

ModuleNotFoundError: No module named 'torchvision'

In [None]:
# Transformação para converter imagens em tensores e normalizar
transform = transforms.Compose([
    transforms.ToTensor(),  # converte para tensor
    transforms.Normalize((0.1307,), (0.3081,))  # normalização dos dados
])

# Baixando o dataset MNIST-treino
full_train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)

# Baixando o dataset MNIST-teste
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Definindo proporção de treino/validação (ex: 90% treino, 10% validação)
train_size = int(0.9 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size

# Dividindo os dados
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

# Loaders para treino, validação e teste
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
# Exibindo algumas imagens do dataset de treino
examples = enumerate(train_loader)
batch_idx, (example_data, example_targets) = next(examples)

fig, axes = plt.subplots(1, 10, figsize=(15, 4))
for i in range(10):
    axes[i].imshow(example_data[i][0], cmap='gray')
    axes[i].set_title(f"Rótulo: {example_targets[i].item()}")
    axes[i].axis('off')
plt.show()

In [None]:
# Definindo a estrutura do modelo que vamos utilizar
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.layer = nn.Linear(in_features=28*28, out_features=10)

    def forward(self, x):
        # 1. Achatar a imagem de entrada.
        x = x.view(x.size(0), -1) 
        
        # 2. Passar os dados achatados pela nossa camada linear.
        x = self.layer(x)
      
        return x

In [None]:
model = Model()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
# Definindo a nossa função de perda e o otimizador

criterion = nn.CrossEntropyLoss()

# Defina a função de otimização, utilizando os parâmetros do modelo e uma taxa de aprendizado
# SGD (Gradiente Descendente Estocástico) com learning rate de 0.01.
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Defina o número de épocas de treinamento do modelo
epochs = 10 # 10 passagens pelo dataset de treino são um bom começo.


In [None]:
def train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs):
    model.to(device)

    for epoch in range(epochs):
        # --- Treinamento ---
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0

        for data, targets in train_loader:
            data, targets = data.to(device), targets.to(device)

            # 1. Zere os gradientes acumulados
            # PyTorch acumula gradientes, então precisamos zerá-los a cada novo batch.
            optimizer.zero_grad()

            # 2. Forward pass: gere as previsões
            # Passa os dados de entrada (imagens) pelo modelo.
            outputs = model(data)

            # 3. calcule a perda entre previsão e rótulo
            # Compara as previsões (outputs) com os rótulos verdadeiros (targets).
            loss = criterion(outputs, targets)

            # 4. Backward pass: calcule os gradientes
            # Calcula como cada peso do modelo contribuiu para o erro (loss).
            loss.backward()

            # 5. Atualize os pesos do modelo com os gradientes
            # O otimizador ajusta os pesos na direção que minimiza o erro.
            optimizer.step()

            train_loss += loss.item() * data.size(0)
            _, predicted = outputs.max(1)
            train_total += targets.size(0)
            train_correct += predicted.eq(targets).sum().item()

        avg_train_loss = train_loss / train_total
        train_acc = train_correct / train_total * 100

        # --- Validação ---
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0

        # Desativando o cálculo de gradientes
        with torch.no_grad():
            for data, targets in val_loader:
                data, targets = data.to(device), targets.to(device)

                outputs = model(data)
                loss = criterion(outputs, targets)

                val_loss += loss.item() * data.size(0)
                _, predicted = outputs.max(1)
                val_total += targets.size(0)
                val_correct += predicted.eq(targets).sum().item()

        avg_val_loss = val_loss / val_total
        val_acc = val_correct / val_total * 100

        # --- Resultados ---
        print(f"Epoch {epoch+1}/{epochs}")
        print(f"  Treino     - Loss: {avg_train_loss:.4f} | Acurácia: {train_acc:.2f}%")
        print(f"  Validação  - Loss: {avg_val_loss:.4f} | Acurácia: {val_acc:.2f}%\n")

In [None]:
train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs)

In [None]:
def test_model(model, test_loader, criterion, device):
    model.eval()
    model.to(device)
    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            loss = criterion(outputs, targets)

            test_loss += loss.item() * data.size(0)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    avg_loss = test_loss / total
    acc = correct / total * 100

    print(f"Teste Loss médio: {avg_loss:.4f} | Acurácia: {acc:.2f}%")

In [None]:
test_model(model, test_loader, criterion, device)

In [None]:
def visualize_predictions(model, data_loader, device, num_images=10):
    model.eval()
    model.to(device)
    images_shown = 0

    with torch.no_grad():
        for data, targets in data_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = outputs.max(1)

            fig, axes = plt.subplots(1, num_images, figsize=(15, 4))
            for i in range(num_images):
                axes[i].imshow(data[i][0].cpu(), cmap='gray')
                axes[i].set_title(f"Predição: {predicted[i].item()}\nRótulo: {targets[i].item()}")
                axes[i].axis('off')
            plt.tight_layout()
            plt.show()

            images_shown += num_images
            break

In [None]:
visualize_predictions(model, test_loader, device)