In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import torchmetrics
import optuna

  if entities is not ():


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [5]:
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(0.2, 0.2, 0.2),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.5, 0.5, 0.5),
        std=(0.5, 0.5, 0.5)
    )  
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.5, 0.5, 0.5),
        std=(0.5, 0.5, 0.5)
    )
])

train_dataset = datasets.CIFAR10(
    root="./data",
    train=True,
    download=True,
    transform=train_transform
)

test_dataset =datasets.CIFAR10(
    root="./data",
    train=False,
    download=True,
    transform=val_transform
)

train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

test_loader =DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

100%|██████████| 170M/170M [00:03<00:00, 56.0MB/s] 


In [6]:
class FlexibleCNN(nn.Module):
    def __init__(self,
                 num_blocks,
                 filters_per_block,
                 kernel_sizes,
                 dropouts,
                 num_fc_units,
                 fc_units):
        super().__init__()
        
        # --- PARTE 1: Extração de Características (Convoluções) ---
        convolution_layers = []
        for i in range(num_blocks):
            out_channels = filters_per_block[i]
            
            convolution_layers.append(
                nn.LazyConv2d(
                    out_channels=out_channels,
                    kernel_size=kernel_sizes[i],
                    padding=kernel_sizes[i] // 2
                )
            )
            convolution_layers.append(nn.BatchNorm2d(filters_per_block[i]))
            convolution_layers.append(nn.ReLU())
            convolution_layers.append(nn.MaxPool2d(2))
            

        self.feature_extractor = nn.Sequential(*convolution_layers)

        # --- PARTE 2: Classificador (Camadas Densas) ---
        fc_layers = []
        for i in range(num_fc_units):
            fc_layers.append(nn.LazyLinear(fc_units[i]))
            fc_layers.append(nn.ReLU())
            fc_layers.append(nn.Dropout(dropouts[i]))
            
        self.classifier = nn.Sequential(*fc_layers)
        # Camada de Saída Final (Fixa em 10)
        self.output = nn.LazyLinear(10)

    def forward(self, x):
        x = self.feature_extractor(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        x = self.output(x)
        return x
        

In [7]:
def objective(trial):
    num_blocks = trial.suggest_int("num_blocks", 1, 3)

    filters_per_block = [
        trial.suggest_int(f"filters_block_{i}", 32, 128)
        for i in range(num_blocks)
    ]

    kernel_sizes = [
        trial.suggest_categorical(f"kernel_size_{i}", [3, 5])
        for i in range(num_blocks)
    ]

    num_fc_layers = trial.suggest_int("num_fc_layers", 1, 3)

    fc_units = [
        trial.suggest_int(f"fc_units_{i}", 64, 256)
        for i in range(num_fc_layers)
    ]

    dropouts = [
        trial.suggest_float(f"dropout_{i}", 0.1, 0.4)
        for i in range(num_fc_layers)
    ]

    model = FlexibleCNN(num_blocks, filters_per_block, kernel_sizes, dropouts, num_fc_layers, fc_units).to(device)

    # Dummy pass para inicializar Lazy layers
    with torch.no_grad():
        model(torch.randn(1, 3, 32, 32).to(device))

    # Métrica
    acc_metric = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(device)
    
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0005)
    loss_function = nn.CrossEntropyLoss()
    
    # Loop de Treino
    for epoch in range(10):
        model.train()
    
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)
    
            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()
    
    # Validação Final
    model.eval()
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            acc_metric.update(outputs, labels)
        
    return acc_metric.compute().item()

In [8]:
# Criamos o estudo
study = optuna.create_study(direction="maximize")

# Iniciamos as tentativas (ex: 10 vezes)
study.optimize(objective, n_trials=10)

print("\n--- RESULTADO DA OTIMIZAÇÃO ---")
print(f"Melhor Acurácia: {study.best_value:.4f}")
print("Melhores Parâmetros:", study.best_params)

[32m[I 2025-12-27 23:19:13,733][0m A new study created in memory with name: no-name-52071725-4b38-46e4-86ee-4e927997048d[0m
[32m[I 2025-12-27 23:21:33,836][0m Trial 0 finished with value: 0.6427000164985657 and parameters: {'num_blocks': 1, 'filters_block_0': 102, 'kernel_size_0': 5, 'num_fc_layers': 2, 'fc_units_0': 240, 'fc_units_1': 67, 'dropout_0': 0.3134770579926872, 'dropout_1': 0.27150987058799164}. Best is trial 0 with value: 0.6427000164985657.[0m
[32m[I 2025-12-27 23:23:51,581][0m Trial 1 finished with value: 0.7470999956130981 and parameters: {'num_blocks': 2, 'filters_block_0': 54, 'filters_block_1': 59, 'kernel_size_0': 5, 'kernel_size_1': 5, 'num_fc_layers': 1, 'fc_units_0': 102, 'dropout_0': 0.1108779139776807}. Best is trial 1 with value: 0.7470999956130981.[0m
[32m[I 2025-12-27 23:26:17,645][0m Trial 2 finished with value: 0.781499981880188 and parameters: {'num_blocks': 3, 'filters_block_0': 128, 'filters_block_1': 86, 'filters_block_2': 82, 'kernel_size_0'


--- RESULTADO DA OTIMIZAÇÃO ---
Melhor Acurácia: 0.7843
Melhores Parâmetros: {'num_blocks': 3, 'filters_block_0': 82, 'filters_block_1': 63, 'filters_block_2': 120, 'kernel_size_0': 5, 'kernel_size_1': 3, 'kernel_size_2': 3, 'num_fc_layers': 2, 'fc_units_0': 209, 'fc_units_1': 190, 'dropout_0': 0.21287761897250004, 'dropout_1': 0.36566573258198476}


In [19]:
# 1. Extrair os valores do melhor trial
bp = study.best_params

# 2. Reconstruir as listas que o modelo espera
best_num_blocks = bp["num_blocks"]

best_filters = [
    bp[f"filters_block_{i}"]
    for i in range(best_num_blocks)
]

best_kernels = [3 for _ in range(best_num_blocks)]  # se kernel foi fixado

best_num_fc = bp["num_fc_layers"]

best_fc_units = [
    bp[f"fc_units_{i}"]
    for i in range(best_num_fc)
]

best_dropouts = [
    bp[f"dropout_{i}"]
    for i in range(best_num_fc)
]

# 3. Instanciar o modelo vencedor
best_model = FlexibleCNN(
    num_blocks=best_num_blocks,
    filters_per_block=best_filters,
    kernel_sizes=best_kernels,
    dropouts=best_dropouts,
    num_fc_units=best_num_fc,
    fc_units=best_fc_units
)

# 4. Inicializar Lazy layers
with torch.no_grad():
    best_model(torch.randn(1, 3, 32, 32))

best_model.to(device)
print(f"Modelo movido com sucesso para: {device}")

Modelo movido com sucesso para: cuda


In [20]:
acc_metric = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(device)
    
optimizer = optim.Adam(best_model.parameters(), lr=0.001, weight_decay=0.0005)
loss_function = nn.CrossEntropyLoss()

for epoch in range(50):
    best_model.train()
    running_loss = 0.0
    
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
    
        optimizer.zero_grad()
        outputs = best_model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print("loss:", running_loss/len(train_loader))

best_model.eval()
with torch.no_grad():
    for image, label in test_loader:
        images = image.to(device)
        labels = label.to(device)
        output = best_model(images)
        acc_metric.update(output, labels)

print(f"ACC: {acc_metric.compute():.4f}")

loss: 1.4042896690880855
loss: 1.0572756299429842
loss: 0.935168511727277
loss: 0.8681345645271604
loss: 0.8074015582080387
loss: 0.7760817193238022
loss: 0.7366746850788136
loss: 0.7052897689744945
loss: 0.6824829485029211
loss: 0.664602526847054
loss: 0.645231607053286
loss: 0.624837741331981
loss: 0.6125073612803389
loss: 0.60098578878071
loss: 0.5872692742844676
loss: 0.5760582259777561
loss: 0.5680284385028702
loss: 0.5582044903579575
loss: 0.5557828014311583
loss: 0.5481069938034353
loss: 0.5400955655118999
loss: 0.5289791924950412
loss: 0.5272506111684967
loss: 0.5240652632454167
loss: 0.5238816332253043
loss: 0.5158323424742045
loss: 0.5077172040443896
loss: 0.5143864190258334
loss: 0.5020139596003401
loss: 0.49960492911469906
loss: 0.4956856178658088
loss: 0.49824569387661527
loss: 0.49893749735849285
loss: 0.4889123459034564
loss: 0.49418707809332385
loss: 0.48788332098813925
loss: 0.48746526559524217
loss: 0.47838831205121085
loss: 0.4807284845377478
loss: 0.4780052137725493