In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import torchmetrics
import optuna

  if entities is not ():


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [3]:
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(0.2, 0.2, 0.2),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.5, 0.5, 0.5),
        std=(0.5, 0.5, 0.5)
    )  
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.5, 0.5, 0.5),
        std=(0.5, 0.5, 0.5)
    )
])

train_dataset = datasets.CIFAR10(
    root="./data",
    train=True,
    download=True,
    transform=train_transform
)

test_dataset =datasets.CIFAR10(
    root="./data",
    train=False,
    download=True,
    transform=val_transform
)

train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

test_loader =DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

100%|██████████| 170M/170M [00:02<00:00, 77.3MB/s]


In [4]:
class FlexibleCNN(nn.Module):
    def __init__(self,
                 num_blocks,
                 filters_per_block,
                 kernel_sizes,
                 dropouts,
                 num_fc_units,
                 fc_units):
        super().__init__()
        
        # --- PARTE 1: Extração de Características (Convoluções) ---
        convolution_layers = []
        for i in range(num_blocks):
            out_channels = filters_per_block[i]
            
            convolution_layers.append(
                nn.LazyConv2d(
                    out_channels=out_channels,
                    kernel_size=kernel_sizes[i],
                    padding=kernel_sizes[i] // 2
                )
            )
            convolution_layers.append(nn.BatchNorm2d(filters_per_block[i]))
            convolution_layers.append(nn.ReLU())
            convolution_layers.append(nn.MaxPool2d(2))
            

        self.feature_extractor = nn.Sequential(*convolution_layers)

        # --- PARTE 2: Classificador (Camadas Densas) ---
        fc_layers = []
        for i in range(num_fc_units):
            fc_layers.append(nn.LazyLinear(fc_units[i]))
            fc_layers.append(nn.ReLU())
            fc_layers.append(nn.Dropout(dropouts[i]))
            
        self.classifier = nn.Sequential(*fc_layers)
        # Camada de Saída Final (Fixa em 10)
        self.output = nn.LazyLinear(10)

    def forward(self, x):
        x = self.feature_extractor(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        x = self.output(x)
        return x
        

In [5]:
def objective(trial):
    num_blocks = trial.suggest_int("num_blocks", 1, 3)

    filters_per_block = [
        trial.suggest_int(f"filters_block_{i}", 32, 128)
        for i in range(num_blocks)
    ]

    kernel_sizes = [
        trial.suggest_categorical(f"kernel_size_{i}", [3, 5])
        for i in range(num_blocks)
    ]

    num_fc_layers = trial.suggest_int("num_fc_layers", 1, 3)

    fc_units = [
        trial.suggest_int(f"fc_units_{i}", 64, 256)
        for i in range(num_fc_layers)
    ]

    dropouts = [
        trial.suggest_float(f"dropout_{i}", 0.1, 0.4)
        for i in range(num_fc_layers)
    ]

    model = FlexibleCNN(num_blocks, filters_per_block, kernel_sizes, dropouts, num_fc_layers, fc_units).to(device)

    # Dummy pass para inicializar Lazy layers
    with torch.no_grad():
        model(torch.randn(1, 3, 32, 32).to(device))

    # Métrica
    acc_metric = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(device)
    
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0005)
    loss_function = nn.CrossEntropyLoss()
    
    # Loop de Treino
    for epoch in range(10):
        model.train()
    
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)
    
            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()
    
    # Validação Final
    model.eval()
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            acc_metric.update(outputs, labels)
        
    return acc_metric.compute().item()

In [6]:
# Criamos o estudo
study = optuna.create_study(direction="maximize")

# Iniciamos as tentativas (ex: 10 vezes)
study.optimize(objective, n_trials=10)

print("\n--- RESULTADO DA OTIMIZAÇÃO ---")
print(f"Melhor Acurácia: {study.best_value:.4f}")
print("Melhores Parâmetros:", study.best_params)

[32m[I 2025-12-28 00:21:25,865][0m A new study created in memory with name: no-name-8dff6cbe-3b2a-4651-af42-e12b8c66b567[0m
[32m[I 2025-12-28 00:23:56,520][0m Trial 0 finished with value: 0.7217000126838684 and parameters: {'num_blocks': 2, 'filters_block_0': 88, 'filters_block_1': 63, 'kernel_size_0': 5, 'kernel_size_1': 5, 'num_fc_layers': 1, 'fc_units_0': 146, 'dropout_0': 0.3320016595886409}. Best is trial 0 with value: 0.7217000126838684.[0m
[32m[I 2025-12-28 00:26:27,585][0m Trial 1 finished with value: 0.7243000268936157 and parameters: {'num_blocks': 2, 'filters_block_0': 107, 'filters_block_1': 42, 'kernel_size_0': 5, 'kernel_size_1': 5, 'num_fc_layers': 2, 'fc_units_0': 182, 'fc_units_1': 66, 'dropout_0': 0.2983268279409548, 'dropout_1': 0.3644703338232236}. Best is trial 1 with value: 0.7243000268936157.[0m
[32m[I 2025-12-28 00:28:56,485][0m Trial 2 finished with value: 0.7228999733924866 and parameters: {'num_blocks': 2, 'filters_block_0': 108, 'filters_block_1':


--- RESULTADO DA OTIMIZAÇÃO ---
Melhor Acurácia: 0.7762
Melhores Parâmetros: {'num_blocks': 3, 'filters_block_0': 56, 'filters_block_1': 110, 'filters_block_2': 64, 'kernel_size_0': 5, 'kernel_size_1': 5, 'kernel_size_2': 5, 'num_fc_layers': 2, 'fc_units_0': 218, 'fc_units_1': 66, 'dropout_0': 0.3226295269253727, 'dropout_1': 0.3953085132876831}


In [7]:
# 1. Extrair os valores do melhor trial
bp = study.best_params

# 2. Reconstruir as listas que o modelo espera
best_num_blocks = bp["num_blocks"]

best_filters = [
    bp[f"filters_block_{i}"]
    for i in range(best_num_blocks)
]

best_kernels = [3 for _ in range(best_num_blocks)]  # se kernel foi fixado

best_num_fc = bp["num_fc_layers"]

best_fc_units = [
    bp[f"fc_units_{i}"]
    for i in range(best_num_fc)
]

best_dropouts = [
    bp[f"dropout_{i}"]
    for i in range(best_num_fc)
]

# 3. Instanciar o modelo vencedor
best_model = FlexibleCNN(
    num_blocks=best_num_blocks,
    filters_per_block=best_filters,
    kernel_sizes=best_kernels,
    dropouts=best_dropouts,
    num_fc_units=best_num_fc,
    fc_units=best_fc_units
)

# 4. Inicializar Lazy layers
with torch.no_grad():
    best_model(torch.randn(1, 3, 32, 32))

best_model.to(device)
print(f"Modelo movido com sucesso para: {device}")

Modelo movido com sucesso para: cuda


In [8]:
acc_metric = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(device)
    
optimizer = optim.Adam(best_model.parameters(), lr=0.001, weight_decay=0.0005)
loss_function = nn.CrossEntropyLoss()

for epoch in range(50):
    best_model.train()
    running_loss = 0.0
    
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
    
        optimizer.zero_grad()
        outputs = best_model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print("loss:", running_loss/len(train_loader))

best_model.eval()
with torch.no_grad():
    for image, label in test_loader:
        images = image.to(device)
        labels = label.to(device)
        output = best_model(images)
        acc_metric.update(output, labels)

print(f"ACC: {acc_metric.compute():.4f}")

loss: 1.5090262809830248
loss: 1.1555433816007337
loss: 1.0203655666250098
loss: 0.9444394193951736
loss: 0.88624672961357
loss: 0.8434836987949088
loss: 0.8122714911885274
loss: 0.7835405229226403
loss: 0.7592849840441018
loss: 0.7361275147446587
loss: 0.7190457960147687
loss: 0.7029173321202588
loss: 0.697017024392667
loss: 0.6724530140990796
loss: 0.6670725949661201
loss: 0.6527624990400451
loss: 0.6492210154033378
loss: 0.6370125379403839
loss: 0.6304359310651984
loss: 0.6289093867515969
loss: 0.620955795011557
loss: 0.6165946474313126
loss: 0.6089522747127601
loss: 0.6039143307968173
loss: 0.5965780079212335
loss: 0.5931958434221994
loss: 0.5846624901075193
loss: 0.5857863857999177
loss: 0.585570897402056
loss: 0.5820504855507475
loss: 0.5764351774893148
loss: 0.5700470961420737
loss: 0.5660684566439875
loss: 0.5667395225304472
loss: 0.5619666961681508
loss: 0.5606852777473762
loss: 0.5577041324218521
loss: 0.5577937158782159
loss: 0.5512673453716062
loss: 0.549677406792598
loss: 