# Pruebas con Optuna

## Inicializar

In [10]:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, TensorDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import os
import sys

# get the directory path of the file
dir_path = os.getcwd()

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from plot_functions import *
from Physics_Loss import *
from utils import *
sys.path.append('../Convolutional_NN')

from Dataset_Class import *

from architectures.convlstm import *
from architectures.generic_spatiotemporal_decoder import *
from architectures.generic_spatiotemporal_regressor import *

In [None]:
epochs = 500
n_train = 1000
n_val = 200
sequence_length = 20

In [None]:
# ⬅️ Esto se ejecuta una vez
dataset_train = load_trimmed_dataset(base_path=dir_path, dataset_type='train', max_samples=n_train, time_steps_output=sequence_length)
dataset_val = load_trimmed_dataset(base_path=dir_path, dataset_type='val', max_samples=n_val, time_steps_output=sequence_length)


def get_data_loaders_from_tensors(batch_size):
    train_loader = DataLoader(TensorDataset(input_train, output_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(input_val, output_val), batch_size=batch_size, shuffle=False)
    return train_loader, val_loader

✅ Cargando dataset train desde: c:\Users\ismael.gallo\Desktop\ia_thermal\ismaelgallo\datasets\PCB_transient_dataset_train.pth
✅ Cargando dataset val desde: c:\Users\ismael.gallo\Desktop\ia_thermal\ismaelgallo\datasets\PCB_transient_dataset_val.pth


## ConvLSTM

In [23]:
class ConvLSTMWrapper(nn.Module):
    def __init__(self, input_dim, hidden_dim, kernel_size, num_layers):
        super().__init__()
        self.convlstm = ConvLSTM(
            input_dim=input_dim,
            hidden_dim=hidden_dim,
            kernel_size=kernel_size,
            num_layers=num_layers,
            batch_first=True,
            bias=True,
            return_all_layers=False
        )

    def forward(self, x):
        output, _ = self.convlstm(x)  # output es lista [layer_output]
        return output[0]              # devolvemos directamente el tensor (B, T, C, H, W)


In [24]:
def objective(trial):
    # Hiperparámetros
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32, 64])
    hidden_dim_val = trial.suggest_categorical("hidden_dim", [16, 32, 64, 128])
    kernel_size_val = trial.suggest_categorical("kernel_size", [1, 3, 5, 7])
    num_layers = trial.suggest_int("num_layers", 1, 4)

    # Adaptar al formato requerido por ConvLSTM
    hidden_dim = [hidden_dim_val] * num_layers
    kernel_size = [(kernel_size_val, kernel_size_val)] * num_layers

    # Crear modelo ConvLSTM
    model = ConvLSTMWrapper(
        input_dim=3,  # o el número de canales reales
        hidden_dim=hidden_dim,
        kernel_size=kernel_size,
        num_layers=num_layers
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
    criterion = torch.nn.MSELoss()

    # Obtener dataloaders (train y val)
    train_loader, val_loader = get_data_loaders_from_tensors(batch_size=batch_size)

    best_val_loss = float('inf')
    for epoch in range(epochs):
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, epoch, epochs)
        val_loss = evaluate(model, val_loader, criterion, device)
        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss

        trial.report(val_loss, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return best_val_loss


In [25]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100)

[I 2025-05-06 07:43:40,265] A new study created in memory with name: no-name-af8927e8-35b2-4c67-bc98-1bcda208eb99
[I 2025-05-06 07:43:40,480] Trial 0 finished with value: 1.8483535051345825 and parameters: {'lr': 0.0007012729240529598, 'batch_size': 8, 'hidden_dim': 32, 'kernel_size': 5, 'num_layers': 4}. Best is trial 0 with value: 1.8483535051345825.
[I 2025-05-06 07:43:40,513] Trial 1 finished with value: 1.851120114326477 and parameters: {'lr': 0.0034799899579761096, 'batch_size': 64, 'hidden_dim': 128, 'kernel_size': 1, 'num_layers': 1}. Best is trial 0 with value: 1.8483535051345825.
[I 2025-05-06 07:43:41,078] Trial 2 finished with value: 1.8512543439865112 and parameters: {'lr': 0.00015652190786062914, 'batch_size': 64, 'hidden_dim': 128, 'kernel_size': 5, 'num_layers': 3}. Best is trial 0 with value: 1.8483535051345825.
[I 2025-05-06 07:43:41,191] Trial 3 finished with value: 1.8415145874023438 and parameters: {'lr': 0.0008459811026152246, 'batch_size': 64, 'hidden_dim': 16, '

In [26]:
print("Best trial:")
for key, val in study.best_trial.params.items():
    print(f"{key}: {val}")

Best trial:
lr: 0.008797049665540861
batch_size: 16
hidden_dim: 128
kernel_size: 7
num_layers: 3


In [27]:
optuna.visualization.plot_optimization_history(study).show()
optuna.visualization.plot_param_importances(study).show()


## Decoder

In [61]:
def objective(trial):
    # Hiperparámetros estructurales
    embedding_dim = trial.suggest_categorical("embedding_dim", [64, 128, 256, 512])
    num_layers = trial.suggest_int("num_layers", 1, 6)
    nhead = trial.suggest_categorical("nhead", [1, 2, 4, 8])
    dim_feedforward_factor = trial.suggest_int("dim_ff_factor", 2, 6)
    use_temporal_channel = trial.suggest_categorical("use_temporal_channel", [False, True])

    # Hiperparámetros de entrenamiento
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-2, log=True)
    dropout = trial.suggest_float("dropout", 0.0, 0.3)

    # Elegir optimizador
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "AdamW", "RMSprop"])

    # Dataloaders
    train_loader, val_loader = get_data_loaders_from_tensors(batch_size)

    # Modelo
    class CustomTransformerDecoder(TransformerDecoder):
        def __init__(self, embedding_dim, num_layers, nhead, dim_ff, dropout):
            super().__init__(embedding_dim, num_layers, nhead)
            encoder_layer = nn.TransformerEncoderLayer(
                d_model=embedding_dim,
                nhead=nhead,
                dim_feedforward=dim_ff,
                dropout=dropout,
                batch_first=True
            )
            self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

    model = GenericSpatioTemporalDecoder(
        embedding_dim=embedding_dim,
        num_layers=num_layers,
        nhead=nhead,
        in_channels=3,
        use_temporal_channel=use_temporal_channel
    ).to(device)

    # Reemplazar el decoder por uno con dropout y tamaño FF ajustado
    model.temporal_decoder = CustomTransformerDecoder(
        embedding_dim=embedding_dim,
        num_layers=num_layers,
        nhead=nhead,
        dim_ff=embedding_dim * dim_feedforward_factor,
        dropout=dropout
    ).to(device)

    # Optimizador
    optimizer_cls = {"Adam": torch.optim.Adam, "AdamW": torch.optim.AdamW, "RMSprop": torch.optim.RMSprop}[optimizer_name]
    optimizer = optimizer_cls(model.parameters(), lr=lr, weight_decay=weight_decay)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=5)
    criterion = torch.nn.MSELoss()

    # Entrenamiento
    best_val_loss = float("inf")
    for epoch in range(epochs):
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, epoch, epochs)
        val_loss = evaluate(model, val_loader, criterion, device)
        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss

        trial.report(val_loss, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return best_val_loss


In [62]:
study = optuna.create_study(direction="minimize", study_name="generic_spatiotemporal_regressor")
study.optimize(objective, n_trials=50)

# Mostrar el mejor resultado
print("🔍 Best trial:")
for k, v in study.best_trial.params.items():
    print(f"{k}: {v}")


[I 2025-05-06 08:01:05,662] A new study created in memory with name: generic_spatiotemporal_regressor
[I 2025-05-06 08:01:05,818] Trial 0 finished with value: 1.4693652391433716 and parameters: {'embedding_dim': 128, 'num_layers': 1, 'nhead': 8, 'dim_ff_factor': 2, 'use_temporal_channel': True, 'batch_size': 32, 'lr': 0.00222884028328163, 'weight_decay': 8.62875912832344e-05, 'dropout': 0.22836201120322436, 'optimizer': 'Adam'}. Best is trial 0 with value: 1.4693652391433716.
[I 2025-05-06 08:01:05,890] Trial 1 finished with value: 1.630523920059204 and parameters: {'embedding_dim': 128, 'num_layers': 4, 'nhead': 8, 'dim_ff_factor': 3, 'use_temporal_channel': True, 'batch_size': 8, 'lr': 0.0020997222473648023, 'weight_decay': 5.885668181198808e-05, 'dropout': 0.22862700679317605, 'optimizer': 'Adam'}. Best is trial 0 with value: 1.4693652391433716.
[I 2025-05-06 08:01:06,135] Trial 2 finished with value: 0.1923268735408783 and parameters: {'embedding_dim': 512, 'num_layers': 5, 'nhead'

🔍 Best trial:
embedding_dim: 512
num_layers: 4
nhead: 4
dim_ff_factor: 4
use_temporal_channel: True
batch_size: 8
lr: 0.009621089712657297
weight_decay: 0.0020484734625875168
dropout: 0.1952609801696341
optimizer: Adam


In [63]:

# Mostrar el mejor resultado
print("🎯 Best trial:")
for key, val in study.best_trial.params.items():
    print(f"  {key}: {val}")

🎯 Best trial:
  embedding_dim: 512
  num_layers: 4
  nhead: 4
  dim_ff_factor: 4
  use_temporal_channel: True
  batch_size: 8
  lr: 0.009621089712657297
  weight_decay: 0.0020484734625875168
  dropout: 0.1952609801696341
  optimizer: Adam


In [64]:
import optuna.visualization as vis

vis.plot_optimization_history(study).show()
vis.plot_param_importances(study).show()


## Regressor

In [58]:
# Cargar los datos una sola vez
dataset_train = load_trimmed_dataset(
    base_path=dir_path, dataset_type='train',
    max_samples=n_train, time_steps_output=sequence_length
)
dataset_val = load_trimmed_dataset(
    base_path=dir_path, dataset_type='val',
    max_samples=n_val, time_steps_output=sequence_length
)

x_train, y_train = prepare_data_for_convlstm(dataset_train, device='cpu')
x_val, y_val = prepare_data_for_convlstm(dataset_val, device='cpu')

train_dataset = TemporalRegressionDataset(x_train, y_train)
val_dataset = TemporalRegressionDataset(x_val, y_val)


✅ Cargando dataset train desde: c:\Users\ismael.gallo\Desktop\ia_thermal\ismaelgallo\datasets\PCB_transient_dataset_train.pth



You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.



✅ Cargando dataset val desde: c:\Users\ismael.gallo\Desktop\ia_thermal\ismaelgallo\datasets\PCB_transient_dataset_val.pth


In [59]:
def objective(trial):
    # Hiperparámetros
    embedding_dim = trial.suggest_categorical("embedding_dim", [64, 128, 256])
    num_layers = trial.suggest_int("num_layers", 1, 4)
    nhead = trial.suggest_categorical("nhead", [1, 2, 4])
    dim_ff_factor = trial.suggest_int("dim_ff_factor", 2, 6)
    dropout = trial.suggest_float("dropout", 0.0, 0.3)
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-2, log=True)
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "AdamW", "RMSprop"])

    # DataLoaders desde datasets pre-cargados
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Modelo
    model = GenericSpatioTemporalRegressor(
        embedding_dim=embedding_dim,
        num_layers=num_layers,
        nhead=nhead,
        in_channels=3
    ).to(device)

    # Reemplazo del decoder por uno personalizado con dropout y FF dimensionado
    class CustomTransformerDecoder(TransformerDecoder):
        def __init__(self, embedding_dim, num_layers, nhead, dim_ff, dropout):
            super().__init__(embedding_dim, num_layers, nhead)
            encoder_layer = nn.TransformerEncoderLayer(
                d_model=embedding_dim,
                nhead=nhead,
                dim_feedforward=dim_ff,
                dropout=dropout,
                batch_first=True
            )
            self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

    model.temporal_decoder = CustomTransformerDecoder(
        embedding_dim=embedding_dim * 2,
        num_layers=num_layers,
        nhead=nhead,
        dim_ff=embedding_dim * dim_ff_factor,
        dropout=dropout
    ).to(device)

    # Optimizador
    optimizer_cls = {"Adam": torch.optim.Adam, "AdamW": torch.optim.AdamW, "RMSprop": torch.optim.RMSprop}[optimizer_name]
    optimizer = optimizer_cls(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=5)
    criterion = torch.nn.MSELoss()

    best_val_loss = float("inf")
    for epoch in range(epochs):
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, epoch, epochs)
        val_loss = evaluate(model, val_loader, criterion, device)
        scheduler.step(val_loss)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        trial.report(val_loss, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return best_val_loss


In [60]:
study = optuna.create_study(direction="minimize", study_name="generic_spatiotemporal_regressor")
study.optimize(objective, n_trials=50)

# Mostrar el mejor resultado
print("🔍 Best trial:")
for k, v in study.best_trial.params.items():
    print(f"{k}: {v}")


[I 2025-05-06 08:00:38,873] A new study created in memory with name: generic_spatiotemporal_regressor
[I 2025-05-06 08:00:39,116] Trial 0 finished with value: 0.6665566563606262 and parameters: {'embedding_dim': 128, 'num_layers': 4, 'nhead': 4, 'dim_ff_factor': 3, 'dropout': 0.1615458538372023, 'batch_size': 32, 'lr': 0.008281828251217632, 'weight_decay': 1.4218836516651927e-05, 'optimizer': 'Adam'}. Best is trial 0 with value: 0.6665566563606262.
[I 2025-05-06 08:00:39,174] Trial 1 finished with value: 1.7096110582351685 and parameters: {'embedding_dim': 64, 'num_layers': 1, 'nhead': 2, 'dim_ff_factor': 6, 'dropout': 0.2047691127999072, 'batch_size': 8, 'lr': 0.00042873380548295154, 'weight_decay': 0.0015415547820774465, 'optimizer': 'Adam'}. Best is trial 0 with value: 0.6665566563606262.
[I 2025-05-06 08:00:39,240] Trial 2 finished with value: 1.372464656829834 and parameters: {'embedding_dim': 64, 'num_layers': 4, 'nhead': 2, 'dim_ff_factor': 2, 'dropout': 0.008368397434795038, 'b

🔍 Best trial:
embedding_dim: 128
num_layers: 3
nhead: 4
dim_ff_factor: 4
dropout: 0.14970233804347982
batch_size: 32
lr: 0.002579550505334217
weight_decay: 5.687430773390812e-06
optimizer: RMSprop
