<a href="https://colab.research.google.com/github/daycardoso/PredictCost/blob/main/regress%C3%A3o_linear%26MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import cudf
import cupy as cp
from cuml.linear_model import LinearRegression as cuLR
from sklearn.linear_model import LinearRegression as skLR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# =====================
# 1. Carregamento dos Dados
# =====================
try:
    df_cudf = cudf.read_csv('/content/drive/MyDrive/Mestrado/Trabalho ML Mestrado 01-2025/df_unificado.csv')
except:
    df_pd = pd.read_csv('/content/drive/MyDrive/Mestrado/Trabalho ML Mestrado 01-2025/df_unificado.csv')
    df_cudf = cudf.from_pandas(df_pd)

# Conversão para float32
for col in df_cudf.select_dtypes(include=['float64']).columns:
    df_cudf[col] = df_cudf[col].astype(cp.float32)

X = df_cudf.iloc[:, :-1]
y = df_cudf.iloc[:, -1]

# =====================
# 2. Divisão treino/teste
# =====================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# =====================
# 3. Treinamento do modelo
# =====================
try:
    linear_regression_model = cuLR()
    linear_regression_model.fit(X_train, y_train)
    y_pred = linear_regression_model.predict(X_test)
except Exception as e:
    print("cuML falhou, usando scikit-learn:", e)
    X_train_pd = X_train.to_pandas() if hasattr(X_train, "to_pandas") else X_train
    y_train_pd = y_train.to_pandas() if hasattr(y_train, "to_pandas") else y_train
    X_test_pd = X_test.to_pandas() if hasattr(X_test, "to_pandas") else X_test

    linear_regression_model = skLR()
    linear_regression_model.fit(X_train_pd, y_train_pd)
    y_pred = linear_regression_model.predict(X_test_pd)

# =====================
# 4. Avaliação
# =====================
y_test_cpu = y_test.to_pandas() if hasattr(y_test, "to_pandas") else y_test
y_pred_cpu = y_pred.to_pandas() if hasattr(y_pred, "to_pandas") else y_pred

print("R²:", r2_score(y_test_cpu, y_pred_cpu))
print("MSE:", mean_squared_error(y_test_cpu, y_pred_cpu))
print("MAE:", mean_absolute_error(y_test_cpu, y_pred_cpu))




R²: 0.712709550510541
MSE: 0.001094289967592238
MAE: 0.0277332444867178


In [3]:
import numpy as np
# Import garbage collector for memory cleanup
import gc

from sklearn.metrics import (
    mean_squared_error, r2_score, median_absolute_error,
    max_error, explained_variance_score
)
from sklearn.model_selection import KFold
from cuml.linear_model import LinearRegression as cuLR
from sklearn.linear_model import LinearRegression as skLR


def nested_cv_linear_evaluation(X, y, model_class, outer_folds=5, random_state=42):
    # Renamed the function to accept a model_class argument
    outer_cv = KFold(n_splits=outer_folds, shuffle=True, random_state=random_state)

    mse_list = []
    r2_list = []
    medae_list = []
    maxe_list = []
    evs_list = []

    for fold_idx, (train_idx, test_idx) in enumerate(outer_cv.split(X), 1):
        X_tr, X_te = X.iloc[train_idx], X.iloc[test_idx]
        y_tr, y_te = y.iloc[train_idx], y.iloc[test_idx]

        # Instantiate the model using the provided model_class
        model = model_class()
        model.fit(X_tr, y_tr)
        y_pred = model.predict(X_te)

        # Avaliação das métricas
        # Ensure the predictions and true values are on the CPU for sklearn metrics
        y_te_cpu = y_te.to_pandas() if hasattr(y_te, "to_pandas") else y_te
        y_pred_cpu = y_pred.to_pandas() if hasattr(y_pred, "to_pandas") else y_pred

        mse  = float(mean_squared_error(y_te_cpu, y_pred_cpu))
        r2   = float(r2_score(y_te_cpu, y_pred_cpu))
        med  = float(median_absolute_error(y_te_cpu, y_pred_cpu))
        maxe = float(max_error(y_te_cpu, y_pred_cpu))
        evs  = float(explained_variance_score(y_te_cpu, y_pred_cpu))


        # Armazenamento
        mse_list.append(mse)
        r2_list.append(r2)
        medae_list.append(med)
        maxe_list.append(maxe)
        evs_list.append(evs)

        print(
            f"[Linear] Fold {fold_idx}/{outer_folds} - "
            f"MSE: {mse:.5f} | R²: {r2:.4f} | MedAE: {med:.4f} | "
            f"MaxErr: {maxe:.4f} | EVS: {evs:.4f}"
        )

        # Limpeza
        del model, X_tr, X_te, y_tr, y_te, y_pred, y_te_cpu, y_pred_cpu # Added y_te_cpu and y_pred_cpu to cleanup
        gc.collect()
        cp.get_default_memory_pool().free_all_blocks()

    # Resultados médios
    metrics = {
        "MSE": np.mean(mse_list),
        "R2": np.mean(r2_list),
        "Median AE": np.mean(medae_list),
        "Max Error": np.mean(maxe_list),
        "Explained Variance": np.mean(evs_list)
    }

    print("\n[Métricas Médias - Regressão Linear]")
    for k, v in metrics.items():
        print(f"{k}: {v:.4f}")

    return metrics

In [4]:
# === Dados já carregados como X, y ===

# Choose which model class to use based on availability or preference
# You might want to add logic here to choose between cuLR and skLR
# based on whether cuML failed in the previous cell.
# For this example, let's assume cuML is available and works.
model_to_evaluate = cuLR

# Avaliação do modelo linear com Nested CV (sem tuning)
# Pass the model class to the function
linear_metrics = nested_cv_linear_evaluation(X, y, model_to_evaluate)
print(f"\nMSE médio Linear Regression (Nested CV): {linear_metrics['MSE']:.4f}")

# Definição do Random Forest com melhores hiperparâmetros encontrados antes
# rf_cls = lambda: cuRF(random_state=42, n_streams=1, **chosen_rf)

# Comparação estatística: Linear vs RF
# t_stat, p_val = five_two_cv_ttest_gpu(linear_regression_model, rf_cls, X_cudf, y_cudf)
# print(f"\n5×2-cv t-test (Linear vs RF): t = {t_stat:.4f}, p = {p_val:.4f}")



[Linear] Fold 1/5 - MSE: 0.00109 | R²: 0.7127 | MedAE: 0.0256 | MaxErr: 0.3347 | EVS: 0.7127




[Linear] Fold 2/5 - MSE: 0.00110 | R²: 0.7124 | MedAE: 0.0257 | MaxErr: 0.2718 | EVS: 0.7124




[Linear] Fold 3/5 - MSE: 0.00109 | R²: 0.7128 | MedAE: 0.0256 | MaxErr: 0.2082 | EVS: 0.7128




[Linear] Fold 4/5 - MSE: 0.00109 | R²: 0.7123 | MedAE: 0.0256 | MaxErr: 0.2180 | EVS: 0.7123




[Linear] Fold 5/5 - MSE: 0.00109 | R²: 0.7126 | MedAE: 0.0256 | MaxErr: 0.2560 | EVS: 0.7126

[Métricas Médias - Regressão Linear]
MSE: 0.0011
R2: 0.7126
Median AE: 0.0256
Max Error: 0.2577
Explained Variance: 0.7126

MSE médio Linear Regression (Nested CV): 0.0011


In [5]:
import time

def measure_inference_time(model, X_test, n_repeats=5):
    """
    Mede o tempo médio de inferência por amostra.
    Repete a medição 'n_repeats' vezes para reduzir ruído.
    """
    total_times = []
    for _ in range(n_repeats):
        start = time.perf_counter()
        y_pred = model.predict(X_test)
        end = time.perf_counter()
        total_times.append(end - start)

    avg_total_time = sum(total_times) / len(total_times)
    time_per_sample = avg_total_time / len(X_test)
    return time_per_sample


In [6]:
# Após modelo.fit(...)
X_test_eval = X_test.to_pandas() if hasattr(X_test, "to_pandas") else X_test
inference_time = measure_inference_time(linear_regression_model, X_test_eval)
print(f"Tempo médio de inferência por amostra (Linear): {inference_time * 1000:.9f} ms")


Tempo médio de inferência por amostra (Linear): 0.000047986 ms


## MLP simples
- https://arxiv.org/pdf/2209.02681 - funções de ativação ditas com bom desempenho neste artigo foram sine, hyperbolic tangents e swish.


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import cudf
import cupy as cp
import numpy as np



# Conversão para float32
for col in df_cudf.select_dtypes(include=['float64']).columns:
    df_cudf[col] = df_cudf[col].astype(cp.float32)

X = df_cudf.iloc[:, :-1].to_pandas().values.astype(np.float32)
y = df_cudf.iloc[:, -1].to_pandas().values.astype(np.float32).reshape(-1, 1)

# ====================
# 2. Divisão treino/teste
# ====================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tensores para GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train_tensor = torch.from_numpy(X_train).to(device)
y_train_tensor = torch.from_numpy(y_train).to(device)
X_test_tensor  = torch.from_numpy(X_test).to(device)
y_test_tensor  = torch.from_numpy(y_test).to(device)

# ====================
# 3. Definição do MLP
# ====================
class SmallMLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.SiLU(),
            nn.Linear(32, 16),
            nn.Tanh(),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.model(x)

model = SmallMLP(X_train.shape[1]).to(device)

# ====================
# 4. Treinamento
# ====================
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()
n_epochs = 50
batch_size = 1024

for epoch in range(n_epochs):
    model.train()
    permutation = torch.randperm(X_train_tensor.size()[0])
    for i in range(0, X_train_tensor.size()[0], batch_size):
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = X_train_tensor[indices], y_train_tensor[indices]

        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = loss_fn(outputs, batch_y)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{n_epochs} - Loss: {loss.item():.6f}")

# ====================
# 5. Avaliação
# ====================
model.eval()
with torch.no_grad():
    y_pred_tensor = model(X_test_tensor)
    y_pred = y_pred_tensor.cpu().numpy()
    y_true = y_test_tensor.cpu().numpy()

print("\n[Métricas - MLP]")
print("MSE:", mean_squared_error(y_true, y_pred))
print("R²:", r2_score(y_true, y_pred))

# ====================
# 6. Tempo de Inferência
# ====================
import time

with torch.no_grad():
    start = time.perf_counter()
    _ = model(X_test_tensor)
    torch.cuda.synchronize()  # garante sincronização GPU
    end = time.perf_counter()

inference_time = (end - start) / len(X_test_tensor)
print(f"Tempo médio de inferência por amostra: {inference_time * 1000:.6f} ms")


Epoch 1/50 - Loss: 0.000702
Epoch 2/50 - Loss: 0.000722
Epoch 3/50 - Loss: 0.000604
Epoch 4/50 - Loss: 0.000518
Epoch 5/50 - Loss: 0.000505
Epoch 6/50 - Loss: 0.000671
Epoch 7/50 - Loss: 0.000466
Epoch 8/50 - Loss: 0.000580
Epoch 9/50 - Loss: 0.000673
Epoch 10/50 - Loss: 0.000540
Epoch 11/50 - Loss: 0.000531
Epoch 12/50 - Loss: 0.000542
Epoch 13/50 - Loss: 0.000580
Epoch 14/50 - Loss: 0.000556
Epoch 15/50 - Loss: 0.000528
Epoch 16/50 - Loss: 0.000543
Epoch 17/50 - Loss: 0.000463
Epoch 18/50 - Loss: 0.000494
Epoch 19/50 - Loss: 0.000439
Epoch 20/50 - Loss: 0.000570
Epoch 21/50 - Loss: 0.000517
Epoch 22/50 - Loss: 0.000456
Epoch 23/50 - Loss: 0.000540
Epoch 24/50 - Loss: 0.000529
Epoch 25/50 - Loss: 0.000439
Epoch 26/50 - Loss: 0.000495
Epoch 27/50 - Loss: 0.000549
Epoch 28/50 - Loss: 0.000512
Epoch 29/50 - Loss: 0.000493
Epoch 30/50 - Loss: 0.000622
Epoch 31/50 - Loss: 0.000577
Epoch 32/50 - Loss: 0.000484
Epoch 33/50 - Loss: 0.000471
Epoch 34/50 - Loss: 0.000511
Epoch 35/50 - Loss: 0.0

In [8]:
# 0. Instalação das dependências no Colab
!pip install --quiet --upgrade pip
!pip install --quiet "jax[cuda]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
!pip install --quiet flax optax scikit-learn cudf-cuda11x  # ajuste o cudf-cudaXX para sua versão de CUDA

# 1. Imports e preparação dos dados
import cudf
import numpy as np
import jax
import jax.numpy as jnp
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# — supondo que você já tenha df_cudf carregado —
# converte float64 → float32 em cuDF
for col in df_cudf.select_dtypes(include=['float64']).columns:
    df_cudf[col] = df_cudf[col].astype('float32')

# extrai para NumPy e depois para JAX
X = df_cudf.iloc[:, :-1].to_pandas().values.astype(np.float32)
y = df_cudf.iloc[:, -1].to_pandas().values.astype(np.float32).reshape(-1, 1)

X_train_np, X_test_np, y_train_np, y_test_np = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# converte para arrays JAX (float32)
X_train = jnp.array(X_train_np)
y_train = jnp.array(y_train_np)
X_test  = jnp.array(X_test_np)
y_test  = jnp.array(y_test_np)

# 2. Definição do modelo MLP com Flax
from flax import linen as nn
from flax.training import train_state
import optax
from typing import Sequence, Callable

class MLP(nn.Module):
    layers: Sequence[int]
    activations: Sequence[Callable[[jnp.ndarray], jnp.ndarray]]

    @nn.compact
    def __call__(self, x):
        for size, act in zip(self.layers, self.activations):
            x = nn.Dense(size)(x)
            x = act(x)
        x = nn.Dense(1)(x)
        return x

# hiper-parâmetros
layer_sizes   = [64, 32, 16]
activations   = [nn.relu, nn.silu, nn.tanh]
learning_rate = 1e-3
n_epochs      = 50
batch_size    = 1024
key           = jax.random.PRNGKey(0)

# função de loss e métrica
def mse_loss(params, batch):
    Xb, yb = batch
    preds = model.apply(params, Xb)
    return jnp.mean((preds - yb) ** 2)

# 3. Setup do estado de treinamento
model = MLP(layers=layer_sizes, activations=activations)
# inicializa parâmetros (shape dummy)
params = model.init(key, jnp.ones([1, X_train.shape[1]]))
tx = optax.adam(learning_rate)
state = train_state.TrainState.create(apply_fn=model.apply, params=params, tx=tx)

# 4. Função de update compilada com jit
@jax.jit
def train_step(state, batch):
    grads = jax.grad(mse_loss)(state.params, batch)
    return state.apply_gradients(grads=grads)

# 5. Loop de treinamento
num_train = X_train.shape[0]
steps_per_epoch = num_train // batch_size

for epoch in range(1, n_epochs + 1):
    # embaralha cada época
    perm_key, key = jax.random.split(key)
    perm = jax.random.permutation(perm_key, num_train)
    X_shuffled = X_train[perm]
    y_shuffled = y_train[perm]

    for i in range(steps_per_epoch):
        start = i * batch_size
        end   = start + batch_size
        batch = (X_shuffled[start:end], y_shuffled[start:end])
        state = train_step(state, batch)

    # calcula loss ao final da época (sobre último batch)
    epoch_loss = mse_loss(state.params, batch)
    print(f"Epoch {epoch:02d}/{n_epochs} — Loss: {epoch_loss:.6f}")

# 6. Avaliação no conjunto de teste
@jax.jit
def predict(params, X):
    return model.apply(params, X)

y_pred = predict(state.params, X_test)
# garante sincronia para trazer ao host
y_pred_np = np.array(y_pred)

print("\n[Métricas — MLP com JAX]")
print("MSE:", mean_squared_error(y_test_np, y_pred_np))
print("R²:",   r2_score(y_test_np, y_pred_np))

# 7. Tempo médio de inferência por amostra
import time
# warm-up
_ = predict(state.params, X_test).block_until_ready()
start = time.perf_counter()
_ = predict(state.params, X_test).block_until_ready()
end   = time.perf_counter()
avg_ms = (end - start) / X_test.shape[0] * 1000
print(f"Tempo médio de inferência por amostra: {avg_ms:.6f} ms")


Epoch 1/50 - Loss: 0.000756
Epoch 2/50 - Loss: 0.001373
Epoch 3/50 - Loss: 0.000691
Epoch 4/50 - Loss: 0.000835
Epoch 5/50 - Loss: 0.000715
Epoch 6/50 - Loss: 0.001023
Epoch 7/50 - Loss: 0.000736
Epoch 8/50 - Loss: 0.000701
Epoch 9/50 - Loss: 0.000548
Epoch 10/50 - Loss: 0.000581
Epoch 11/50 - Loss: 0.000767
Epoch 12/50 - Loss: 0.000593
Epoch 13/50 - Loss: 0.000540
Epoch 14/50 - Loss: 0.000570
Epoch 15/50 - Loss: 0.000639
Epoch 16/50 - Loss: 0.000515
Epoch 17/50 - Loss: 0.000624
Epoch 18/50 - Loss: 0.000558
Epoch 19/50 - Loss: 0.000654
Epoch 20/50 - Loss: 0.000571
Epoch 21/50 - Loss: 0.000518
Epoch 22/50 - Loss: 0.000669
Epoch 23/50 - Loss: 0.000450
Epoch 24/50 - Loss: 0.000544
Epoch 25/50 - Loss: 0.000598
Epoch 26/50 - Loss: 0.000524
Epoch 27/50 - Loss: 0.000685
Epoch 28/50 - Loss: 0.000508
Epoch 29/50 - Loss: 0.000558
Epoch 30/50 - Loss: 0.000664
Epoch 31/50 - Loss: 0.000537
Epoch 32/50 - Loss: 0.000580
Epoch 33/50 - Loss: 0.000625
Epoch 34/50 - Loss: 0.000666
Epoch 35/50 - Loss: 0.0

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error, r2_score # <<< MUDANÇA AQUI: import r2_score
import numpy as np
import time
import itertools

# ==============================================================================
# 1. SETUP E PREPARAÇÃO
# ==============================================================================

# --- ESCOLHA A MÉTRICA DE OTIMIZAÇÃO --- # <<< MUDANÇA AQUI
# Escolha 'MSE' para minimizar o erro ou 'R2' para maximizar o coeficiente de determinação.
OPTIMIZATION_METRIC = 'MSE' # Pode ser 'MSE' ou 'R2'

# Garantir que a GPU está sendo utilizada
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == 'cuda':
    print(f"GPU detectada: {torch.cuda.get_device_name(0)}")
    print(f"Otimizando para o {'menor' if OPTIMIZATION_METRIC == 'MSE' else 'maior'} {OPTIMIZATION_METRIC}.")
else:
    print("Nenhuma GPU detectada, utilizando CPU.")

# --- Funções de Ativação Customizadas ---
class Gaussian(nn.Module):
    def forward(self, x):
        return torch.exp(-x**2)

class Sine(nn.Module):
    def forward(self, x):
        return torch.sin(x)

activation_functions = {
    'ReLU': nn.ReLU, 'SiLU': nn.SiLU, 'Tanh': nn.Tanh,
    'LeakyReLU': nn.LeakyReLU, 'Gaussian': Gaussian, 'Sine': Sine, 'Swish': nn.SiLU
}

# --- Geração de Dados de Exemplo ---
X, y = make_regression(n_samples=5000, n_features=20, noise=25, random_state=42)
y = y.reshape(-1, 1)

# Normalização dos dados
scaler_X = StandardScaler()
X = scaler_X.fit_transform(X)
scaler_y = StandardScaler()
y = scaler_y.fit_transform(y)

# Conversão para tensores PyTorch
X = torch.from_numpy(X.astype(np.float32))
y = torch.from_numpy(y.astype(np.float32))

# ==============================================================================
# 2. DEFINIÇÃO DO ESPAÇO DE HIPERPARÂMETROS
# ==============================================================================

# Aqui você define todas as combinações que deseja testar no laço interno.

# Baseline (como um dos candidatos)
baseline_config = {
    'layers': [64, 32, 16],
    'activations': ['ReLU', 'SiLU', 'Tanh'],
    'optimizer': 'Adam',
    'lr': 0.001
}

# Grade de busca (Grid Search)
param_grid = {
    'layer_configs': [
        [64, 32, 16],          # Baseline
        [128, 64],             # Menos camadas, mais neurônios
        [32, 32, 32, 32],      # Mais camadas, menos neurônios
        [100, 50, 25]          # Outra variação
    ],
    'activation_sequences': [
        ['ReLU', 'SiLU', 'Tanh'], # Baseline
        ['LeakyReLU', 'LeakyReLU'],
        ['Gaussian', 'Sine', 'Tanh'],
        ['SiLU', 'SiLU', 'SiLU', 'SiLU'],
        ['Sine', 'Sine']
    ],
    'optimizers': ['Adam', 'AdamW', 'SGD'],
    'learning_rates': [0.01, 0.001]
}

# Gerar todas as configurações possíveis para a busca
# Nota: Esta lógica assume que o número de ativações corresponde ao número de camadas.
# Vamos criar uma lista de configurações válidas.
search_space = [baseline_config] # Começa com a baseline

# Adiciona outras combinações
# Combinação 1: Duas camadas
for act_seq in [['LeakyReLU', 'LeakyReLU'], ['Sine', 'Sine']]:
    for opt in param_grid['optimizers']:
        for lr in param_grid['learning_rates']:
            search_space.append({
                'layers': [128, 64],
                'activations': act_seq,
                'optimizer': opt,
                'lr': lr
            })

# Combinação 2: Quatro camadas
for opt in param_grid['optimizers']:
    for lr in param_grid['learning_rates']:
        search_space.append({
            'layers': [32, 32, 32, 32],
            'activations': ['SiLU', 'SiLU', 'SiLU', 'SiLU'],
            'optimizer': opt,
            'lr': lr
        })

print(f"Total de {len(search_space)} combinações de hiperparâmetros para testar em cada fold interno.")

# ==============================================================================
# 3. CONSTRUTOR DE MLP DINÂMICO (Sem alterações)
# ==============================================================================
def create_dynamic_mlp(input_dim, output_dim, config):
    layers = []
    current_dim = input_dim
    for i, n_neurons in enumerate(config['layers']):
        layers.append(nn.Linear(current_dim, n_neurons))
        activation_name = config['activations'][i]
        layers.append(activation_functions[activation_name]())
        current_dim = n_neurons
    layers.append(nn.Linear(current_dim, output_dim))
    model = nn.Sequential(*layers)
    return model.to(device)

# ==============================================================================
# 4. FUNÇÃO DE TREINO E AVALIAÇÃO (MODIFICADA)
# ==============================================================================

def train_and_evaluate(config, X_train, y_train, X_val, y_val, n_epochs=50, batch_size=256):
    """
    Cria, treina e avalia um modelo.
    Retorna um dicionário com MSE e R2. # <<< MUDANÇA AQUI
    """
    input_dim = X_train.shape[1]
    output_dim = y_train.shape[1]

    model = create_dynamic_mlp(input_dim, output_dim, config)

    if config['optimizer'] == 'Adam': optimizer = optim.Adam(model.parameters(), lr=config['lr'])
    elif config['optimizer'] == 'AdamW': optimizer = optim.AdamW(model.parameters(), lr=config['lr'])
    elif config['optimizer'] == 'SGD': optimizer = optim.SGD(model.parameters(), lr=config['lr'], momentum=0.9)

    loss_fn = nn.MSELoss()

    for epoch in range(n_epochs):
        model.train()
        permutation = torch.randperm(X_train.size(0))
        for i in range(0, X_train.size(0), batch_size):
            indices = permutation[i:i+batch_size]
            batch_x, batch_y = X_train[indices], y_train[indices]
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = loss_fn(outputs, batch_y)
            loss.backward()
            optimizer.step()

    model.eval()
    with torch.no_grad():
        y_pred = model(X_val)

        # <<< MUDANÇA AQUI: Calcular MSE e R2
        y_val_np = y_val.cpu().numpy()
        y_pred_np = y_pred.cpu().numpy()

        mse = mean_squared_error(y_val_np, y_pred_np)
        r2 = r2_score(y_val_np, y_pred_np)

    return {'mse': mse, 'r2': r2}

# ==============================================================================
# 5. IMPLEMENTAÇÃO DO NESTED CROSS-VALIDATION (LÓGICA CORRIGIDA)
# ==============================================================================

N_OUTER_FOLDS = 5
N_INNER_FOLDS = 3

outer_kf = KFold(n_splits=N_OUTER_FOLDS, shuffle=True, random_state=42)
inner_kf = KFold(n_splits=N_INNER_FOLDS, shuffle=True, random_state=42)

outer_loop_scores = []
best_configs_per_fold = []

print("\nIniciando Nested Cross-Validation...")
start_time = time.time()

for i_outer, (outer_train_idx, outer_test_idx) in enumerate(outer_kf.split(X, y)):
    print(f"\n{'='*20} FOLD EXTERNO {i_outer+1}/{N_OUTER_FOLDS} {'='*20}")

    X_outer_train, y_outer_train = X[outer_train_idx], y[outer_train_idx]
    X_outer_test, y_outer_test = X[outer_test_idx], y[outer_test_idx]

    # <<< MUDANÇA AQUI: Inicialização da pontuação baseada na métrica escolhida
    if OPTIMIZATION_METRIC == 'MSE':
        best_inner_score = float('inf')  # Queremos o menor MSE
    else: # 'R2'
        best_inner_score = float('-inf') # Queremos o maior R2
    best_hyperparams = None

    for i_config, config in enumerate(search_space):
        print(f"  Testando config {i_config+1}/{len(search_space)}...", end='\r')

        inner_loop_scores = []
        for inner_train_idx, inner_val_idx in inner_kf.split(X_outer_train, y_outer_train):
            X_inner_train, y_inner_train = X_outer_train[inner_train_idx], y_outer_train[inner_train_idx]
            X_inner_val, y_inner_val = X_outer_train[inner_val_idx], y_outer_train[inner_val_idx]

            scores = train_and_evaluate(
                config,
                X_inner_train.to(device), y_inner_train.to(device),
                X_inner_val.to(device), y_inner_val.to(device)
            )
            # <<< MUDANÇA AQUI: Adiciona a pontuação da métrica relevante
            inner_loop_scores.append(scores[OPTIMIZATION_METRIC.lower()])

        avg_inner_score = np.mean(inner_loop_scores)

        # <<< MUDANÇA AQUI: Lógica de comparação correta
        is_better = False
        if OPTIMIZATION_METRIC == 'MSE':
            if avg_inner_score < best_inner_score:
                is_better = True
        else: # 'R2'
            if avg_inner_score > best_inner_score:
                is_better = True

        if is_better:
            best_inner_score = avg_inner_score
            best_hyperparams = config

    print(f"\n  Melhor config encontrada no laço interno: {best_hyperparams}")
    print(f"  Melhor score ({OPTIMIZATION_METRIC}) médio interno: {best_inner_score:.6f}")
    best_configs_per_fold.append(best_hyperparams)

    # --- Treinamento Final e Avaliação no Fold Externo ---
    print("  Treinando modelo final do fold com os melhores hiperparâmetros...")
    final_model = create_dynamic_mlp(X.shape[1], y.shape[1], best_hyperparams)
    # ... (código de treinamento final é o mesmo)
    optimizer = optim.Adam(final_model.parameters(), lr=best_hyperparams['lr'])
    loss_fn = nn.MSELoss()
    n_epochs_final = 100
    batch_size = 256
    for epoch in range(n_epochs_final):
        final_model.train()
        permutation = torch.randperm(X_outer_train.size(0))
        for i in range(0, X_outer_train.size(0), batch_size):
            indices = permutation[i:i+batch_size]
            # Mover para GPU dentro do laço
            batch_x, batch_y = X_outer_train[indices].to(device), y_outer_train[indices].to(device)
            optimizer.zero_grad()
            outputs = final_model(batch_x)
            loss = loss_fn(outputs, batch_y)
            loss.backward()
            optimizer.step()

    final_model.eval()
    with torch.no_grad():
        y_pred_final = final_model(X_outer_test.to(device))
        y_pred_final_rescaled = scaler_y.inverse_transform(y_pred_final.cpu().numpy())
        y_outer_test_rescaled = scaler_y.inverse_transform(y_outer_test.cpu().numpy())

        final_mse = mean_squared_error(y_outer_test_rescaled, y_pred_final_rescaled)
        final_r2 = r2_score(y_outer_test_rescaled, y_pred_final_rescaled)
        outer_loop_scores.append({'mse': final_mse, 'r2': final_r2}) # <<< MUDANÇA AQUI
        print(f"  Score no fold de teste externo -> MSE: {final_mse:.4f} | R²: {final_r2:.4f}")

# ==============================================================================
# 6. RESULTADOS FINAIS (MODIFICADO)
# ==============================================================================

total_time = time.time() - start_time
print(f"\n\n{'='*25} RESULTADO FINAL {'='*25}")
print(f"Otimização realizada com base na métrica: {OPTIMIZATION_METRIC}")
print(f"Tempo total do processo: {total_time / 60:.2f} minutos")

# <<< MUDANÇA AQUI: Relatório final mais detalhado
print("\nScores Finais (MSE e R²) em cada Fold de Teste Externo:")
for i, scores in enumerate(outer_loop_scores):
    print(f"  Fold {i+1}: MSE = {scores['mse']:.4f}, R² = {scores['r2']:.4f} (Melhor config: {best_configs_per_fold[i]})")

# Extrair listas de scores para cálculo da média e desvio padrão
final_mses = [s['mse'] for s in outer_loop_scores]
final_r2s = [s['r2'] for s in outer_loop_scores]

mean_mse = np.mean(final_mses)
std_mse = np.std(final_mses)
mean_r2 = np.mean(final_r2s)
std_r2 = np.std(final_r2s)

print(f"\nEstimativa de Performance Generalizada:")
print(f"  MSE: {mean_mse:.4f} ± {std_mse:.4f}")
print(f"  R² : {mean_r2:.4f} ± {std_r2:.4f}")

GPU detectada: NVIDIA L4
Otimizando para o menor MSE.
Total de 19 combinações de hiperparâmetros para testar em cada fold interno.

Iniciando Nested Cross-Validation...

