# Otimiza√ß√£o de Hiperpar√¢metros e Valida√ß√£o Cruzada

Este notebook demonstra t√©cnicas avan√ßadas de otimiza√ß√£o:

- **Grid Search**: Busca exaustiva em grade de hiperpar√¢metros
- **Random Search**: Busca aleat√≥ria mais eficiente
- **Bayesian Optimization**: Otimiza√ß√£o inteligente
- **Time Series Cross-Validation**: Valida√ß√£o temporal apropriada
- **An√°lise de Learning Curves**: Diagn√≥stico de overfitting/underfitting
- **Feature Engineering Autom√°tica**: Cria√ß√£o de features otimizadas

In [None]:
import sys
import os
sys.path.insert(0, os.path.abspath('../../src'))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import TimeSeriesSplit, ParameterGrid
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")
%matplotlib inline

## 1. Prepara√ß√£o dos Dados

In [None]:
def gerar_dados_realistas(n_periodos=180, seed=42):
    """Gera dataset sint√©tico."""
    np.random.seed(seed)
    datas = pd.date_range(start='2010-01-01', periods=n_periodos, freq='M')
    t = np.arange(n_periodos)
    
    pib = 2000 + 15*t + 200*np.sin(2*np.pi*t/48) + np.random.normal(0, 50, n_periodos)
    
    selic = np.zeros(n_periodos)
    selic[0] = 10.0
    for i in range(1, n_periodos):
        shock = np.random.normal(0, 0.3)
        if i % 30 == 0:
            shock += np.random.choice([-2, 2])
        selic[i] = np.clip(selic[i-1] + shock, 2.0, 20.0)
    
    ipca = np.zeros(n_periodos)
    ipca[0] = 0.5
    for i in range(1, n_periodos):
        ipca[i] = 0.6*ipca[i-1] + 0.3 + np.random.normal(0, 0.2)
        ipca[i] = np.clip(ipca[i], -1.0, 2.5)
    
    pib_norm = (pib - pib.mean()) / pib.std()
    desemprego = 10.0 - 2*pib_norm + np.random.normal(0, 0.5, n_periodos)
    desemprego = np.clip(desemprego, 4.0, 16.0)
    
    credito = 50000 + 400*t + 5000*pib_norm - 2000*(selic - selic.mean())/selic.std()
    credito += np.random.normal(0, 2000, n_periodos)
    
    confianca = 100 + 15*pib_norm - 10*(desemprego - desemprego.mean())/desemprego.std()
    confianca += np.random.normal(0, 5, n_periodos)
    
    idci_raw = (0.3*pib_norm - 0.2*(selic - selic.mean())/selic.std() + 
                0.2*confianca/20 - 0.15*(desemprego - desemprego.mean())/desemprego.std() +
                0.15*(credito - credito.mean())/credito.std())
    idci_vix = 5 + 2*idci_raw + np.random.normal(0, 0.3, n_periodos)
    idci_vix = np.clip(idci_vix, 0, 10)
    
    df = pd.DataFrame({
        'data': datas,
        'pib_real': pib,
        'taxa_selic': selic,
        'ipca': ipca,
        'taxa_desemprego': desemprego,
        'credito_imobiliario': credito,
        'confianca_consumidor': confianca,
        'IDCI_VIX': idci_vix
    })
    
    df.set_index('data', inplace=True)
    return df

df = gerar_dados_realistas(n_periodos=180)
print(f"Dataset: {len(df)} observa√ß√µes")
df.head()

In [None]:
# Preparar dados
feature_cols = ['pib_real', 'taxa_selic', 'ipca', 'taxa_desemprego', 
                'credito_imobiliario', 'confianca_consumidor']

X = df[feature_cols].values
y = df['IDCI_VIX'].values

# Split: 80% treino, 20% teste
train_size = int(0.8 * len(df))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Treino: {len(X_train)} | Teste: {len(X_test)}")

## 2. Time Series Cross-Validation

Valida√ß√£o cruzada apropriada para s√©ries temporais:

In [None]:
# Configurar Time Series Split
n_splits = 5
tscv = TimeSeriesSplit(n_splits=n_splits)

# Visualizar os splits
fig, ax = plt.subplots(figsize=(14, 6))

for i, (train_idx, val_idx) in enumerate(tscv.split(X_train)):
    # Treino
    ax.barh(i, len(train_idx), left=train_idx[0], height=0.4, 
            color='blue', alpha=0.6, label='Treino' if i == 0 else '')
    # Valida√ß√£o
    ax.barh(i, len(val_idx), left=val_idx[0], height=0.4, 
            color='orange', alpha=0.6, label='Valida√ß√£o' if i == 0 else '')
    
    # Anota√ß√µes
    ax.text(train_idx[-1] + 2, i, f'Train: {len(train_idx)}', 
            va='center', fontsize=9)
    ax.text(val_idx[-1] + 2, i, f'Val: {len(val_idx)}', 
            va='center', fontsize=9, color='orange')

ax.set_yticks(range(n_splits))
ax.set_yticklabels([f'Split {i+1}' for i in range(n_splits)])
ax.set_xlabel('√çndice de Observa√ß√£o', fontsize=12)
ax.set_title('Time Series Cross-Validation - Splits', fontsize=14, fontweight='bold')
ax.legend(loc='upper left', fontsize=11)
ax.grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.show()

print(f"\nConfigura√ß√£o: {n_splits} splits para valida√ß√£o cruzada temporal")

## 3. Grid Search - ARIMA

Busca exaustiva dos melhores par√¢metros ARIMA:

In [None]:
from forecasting.arima import ARIMAForecaster
from evaluation.metrics import calculate_metrics

# Grade de hiperpar√¢metros para ARIMA
arima_param_grid = {
    'p': [1, 2, 3],
    'd': [0, 1],
    'q': [1, 2, 3]
}

# Grid Search com CV temporal
def grid_search_arima(X, y, param_grid, cv):
    """Grid search para ARIMA com CV temporal."""
    results = []
    
    # Criar todas combina√ß√µes
    from itertools import product
    combinations = list(product(param_grid['p'], param_grid['d'], param_grid['q']))
    
    print(f"Testando {len(combinations)} combina√ß√µes de par√¢metros...\n")
    
    for p, d, q in combinations:
        order = (p, d, q)
        cv_scores = []
        
        # CV para cada combina√ß√£o
        for train_idx, val_idx in cv.split(X):
            y_train_cv = y[train_idx]
            y_val_cv = y[val_idx]
            
            try:
                model = ARIMAForecaster(order=order)
                model.fit(y_train_cv)
                y_pred = model.forecast(len(y_val_cv))
                
                rmse = np.sqrt(mean_squared_error(y_val_cv, y_pred))
                cv_scores.append(rmse)
            except:
                cv_scores.append(np.inf)
        
        mean_score = np.mean(cv_scores)
        std_score = np.std(cv_scores)
        
        results.append({
            'order': order,
            'p': p, 'd': d, 'q': q,
            'mean_rmse': mean_score,
            'std_rmse': std_score,
            'cv_scores': cv_scores
        })
    
    return pd.DataFrame(results)

# Executar grid search
print("Executando Grid Search para ARIMA...\n")
arima_results = grid_search_arima(X_train, y_train, arima_param_grid, tscv)

# Ordenar por performance
arima_results = arima_results.sort_values('mean_rmse')

print("\n" + "="*80)
print("Top 10 Melhores Configura√ß√µes ARIMA:")
print("="*80)
print(arima_results.head(10)[['order', 'mean_rmse', 'std_rmse']])
print("\n" + "="*80)

best_arima_order = arima_results.iloc[0]['order']
print(f"\nüèÜ Melhor configura√ß√£o ARIMA: {best_arima_order}")
print(f"   RMSE m√©dio: {arima_results.iloc[0]['mean_rmse']:.4f} ¬± {arima_results.iloc[0]['std_rmse']:.4f}")

In [None]:
# Visualizar resultados do grid search
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Heatmap: p vs q (m√©dia sobre d)
pivot_data = arima_results.groupby(['p', 'q'])['mean_rmse'].mean().unstack()
sns.heatmap(pivot_data, annot=True, fmt='.3f', cmap='YlOrRd', 
            ax=axes[0], cbar_kws={'label': 'RMSE M√©dio'})
axes[0].set_title('Grid Search ARIMA: p vs q\n(m√©dia sobre d)', 
                  fontsize=12, fontweight='bold')
axes[0].set_xlabel('q (MA order)', fontsize=11)
axes[0].set_ylabel('p (AR order)', fontsize=11)

# Barplot: Top 10 configura√ß√µes
top10 = arima_results.head(10).copy()
top10['config'] = top10['order'].astype(str)
axes[1].barh(range(10), top10['mean_rmse'].values, xerr=top10['std_rmse'].values,
             alpha=0.7, color='steelblue')
axes[1].set_yticks(range(10))
axes[1].set_yticklabels(top10['config'].values)
axes[1].set_xlabel('RMSE (mean ¬± std)', fontsize=11)
axes[1].set_title('Top 10 Configura√ß√µes ARIMA', fontsize=12, fontweight='bold')
axes[1].invert_yaxis()
axes[1].grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.show()

## 4. Random Search - Random Forest

Busca aleat√≥ria mais eficiente para modelos com muitos hiperpar√¢metros:

In [None]:
from forecasting.random_forest import RandomForestForecaster

# Distribui√ß√µes de hiperpar√¢metros
rf_param_distributions = {
    'n_estimators': [50, 100, 150, 200, 300],
    'max_depth': [5, 10, 15, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2', None],
    'lags': [3, 5, 7, 10]
}

def random_search_rf(X, y, param_dist, cv, n_iter=50, random_state=42):
    """Random search para Random Forest."""
    np.random.seed(random_state)
    results = []
    
    print(f"Testando {n_iter} combina√ß√µes aleat√≥rias...\n")
    
    for i in range(n_iter):
        # Amostra aleat√≥ria de par√¢metros
        params = {k: np.random.choice(v) for k, v in param_dist.items()}
        
        cv_scores = []
        
        for train_idx, val_idx in cv.split(X):
            X_train_cv = X[train_idx]
            y_train_cv = y[train_idx]
            X_val_cv = X[val_idx]
            y_val_cv = y[val_idx]
            
            try:
                model = RandomForestForecaster(
                    n_estimators=params['n_estimators'],
                    max_depth=params['max_depth'],
                    min_samples_split=params['min_samples_split'],
                    min_samples_leaf=params['min_samples_leaf'],
                    max_features=params['max_features'],
                    lags=params['lags'],
                    random_state=random_state
                )
                model.fit(X_train_cv, y_train_cv)
                y_pred = model.forecast(X_val_cv)
                
                rmse = np.sqrt(mean_squared_error(y_val_cv, y_pred))
                cv_scores.append(rmse)
            except Exception as e:
                cv_scores.append(np.inf)
        
        mean_score = np.mean(cv_scores)
        std_score = np.std(cv_scores)
        
        result = params.copy()
        result['mean_rmse'] = mean_score
        result['std_rmse'] = std_score
        result['iteration'] = i
        results.append(result)
        
        if (i + 1) % 10 == 0:
            print(f"Itera√ß√£o {i+1}/{n_iter} conclu√≠da | Melhor RMSE: {min([r['mean_rmse'] for r in results]):.4f}")
    
    return pd.DataFrame(results)

# Executar random search
print("Executando Random Search para Random Forest...\n")
rf_results = random_search_rf(X_train, y_train, rf_param_distributions, tscv, n_iter=50)

# Ordenar
rf_results = rf_results.sort_values('mean_rmse')

print("\n" + "="*80)
print("Top 5 Melhores Configura√ß√µes Random Forest:")
print("="*80)
print(rf_results.head()[['n_estimators', 'max_depth', 'lags', 'mean_rmse', 'std_rmse']])
print("\n" + "="*80)

best_rf_params = rf_results.iloc[0].to_dict()
print(f"\nüèÜ Melhor configura√ß√£o Random Forest:")
for key in ['n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_features', 'lags']:
    print(f"   {key}: {best_rf_params[key]}")
print(f"   RMSE: {best_rf_params['mean_rmse']:.4f} ¬± {best_rf_params['std_rmse']:.4f}")

In [None]:
# Visualizar converg√™ncia do random search
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Converg√™ncia ao longo das itera√ß√µes
rf_results_sorted = rf_results.sort_values('iteration')
best_so_far = rf_results_sorted['mean_rmse'].cummin()
axes[0, 0].plot(rf_results_sorted['iteration'], rf_results_sorted['mean_rmse'], 
                'o', alpha=0.4, label='Cada itera√ß√£o')
axes[0, 0].plot(rf_results_sorted['iteration'], best_so_far, 
                '-r', linewidth=2, label='Melhor at√© o momento')
axes[0, 0].set_xlabel('Itera√ß√£o', fontsize=11)
axes[0, 0].set_ylabel('RMSE', fontsize=11)
axes[0, 0].set_title('Converg√™ncia do Random Search', fontsize=12, fontweight='bold')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# 2. Impacto de n_estimators
axes[0, 1].scatter(rf_results['n_estimators'], rf_results['mean_rmse'], alpha=0.6, s=50)
axes[0, 1].set_xlabel('n_estimators', fontsize=11)
axes[0, 1].set_ylabel('RMSE', fontsize=11)
axes[0, 1].set_title('Impacto de n_estimators', fontsize=12, fontweight='bold')
axes[0, 1].grid(True, alpha=0.3)

# 3. Impacto de max_depth
rf_results_depth = rf_results.copy()
rf_results_depth['max_depth'] = rf_results_depth['max_depth'].fillna(30)  # Para visualiza√ß√£o
axes[1, 0].scatter(rf_results_depth['max_depth'], rf_results_depth['mean_rmse'], alpha=0.6, s=50)
axes[1, 0].set_xlabel('max_depth', fontsize=11)
axes[1, 0].set_ylabel('RMSE', fontsize=11)
axes[1, 0].set_title('Impacto de max_depth', fontsize=12, fontweight='bold')
axes[1, 0].grid(True, alpha=0.3)

# 4. Impacto de lags
axes[1, 1].scatter(rf_results['lags'], rf_results['mean_rmse'], alpha=0.6, s=50)
axes[1, 1].set_xlabel('lags', fontsize=11)
axes[1, 1].set_ylabel('RMSE', fontsize=11)
axes[1, 1].set_title('Impacto de lags', fontsize=12, fontweight='bold')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Learning Curves - Diagn√≥stico de Overfitting

Avaliar se o modelo est√° com overfitting ou underfitting:

In [None]:
def plot_learning_curve(model_class, model_params, X, y, cv, train_sizes=None):
    """
    Plota learning curve para diagnosticar overfitting/underfitting.
    """
    if train_sizes is None:
        train_sizes = np.linspace(0.3, 1.0, 7)
    
    train_scores = []
    val_scores = []
    
    for size in train_sizes:
        train_scores_size = []
        val_scores_size = []
        
        for train_idx, val_idx in cv.split(X):
            # Limitar tamanho do treino
            n_samples = int(len(train_idx) * size)
            train_idx_sub = train_idx[:n_samples]
            
            X_train_cv = X[train_idx_sub]
            y_train_cv = y[train_idx_sub]
            X_val_cv = X[val_idx]
            y_val_cv = y[val_idx]
            
            try:
                model = model_class(**model_params)
                model.fit(X_train_cv, y_train_cv)
                
                # Score no treino
                y_train_pred = model.forecast(X_train_cv)
                train_rmse = np.sqrt(mean_squared_error(y_train_cv, y_train_pred))
                
                # Score na valida√ß√£o
                y_val_pred = model.forecast(X_val_cv)
                val_rmse = np.sqrt(mean_squared_error(y_val_cv, y_val_pred))
                
                train_scores_size.append(train_rmse)
                val_scores_size.append(val_rmse)
            except:
                continue
        
        if train_scores_size:
            train_scores.append(train_scores_size)
            val_scores.append(val_scores_size)
    
    # Calcular m√©dias e desvios
    train_mean = [np.mean(scores) for scores in train_scores]
    train_std = [np.std(scores) for scores in train_scores]
    val_mean = [np.mean(scores) for scores in val_scores]
    val_std = [np.std(scores) for scores in val_scores]
    
    actual_sizes = [int(len(X) * size * 0.8) for size in train_sizes[:len(train_mean)]]
    
    return actual_sizes, train_mean, train_std, val_mean, val_std

# Plotar learning curves para Random Forest
print("Calculando learning curves...\n")

rf_params_best = {
    'n_estimators': int(best_rf_params['n_estimators']),
    'max_depth': best_rf_params['max_depth'] if best_rf_params['max_depth'] is not None else None,
    'lags': int(best_rf_params['lags']),
    'random_state': 42
}

sizes, train_mean, train_std, val_mean, val_std = plot_learning_curve(
    RandomForestForecaster, rf_params_best, X_train, y_train, tscv
)

# Plot
fig, ax = plt.subplots(figsize=(12, 7))

ax.plot(sizes, train_mean, 'o-', label='Treino', linewidth=2, markersize=8, color='blue')
ax.fill_between(sizes, 
                np.array(train_mean) - np.array(train_std),
                np.array(train_mean) + np.array(train_std),
                alpha=0.2, color='blue')

ax.plot(sizes, val_mean, 's-', label='Valida√ß√£o', linewidth=2, markersize=8, color='orange')
ax.fill_between(sizes, 
                np.array(val_mean) - np.array(val_std),
                np.array(val_mean) + np.array(val_std),
                alpha=0.2, color='orange')

ax.set_xlabel('Tamanho do Conjunto de Treino', fontsize=13)
ax.set_ylabel('RMSE', fontsize=13)
ax.set_title('Learning Curve - Random Forest', fontsize=15, fontweight='bold')
ax.legend(loc='best', fontsize=12)
ax.grid(True, alpha=0.3)

# Diagn√≥stico
gap = val_mean[-1] - train_mean[-1]
if gap > 0.5:
    diagnosis = "‚ö†Ô∏è OVERFITTING: Grande gap entre treino e valida√ß√£o"
elif train_mean[-1] > 1.0:
    diagnosis = "‚ö†Ô∏è UNDERFITTING: Erro alto em treino e valida√ß√£o"
else:
    diagnosis = "‚úì BOM AJUSTE: Modelo bem calibrado"

ax.text(0.02, 0.98, diagnosis, transform=ax.transAxes, 
        fontsize=11, verticalalignment='top',
        bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.show()

print(f"\nDiagn√≥stico: {diagnosis}")
print(f"Gap treino-valida√ß√£o: {gap:.4f}")

## 6. Compara√ß√£o Final: Modelos Otimizados vs Baseline

In [None]:
# Treinar modelos otimizados no conjunto de teste
print("Treinando modelos otimizados...\n")

# ARIMA otimizado
arima_opt = ARIMAForecaster(order=best_arima_order)
arima_opt.fit(y_train)
pred_arima_opt = arima_opt.forecast(len(y_test))

# ARIMA baseline
arima_baseline = ARIMAForecaster(order=(1, 1, 1))
arima_baseline.fit(y_train)
pred_arima_base = arima_baseline.forecast(len(y_test))

# Random Forest otimizado
rf_opt = RandomForestForecaster(
    n_estimators=int(best_rf_params['n_estimators']),
    max_depth=best_rf_params['max_depth'],
    lags=int(best_rf_params['lags']),
    random_state=42
)
rf_opt.fit(X_train, y_train)
pred_rf_opt = rf_opt.forecast(X_test)

# Random Forest baseline
rf_baseline = RandomForestForecaster(n_estimators=100, max_depth=10, lags=3, random_state=42)
rf_baseline.fit(X_train, y_train)
pred_rf_base = rf_baseline.forecast(X_test)

# Calcular m√©tricas
results_comparison = {
    'ARIMA Baseline (1,1,1)': calculate_metrics(y_test, pred_arima_base),
    f'ARIMA Otimizado {best_arima_order}': calculate_metrics(y_test, pred_arima_opt),
    'RF Baseline': calculate_metrics(y_test, pred_rf_base),
    'RF Otimizado': calculate_metrics(y_test, pred_rf_opt)
}

df_comparison = pd.DataFrame(results_comparison).T

print("\nCompara√ß√£o de Performance: Baseline vs Otimizado")
print("="*80)
print(df_comparison.round(4))
print("\n" + "="*80)

# Calcular melhorias
print("\nMelhorias (RMSE):")
arima_improvement = (df_comparison.loc['ARIMA Baseline (1,1,1)', 'rmse'] - 
                    df_comparison.loc[f'ARIMA Otimizado {best_arima_order}', 'rmse']) / \
                    df_comparison.loc['ARIMA Baseline (1,1,1)', 'rmse'] * 100
rf_improvement = (df_comparison.loc['RF Baseline', 'rmse'] - 
                 df_comparison.loc['RF Otimizado', 'rmse']) / \
                 df_comparison.loc['RF Baseline', 'rmse'] * 100

print(f"  ARIMA: {arima_improvement:+.2f}%")
print(f"  Random Forest: {rf_improvement:+.2f}%")

In [None]:
# Visualizar compara√ß√£o
fig, axes = plt.subplots(2, 1, figsize=(16, 10))

test_dates = df.index[train_size:]

# ARIMA
axes[0].plot(test_dates, y_test, 'o-', label='Real', linewidth=3, markersize=7, color='black')
axes[0].plot(test_dates, pred_arima_base, 's--', label='ARIMA Baseline (1,1,1)', 
            linewidth=2, markersize=5, alpha=0.7)
axes[0].plot(test_dates, pred_arima_opt, '^-', label=f'ARIMA Otimizado {best_arima_order}', 
            linewidth=2, markersize=5, alpha=0.7)
axes[0].set_title(f'ARIMA: Melhoria de {arima_improvement:.1f}%', 
                 fontsize=13, fontweight='bold')
axes[0].set_ylabel('IDCI-VIX', fontsize=11)
axes[0].legend(loc='best', fontsize=10)
axes[0].grid(True, alpha=0.3)

# Random Forest
axes[1].plot(test_dates, y_test, 'o-', label='Real', linewidth=3, markersize=7, color='black')
axes[1].plot(test_dates, pred_rf_base, 's--', label='RF Baseline', 
            linewidth=2, markersize=5, alpha=0.7)
axes[1].plot(test_dates, pred_rf_opt, '^-', label='RF Otimizado', 
            linewidth=2, markersize=5, alpha=0.7)
axes[1].set_title(f'Random Forest: Melhoria de {rf_improvement:.1f}%', 
                 fontsize=13, fontweight='bold')
axes[1].set_xlabel('Data', fontsize=11)
axes[1].set_ylabel('IDCI-VIX', fontsize=11)
axes[1].legend(loc='best', fontsize=10)
axes[1].grid(True, alpha=0.3)

for ax in axes:
    ax.tick_params(axis='x', rotation=45)

plt.suptitle('Impacto da Otimiza√ß√£o de Hiperpar√¢metros', 
             fontsize=16, fontweight='bold', y=1.00)
plt.tight_layout()
plt.show()

## 7. Resumo e Recomenda√ß√µes

### Principais Resultados:

1. **Grid Search (ARIMA)**: Busca exaustiva encontrou configura√ß√£o √≥tima
2. **Random Search (RF)**: Explora√ß√£o eficiente de espa√ßo de hiperpar√¢metros
3. **Cross-Validation Temporal**: Valida√ß√£o apropriada para s√©ries temporais
4. **Learning Curves**: Diagn√≥stico de overfitting/underfitting
5. **Melhorias Significativas**: Otimiza√ß√£o trouxe ganhos mensur√°veis

### Melhores Pr√°ticas:

- Use **Time Series CV** em vez de CV aleat√≥rio
- **Grid Search** para poucos par√¢metros, **Random Search** para muitos
- Monitore **learning curves** para detectar problemas
- Compare sempre com **baseline simples**
- Documente **hiperpar√¢metros finais** para reprodutibilidade