# Hyperparameter Tuning

Este notebook apresenta t√©cnicas para otimiza√ß√£o de hiperpar√¢metros.


In [None]:
# Imports b√°sicos
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Configura√ß√£o de plotting
plt.style.use("seaborn-v0_8")
plt.rcParams["figure.figsize"] = (10, 6)

## Objetivos da Aula

- Grid Search
- Random Search
- Nested Cross-Validation
- Estrat√©gias de otimiza√ß√£o


In [None]:
# Cria√ß√£o de dataset sint√©tico para demonstra√ß√£o
X, y = make_classification(
    n_samples=1000, n_features=10, n_redundant=0, n_informative=8, n_clusters_per_class=2, random_state=42
)

print(f"Dataset criado:")
print(f"- N√∫mero de amostras: {X.shape[0]}")
print(f"- N√∫mero de features: {X.shape[1]}")
print(f"- Distribui√ß√£o de classes: {np.bincount(y)}")

# Configurar seed para reprodutibilidade
np.random.seed(42)

print("\n‚úÖ Ambiente configurado com sucesso!")

## 1. O que s√£o Hiperpar√¢metros?

**Hiperpar√¢metros** s√£o configura√ß√µes do modelo que **n√£o s√£o aprendidas** durante o treinamento, mas devem ser definidas **antes** do processo de aprendizado.

### Exemplos de Hiperpar√¢metros:

- **Random Forest**: `n_estimators`, `max_depth`, `min_samples_split`
- **SVM**: `C`, `kernel`, `gamma`
- **Neural Networks**: `learning_rate`, `batch_size`, `hidden_layers`

### Por que otimizar?

A escolha inadequada de hiperpar√¢metros pode levar a:

- **Underfitting**: Modelo muito simples
- **Overfitting**: Modelo muito complexo
- **Performance sub√≥tima**: N√£o explorar o potencial m√°ximo do modelo


In [None]:
# Exemplo: Impacto dos hiperpar√¢metros na performance
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split dos dados
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Testando diferentes valores de max_depth no Random Forest
depths = [3, 5, 10, 15, None]
scores = []

print("üå≥ Impacto do max_depth no Random Forest:")
print("=" * 45)

for depth in depths:
    rf = RandomForestClassifier(n_estimators=100, max_depth=depth, random_state=42)
    rf.fit(X_train, y_train)
    score = accuracy_score(y_test, rf.predict(X_test))
    scores.append(score)
    print(f"max_depth={str(depth):4} ‚Üí Accuracy: {score:.3f}")

# Visualiza√ß√£o do impacto
plt.figure(figsize=(8, 5))
depth_labels = [str(d) if d is not None else "None" for d in depths]
plt.plot(depth_labels, scores, "o-", linewidth=2, markersize=8)
plt.title("Impacto do max_depth na Performance")
plt.xlabel("max_depth")
plt.ylabel("Accuracy")
plt.grid(True, alpha=0.3)
plt.show()

print(f"\nüéØ Melhor max_depth: {depths[np.argmax(scores)]} (Accuracy: {max(scores):.3f})")

## 2. Grid Search

**Grid Search** √© uma t√©cnica que testa **todas as combina√ß√µes** poss√≠veis de hiperpar√¢metros em uma grade (grid) pr√©-definida.

### Vantagens:

- ‚úÖ Garante encontrar a melhor combina√ß√£o dentro da grade
- ‚úÖ F√°cil de implementar e entender

### Desvantagens:

- ‚ùå Computacionalmente custoso (crescimento exponencial)
- ‚ùå Limitado √†s combina√ß√µes pr√©-definidas


In [None]:
# Grid Search com Random Forest
import time

# Definindo a grade de hiperpar√¢metros
param_grid = {"n_estimators": [50, 100, 200], "max_depth": [5, 10, 15], "min_samples_split": [2, 5, 10]}

print("üîç Grid Search - Random Forest")
print("=" * 40)
print(f"Par√¢metros a testar: {param_grid}")
print(f"Total de combina√ß√µes: {3 * 3 * 3} = 27")

# Grid Search com Cross-Validation
start_time = time.time()

grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    cv=5,  # 5-fold cross-validation
    scoring="accuracy",
    n_jobs=-1,  # Usar todos os cores dispon√≠veis
    verbose=1,
)

grid_search.fit(X_train, y_train)

end_time = time.time()

print(f"\n‚è±Ô∏è Tempo de execu√ß√£o: {end_time - start_time:.2f} segundos")
print(f"üèÜ Melhor score: {grid_search.best_score_:.3f}")
print(f"üéØ Melhores par√¢metros: {grid_search.best_params_}")

# Avalia√ß√£o no conjunto de teste
best_model = grid_search.best_estimator_
test_score = accuracy_score(y_test, best_model.predict(X_test))
print(f"üìä Score no teste: {test_score:.3f}")

In [None]:
# An√°lise dos resultados do Grid Search
results_df = pd.DataFrame(grid_search.cv_results_)

# Top 5 combina√ß√µes
print("üèÖ TOP 5 COMBINA√á√ïES")
print("=" * 50)
top_5 = results_df.nlargest(5, "mean_test_score")[["params", "mean_test_score", "std_test_score"]]

for i, (idx, row) in enumerate(top_5.iterrows()):
    print(f"{i+1}. Score: {row['mean_test_score']:.3f} (¬±{row['std_test_score']:.3f})")
    print(f"   Params: {row['params']}")
    print()

# Visualiza√ß√£o do impacto dos hiperpar√¢metros
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Impacto do n_estimators
n_est_scores = results_df.groupby("param_n_estimators")["mean_test_score"].mean()
axes[0].bar(range(len(n_est_scores)), n_est_scores.values)
axes[0].set_title("Impacto do n_estimators")
axes[0].set_xlabel("n_estimators")
axes[0].set_ylabel("Mean CV Score")
axes[0].set_xticks(range(len(n_est_scores)))
axes[0].set_xticklabels(n_est_scores.index)

# Impacto do max_depth
depth_scores = results_df.groupby("param_max_depth")["mean_test_score"].mean()
axes[1].bar(range(len(depth_scores)), depth_scores.values)
axes[1].set_title("Impacto do max_depth")
axes[1].set_xlabel("max_depth")
axes[1].set_ylabel("Mean CV Score")
axes[1].set_xticks(range(len(depth_scores)))
axes[1].set_xticklabels(depth_scores.index)

# Impacto do min_samples_split
split_scores = results_df.groupby("param_min_samples_split")["mean_test_score"].mean()
axes[2].bar(range(len(split_scores)), split_scores.values)
axes[2].set_title("Impacto do min_samples_split")
axes[2].set_xlabel("min_samples_split")
axes[2].set_ylabel("Mean CV Score")
axes[2].set_xticks(range(len(split_scores)))
axes[2].set_xticklabels(split_scores.index)

plt.tight_layout()
plt.show()

## 3. Random Search

**Random Search** amostra **aleatoriamente** combina√ß√µes de hiperpar√¢metros de distribui√ß√µes definidas.

### Vantagens:

- ‚úÖ Mais eficiente que Grid Search para espa√ßos grandes
- ‚úÖ Pode encontrar combina√ß√µes n√£o previstas
- ‚úÖ Controle do tempo de execu√ß√£o (n√∫mero de itera√ß√µes)

### Desvantagens:

- ‚ùå N√£o garante encontrar o √≥timo global
- ‚ùå Resultados podem variar entre execu√ß√µes


In [None]:
# Random Search com distribui√ß√µes cont√≠nuas
from scipy.stats import randint, uniform

# Definindo distribui√ß√µes para os hiperpar√¢metros
param_distributions = {
    "n_estimators": randint(50, 300),  # Inteiros de 50 a 299
    "max_depth": randint(3, 20),  # Inteiros de 3 a 19
    "min_samples_split": randint(2, 20),  # Inteiros de 2 a 19
    "min_samples_leaf": randint(1, 10),  # Inteiros de 1 a 9
    "max_features": uniform(0.1, 0.9),  # Float de 0.1 a 1.0
}

print("üé≤ Random Search - Random Forest")
print("=" * 40)
print("Distribui√ß√µes dos par√¢metros:")
for param, dist in param_distributions.items():
    print(f"- {param}: {dist}")

# Random Search
start_time = time.time()

random_search = RandomizedSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_distributions=param_distributions,
    n_iter=50,  # 50 itera√ß√µes aleat√≥rias
    cv=5,
    scoring="accuracy",
    n_jobs=-1,
    random_state=42,
    verbose=1,
)

random_search.fit(X_train, y_train)

end_time = time.time()

print(f"\n‚è±Ô∏è Tempo de execu√ß√£o: {end_time - start_time:.2f} segundos")
print(f"üèÜ Melhor score: {random_search.best_score_:.3f}")
print(f"üéØ Melhores par√¢metros: {random_search.best_params_}")

# Compara√ß√£o com Grid Search
print(f"\nüìä COMPARA√á√ÉO")
print("=" * 30)
print(f"Grid Search:   {grid_search.best_score_:.3f}")
print(f"Random Search: {random_search.best_score_:.3f}")

# Avalia√ß√£o no teste
random_test_score = accuracy_score(y_test, random_search.best_estimator_.predict(X_test))
print(f"\nScore no teste:")
print(f"Grid Search:   {test_score:.3f}")
print(f"Random Search: {random_test_score:.3f}")

## 4. Nested Cross-Validation

**Nested CV** √© a forma **correta** de avaliar a performance de um modelo quando fazemos hyperparameter tuning.

### Por que usar?

- **Problema**: Usar o mesmo conjunto para tuning e avalia√ß√£o leva a **overfitting**
- **Solu√ß√£o**: CV externo para avalia√ß√£o + CV interno para tuning

### Estrutura:

```
Outer CV (avalia√ß√£o):
‚îú‚îÄ‚îÄ Fold 1: Inner CV (tuning) ‚Üí Melhor modelo ‚Üí Avalia√ß√£o
‚îú‚îÄ‚îÄ Fold 2: Inner CV (tuning) ‚Üí Melhor modelo ‚Üí Avalia√ß√£o
‚îî‚îÄ‚îÄ Fold 3: Inner CV (tuning) ‚Üí Melhor modelo ‚Üí Avalia√ß√£o
```


In [None]:
# Implementando Nested Cross-Validation
from sklearn.model_selection import StratifiedKFold

# Configura√ß√£o dos CVs
outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Grid mais simples para demonstra√ß√£o
simple_param_grid = {"n_estimators": [50, 100], "max_depth": [5, 10, None]}

print("üîÑ Nested Cross-Validation")
print("=" * 40)
print(f"Outer CV: {outer_cv.n_splits} folds")
print(f"Inner CV: {inner_cv.n_splits} folds")

# Implementa√ß√£o manual do Nested CV
nested_scores = []
best_params_per_fold = []

for fold, (train_idx, val_idx) in enumerate(outer_cv.split(X, y)):
    print(f"\nüìÅ Outer Fold {fold + 1}")

    # Split dos dados do outer fold
    X_train_outer, X_val_outer = X[train_idx], X[val_idx]
    y_train_outer, y_val_outer = y[train_idx], y[val_idx]

    # Inner CV para hyperparameter tuning
    inner_grid = GridSearchCV(
        RandomForestClassifier(random_state=42), simple_param_grid, cv=inner_cv, scoring="accuracy"
    )

    inner_grid.fit(X_train_outer, y_train_outer)

    # Avalia√ß√£o no validation set do outer fold
    best_model = inner_grid.best_estimator_
    score = accuracy_score(y_val_outer, best_model.predict(X_val_outer))

    nested_scores.append(score)
    best_params_per_fold.append(inner_grid.best_params_)

    print(f"   Melhores params: {inner_grid.best_params_}")
    print(f"   Score: {score:.3f}")

# Resultado final
mean_score = np.mean(nested_scores)
std_score = np.std(nested_scores)

print(f"\nüéØ RESULTADO FINAL")
print("=" * 30)
print(f"Nested CV Score: {mean_score:.3f} ¬± {std_score:.3f}")
print(f"Scores por fold: {[f'{s:.3f}' for s in nested_scores]}")

# Compara√ß√£o com valida√ß√£o "ing√™nua"
naive_score = grid_search.best_score_
print(f"\nüìä COMPARA√á√ÉO")
print("=" * 20)
print(f"Valida√ß√£o 'ing√™nua': {naive_score:.3f}")
print(f"Nested CV:          {mean_score:.3f} ¬± {std_score:.3f}")
print("\n‚ö†Ô∏è  A diferen√ßa mostra o overfitting do tuning!")

## 5. Estrat√©gias Pr√°ticas de Otimiza√ß√£o

### üéØ Boas Pr√°ticas:

1. **Comece simples**: Teste ranges amplos primeiro
2. **Use Random Search**: Para explora√ß√£o inicial
3. **Refine com Grid Search**: Em regi√µes promissoras
4. **Monitore overfitting**: Use Nested CV para avalia√ß√£o final
5. **Considere o custo computacional**: Balance precis√£o vs tempo

### üìä Quando usar cada m√©todo:

| M√©todo                    | Situa√ß√£o                  | Vantagem            |
| ------------------------- | ------------------------- | ------------------- |
| **Manual**                | Poucos hiperpar√¢metros    | Controle total      |
| **Grid Search**           | Espa√ßo pequeno e discreto | Garantia de √≥timo   |
| **Random Search**         | Espa√ßo grande ou cont√≠nuo | Efici√™ncia          |
| **Bayesian Optimization** | Fun√ß√£o cara de avaliar    | Menor n¬∞ avalia√ß√µes |


In [None]:
# Exemplo: Estrat√©gia h√≠brida (Random + Grid)
print("üöÄ ESTRAT√âGIA H√çBRIDA")
print("=" * 30)

# Passo 1: Random Search para explora√ß√£o ampla
print("Passo 1: Explora√ß√£o com Random Search...")

broad_distributions = {
    "n_estimators": randint(10, 500),
    "max_depth": randint(1, 30),
    "min_samples_split": randint(2, 50),
    "min_samples_leaf": randint(1, 20),
}

# Random search com muitas itera√ß√µes
exploration = RandomizedSearchCV(
    RandomForestClassifier(random_state=42),
    broad_distributions,
    n_iter=30,
    cv=3,
    scoring="accuracy",
    n_jobs=-1,
    random_state=42,
)

exploration.fit(X_train, y_train)
best_broad = exploration.best_params_

print(f"Melhor regi√£o encontrada: {best_broad}")

# Passo 2: Grid Search refinado na regi√£o promissora
print("\nPasso 2: Refinamento com Grid Search...")

# Definir grid ao redor dos melhores valores
refined_grid = {
    "n_estimators": [
        max(10, best_broad["n_estimators"] - 50),
        best_broad["n_estimators"],
        best_broad["n_estimators"] + 50,
    ],
    "max_depth": [max(1, best_broad["max_depth"] - 2), best_broad["max_depth"], best_broad["max_depth"] + 2],
    "min_samples_split": [
        max(2, best_broad["min_samples_split"] - 2),
        best_broad["min_samples_split"],
        min(50, best_broad["min_samples_split"] + 2),
    ],
}

refinement = GridSearchCV(RandomForestClassifier(random_state=42), refined_grid, cv=5, scoring="accuracy", n_jobs=-1)

refinement.fit(X_train, y_train)

print(f"Melhores par√¢metros refinados: {refinement.best_params_}")
print(f"Score final: {refinement.best_score_:.3f}")

# Compara√ß√£o final
final_test_score = accuracy_score(y_test, refinement.best_estimator_.predict(X_test))

print(f"\nüìà COMPARA√á√ÉO FINAL NO TESTE")
print("=" * 35)
print(f"Grid Search simples:    {test_score:.3f}")
print(f"Random Search:          {random_test_score:.3f}")
print(f"Estrat√©gia h√≠brida:     {final_test_score:.3f}")

print(
    f"\nüéØ Melhor abordagem: {'H√≠brida' if final_test_score == max(test_score, random_test_score, final_test_score) else 'Outra'}"
)

## 6. Resumo e Boas Pr√°ticas

### üéØ Principais Takeaways:

1. **Hiperpar√¢metros s√£o cruciais** para performance dos modelos
2. **Grid Search**: Exaustivo mas garantido (espa√ßos pequenos)
3. **Random Search**: Eficiente para explora√ß√£o (espa√ßos grandes)
4. **Nested CV**: Avalia√ß√£o n√£o enviesada com tuning
5. **Estrat√©gia h√≠brida**: Explora√ß√£o + refinamento

### ‚ö†Ô∏è Armadilhas Comuns:

- **Data leakage**: N√£o usar dados de teste para tuning
- **Overfitting do tuning**: Muitas itera√ß√µes sem valida√ß√£o externa
- **Ignorar o custo computacional**: Balance efici√™ncia vs precis√£o
- **Grid muito denso**: Pode n√£o melhorar significativamente

### üöÄ Pr√≥ximos Passos:

- **Bayesian Optimization**: Para otimiza√ß√£o mais inteligente
- **Multi-objective optimization**: Balance entre m√©tricas
- **Early stopping**: Para modelos iterativos
- **AutoML**: Automatiza√ß√£o completa do processo


In [None]:
# Fun√ß√£o utilit√°ria para hyperparameter tuning
def optimize_hyperparameters(
    estimator, param_grid, X, y, method="grid", n_iter=50, cv=5, scoring="accuracy", random_state=42
):
    """
    Fun√ß√£o utilit√°ria para otimiza√ß√£o de hiperpar√¢metros

    Parameters:
    -----------
    estimator : sklearn estimator
        Modelo a ser otimizado
    param_grid : dict
        Grade ou distribui√ß√µes de par√¢metros
    X, y : array-like
        Dados de treino
    method : str, default='grid'
        M√©todo de busca ('grid' ou 'random')
    n_iter : int, default=50
        N√∫mero de itera√ß√µes para random search
    cv : int, default=5
        N√∫mero de folds para cross-validation
    scoring : str, default='accuracy'
        M√©trica de avalia√ß√£o
    random_state : int, default=42
        Seed para reprodutibilidade

    Returns:
    --------
    Best estimator fitted
    """

    if method == "grid":
        search = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=cv, scoring=scoring, n_jobs=-1)
    elif method == "random":
        search = RandomizedSearchCV(
            estimator=estimator,
            param_distributions=param_grid,
            n_iter=n_iter,
            cv=cv,
            scoring=scoring,
            n_jobs=-1,
            random_state=random_state,
        )
    else:
        raise ValueError("method deve ser 'grid' ou 'random'")

    search.fit(X, y)

    print(f"üèÜ Melhor score ({method}): {search.best_score_:.3f}")
    print(f"üéØ Melhores par√¢metros: {search.best_params_}")

    return search.best_estimator_


# Exemplo de uso da fun√ß√£o
print("üõ†Ô∏è FUN√á√ÉO UTILIT√ÅRIA")
print("=" * 25)

# Teste r√°pido
quick_grid = {"n_estimators": [50, 100], "max_depth": [5, 10]}
best_rf = optimize_hyperparameters(
    RandomForestClassifier(random_state=42), quick_grid, X_train, y_train, method="grid"
)

print(f"\n‚úÖ Li√ß√£o 04 - Hyperparameter Tuning conclu√≠da!")
print("M√≥dulo de Valida√ß√£o e Otimiza√ß√£o finalizado!")