In [None]:
import pandas as pd
import numpy as np
import optuna
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import utils

%load_ext autoreload
%autoreload 2

pd.set_option('display.max_columns', None)

# Otimização de hiperparâmetros

* Nada mais é do que um **problema de otimização**, onde você busca encontrar um **mínimo ou um máximo** para uma função
* No contexto de Machine Learning, a ideia é encontrar a **melhor configuração de hiperparâmetros** de modo a maximimizar o **desempenho** do seu modelo

## O que vamos ver hoje

1. Primeiro iremos entender o **funcionamento** dos otimizadores com uma **função simples**
2. Depois iremos ver uma aplicação **prática em um modelo**
3. Iremos ver também uma maneira de **lidar com overfitting** através dos hiperparâmetros
4. Por último iremos ver **funcionalidades avançadas** que facilitam muito o dia-a-dia

## 1. Usando o otimizadores para achar o mínimo de uma função matemática

In [None]:
# Função matemática com mínimo em: x = 8.33 e y = -6.47 e o valor mínimo é -26.07
def function(x, y):
    return (x - 5.1) ** 2 + (y + 2.3) ** 2 + x * y

utils.plot_function(function);

In [3]:
x_options = np.arange(-10, 12, 2)
y_options = np.arange(-10, 12, 2)
grid = np.meshgrid(x_options, y_options)

In [None]:
ax = utils.plot_function(function)
ax.scatter(grid[0], grid[1], marker="x", color="black");

In [None]:
min(function(grid[0], grid[1]).flatten())

In [None]:
argmin = np.argmin(function(grid[0], grid[1]).flatten())
grid[0].flatten()[argmin], grid[1].flatten()[argmin]

### Usando optuna

In [None]:
study = optuna.create_study(direction="minimize")

In [8]:
def objective(trial):
    x = trial.suggest_float("x", -10, 10)
    y = trial.suggest_float("y", -10, 10)
    return function(x, y)

In [None]:
study.optimize(objective, n_trials=50)

In [None]:
ax = utils.plot_function(function)
ax.scatter(study.trials_dataframe()["params_x"], study.trials_dataframe()["params_y"], marker="x", color="black")
ax.scatter(**study.best_params, marker="x", color="y");

In [None]:
study.best_params, study.best_value

In [None]:
optuna.visualization.plot_contour(study)

## 2. Otimizando um modelo

#### Carregando os dados

In [33]:
df = pd.read_csv("../data/UCI_Credit_Card.csv", index_col="ID")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df_train_val, df_test = train_test_split(df, test_size=0.2, random_state=23)
df_train, df_val = train_test_split(df_train_val, test_size=0.2, random_state=23)
df_train.shape, df_val.shape, df_test.shape

In [37]:
target = "default.payment.next.month"
features = [col for col in df_train.columns if col != target]

#### Treinando um modelo sem tuning

In [None]:
model = LGBMClassifier(verbose=-1)
model.fit(df_train[features], df_train[target])

In [39]:
df_train["pred"] = model.predict_proba(df_train[features])[:, 1]
df_test["pred"] = model.predict_proba(df_test[features])[:, 1]

In [None]:
roc_auc_score(df_train[target], df_train["pred"]), roc_auc_score(df_test[target], df_test["pred"])

#### Tuning

In [None]:
study = optuna.create_study(direction="maximize")

In [22]:
def objective(trial):
    params = {
        "random_state": 23,
        "max_depth": trial.suggest_int("max_depth", 2, 100),
        "learning_rate": trial.suggest_float("learning_rate", 0.00001, 10, log=True),
        "n_estimators": trial.suggest_int("n_estimators", 10, 100),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "max_bin": trial.suggest_int("max_bin", 32, 256),
    }
    model = LGBMClassifier(**params, verbose=-1)
    model.fit(df_train[features], df_train[target])
    pred = model.predict_proba(df_val[features])[:, 1]

    return roc_auc_score(df_val[target], pred)

In [None]:
study.optimize(objective, n_trials=50)

In [None]:
optuna.visualization.plot_parallel_coordinate(study)

In [None]:
model = LGBMClassifier(**study.best_params, verbose=-1)
model.fit(df_train[features], df_train[target])

In [26]:
df_train["pred"] = model.predict_proba(df_train[features])[:, 1]
df_test["pred"] = model.predict_proba(df_test[features])[:, 1]

In [None]:
roc_auc_score(df_train[target], df_train["pred"]), roc_auc_score(df_test[target], df_test["pred"])

## Extra: Reduzindo overfitting

In [None]:
study = optuna.create_study(direction="maximize")

Criando um métrica personalizada para reduzir overfitting
$$metric = {1 \over 1 + abs(AUC_V - AUC_T)} * AUC_V$$

In [None]:
def objective(trial):
    params = {
        "random_state": 23,
        "max_depth": trial.suggest_int("max_depth", 2, 100),
        "learning_rate": trial.suggest_float("learning_rate", 0.00001, 10, log=True),
        "n_estimators": trial.suggest_int("n_estimators", 10, 100),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "max_bin": trial.suggest_int("max_bin", 32, 256),
    }
    model = LGBMClassifier(**params, verbose=-1)
    model.fit(df_train[features], df_train[target])
    pred = model.predict_proba(df_val[features])[:, 1]
    pred_train = model.predict_proba(df_train[features])[:, 1]

    auc_val = roc_auc_score(df_val[target], pred)
    auc_train = roc_auc_score(df_train[target], pred_train)
    
    return (1 / (1 + abs(auc_val - auc_train))) * auc_val

In [None]:
study.optimize(objective, n_trials=50)

In [None]:
optuna.visualization.plot_parallel_coordinate(study)

In [None]:
model = LGBMClassifier(**study.best_params, verbose=-1)
model.fit(df_train[features], df_train[target])

In [33]:
df_train["pred"] = model.predict_proba(df_train[features])[:, 1]
df_test["pred"] = model.predict_proba(df_test[features])[:, 1]

In [None]:
roc_auc_score(df_train[target], df_train["pred"]), roc_auc_score(df_test[target], df_test["pred"])

## Extra 2: Tópicos avançados

In [35]:
from sklearn.model_selection import KFold

In [None]:
study = optuna.create_study(direction="maximize", storage="sqlite:///optuna.db", pruner=optuna.pruners.HyperbandPruner())

In [37]:
def objective(trial):
    params = {
        "random_state": 23,
        "max_depth": trial.suggest_int("max_depth", 2, 100),
        "learning_rate": trial.suggest_float("learning_rate", 0.00001, 10, log=True),
        "n_estimators": trial.suggest_int("n_estimators", 10, 100),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "max_bin": trial.suggest_int("max_bin", 32, 256),
    }

    auc_val = []
    for idx, (train_idx, val_idx) in enumerate(KFold(n_splits=3).split(df_train_val)):
        model = LGBMClassifier(**params, verbose=-1)
        model.fit(df_train_val.iloc[train_idx][features], df_train_val.iloc[train_idx][target])
        pred = model.predict_proba(df_train_val.iloc[val_idx][features])[:, 1]

        auc_val.append(roc_auc_score(df_train_val.iloc[val_idx][target], pred))

        trial.report(auc_val[-1], idx)

        if trial.should_prune():
            raise optuna.TrialPruned()
    
    return np.mean(auc_val)

In [None]:
study.optimize(objective, n_trials=50)