In [None]:
import optuna
from catboost import CatBoostClassifier
from catboost import datasets
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

In [None]:
train_df, test_df = datasets.amazon()
y_train = train_df.ACTION
X_train = train_df.drop('ACTION', axis=1)

X_test = test_df.drop('id', axis=1)

In [None]:
train_x, valid_x, train_y, valid_y = train_test_split(X_train, y_train, test_size=0.3)

In [None]:
def objective(trial):
    param = {
        "objective": trial.suggest_categorical("objective", ["Logloss", "CrossEntropy"]),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1),
        "depth": trial.suggest_int("depth", 1, 12),
        "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
        "bootstrap_type": trial.suggest_categorical(
            "bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]
        ),
        "used_ram_limit": "3gb",
    }

    if param["bootstrap_type"] == "Bayesian":
        param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
    elif param["bootstrap_type"] == "Bernoulli":
        param["subsample"] = trial.suggest_float("subsample", 0.1, 1)

    model = CatBoostClassifier(**param)

    model.fit(train_x, train_y, eval_set=[(valid_x, valid_y)], verbose=0, early_stopping_rounds=100)

    preds = model.predict(valid_x)
    auc = roc_auc_score(valid_y, preds)
    return auc

In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=5, timeout=600)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
study.best_params

In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_param_importances(study)

In [None]:
optuna.visualization.plot_edf(study)

In [None]:
model = CatBoostClassifier(**study.best_params, 
                           iterations=10000,
                           early_stopping_rounds=100,
                           custom_metric=['Logloss', 'AUC'], 
                           eval_metric='Logloss')

In [None]:
model.fit(X_train, y_train, eval_set=(valid_x, valid_y), 
          verbose=False, 
          plot=True)