In [4]:
import numpy as np
import optuna
import lightgbm as lgb
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split

In [5]:
# 二分类问题
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
print(data.shape)

(569, 30)


In [6]:
def objective(trial, data, target):
    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25)
    dtrain = lgb.Dataset(train_x, label=train_y)
    dvalid = lgb.Dataset(valid_x, label=valid_y)

    param = {
        "objective": "binary",
        "metric": "auc",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }

    # Callback for LightGBM to prune unpromising trials.
    pruning_callback = optuna.integration.LightGBMPruningCallback(trial,
                                                                  # An evaluation metric for pruning
                                                                  # 可选值为:lightgbm模型的"eval_metric"参数的值
                                                                  metric="auc")
    # 也可在次进行交叉验证
    gbm = lgb.train(
        param, dtrain, valid_sets=[dvalid], verbose_eval=False, callbacks=[pruning_callback]
    )

    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels)
    return accuracy

In [7]:
study = optuna.create_study(
        # Pruner using the median stopping rule.
        # Prune if the trial’s best intermediate result is worse than median of intermediate results of previous trials at the sa
        pruner=optuna.pruners.MedianPruner(
            # n_startup_trials – Pruning is disabled until the given number of trials finish in the same study.
            n_startup_trials=5,  # 默认n_startup_trials=5
            # n_warmup_steps – Pruning is disabled until the trial exceeds the given number of step.
            n_warmup_steps=10,  # 默认n_warmup_steps=0
        ), direction="maximize")
func = lambda trial: objective(trial, data, target)
study.optimize(func, n_trials=100)

[32m[I 2021-09-18 18:16:54,622][0m A new study created in memory with name: no-name-91656330-2cd2-494f-b8f4-76d7229bc82e[0m
[32m[I 2021-09-18 18:16:54,648][0m Trial 0 finished with value: 0.951048951048951 and parameters: {'lambda_l1': 0.00042281538890241954, 'lambda_l2': 0.13408528834551092, 'num_leaves': 2, 'feature_fraction': 0.9100359925351242, 'bagging_fraction': 0.6020221115814497, 'bagging_freq': 3, 'min_child_samples': 83}. Best is trial 0 with value: 0.951048951048951.[0m
[32m[I 2021-09-18 18:16:54,697][0m Trial 1 finished with value: 0.965034965034965 and parameters: {'lambda_l1': 7.997944230061634e-07, 'lambda_l2': 8.564839081699405e-05, 'num_leaves': 248, 'feature_fraction': 0.7656811624516968, 'bagging_fraction': 0.6077217497535875, 'bagging_freq': 6, 'min_child_samples': 23}. Best is trial 1 with value: 0.965034965034965.[0m
[32m[I 2021-09-18 18:16:54,732][0m Trial 2 finished with value: 0.9440559440559441 and parameters: {'lambda_l1': 2.116978327079616e-06, 'l

In [8]:
# 最优结果
print(study.best_value)

# 最优参数
print(study.best_params)


0.993006993006993
{'lambda_l1': 5.993215913610334e-06, 'lambda_l2': 4.4362869026817765e-05, 'num_leaves': 235, 'feature_fraction': 0.7072297993919034, 'bagging_fraction': 0.6510799386579774, 'bagging_freq': 1, 'min_child_samples': 5}
