In [87]:
import numpy as np
import optuna
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split
import xgboost as xgb

In [88]:
# 二分类问题
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
print(data.shape)

(569, 30)


In [89]:
# 第一个参数必须为trial
def objective(trial, data, target):
    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25)
    dtrain = xgb.DMatrix(train_x, label=train_y)
    dvalid = xgb.DMatrix(valid_x, label=valid_y)

    param = {
        "verbosity": 0,
        "objective": "binary:logistic",
        "tree_method": "exact",
        # 超参数类型为分类数值
        "booster": trial.suggest_categorical(name="booster",  # 参数名称
                                             choices=["gbtree", "gblinear", "dart"]),
        # 超参数类型为浮点数
        "lambda": trial.suggest_float(name="lambda",
                                      # 搜索范围为[low, high);不包括high
                                      low=1e-8, high=1.0,
                                      # If log is true, the value is sampled from the range in the log domain.
                                      # Otherwise, the value is sampled from the range in the linear domain
                                      log=True),
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
        "subsample": trial.suggest_float("subsample", 0.2, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.2, 1.0),
    }

    if param["booster"] in ["gbtree", "dart"]:
        param["max_depth"] = trial.suggest_int("max_depth", 3, 9,
                                               # A step of discretization.
                                               step=2)
        param["min_child_weight"] = trial.suggest_int("min_child_weight", 2, 10)
        param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
        param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
        param["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])

    if param["booster"] == "dart":
        param["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
        param["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
        param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)

    # 也可在次进行交叉验证
    bst = xgb.train(param, dtrain)
    preds = bst.predict(dvalid)
    pred_labels = np.around(preds)
    accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels)
    return accuracy  # 返回一个评价指标

In [90]:
study = optuna.create_study(direction="maximize")  # 默认direction="minimize",即优化方为最小化
func = lambda trial: objective(trial, data, target)

study.optimize(func,
               n_jobs=-1,
               n_trials=100,  # 实验次数
               # 给定timeout秒后停止训练
               timeout=600)  # 默认timeout=None,即不受时间限制

[32m[I 2021-09-18 18:46:47,294][0m A new study created in memory with name: no-name-2af3af7d-a418-4ebf-91c5-9b58da0d81c0[0m
[32m[I 2021-09-18 18:46:47,372][0m Trial 0 finished with value: 0.8601398601398601 and parameters: {'booster': 'gblinear', 'lambda': 4.902955253031039e-05, 'alpha': 0.0029160557364711266, 'subsample': 0.3481567275053239, 'colsample_bytree': 0.7766045626335871}. Best is trial 0 with value: 0.8601398601398601.[0m
[32m[I 2021-09-18 18:46:47,386][0m Trial 5 finished with value: 0.9090909090909091 and parameters: {'booster': 'gblinear', 'lambda': 2.5246689512293522e-06, 'alpha': 1.2919241074245777e-07, 'subsample': 0.9607942965364187, 'colsample_bytree': 0.9983025282598896}. Best is trial 5 with value: 0.9090909090909091.[0m
[32m[I 2021-09-18 18:46:47,392][0m Trial 3 finished with value: 0.9090909090909091 and parameters: {'booster': 'gblinear', 'lambda': 6.212058435568517e-07, 'alpha': 1.218025692017841e-05, 'subsample': 0.9627615544362584, 'colsample_bytre

In [91]:
# 最优结果
print(study.best_value)

# 最优参数
print(study.best_params)

0.993006993006993
{'booster': 'gbtree', 'lambda': 5.7018439180325875e-06, 'alpha': 0.000276768329263099, 'subsample': 0.7929088020078279, 'colsample_bytree': 0.2502794583397271, 'max_depth': 5, 'min_child_weight': 2, 'eta': 0.18853333134518047, 'gamma': 4.7803845381287927e-08, 'grow_policy': 'lossguide'}
