In [8]:
import numpy as np
import optuna
import lightgbm as lgb
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split

In [9]:
# 二分类问题
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
print(data.shape)

(569, 30)


In [10]:
# 第一个参数必须为trial
def objective(trial, data, target):
    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25)
    dtrain = lgb.Dataset(train_x, label=train_y)

    param = {
        "objective": "binary",
        "metric": "binary_logloss",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }

    # 也可在次进行交叉验证
    gbm = lgb.train(param, dtrain)
    preds = gbm.predict(valid_x)
    pred_labels = np.around(preds)
    accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels)
    return accuracy

In [11]:
study = optuna.create_study(direction="maximize")
func = lambda trial: objective(trial, data, target)
study.optimize(func, n_trials=100)

[32m[I 2021-09-18 18:08:47,506][0m A new study created in memory with name: no-name-c10caea5-1dd3-4bbb-ae6c-34942de0e17f[0m
[32m[I 2021-09-18 18:08:47,520][0m Trial 0 finished with value: 0.9300699300699301 and parameters: {'lambda_l1': 5.675834596634304e-07, 'lambda_l2': 1.0011856405278337, 'num_leaves': 67, 'feature_fraction': 0.8310439132276943, 'bagging_fraction': 0.4275888039757475, 'bagging_freq': 7, 'min_child_samples': 67}. Best is trial 0 with value: 0.9300699300699301.[0m
[32m[I 2021-09-18 18:08:47,550][0m Trial 1 finished with value: 0.972027972027972 and parameters: {'lambda_l1': 1.8543911147991013e-07, 'lambda_l2': 1.2505699236880075, 'num_leaves': 237, 'feature_fraction': 0.7446835580561992, 'bagging_fraction': 0.7219321158928171, 'bagging_freq': 5, 'min_child_samples': 7}. Best is trial 1 with value: 0.972027972027972.[0m
[32m[I 2021-09-18 18:08:47,573][0m Trial 2 finished with value: 0.9440559440559441 and parameters: {'lambda_l1': 0.005629420531695931, 'lamb

In [12]:
# 最优结果
print(study.best_value)

# 最优参数
print(study.best_params)

0.993006993006993
{'lambda_l1': 0.0003941552022533914, 'lambda_l2': 1.1321651347121915e-06, 'num_leaves': 31, 'feature_fraction': 0.7523348778238667, 'bagging_fraction': 0.5169975361622172, 'bagging_freq': 7, 'min_child_samples': 81}
