In [1]:
import warnings
import pprint
import logging
from collections import namedtuple

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import roc_auc_score

import lightgbm as lgb
import optuna

%matplotlib inline

logging.getLogger("lightgbm").setLevel(logging.DEBUG)   # turn off lightgbm messages

In [2]:
# load data
X, y = load_breast_cancer(return_X_y=True)

# # convert y to binary (predict 2 as the positive target)
# y = np.where(y == 1, 0, y)
# y = np.where(y == 2, 1, y)

In [3]:
# split data for early stopping demonstration
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.30,
    random_state=17,
    stratify=y,
)

In [4]:
# create param space
STATIC_PARAMS = {
    "boosting_type": "goss",
    "objective": "binary",
    "random_state": 13,
    "subsample_freq": 1,
    "metric": ["auc", "binary_logloss"],
    "first_metric_only": True,
    "num_boost_round": 200,
    "early_stopping_rounds": 10,
    "verbose": -1,   # turns off lgb logging messages durng BHO
    "n_jobs": None,
}

In [5]:
# define objective function to be minimize (can also choose maximized) for optuna
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test)

def objective(trial):
    # suggested params for trial
    params = {
        "num_leaves": trial.suggest_int("num_leaves", 2, 16),
        "max_depth": trial.suggest_int("max_depth", 1, 5),
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.0001, 0.1),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.1, 1.0),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 20),
        "extra_trees": trial.suggest_categorical("extra_trees", [True, False]),
    }
    params.update(**STATIC_PARAMS)
    
    results_dict = dict()
    model = lgb.train(
        params,
        train_data,
        valid_sets=test_data,
        evals_result=results_dict,
        verbose_eval=False,
    )
    return -results_dict["valid_0"]["auc"][-1]

In [6]:
# create study and train - note, can choose different types of hyperparameter optimization here
study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=50)

[32m[I 2022-03-01 11:38:46,592][0m A new study created in memory with name: no-name-7db7a67a-cf00-4bee-a5ef-a7a01a46a943[0m
[32m[I 2022-03-01 11:38:46,629][0m Trial 0 finished with value: -0.984375 and parameters: {'num_leaves': 12, 'max_depth': 1, 'learning_rate': 0.020870964219869344, 'colsample_bytree': 0.4615740598521809, 'min_child_weight': 15, 'extra_trees': True}. Best is trial 0 with value: -0.984375.[0m
[32m[I 2022-03-01 11:38:46,666][0m Trial 1 finished with value: -0.9912383177570093 and parameters: {'num_leaves': 13, 'max_depth': 1, 'learning_rate': 0.00768544316582429, 'colsample_bytree': 0.7944838385987152, 'min_child_weight': 15, 'extra_trees': True}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:46,705][0m Trial 2 finished with value: -0.9890478971962616 and parameters: {'num_leaves': 15, 'max_depth': 3, 'learning_rate': 0.005005753289798976, 'colsample_bytree': 0.20345836675517914, 'min_child_weight': 8, 'extra_trees': True}. Be

[32m[I 2022-03-01 11:38:46,891][0m Trial 6 finished with value: -0.9869304906542056 and parameters: {'num_leaves': 6, 'max_depth': 2, 'learning_rate': 0.05580834148417595, 'colsample_bytree': 0.2768464061695515, 'min_child_weight': 4, 'extra_trees': True}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:46,946][0m Trial 7 finished with value: -0.9759783878504673 and parameters: {'num_leaves': 4, 'max_depth': 5, 'learning_rate': 0.00032896748198309476, 'colsample_bytree': 0.5566642949954341, 'min_child_weight': 15, 'extra_trees': False}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:46,988][0m Trial 8 finished with value: -0.9334112149532711 and parameters: {'num_leaves': 10, 'max_depth': 1, 'learning_rate': 0.00015204062887271474, 'colsample_bytree': 0.8526741042690991, 'min_child_weight': 10, 'extra_trees': False}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:47,046][0m Trial 9 finished

[32m[I 2022-03-01 11:38:47,289][0m Trial 13 finished with value: -0.9878796728971962 and parameters: {'num_leaves': 14, 'max_depth': 1, 'learning_rate': 0.0015522945717215634, 'colsample_bytree': 0.7259856474358685, 'min_child_weight': 13, 'extra_trees': True}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:47,349][0m Trial 14 finished with value: -0.9832067757009346 and parameters: {'num_leaves': 12, 'max_depth': 5, 'learning_rate': 0.0017471679377186627, 'colsample_bytree': 0.10055354348561818, 'min_child_weight': 7, 'extra_trees': True}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:47,446][0m Trial 15 finished with value: -0.9836448598130841 and parameters: {'num_leaves': 8, 'max_depth': 2, 'learning_rate': 0.010006661920077823, 'colsample_bytree': 0.7257957957929726, 'min_child_weight': 13, 'extra_trees': True}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:47,510][0m Trial 16 finis

[32m[I 2022-03-01 11:38:47,787][0m Trial 20 finished with value: -0.983498831775701 and parameters: {'num_leaves': 9, 'max_depth': 2, 'learning_rate': 0.002681716628653119, 'colsample_bytree': 0.8341866424399089, 'min_child_weight': 17, 'extra_trees': True}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:47,859][0m Trial 21 finished with value: -0.9899240654205608 and parameters: {'num_leaves': 11, 'max_depth': 1, 'learning_rate': 0.001699500965993359, 'colsample_bytree': 0.9990824017649456, 'min_child_weight': 12, 'extra_trees': True}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:47,937][0m Trial 22 finished with value: -0.990946261682243 and parameters: {'num_leaves': 12, 'max_depth': 1, 'learning_rate': 0.0011134629968892497, 'colsample_bytree': 0.9177775048966019, 'min_child_weight': 13, 'extra_trees': True}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:48,009][0m Trial 23 finished

[32m[I 2022-03-01 11:38:48,194][0m Trial 26 finished with value: -0.9718165887850467 and parameters: {'num_leaves': 13, 'max_depth': 2, 'learning_rate': 0.0010155321561135244, 'colsample_bytree': 0.9159380514384292, 'min_child_weight': 17, 'extra_trees': False}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:48,264][0m Trial 27 finished with value: -0.9891939252336449 and parameters: {'num_leaves': 8, 'max_depth': 1, 'learning_rate': 0.0028583965352231033, 'colsample_bytree': 0.8032739792967529, 'min_child_weight': 11, 'extra_trees': True}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:48,328][0m Trial 28 finished with value: -0.9902161214953271 and parameters: {'num_leaves': 11, 'max_depth': 1, 'learning_rate': 0.023785494859939332, 'colsample_bytree': 0.936467293476001, 'min_child_weight': 17, 'extra_trees': True}. Best is trial 1 with value: -0.9912383177570093.[0m
[32m[I 2022-03-01 11:38:48,399][0m Trial 29 finis

[32m[I 2022-03-01 11:38:48,673][0m Trial 33 finished with value: -0.9872225467289719 and parameters: {'num_leaves': 15, 'max_depth': 1, 'learning_rate': 0.0005032985882303097, 'colsample_bytree': 0.7700627211469836, 'min_child_weight': 9, 'extra_trees': True}. Best is trial 32 with value: -0.991311331775701.[0m
[32m[I 2022-03-01 11:38:48,733][0m Trial 34 finished with value: -0.9881717289719626 and parameters: {'num_leaves': 14, 'max_depth': 1, 'learning_rate': 0.005566255072264108, 'colsample_bytree': 0.947451215897313, 'min_child_weight': 16, 'extra_trees': True}. Best is trial 32 with value: -0.991311331775701.[0m
[32m[I 2022-03-01 11:38:48,810][0m Trial 35 finished with value: -0.9823306074766355 and parameters: {'num_leaves': 12, 'max_depth': 2, 'learning_rate': 0.0010921071534417456, 'colsample_bytree': 0.6607124266189637, 'min_child_weight': 15, 'extra_trees': True}. Best is trial 32 with value: -0.991311331775701.[0m
[32m[I 2022-03-01 11:38:48,871][0m Trial 36 finish

[32m[I 2022-03-01 11:38:49,073][0m Trial 39 finished with value: -0.9872225467289719 and parameters: {'num_leaves': 9, 'max_depth': 1, 'learning_rate': 0.0007443600863611142, 'colsample_bytree': 0.7626841621245805, 'min_child_weight': 11, 'extra_trees': True}. Best is trial 32 with value: -0.991311331775701.[0m
[32m[I 2022-03-01 11:38:49,144][0m Trial 40 finished with value: -0.9836448598130841 and parameters: {'num_leaves': 7, 'max_depth': 2, 'learning_rate': 0.002343530995503057, 'colsample_bytree': 0.8497773331454559, 'min_child_weight': 9, 'extra_trees': True}. Best is trial 32 with value: -0.991311331775701.[0m
[32m[I 2022-03-01 11:38:49,209][0m Trial 41 finished with value: -0.9906542056074766 and parameters: {'num_leaves': 13, 'max_depth': 1, 'learning_rate': 0.0003986815638916639, 'colsample_bytree': 0.90612030367298, 'min_child_weight': 14, 'extra_trees': True}. Best is trial 32 with value: -0.991311331775701.[0m
[32m[I 2022-03-01 11:38:49,281][0m Trial 42 finished 

[32m[I 2022-03-01 11:38:49,522][0m Trial 46 finished with value: -0.9891939252336449 and parameters: {'num_leaves': 12, 'max_depth': 1, 'learning_rate': 0.00015684715541355605, 'colsample_bytree': 0.8036155316273024, 'min_child_weight': 12, 'extra_trees': True}. Best is trial 44 with value: -0.991822429906542.[0m
[32m[I 2022-03-01 11:38:49,587][0m Trial 47 finished with value: -0.9877336448598131 and parameters: {'num_leaves': 5, 'max_depth': 3, 'learning_rate': 0.0011669181914929277, 'colsample_bytree': 0.6975501189961641, 'min_child_weight': 5, 'extra_trees': True}. Best is trial 44 with value: -0.991822429906542.[0m
[32m[I 2022-03-01 11:38:49,649][0m Trial 48 finished with value: -0.9883177570093458 and parameters: {'num_leaves': 10, 'max_depth': 1, 'learning_rate': 0.004199670473525619, 'colsample_bytree': 0.5543344222984388, 'min_child_weight': 13, 'extra_trees': True}. Best is trial 44 with value: -0.991822429906542.[0m
[32m[I 2022-03-01 11:38:49,714][0m Trial 49 finis

In [7]:
study.best_params

{'num_leaves': 10,
 'max_depth': 1,
 'learning_rate': 0.00014610484805477867,
 'colsample_bytree': 0.999727693942414,
 'min_child_weight': 12,
 'extra_trees': True}

In [8]:
study.best_trial

FrozenTrial(number=44, values=[-0.991822429906542], datetime_start=datetime.datetime(2022, 3, 1, 11, 38, 49, 351874), datetime_complete=datetime.datetime(2022, 3, 1, 11, 38, 49, 406772), params={'num_leaves': 10, 'max_depth': 1, 'learning_rate': 0.00014610484805477867, 'colsample_bytree': 0.999727693942414, 'min_child_weight': 12, 'extra_trees': True}, distributions={'num_leaves': IntUniformDistribution(high=16, low=2, step=1), 'max_depth': IntUniformDistribution(high=5, low=1, step=1), 'learning_rate': LogUniformDistribution(high=0.1, low=0.0001), 'colsample_bytree': UniformDistribution(high=1.0, low=0.1), 'min_child_weight': IntUniformDistribution(high=20, low=1, step=1), 'extra_trees': CategoricalDistribution(choices=(True, False))}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=44, state=TrialState.COMPLETE, value=None)

In [9]:
# from stack overflow for gpus

# differing script files (optimize.py)
# at top of script after imports
# DEVICE = sys.argv[1]    # "cuda: " + sys.argv[1] for pytorch
# def objective:
#     ...

# if __name__ == "__main__":
#     study = optuna.load_study(study_name="distributed-example", storage="sqlite:///example.db")
#.    study.optimize(objective, n_trials=100)

# in terminal
# optuna create-study --study-name "distributed-example" --storage "sqlite:///example.db"

# then for every gpu
# python optimize.py 0
# python optimize.py 1
# etc.

# see optuna docs for visualizations

# also see use of dask in github issues with shared queue (or try multiprocessing) to manage gpu ids