# About:
- this notebooks:
    - optimizes parameters with Optuna with a pruner that observes intermediate results and stop unpromising trials.
    - Approach:
        1. Fix at a high-ish learning rate and at a fixed iteration
        2. perform Hyperparameter Search
        3. Using the best parameters found, reduce learning rate and find the best iteration using early stopping
- insights:
    - **the above approach is effective** - gains are usually seen in step 3

In [1]:
import lightgbm as lgb
import optuna 
from lightgbm import Dataset
from optuna import Trial, visualization
from optuna.samplers import TPESampler

### prepare data

In [2]:
label_colnames = ['h1n1_vaccine', 'seasonal_vaccine']
X = pd.read_csv(r"C:\Users\tanch\Documents\Coding Competitions\DataDriven\Flu Shot Learning\local\data\cleaned_train_set.csv",index_col = "respondent_id")
y = X[label_colnames[1]]
X = X.drop(label_colnames,axis=1)

<IPython.core.display.Javascript object>

In [3]:
# boosting libraries have Dataset class that has memory and speed optimizations
dtrain = Dataset(X, label=y)

# Objective Function
- should take an input Trial instance and return a score

In [4]:
def objective(trial):

    # n_estimators and learning rate are closely tight and will be tuned afterwards
    param = {
        "objective": "binary",
        "learning_rate":0.1,
        "metric": "auc",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "max_depth" : trial.suggest_int("max_depth", 4, 10),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.4, 1.0),
        "subsample": trial.suggest_float("subsample", 0.4, 1.0),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
    }

    # Add a callback for pruning.
    pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "auc")
    
    cv_scores = lgb.cv(param, dtrain, 1000, nfold  = 3, early_stopping_rounds = 20,verbose_eval=False, 
                       callbacks=[pruning_callback])
    
    return cv_scores['auc-mean'][-1]      # last recorded cv score is the best score from early stopping


# create_study

In [5]:
study = optuna.create_study(
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10), direction="maximize"
)
study.optimize(objective, n_trials=1000)

[32m[I 2021-07-29 19:31:26,678][0m A new study created in memory with name: no-name-61e871c1-26dd-4068-aca6-d64731bf1d3e[0m
[32m[I 2021-07-29 19:31:27,722][0m Trial 0 finished with value: 0.8625741011202436 and parameters: {'max_depth': 6, 'colsample_bytree': 0.8448705189311658, 'subsample': 0.6665039870461622, 'min_child_samples': 91, 'lambda_l1': 0.0022255494064781878, 'lambda_l2': 3.987015787570781e-06}. Best is trial 0 with value: 0.8625741011202436.[0m
[32m[I 2021-07-29 19:31:28,705][0m Trial 1 finished with value: 0.8624438614770368 and parameters: {'max_depth': 8, 'colsample_bytree': 0.7743214679944873, 'subsample': 0.7322971164747668, 'min_child_samples': 93, 'lambda_l1': 0.29129123918541483, 'lambda_l2': 1.3069648221242424e-06}. Best is trial 0 with value: 0.8625741011202436.[0m
[32m[I 2021-07-29 19:31:29,656][0m Trial 2 finished with value: 0.8619219108269903 and parameters: {'max_depth': 10, 'colsample_bytree': 0.9943979148083687, 'subsample': 0.593235949279995, '

[32m[I 2021-07-29 19:31:55,361][0m Trial 59 finished with value: 0.863295636220638 and parameters: {'max_depth': 9, 'colsample_bytree': 0.5787318392516824, 'subsample': 0.576141312951802, 'min_child_samples': 44, 'lambda_l1': 0.006558765857258209, 'lambda_l2': 0.005703296631966593}. Best is trial 11 with value: 0.8634175138582297.[0m
[32m[I 2021-07-29 19:31:56,227][0m Trial 60 pruned. Trial was pruned at iteration 91.[0m
[32m[I 2021-07-29 19:31:56,729][0m Trial 61 pruned. Trial was pruned at iteration 40.[0m
[32m[I 2021-07-29 19:31:57,360][0m Trial 62 pruned. Trial was pruned at iteration 57.[0m
[32m[I 2021-07-29 19:31:57,889][0m Trial 63 pruned. Trial was pruned at iteration 41.[0m
[32m[I 2021-07-29 19:31:58,164][0m Trial 64 pruned. Trial was pruned at iteration 16.[0m
[32m[I 2021-07-29 19:31:58,679][0m Trial 65 pruned. Trial was pruned at iteration 40.[0m
[32m[I 2021-07-29 19:31:58,933][0m Trial 66 pruned. Trial was pruned at iteration 13.[0m
[32m[I 2021-07-2

[32m[I 2021-07-29 19:32:27,308][0m Trial 143 pruned. Trial was pruned at iteration 23.[0m
[32m[I 2021-07-29 19:32:27,552][0m Trial 144 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:32:28,028][0m Trial 145 pruned. Trial was pruned at iteration 36.[0m
[32m[I 2021-07-29 19:32:28,583][0m Trial 146 pruned. Trial was pruned at iteration 40.[0m
[32m[I 2021-07-29 19:32:28,825][0m Trial 147 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:32:29,112][0m Trial 148 pruned. Trial was pruned at iteration 12.[0m
[32m[I 2021-07-29 19:32:29,489][0m Trial 149 pruned. Trial was pruned at iteration 20.[0m
[32m[I 2021-07-29 19:32:29,739][0m Trial 150 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:32:30,294][0m Trial 151 pruned. Trial was pruned at iteration 40.[0m
[32m[I 2021-07-29 19:32:30,548][0m Trial 152 pruned. Trial was pruned at iteration 12.[0m
[32m[I 2021-07-29 19:32:31,059][0m Trial 153 pruned. Trial was prune

[32m[I 2021-07-29 19:33:00,828][0m Trial 223 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:33:01,985][0m Trial 224 finished with value: 0.8632381046599916 and parameters: {'max_depth': 10, 'colsample_bytree': 0.5439958219028861, 'subsample': 0.9448290421059372, 'min_child_samples': 41, 'lambda_l1': 0.017943148817174612, 'lambda_l2': 0.007468743581016311}. Best is trial 162 with value: 0.8634888365505201.[0m
[32m[I 2021-07-29 19:33:02,321][0m Trial 225 pruned. Trial was pruned at iteration 20.[0m
[32m[I 2021-07-29 19:33:02,720][0m Trial 226 pruned. Trial was pruned at iteration 27.[0m
[32m[I 2021-07-29 19:33:02,977][0m Trial 227 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:33:03,365][0m Trial 228 pruned. Trial was pruned at iteration 20.[0m
[32m[I 2021-07-29 19:33:04,056][0m Trial 229 pruned. Trial was pruned at iteration 57.[0m
[32m[I 2021-07-29 19:33:04,476][0m Trial 230 pruned. Trial was pruned at iteration 26.[0m
[32m

[32m[I 2021-07-29 19:33:33,025][0m Trial 309 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:33:33,402][0m Trial 310 pruned. Trial was pruned at iteration 23.[0m
[32m[I 2021-07-29 19:33:33,855][0m Trial 311 pruned. Trial was pruned at iteration 26.[0m
[32m[I 2021-07-29 19:33:34,151][0m Trial 312 pruned. Trial was pruned at iteration 15.[0m
[32m[I 2021-07-29 19:33:34,431][0m Trial 313 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:33:34,786][0m Trial 314 pruned. Trial was pruned at iteration 16.[0m
[32m[I 2021-07-29 19:33:35,008][0m Trial 315 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:33:35,299][0m Trial 316 pruned. Trial was pruned at iteration 15.[0m
[32m[I 2021-07-29 19:33:35,675][0m Trial 317 pruned. Trial was pruned at iteration 19.[0m
[32m[I 2021-07-29 19:33:36,039][0m Trial 318 pruned. Trial was pruned at iteration 23.[0m
[32m[I 2021-07-29 19:33:36,291][0m Trial 319 pruned. Trial was prune

[32m[I 2021-07-29 19:34:04,823][0m Trial 398 pruned. Trial was pruned at iteration 12.[0m
[32m[I 2021-07-29 19:34:05,122][0m Trial 399 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:34:05,397][0m Trial 400 pruned. Trial was pruned at iteration 12.[0m
[32m[I 2021-07-29 19:34:05,861][0m Trial 401 pruned. Trial was pruned at iteration 30.[0m
[32m[I 2021-07-29 19:34:06,227][0m Trial 402 pruned. Trial was pruned at iteration 23.[0m
[32m[I 2021-07-29 19:34:06,488][0m Trial 403 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:34:06,747][0m Trial 404 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:34:07,041][0m Trial 405 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:34:07,351][0m Trial 406 pruned. Trial was pruned at iteration 16.[0m
[32m[I 2021-07-29 19:34:07,977][0m Trial 407 pruned. Trial was pruned at iteration 44.[0m
[32m[I 2021-07-29 19:34:08,248][0m Trial 408 pruned. Trial was prune

[32m[I 2021-07-29 19:34:36,305][0m Trial 484 finished with value: 0.8632408956784854 and parameters: {'max_depth': 8, 'colsample_bytree': 0.5423135772889808, 'subsample': 0.754786212313595, 'min_child_samples': 36, 'lambda_l1': 0.000504841388522611, 'lambda_l2': 0.0005671425238979671}. Best is trial 162 with value: 0.8634888365505201.[0m
[32m[I 2021-07-29 19:34:36,549][0m Trial 485 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:34:36,951][0m Trial 486 pruned. Trial was pruned at iteration 26.[0m
[32m[I 2021-07-29 19:34:37,285][0m Trial 487 pruned. Trial was pruned at iteration 14.[0m
[32m[I 2021-07-29 19:34:37,631][0m Trial 488 pruned. Trial was pruned at iteration 20.[0m
[32m[I 2021-07-29 19:34:37,897][0m Trial 489 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:34:38,158][0m Trial 490 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:34:38,464][0m Trial 491 pruned. Trial was pruned at iteration 13.[0m
[32m[

[32m[I 2021-07-29 19:35:05,417][0m Trial 567 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:35:05,798][0m Trial 568 pruned. Trial was pruned at iteration 20.[0m
[32m[I 2021-07-29 19:35:06,167][0m Trial 569 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:35:06,474][0m Trial 570 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:35:06,933][0m Trial 571 pruned. Trial was pruned at iteration 30.[0m
[32m[I 2021-07-29 19:35:07,378][0m Trial 572 pruned. Trial was pruned at iteration 22.[0m
[32m[I 2021-07-29 19:35:07,658][0m Trial 573 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:35:07,945][0m Trial 574 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:35:08,213][0m Trial 575 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:35:08,573][0m Trial 576 pruned. Trial was pruned at iteration 17.[0m
[32m[I 2021-07-29 19:35:08,877][0m Trial 577 pruned. Trial was prune

[32m[I 2021-07-29 19:35:35,176][0m Trial 653 pruned. Trial was pruned at iteration 19.[0m
[32m[I 2021-07-29 19:35:35,494][0m Trial 654 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:35:35,784][0m Trial 655 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:35:36,112][0m Trial 656 pruned. Trial was pruned at iteration 15.[0m
[32m[I 2021-07-29 19:35:36,388][0m Trial 657 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:35:36,715][0m Trial 658 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:35:37,004][0m Trial 659 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:35:37,280][0m Trial 660 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:35:37,696][0m Trial 661 pruned. Trial was pruned at iteration 19.[0m
[32m[I 2021-07-29 19:35:38,023][0m Trial 662 pruned. Trial was pruned at iteration 15.[0m
[32m[I 2021-07-29 19:35:38,411][0m Trial 663 pruned. Trial was prune

[32m[I 2021-07-29 19:36:05,189][0m Trial 742 pruned. Trial was pruned at iteration 13.[0m
[32m[I 2021-07-29 19:36:05,526][0m Trial 743 pruned. Trial was pruned at iteration 15.[0m
[32m[I 2021-07-29 19:36:05,851][0m Trial 744 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:36:06,209][0m Trial 745 pruned. Trial was pruned at iteration 17.[0m
[32m[I 2021-07-29 19:36:06,476][0m Trial 746 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:36:06,809][0m Trial 747 pruned. Trial was pruned at iteration 14.[0m
[32m[I 2021-07-29 19:36:07,130][0m Trial 748 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:36:07,514][0m Trial 749 pruned. Trial was pruned at iteration 19.[0m
[32m[I 2021-07-29 19:36:07,817][0m Trial 750 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:36:08,152][0m Trial 751 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:36:08,543][0m Trial 752 pruned. Trial was prune

[32m[I 2021-07-29 19:36:36,600][0m Trial 831 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:36:37,009][0m Trial 832 pruned. Trial was pruned at iteration 23.[0m
[32m[I 2021-07-29 19:36:37,329][0m Trial 833 pruned. Trial was pruned at iteration 13.[0m
[32m[I 2021-07-29 19:36:37,670][0m Trial 834 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:36:37,954][0m Trial 835 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:36:38,267][0m Trial 836 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:36:38,564][0m Trial 837 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:36:39,051][0m Trial 838 pruned. Trial was pruned at iteration 19.[0m
[32m[I 2021-07-29 19:36:39,474][0m Trial 839 pruned. Trial was pruned at iteration 23.[0m
[32m[I 2021-07-29 19:36:39,907][0m Trial 840 pruned. Trial was pruned at iteration 20.[0m
[32m[I 2021-07-29 19:36:40,172][0m Trial 841 pruned. Trial was prune

[32m[I 2021-07-29 19:37:07,086][0m Trial 917 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:37:07,457][0m Trial 918 pruned. Trial was pruned at iteration 18.[0m
[32m[I 2021-07-29 19:37:07,901][0m Trial 919 pruned. Trial was pruned at iteration 20.[0m
[32m[I 2021-07-29 19:37:08,202][0m Trial 920 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:37:08,505][0m Trial 921 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:37:08,895][0m Trial 922 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:37:09,215][0m Trial 923 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:37:09,575][0m Trial 924 pruned. Trial was pruned at iteration 15.[0m
[32m[I 2021-07-29 19:37:09,927][0m Trial 925 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 19:37:10,300][0m Trial 926 pruned. Trial was pruned at iteration 17.[0m
[32m[I 2021-07-29 19:37:10,588][0m Trial 927 pruned. Trial was prune

In [6]:
print("Number of finished trials: {}".format(len(study.trials)))
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 1000
Best trial:
  Value: 0.8634888365505201
  Params: 
    max_depth: 8
    colsample_bytree: 0.5408815427763269
    subsample: 0.8466638436024809
    min_child_samples: 37
    lambda_l1: 0.0001634213220638466
    lambda_l2: 0.0019856133446647076


## Optimize n_estimators and learning rate
- set to high iterations and low learning rate

In [10]:
param = trial.params
param.update({"learning_rate":0.01,
              "objective": "binary",
              "metric": "auc"})
param

{'max_depth': 8,
 'colsample_bytree': 0.5408815427763269,
 'subsample': 0.8466638436024809,
 'min_child_samples': 37,
 'lambda_l1': 0.0001634213220638466,
 'lambda_l2': 0.0019856133446647076,
 'learning_rate': 0.01,
 'objective': 'binary',
 'metric': 'auc'}

In [11]:
cv_scores = lgb.cv(param, dtrain, 10000, nfold  = 5, early_stopping_rounds = 50,verbose_eval=False)['auc-mean']

In [12]:
print(f"Best Iteration {len(cv_scores)} at {cv_scores[-1]}")

Best Iteration 896 at 0.8644408064138457


# Train and export

In [16]:
best_param = {
    'max_depth': 8,
    'colsample_bytree': 0.5408815427763269,
    'subsample': 0.8466638436024809,
    'min_child_samples': 37,
    'lambda_l1': 0.0001634213220638466,
    'lambda_l2': 0.0019856133446647076,
    'learning_rate': 0.01,
    'objective': 'binary',
    'metric': 'auc',
    "num_iterations":896
}

In [17]:
model = lgb.LGBMClassifier()
model.set_params(**best_param)
model.fit(X,y)

LGBMClassifier(boosting_type='gbdt', class_weight=None,
               colsample_bytree=0.5408815427763269, importance_type='split',
               lambda_l1=0.0001634213220638466, lambda_l2=0.0019856133446647076,
               learning_rate=0.01, max_depth=8, metric='auc',
               min_child_samples=37, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=100, n_jobs=-1, num_iterations=896, num_leaves=31,
               objective='binary', random_state=None, reg_alpha=0.0,
               reg_lambda=0.0, silent=True, subsample=0.8466638436024809,
               subsample_for_bin=200000, subsample_freq=0)

In [18]:
import joblib
joblib.dump(model, r"C:\Users\tanch\Documents\Coding Competitions\DataDriven\Flu Shot Learning\local\model\lightgbm seasonsal_vaccine 0.pkl")
# gbm_pickle = joblib.load(r"C:\Users\tanch\Documents\Coding Competitions\DataDriven\Flu Shot Learning\local\model\lightgbm 0.pkl")

['C:\\Users\\tanch\\Documents\\Coding Competitions\\DataDriven\\Flu Shot Learning\\local\\model\\lightgbm seasonsal_vaccine 0.pkl']