# About:
- this notebooks:
    - optimizes parameters with Optuna with a pruner that observes intermediate results and stop unpromising trials.
    - Approach:
        1. Fix at a high-ish learning rate and at a fixed iteration
        2. perform Hyperparameter Search
        3. Using the best parameters found, reduce learning rate and find the best iteration using early stopping
- insights:
    - **the above approach is effective** - gains are usually seen in step 3

In [1]:
import xgboost as xgb

In [2]:
import optuna 
from optuna import Trial, visualization
from optuna.samplers import TPESampler

### prepare data
- for catboost, we specify nominal features as cat_features as it was found to improve performance
- note: catboost expects these cat_features to be a string!

In [2]:
label_colnames = ['h1n1_vaccine', 'seasonal_vaccine']
X = pd.read_csv(r"C:\Users\tanch\Documents\Coding Competitions\DataDriven\Flu Shot Learning\local\data\cleaned_train_set.csv",index_col = "respondent_id")
y = X[label_colnames[0]]
X = X.drop(label_colnames,axis=1)

<IPython.core.display.Javascript object>

In [3]:
# boosting libraries have Dataset class that has memory and speed optimizations
dtrain = xgb.DMatrix(X, label=y)

# Objective Function
- should take an input Trial instance and return a score

In [7]:
def objective(trial):

    # n_estimators and learning rate are closely tight and will be tuned afterwards
    param = {
        'learning_rate': 0.1,
        "verbosity": 0 ,
        'max_depth':trial.suggest_int('max_depth', 4, 10),
        'gamma':trial.suggest_int('gamma', 0, 5),
        'min_child_weight':trial.suggest_int('min_child_weight', 0, 20),
        'scale_pos_weight':trial.suggest_int('scale_pos_weight', 0, 20),
        'subsample':trial.suggest_float('subsample',0.4,1),
        'colsample_bytree':trial.suggest_float('colsample_bytree',0.4,1),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
    }
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, "test-auc")

    cv_scores = xgb.cv(param, dtrain, 1000, nfold=3, metrics='auc', early_stopping_rounds=20,
                       verbose_eval = False,
                       callbacks=[pruning_callback])
    return cv_scores['test-auc-mean'].max()    

# create_study

In [8]:
study = optuna.create_study(
    direction='maximize',                     
    sampler=TPESampler(),
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
)
study.optimize(objective, n_trials=300)

[32m[I 2021-07-29 21:29:43,058][0m A new study created in memory with name: no-name-f7606bf2-216d-4980-af43-8a0445c1c8c0[0m
[32m[I 2021-07-29 21:29:49,315][0m Trial 0 finished with value: 0.8605349999999999 and parameters: {'max_depth': 8, 'gamma': 5, 'min_child_weight': 5, 'scale_pos_weight': 11, 'subsample': 0.8017705808006281, 'colsample_bytree': 0.48834021416929474, 'reg_alpha': 0.01980933036171584, 'reg_lambda': 0.03484180859432258}. Best is trial 0 with value: 0.8605349999999999.[0m
[32m[I 2021-07-29 21:29:52,555][0m Trial 1 finished with value: 0.8583776666666667 and parameters: {'max_depth': 8, 'gamma': 1, 'min_child_weight': 11, 'scale_pos_weight': 5, 'subsample': 0.6355431286866906, 'colsample_bytree': 0.5678040885414659, 'reg_alpha': 0.05101788417864492, 'reg_lambda': 0.1472851049923175}. Best is trial 0 with value: 0.8605349999999999.[0m
[32m[I 2021-07-29 21:30:06,416][0m Trial 2 finished with value: 0.8651923333333333 and parameters: {'max_depth': 7, 'gamma': 5,

[32m[I 2021-07-29 21:31:38,710][0m Trial 55 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:31:41,901][0m Trial 56 pruned. Trial was pruned at iteration 64.[0m
[32m[I 2021-07-29 21:31:42,230][0m Trial 57 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:31:42,736][0m Trial 58 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:31:43,332][0m Trial 59 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:31:48,213][0m Trial 60 finished with value: 0.8655076666666668 and parameters: {'max_depth': 7, 'gamma': 2, 'min_child_weight': 11, 'scale_pos_weight': 3, 'subsample': 0.8920453341042848, 'colsample_bytree': 0.7566292668734416, 'reg_alpha': 4.971163084932676e-07, 'reg_lambda': 2.368713842364587}. Best is trial 24 with value: 0.86679.[0m
[32m[I 2021-07-29 21:31:54,299][0m Trial 61 pruned. Trial was pruned at iteration 115.[0m
[32m[I 2021-07-29 21:31:55,021][0m Trial 62 pruned. Trial was pruned at iteration 

[32m[I 2021-07-29 21:33:17,243][0m Trial 114 finished with value: 0.8661533333333332 and parameters: {'max_depth': 7, 'gamma': 1, 'min_child_weight': 12, 'scale_pos_weight': 2, 'subsample': 0.8681427553499368, 'colsample_bytree': 0.5923471907660818, 'reg_alpha': 1.678413658597575e-06, 'reg_lambda': 6.44333640622851}. Best is trial 24 with value: 0.86679.[0m
[32m[I 2021-07-29 21:33:17,878][0m Trial 115 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:33:21,796][0m Trial 116 finished with value: 0.865476 and parameters: {'max_depth': 7, 'gamma': 1, 'min_child_weight': 12, 'scale_pos_weight': 2, 'subsample': 0.8168999654009725, 'colsample_bytree': 0.5791406516774498, 'reg_alpha': 4.304446321535472e-07, 'reg_lambda': 5.758675145185064}. Best is trial 24 with value: 0.86679.[0m
[32m[I 2021-07-29 21:33:22,113][0m Trial 117 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:33:26,426][0m Trial 118 finished with value: 0.8657863333333333 and parame

[32m[I 2021-07-29 21:35:00,343][0m Trial 166 finished with value: 0.8660896666666668 and parameters: {'max_depth': 7, 'gamma': 1, 'min_child_weight': 10, 'scale_pos_weight': 2, 'subsample': 0.8325148295111899, 'colsample_bytree': 0.5880521169345708, 'reg_alpha': 0.005346150770493082, 'reg_lambda': 6.9889658096644105}. Best is trial 24 with value: 0.86679.[0m
[32m[I 2021-07-29 21:35:01,028][0m Trial 167 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:35:02,687][0m Trial 168 pruned. Trial was pruned at iteration 29.[0m
[32m[I 2021-07-29 21:35:07,065][0m Trial 169 finished with value: 0.8665453333333334 and parameters: {'max_depth': 7, 'gamma': 1, 'min_child_weight': 10, 'scale_pos_weight': 3, 'subsample': 0.8414944133586016, 'colsample_bytree': 0.5704911936062269, 'reg_alpha': 0.00666972737299255, 'reg_lambda': 9.94179371913655}. Best is trial 24 with value: 0.86679.[0m
[32m[I 2021-07-29 21:35:08,901][0m Trial 170 pruned. Trial was pruned at iteration 33.

[32m[I 2021-07-29 21:36:31,019][0m Trial 214 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:36:32,404][0m Trial 215 pruned. Trial was pruned at iteration 24.[0m
[32m[I 2021-07-29 21:36:33,078][0m Trial 216 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:36:35,348][0m Trial 217 pruned. Trial was pruned at iteration 44.[0m
[32m[I 2021-07-29 21:36:36,079][0m Trial 218 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:36:36,840][0m Trial 219 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:36:38,133][0m Trial 220 pruned. Trial was pruned at iteration 21.[0m
[32m[I 2021-07-29 21:36:41,007][0m Trial 221 pruned. Trial was pruned at iteration 54.[0m
[32m[I 2021-07-29 21:36:41,893][0m Trial 222 pruned. Trial was pruned at iteration 15.[0m
[32m[I 2021-07-29 21:36:46,660][0m Trial 223 finished with value: 0.8663963333333333 and parameters: {'max_depth': 7, 'gamma': 1, 'min_child_weight': 9, 'scale_p

[32m[I 2021-07-29 21:38:04,811][0m Trial 282 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:38:05,531][0m Trial 283 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:38:10,427][0m Trial 284 finished with value: 0.8665913333333334 and parameters: {'max_depth': 7, 'gamma': 1, 'min_child_weight': 10, 'scale_pos_weight': 2, 'subsample': 0.8231814669924657, 'colsample_bytree': 0.589112310701525, 'reg_alpha': 0.4169196643164091, 'reg_lambda': 9.71719876809718}. Best is trial 192 with value: 0.8671760000000001.[0m
[32m[I 2021-07-29 21:38:11,074][0m Trial 285 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:38:11,887][0m Trial 286 pruned. Trial was pruned at iteration 13.[0m
[32m[I 2021-07-29 21:38:12,549][0m Trial 287 pruned. Trial was pruned at iteration 10.[0m
[32m[I 2021-07-29 21:38:13,849][0m Trial 288 pruned. Trial was pruned at iteration 23.[0m
[32m[I 2021-07-29 21:38:18,306][0m Trial 289 finished with value: 0.8

In [15]:
print("Number of finished trials: {}".format(len(study.trials)))
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 300
Best trial:
  Value: 0.8671760000000001
  Params: 
    max_depth: 7
    gamma: 1
    min_child_weight: 9
    scale_pos_weight: 3
    subsample: 0.8738492762052684
    colsample_bytree: 0.5555672546387187
    reg_alpha: 1.6530812668946973
    reg_lambda: 9.599740027835166


## Optimize n_estimators and learning rate
- set to high iterations and low learning rate

In [16]:
param = trial.params
param.update({"learning_rate":0.01,
              "eval_metric" : "auc",
              "use_label_encoder":False})
param

{'max_depth': 7,
 'gamma': 1,
 'min_child_weight': 9,
 'scale_pos_weight': 3,
 'subsample': 0.8738492762052684,
 'colsample_bytree': 0.5555672546387187,
 'reg_alpha': 1.6530812668946973,
 'reg_lambda': 9.599740027835166,
 'learning_rate': 0.01,
 'eval_metric': 'auc',
 'use_label_encoder': False}

In [11]:
cv_scores = xgb.cv(param, dtrain, 10000, nfold  = 5, early_stopping_rounds = 50,verbose_eval=False)['test-auc-mean']

In [12]:
print(f"Best Iteration {len(cv_scores)} at {cv_scores.max()}")

Best Iteration 808 at 0.8692228


# Train with best param and export

In [13]:
best_param = param
best_param.update({"n_estimators":len(cv_scores)})
best_param

{'max_depth': 7,
 'gamma': 1,
 'min_child_weight': 9,
 'scale_pos_weight': 3,
 'subsample': 0.8738492762052684,
 'colsample_bytree': 0.5555672546387187,
 'reg_alpha': 1.6530812668946973,
 'reg_lambda': 9.599740027835166,
 'learning_rate': 0.01,
 'eval_metric': 'auc',
 'n_estimators': 808}

In [14]:
model = xgb.XGBClassifier()
model.set_params(**best_param)
model.fit(X,y)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.5555672546387187,
              eval_metric='auc', gamma=1, gpu_id=-1, importance_type='gain',
              interaction_constraints='', learning_rate=0.01, max_delta_step=0,
              max_depth=7, min_child_weight=9, missing=nan,
              monotone_constraints='()', n_estimators=808, n_jobs=8,
              num_parallel_tree=1, objective='binary:logistic', random_state=0,
              reg_alpha=1.6530812668946973, reg_lambda=9.599740027835166,
              scale_pos_weight=3, subsample=0.8738492762052684,
              tree_method='exact', use_label_encoder=True,
              validate_parameters=1, verbosity=None)

In [17]:
import joblib
joblib.dump(model, r"C:\Users\tanch\Documents\Coding Competitions\DataDriven\Flu Shot Learning\local\model\xgbboost h1n1_vaccine 0.pkl")
# gbm_pickle = joblib.load(r"C:\Users\tanch\Documents\Coding Competitions\DataDriven\Flu Shot Learning\local\model\lightgbm 0.pkl")

['C:\\Users\\tanch\\Documents\\Coding Competitions\\DataDriven\\Flu Shot Learning\\local\\model\\xgbboost h1n1_vaccine 0.pkl']