In [7]:
import pandas as pd
import numpy as np
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split

import lightgbm as lgb

from ray import tune
from ray.tune.schedulers import ASHAScheduler, PopulationBasedTraining

train_processed_df = pd.read_pickle("inputs/train_processed.pkl")

def LightGBMCallback(env):
    """Assumes that `valid_0` is the target validation score."""
    _, metric, score, _ = env.evaluation_result_list[0]
    tune.report(**{metric: score})


def train_diabetes(config):
    data = train_processed_df.drop('diabetes_mellitus', axis=1)
    target = train_processed_df.diabetes_mellitus
    #data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
    train_x, test_x, train_y, test_y = train_test_split(
        data, target, test_size=0.20)
    train_set = lgb.Dataset(train_x, label=train_y)
    test_set = lgb.Dataset(test_x, label=test_y)
    gbm = lgb.train(
        config,
        train_set,
        valid_sets=[test_set],
        verbose_eval=False,
        callbacks=[LightGBMCallback])
    preds = gbm.predict(test_x)
    pred_labels = np.rint(preds)
    tune.report(
        binary_error=sklearn.metrics.roc_auc_score(test_y, pred_labels),
        done=True)


config = {
        "objective": "binary",
        "metric": "binary_error",
        "verbose": -1,
        #"boosting_type": tune.grid_search(["gbdt", "dart"]),
        "boosting_type": "gbdt",
        "num_leaves": tune.randint(10, 40),
        #'min_child_samples': tune.randint(100, 400),
        #'min_child_weight': tune.grid_search([1e-7,1e-6,1e-5, 1e-4, 1e-3, 1e-2,1e-1]),
        #'subsample': tune.uniform(0.4, 0.6),
        #'colsample_bytree': tune.uniform(0.2, 0.6),
        #'reg_alpha': tune.grid_search([0, 1, 2, 5, 7, 10, 50]),
        #'reg_lambda': tune.grid_search([0, 1, 5, 10, 20, 50]),
        'scale_pos_weight': tune.grid_search([1, 2, 3]),
        "learning_rate": 0.01
    }

analysis = tune.run(
        train_diabetes,
        metric="binary_error",
        #mode="min",
        mode="max",
        resources_per_trial={"cpu": 4, "gpu": 0},
        config=config,
        num_samples=1,
        #scheduler=ASHAScheduler())
        scheduler=PopulationBasedTraining(),
        local_dir="logging")





Trial name,status,loc,num_leaves,scale_pos_weight
train_diabetes_d4e8e_00000,RUNNING,,33,1


Result for train_diabetes_d4e8e_00000:
  binary_error: 0.21339121081745543
  date: 2021-03-19_09-52-44
  done: false
  experiment_id: d8a25c00733c4a6da1bbe1a319290b61
  hostname: MacBook-Pro-3
  iterations_since_restore: 1
  node_ip: 192.168.0.4
  pid: 22409
  time_since_restore: 7.334596157073975
  time_this_iter_s: 7.334596157073975
  time_total_s: 7.334596157073975
  timestamp: 1616147564
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: d4e8e_00000
  


Trial name,status,loc,num_leaves,scale_pos_weight,iter,total time (s),binary_error
train_diabetes_d4e8e_00000,RUNNING,192.168.0.4:22409,33,1,1.0,7.3346,0.213391
train_diabetes_d4e8e_00001,RUNNING,,17,2,,,
train_diabetes_d4e8e_00002,PENDING,,39,3,,,


Result for train_diabetes_d4e8e_00001:
  binary_error: 0.213660110633067
  date: 2021-03-19_09-52-45
  done: false
  experiment_id: e8a5a8a11c7847789552eb6fbb328b12
  hostname: MacBook-Pro-3
  iterations_since_restore: 1
  node_ip: 192.168.0.4
  pid: 22406
  time_since_restore: 7.379410028457642
  time_this_iter_s: 7.379410028457642
  time_total_s: 7.379410028457642
  timestamp: 1616147565
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: d4e8e_00001
  


Trial name,status,loc,num_leaves,scale_pos_weight,iter,total time (s),binary_error
train_diabetes_d4e8e_00000,RUNNING,192.168.0.4:22409,33,1,47.0,12.3296,0.213391
train_diabetes_d4e8e_00001,RUNNING,192.168.0.4:22406,17,2,50.0,11.4991,0.21366
train_diabetes_d4e8e_00002,PENDING,,39,3,,,


Result for train_diabetes_d4e8e_00000:
  binary_error: 0.21339121081745543
  date: 2021-03-19_09-52-50
  done: false
  experiment_id: d8a25c00733c4a6da1bbe1a319290b61
  hostname: MacBook-Pro-3
  iterations_since_restore: 48
  node_ip: 192.168.0.4
  pid: 22409
  time_since_restore: 12.438672065734863
  time_this_iter_s: 0.1090538501739502
  time_total_s: 12.438672065734863
  timestamp: 1616147570
  timesteps_since_restore: 0
  training_iteration: 48
  trial_id: d4e8e_00000
  
Result for train_diabetes_d4e8e_00001:
  binary_error: 0.213660110633067
  date: 2021-03-19_09-52-50
  done: false
  experiment_id: e8a5a8a11c7847789552eb6fbb328b12
  hostname: MacBook-Pro-3
  iterations_since_restore: 60
  node_ip: 192.168.0.4
  pid: 22406
  time_since_restore: 12.419089794158936
  time_this_iter_s: 0.08419299125671387
  time_total_s: 12.419089794158936
  timestamp: 1616147570
  timesteps_since_restore: 0
  training_iteration: 60
  trial_id: d4e8e_00001
  


Trial name,status,loc,num_leaves,scale_pos_weight,iter,total time (s),binary_error
train_diabetes_d4e8e_00000,RUNNING,192.168.0.4:22409,33,1,93.0,17.4208,0.205401
train_diabetes_d4e8e_00001,RUNNING,192.168.0.4:22406,17,2,100.0,15.8359,0.180662
train_diabetes_d4e8e_00002,PENDING,,39,3,,,


Result for train_diabetes_d4e8e_00001:
  binary_error: 0.6552603445074549
  date: 2021-03-19_09-52-55
  done: true
  experiment_id: e8a5a8a11c7847789552eb6fbb328b12
  hostname: MacBook-Pro-3
  iterations_since_restore: 101
  node_ip: 192.168.0.4
  pid: 22406
  time_since_restore: 16.569145917892456
  time_this_iter_s: 0.7332251071929932
  time_total_s: 16.569145917892456
  timestamp: 1616147575
  timesteps_since_restore: 0
  training_iteration: 101
  trial_id: d4e8e_00001
  
Result for train_diabetes_d4e8e_00000:
  binary_error: 0.20463275968039335
  date: 2021-03-19_09-52-55
  done: false
  experiment_id: d8a25c00733c4a6da1bbe1a319290b61
  hostname: MacBook-Pro-3
  iterations_since_restore: 94
  node_ip: 192.168.0.4
  pid: 22409
  time_since_restore: 17.540767192840576
  time_this_iter_s: 0.11999821662902832
  time_total_s: 17.540767192840576
  timestamp: 1616147575
  timesteps_since_restore: 0
  training_iteration: 94
  trial_id: d4e8e_00000
  
Result for train_diabetes_d4e8e_00000:


Trial name,status,loc,num_leaves,scale_pos_weight,iter,total time (s),binary_error
train_diabetes_d4e8e_00002,RUNNING,192.168.0.4:22495,39,3,1,7.28567,0.213276
train_diabetes_d4e8e_00000,TERMINATED,,33,1,101,19.6081,0.54789
train_diabetes_d4e8e_00001,TERMINATED,,17,2,101,16.5691,0.65526


Result for train_diabetes_d4e8e_00002:
  binary_error: 0.2132759680393362
  date: 2021-03-19_09-53-09
  done: false
  experiment_id: f0f633ddeb044f838a9d1798d4aa3acd
  hostname: MacBook-Pro-3
  iterations_since_restore: 43
  node_ip: 192.168.0.4
  pid: 22495
  time_since_restore: 12.3503258228302
  time_this_iter_s: 0.12150073051452637
  time_total_s: 12.3503258228302
  timestamp: 1616147589
  timesteps_since_restore: 0
  training_iteration: 43
  trial_id: d4e8e_00002
  


Trial name,status,loc,num_leaves,scale_pos_weight,iter,total time (s),binary_error
train_diabetes_d4e8e_00002,RUNNING,192.168.0.4:22495,39,3,43,12.3503,0.213276
train_diabetes_d4e8e_00000,TERMINATED,,33,1,101,19.6081,0.54789
train_diabetes_d4e8e_00001,TERMINATED,,17,2,101,16.5691,0.65526


Result for train_diabetes_d4e8e_00002:
  binary_error: 0.17682083589428396
  date: 2021-03-19_09-53-14
  done: false
  experiment_id: f0f633ddeb044f838a9d1798d4aa3acd
  hostname: MacBook-Pro-3
  iterations_since_restore: 85
  node_ip: 192.168.0.4
  pid: 22495
  time_since_restore: 17.423748016357422
  time_this_iter_s: 0.11935114860534668
  time_total_s: 17.423748016357422
  timestamp: 1616147594
  timesteps_since_restore: 0
  training_iteration: 85
  trial_id: d4e8e_00002
  


Trial name,status,loc,num_leaves,scale_pos_weight,iter,total time (s),binary_error
train_diabetes_d4e8e_00002,RUNNING,192.168.0.4:22495,39,3,85,17.4237,0.176821
train_diabetes_d4e8e_00000,TERMINATED,,33,1,101,19.6081,0.54789
train_diabetes_d4e8e_00001,TERMINATED,,17,2,101,16.5691,0.65526


Result for train_diabetes_d4e8e_00002:
  binary_error: 0.7195581969335375
  date: 2021-03-19_09-53-17
  done: true
  experiment_id: f0f633ddeb044f838a9d1798d4aa3acd
  hostname: MacBook-Pro-3
  iterations_since_restore: 101
  node_ip: 192.168.0.4
  pid: 22495
  time_since_restore: 20.003094911575317
  time_this_iter_s: 0.7850649356842041
  time_total_s: 20.003094911575317
  timestamp: 1616147597
  timesteps_since_restore: 0
  training_iteration: 101
  trial_id: d4e8e_00002
  


Trial name,status,loc,num_leaves,scale_pos_weight,iter,total time (s),binary_error
train_diabetes_d4e8e_00000,TERMINATED,,33,1,101,19.6081,0.54789
train_diabetes_d4e8e_00001,TERMINATED,,17,2,101,16.5691,0.65526
train_diabetes_d4e8e_00002,TERMINATED,,39,3,101,20.0031,0.719558


2021-03-19 09:53:17,115	INFO tune.py:450 -- Total run time: 42.23 seconds (41.77 seconds for the tuning loop).


In [8]:
print("Best hyperparameters found were: ", analysis.best_config)

Best hyperparameters found were:  {'objective': 'binary', 'metric': 'binary_error', 'verbose': -1, 'boosting_type': 'gbdt', 'num_leaves': 39, 'scale_pos_weight': 3, 'learning_rate': 0.01}
