<a href="https://colab.research.google.com/github/dandichristiawan/machine-learning/blob/main/Distributed%20XGBoost%20with%20Ray.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Distributed XGBoost with Ray**

Installing and starting Ray

In [None]:
pip install ray

Installing XGBoost-Ray

In [None]:
pip install xgboost_ray

Installing SKLearn

In [None]:
pip install sklearn

## Simple training example

In [4]:
from xgboost_ray import RayDMatrix, RayParams, train
from sklearn.datasets import load_breast_cancer

train_x, train_y = load_breast_cancer(return_X_y=True)
train_set = RayDMatrix(train_x, train_y)

evals_result = {}
bst = train(
    {
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
    },
    train_set,
    evals_result=evals_result,
    evals=[(train_set, "train")],
    verbose_eval=False,
    ray_params=RayParams(num_actors=2, cpus_per_actor=1))

bst.save_model("model.xgb")
print("Final training error: {:.4f}".format(
    evals_result["train"]["error"][-1]))

2021-11-10 12:37:51,031	INFO main.py:971 -- [RayXGBoost] Created 2 new actors (2 total actors). Waiting until actors are ready for training.


[2m[36m(_EventActor pid=1123)[0m 


2021-11-10 12:37:53,266	INFO main.py:1016 -- [RayXGBoost] Starting XGBoost training.
[2m[36m(pid=1185)[0m [12:37:53] Tree method is automatically selected to be 'approx' for distributed training.
[2m[36m(pid=1209)[0m [12:37:53] Tree method is automatically selected to be 'approx' for distributed training.
2021-11-10 12:37:54,860	INFO main.py:1498 -- [RayXGBoost] Finished XGBoost training on training data with total N=569 in 4.76 seconds (1.58 pure XGBoost training time).


Final training error: 0.0053


## Simple prediction example

In [5]:
from xgboost_ray import RayDMatrix, RayParams, predict
from sklearn.datasets import load_breast_cancer
import xgboost as xgb

data, labels = load_breast_cancer(return_X_y=True)

dpred = RayDMatrix(data, labels)

bst = xgb.Booster(model_file="model.xgb")
pred_ray = predict(bst, dpred, ray_params=RayParams(num_actors=2))

print(pred_ray)

2021-11-10 12:37:54,962	INFO main.py:1535 -- [RayXGBoost] Created 2 remote actors.
2021-11-10 12:37:57,515	INFO main.py:1552 -- [RayXGBoost] Starting XGBoost prediction.


[0.07511145 0.04715708 0.02642454 0.11464816 0.08779665 0.19609766
 0.02642454 0.03200296 0.03968306 0.06930447 0.09290799 0.02642454
 0.03476055 0.05490047 0.05012454 0.02642454 0.03337562 0.02642454
 0.02642454 0.9528658  0.9751126  0.9751126  0.07511145 0.02642454
 0.02642454 0.04102892 0.03118352 0.02642454 0.02642454 0.08779665
 0.02642454 0.03476055 0.02642454 0.02642454 0.02642454 0.02642454
 0.0665335  0.9690314  0.33619487 0.0696831  0.5729184  0.15662025
 0.03451282 0.03200296 0.12301622 0.04102892 0.9751126  0.04096693
 0.97315884 0.9234187  0.9751126  0.9751126  0.9751126  0.03059936
 0.09855346 0.9751126  0.02642454 0.03755146 0.9751126  0.97315884
 0.9751126  0.97315884 0.02642454 0.9751126  0.03120249 0.03120249
 0.97315884 0.9751126  0.8630876  0.9751126  0.02642454 0.9751126
 0.02642454 0.18486539 0.9751126  0.02642454 0.9684864  0.07511145
 0.04528174 0.9751126  0.97315884 0.8423582  0.02642454 0.02642454
 0.9751126  0.02642454 0.18525673 0.02642454 0.9610139  0.91992

## Hyperparameter optimization

In [6]:
from xgboost_ray import RayDMatrix, RayParams, train
from sklearn.datasets import load_breast_cancer

num_actors = 1
num_cpus_per_actor = 1

ray_params = RayParams(
    num_actors=num_actors, cpus_per_actor=num_cpus_per_actor)

def train_model(config):
    train_x, train_y = load_breast_cancer(return_X_y=True)
    train_set = RayDMatrix(train_x, train_y)

    evals_result = {}
    bst = train(
        params=config,
        dtrain=train_set,
        evals_result=evals_result,
        evals=[(train_set, "train")],
        verbose_eval=False,
        ray_params=ray_params)
    bst.save_model("model.xgb")

from ray import tune

# Specify the hyperparameter search space.
config = {
    "tree_method": "approx",
    "objective": "binary:logistic",
    "eval_metric": ["logloss", "error"],
    "eta": tune.loguniform(1e-4, 1e-1),
    "subsample": tune.uniform(0.5, 1.0),
    "max_depth": tune.randint(1, 9)
}

# Make sure to use the `get_tune_resources` method to set the `resources_per_trial`
analysis = tune.run(
    train_model,
    config=config,
    metric="train-error",
    mode="min",
    num_samples=4,
    resources_per_trial=ray_params.get_tune_resources())
print("Best hyperparameters", analysis.best_config)

2021-11-10 12:37:57,652	INFO logger.py:606 -- pip install 'ray[tune]' to see TensorBoard files.


Trial name,status,loc,eta,max_depth,subsample
train_model_088ee_00000,PENDING,,0.00623427,7,0.943351
train_model_088ee_00001,PENDING,,0.0714974,3,0.737249
train_model_088ee_00002,PENDING,,0.000523812,8,0.548698
train_model_088ee_00003,PENDING,,0.0190608,2,0.793031


[2m[36m(pid=1289)[0m   f"`num_actors` in `ray_params` is smaller than 2 "
[2m[36m(pid=1289)[0m 2021-11-10 12:38:00,421	INFO main.py:971 -- [RayXGBoost] Created 1 new actors (1 total actors). Waiting until actors are ready for training.


Trial name,status,loc,eta,max_depth,subsample
train_model_088ee_00000,RUNNING,172.28.0.2:1289,0.00623427,7,0.943351
train_model_088ee_00001,PENDING,,0.0714974,3,0.737249
train_model_088ee_00002,PENDING,,0.000523812,8,0.548698
train_model_088ee_00003,PENDING,,0.0190608,2,0.793031


[2m[36m(pid=1289)[0m 2021-11-10 12:38:03,138	INFO main.py:1016 -- [RayXGBoost] Starting XGBoost training.


Result for train_model_088ee_00000:
  date: 2021-11-10_12-38-04
  done: false
  experiment_id: 851c9bef561b47a0988e12f3cf879984
  hostname: 9cb2033ddaeb
  iterations_since_restore: 1
  node_ip: 172.28.0.2
  pid: 1289
  time_since_restore: 3.821312427520752
  time_this_iter_s: 3.821312427520752
  time_total_s: 3.821312427520752
  timestamp: 1636547884
  timesteps_since_restore: 0
  train-error: 0.029877
  train-logloss: 0.687657
  training_iteration: 1
  trial_id: 088ee_00000
  
Result for train_model_088ee_00000:
  date: 2021-11-10_12-38-04
  done: true
  experiment_id: 851c9bef561b47a0988e12f3cf879984
  experiment_tag: 0_eta=0.0062343,max_depth=7,subsample=0.94335
  hostname: 9cb2033ddaeb
  iterations_since_restore: 10
  node_ip: 172.28.0.2
  pid: 1289
  time_since_restore: 3.979233980178833
  time_this_iter_s: 0.006816864013671875
  time_total_s: 3.979233980178833
  timestamp: 1636547884
  timesteps_since_restore: 0
  train-error: 0.008787
  train-logloss: 0.640539
  training_iterati

[2m[36m(pid=1289)[0m 2021-11-10 12:38:04,335	INFO main.py:1498 -- [RayXGBoost] Finished XGBoost training on training data with total N=569 in 3.98 seconds (1.19 pure XGBoost training time).
[2m[36m(pid=1395)[0m   f"`num_actors` in `ray_params` is smaller than 2 "
[2m[36m(pid=1395)[0m 2021-11-10 12:38:06,442	INFO main.py:971 -- [RayXGBoost] Created 1 new actors (1 total actors). Waiting until actors are ready for training.


Trial name,status,loc,eta,max_depth,subsample,iter,total time (s),train-logloss,train-error
train_model_088ee_00001,RUNNING,172.28.0.2:1395,0.0714974,3,0.737249,,,,
train_model_088ee_00002,PENDING,,0.000523812,8,0.548698,,,,
train_model_088ee_00003,PENDING,,0.0190608,2,0.793031,,,,
train_model_088ee_00000,TERMINATED,172.28.0.2:1289,0.00623427,7,0.943351,10.0,3.97923,0.640539,0.008787


[2m[36m(pid=1395)[0m 2021-11-10 12:38:09,165	INFO main.py:1016 -- [RayXGBoost] Starting XGBoost training.


Result for train_model_088ee_00001:
  date: 2021-11-10_12-38-10
  done: false
  experiment_id: 7091f78a326b4b86b5324c6a8d0a4522
  hostname: 9cb2033ddaeb
  iterations_since_restore: 1
  node_ip: 172.28.0.2
  pid: 1395
  time_since_restore: 3.8300869464874268
  time_this_iter_s: 3.8300869464874268
  time_total_s: 3.8300869464874268
  timestamp: 1636547890
  timesteps_since_restore: 0
  train-error: 0.045694
  train-logloss: 0.635229
  training_iteration: 1
  trial_id: 088ee_00001
  
Result for train_model_088ee_00001:
  date: 2021-11-10_12-38-10
  done: true
  experiment_id: 7091f78a326b4b86b5324c6a8d0a4522
  experiment_tag: 1_eta=0.071497,max_depth=3,subsample=0.73725
  hostname: 9cb2033ddaeb
  iterations_since_restore: 10
  node_ip: 172.28.0.2
  pid: 1395
  time_since_restore: 3.9176175594329834
  time_this_iter_s: 0.0059545040130615234
  time_total_s: 3.9176175594329834
  timestamp: 1636547890
  timesteps_since_restore: 0
  train-error: 0.024605
  train-logloss: 0.327906
  training_it

[2m[36m(pid=1395)[0m 2021-11-10 12:38:10,329	INFO main.py:1498 -- [RayXGBoost] Finished XGBoost training on training data with total N=569 in 3.96 seconds (1.16 pure XGBoost training time).
[2m[36m(pid=1501)[0m   f"`num_actors` in `ray_params` is smaller than 2 "
[2m[36m(pid=1501)[0m 2021-11-10 12:38:12,411	INFO main.py:971 -- [RayXGBoost] Created 1 new actors (1 total actors). Waiting until actors are ready for training.


Trial name,status,loc,eta,max_depth,subsample,iter,total time (s),train-logloss,train-error
train_model_088ee_00002,RUNNING,172.28.0.2:1501,0.000523812,8,0.548698,,,,
train_model_088ee_00003,PENDING,,0.0190608,2,0.793031,,,,
train_model_088ee_00000,TERMINATED,172.28.0.2:1289,0.00623427,7,0.943351,10.0,3.97923,0.640539,0.008787
train_model_088ee_00001,TERMINATED,172.28.0.2:1395,0.0714974,3,0.737249,10.0,3.91762,0.327906,0.024605


[2m[36m(pid=1501)[0m 2021-11-10 12:38:15,129	INFO main.py:1016 -- [RayXGBoost] Starting XGBoost training.


Result for train_model_088ee_00002:
  date: 2021-11-10_12-38-16
  done: false
  experiment_id: 3f15cc14f91b450b8570f5a69e1045cf
  hostname: 9cb2033ddaeb
  iterations_since_restore: 1
  node_ip: 172.28.0.2
  pid: 1501
  time_since_restore: 3.8104159832000732
  time_this_iter_s: 3.8104159832000732
  time_total_s: 3.8104159832000732
  timestamp: 1636547896
  timesteps_since_restore: 0
  train-error: 0.035149
  train-logloss: 0.692683
  training_iteration: 1
  trial_id: 088ee_00002
  
Result for train_model_088ee_00002:
  date: 2021-11-10_12-38-16
  done: true
  experiment_id: 3f15cc14f91b450b8570f5a69e1045cf
  experiment_tag: 2_eta=0.00052381,max_depth=8,subsample=0.5487
  hostname: 9cb2033ddaeb
  iterations_since_restore: 10
  node_ip: 172.28.0.2
  pid: 1501
  time_since_restore: 3.8944265842437744
  time_this_iter_s: 0.006228208541870117
  time_total_s: 3.8944265842437744
  timestamp: 1636547896
  timesteps_since_restore: 0
  train-error: 0.022847
  train-logloss: 0.688733
  training_it

[2m[36m(pid=1501)[0m 2021-11-10 12:38:16,291	INFO main.py:1498 -- [RayXGBoost] Finished XGBoost training on training data with total N=569 in 3.94 seconds (1.16 pure XGBoost training time).
[2m[36m(pid=1605)[0m   f"`num_actors` in `ray_params` is smaller than 2 "
[2m[36m(pid=1605)[0m 2021-11-10 12:38:18,376	INFO main.py:971 -- [RayXGBoost] Created 1 new actors (1 total actors). Waiting until actors are ready for training.


Trial name,status,loc,eta,max_depth,subsample,iter,total time (s),train-logloss,train-error
train_model_088ee_00003,RUNNING,172.28.0.2:1605,0.0190608,2,0.793031,,,,
train_model_088ee_00000,TERMINATED,172.28.0.2:1289,0.00623427,7,0.943351,10.0,3.97923,0.640539,0.008787
train_model_088ee_00001,TERMINATED,172.28.0.2:1395,0.0714974,3,0.737249,10.0,3.91762,0.327906,0.024605
train_model_088ee_00002,TERMINATED,172.28.0.2:1501,0.000523812,8,0.548698,10.0,3.89443,0.688733,0.022847


[2m[36m(pid=1605)[0m 2021-11-10 12:38:21,113	INFO main.py:1016 -- [RayXGBoost] Starting XGBoost training.


Result for train_model_088ee_00003:
  date: 2021-11-10_12-38-22
  done: false
  experiment_id: dcf834acd8a94a3a9a0d95882998bdaa
  hostname: 9cb2033ddaeb
  iterations_since_restore: 1
  node_ip: 172.28.0.2
  pid: 1605
  time_since_restore: 3.843522787094116
  time_this_iter_s: 3.843522787094116
  time_total_s: 3.843522787094116
  timestamp: 1636547902
  timesteps_since_restore: 0
  train-error: 0.050967
  train-logloss: 0.677723
  training_iteration: 1
  trial_id: 088ee_00003
  
Result for train_model_088ee_00003:
  date: 2021-11-10_12-38-22
  done: true
  experiment_id: dcf834acd8a94a3a9a0d95882998bdaa
  experiment_tag: 3_eta=0.019061,max_depth=2,subsample=0.79303
  hostname: 9cb2033ddaeb
  iterations_since_restore: 10
  node_ip: 172.28.0.2
  pid: 1605
  time_since_restore: 3.9192097187042236
  time_this_iter_s: 0.005998134613037109
  time_total_s: 3.9192097187042236
  timestamp: 1636547902
  timesteps_since_restore: 0
  train-error: 0.038664
  train-logloss: 0.561658
  training_iterat

[2m[36m(pid=1605)[0m 2021-11-10 12:38:22,277	INFO main.py:1498 -- [RayXGBoost] Finished XGBoost training on training data with total N=569 in 3.97 seconds (1.16 pure XGBoost training time).


Trial name,status,loc,eta,max_depth,subsample,iter,total time (s),train-logloss,train-error
train_model_088ee_00000,TERMINATED,172.28.0.2:1289,0.00623427,7,0.943351,10,3.97923,0.640539,0.008787
train_model_088ee_00001,TERMINATED,172.28.0.2:1395,0.0714974,3,0.737249,10,3.91762,0.327906,0.024605
train_model_088ee_00002,TERMINATED,172.28.0.2:1501,0.000523812,8,0.548698,10,3.89443,0.688733,0.022847
train_model_088ee_00003,TERMINATED,172.28.0.2:1605,0.0190608,2,0.793031,10,3.91921,0.561658,0.038664


2021-11-10 12:38:22,446	INFO tune.py:630 -- Total run time: 24.82 seconds (24.62 seconds for the tuning loop).


Best hyperparameters {'tree_method': 'approx', 'objective': 'binary:logistic', 'eval_metric': ['logloss', 'error'], 'eta': 0.006234271492854062, 'subsample': 0.9433507825155405, 'max_depth': 7}
