In [1]:
import xgboost as xgb
import numpy as np
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
import time

In [2]:
%%time
# Fetch dataset using sklearn
cov = fetch_covtype()
X = cov.data
y = cov.target

CPU times: user 480 ms, sys: 28 ms, total: 508 ms
Wall time: 507 ms


In [3]:
%%time
# Create 0.75/0.25 train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, train_size=0.75,
                                                    random_state=42)

CPU times: user 80 ms, sys: 28 ms, total: 108 ms
Wall time: 107 ms


In [4]:
%%time
# Convert input data from numpy to XGBoost format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

num_round = 3
maxdepth = 6

CPU times: user 84 ms, sys: 32 ms, total: 116 ms
Wall time: 116 ms


In [5]:
%%time
# Specify sufficient boosting iterations to reach a minimum

# Leave most parameters as default
param = {'num_class': 1, # Number of possible output classes
         'tree_method': 'gpu_hist', # Use GPU accelerated algorithm
         'grow_policy': 'depthwise',
         'max_depth': maxdepth,
         'random_state': 1234,
         'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_delta_step': 0, 'min_child_weight': 1, 'missing': None, 'n_estimators': 3, 'n_jobs': 1, 'objective': 'reg:linear', 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'silent': True, 'subsample': 1, 'verbose': True
         }
gpu_res = {} # Store accuracy result
tmp = time.time()
# Train model
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=gpu_res)
print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))

[0]	test-rmse:1.93335
[1]	test-rmse:1.79929
[2]	test-rmse:1.68219
GPU Training Time: 1.0427639484405518 seconds
CPU times: user 3.5 s, sys: 404 ms, total: 3.91 s
Wall time: 1.04 s


In [11]:
%%time
# Leave most parameters as default
param = {'tree_method': 'gpu_hist', # Use GPU accelerated algorithm
         #'tree_method': 'hist',
         'grow_policy': 'depthwise',
         'max_depth': maxdepth,
         'random_state': 1234,
         'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_delta_step': 0, 'min_child_weight': 1, 'missing': None, 'n_estimators': 3, 'n_jobs': 1, 'objective': 'reg:linear', 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'silent': True, 'subsample': 1, 'verbose': True
        }
gpu_res = {} # Store accuracy result
tmp = time.time()
# Train model
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=gpu_res)
print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))

[0]	test-rmse:1.93335
[1]	test-rmse:1.79929
[2]	test-rmse:1.68219
GPU Training Time: 1.3707866668701172 seconds
CPU times: user 1.2 s, sys: 148 ms, total: 1.35 s
Wall time: 1.37 s


In [12]:
%%time
# Leave most parameters as default
param = {#'tree_method': 'gpu_hist', # Use GPU accelerated algorithm
         'tree_method': 'hist',
         'grow_policy': 'lossguide',
         'max_leaves': np.power(2,maxdepth),
         'max_depth': maxdepth,
         'random_state': 1234,
         'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_delta_step': 0, 'min_child_weight': 1, 'missing': None, 'n_estimators': 3, 'n_jobs': 1, 'objective': 'reg:linear', 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'silent': True, 'subsample': 1, 'verbose': True
         }
gpu_res = {} # Store accuracy result
tmp = time.time()
# Train model
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=gpu_res)
print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))

[0]	test-rmse:1.93335
[1]	test-rmse:1.7993
[2]	test-rmse:1.68222
GPU Training Time: 1.6107001304626465 seconds
CPU times: user 1.58 s, sys: 28 ms, total: 1.61 s
Wall time: 1.61 s


In [13]:
train_X = X_train
test_X = X_test
train_y = y_train
test_y = y_test

In [14]:
grow_policy = "depthwise"
eval_metric = "rmse"
model = xgb.XGBRegressor(    n_estimators=num_round,
                             tree_method="hist",
                             #tree_method="gpu_hist", n_gpus=1,
                             #grow_policy=grow_policy,
                             max_depth=maxdepth,
                             n_jobs=1, random_state=1234, verbose=True)
model.fit(X=train_X, y=train_y, verbose=True, eval_set=[(test_X, test_y)], eval_metric=eval_metric)
print(model.get_params())

[0]	validation_0-rmse:1.93335
[1]	validation_0-rmse:1.7993
[2]	validation_0-rmse:1.68222
{'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_delta_step': 0, 'max_depth': 6, 'min_child_weight': 1, 'missing': None, 'n_estimators': 3, 'n_jobs': 1, 'nthread': None, 'objective': 'reg:linear', 'random_state': 1234, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'seed': None, 'silent': True, 'subsample': 1, 'tree_method': 'hist', 'verbose': True}


In [15]:
grow_policy = "lossguide"
eval_metric = "rmse"
model = xgb.XGBRegressor(    n_estimators=num_round,
                             tree_method="hist",
                             #tree_method="gpu_hist", n_gpus=1,
                             grow_policy=grow_policy,
                             max_leaves = np.power(2,maxdepth),
                             max_depth=maxdepth,
                             n_jobs=1, random_state=1234, verbose=True)
model.fit(X=train_X, y=train_y, verbose=True, eval_set=[(test_X, test_y)], eval_metric=eval_metric)
print(model.get_params())

[0]	validation_0-rmse:1.93335
[1]	validation_0-rmse:1.7993
[2]	validation_0-rmse:1.68222
{'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_delta_step': 0, 'max_depth': 6, 'min_child_weight': 1, 'missing': None, 'n_estimators': 3, 'n_jobs': 1, 'nthread': None, 'objective': 'reg:linear', 'random_state': 1234, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'seed': None, 'silent': True, 'subsample': 1, 'tree_method': 'hist', 'grow_policy': 'lossguide', 'max_leaves': 64, 'verbose': True}
