In [1]:
import xgboost as xgb
import numpy as np
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
import time

In [2]:
%%time
# Fetch dataset using sklearn
cov = fetch_covtype()
X = cov.data
y = cov.target

CPU times: user 576 ms, sys: 64 ms, total: 640 ms
Wall time: 1.11 s


In [3]:
%%time
# Create 0.75/0.25 train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, train_size=0.75,
                                                    random_state=42)

CPU times: user 136 ms, sys: 40 ms, total: 176 ms
Wall time: 173 ms


In [16]:
%%time
# Convert input data from numpy to XGBoost format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# boost parameters to hold constant
num_round = 10
maxdepth = 8

CPU times: user 140 ms, sys: 60 ms, total: 200 ms
Wall time: 200 ms


In [17]:
%%time
# Leave most parameters as default
param = {'objective': 'multi:softmax', # Specify multiclass classification
         'num_class': 8, # Number of possible output classes
         'tree_method': 'gpu_hist', # Use GPU accelerated algorithm
         'grow_policy': 'depthwise',
         'max_depth': maxdepth
         }
gpu_res = {} # Store accuracy result
tmp = time.time()
# Train model
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=gpu_res)
print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))

[0]	test-merror:0.222935
[1]	test-merror:0.211066
[2]	test-merror:0.202309
[3]	test-merror:0.196678
[4]	test-merror:0.193304
[5]	test-merror:0.189442
[6]	test-merror:0.186771
[7]	test-merror:0.185056
[8]	test-merror:0.183046
[9]	test-merror:0.180382
GPU Training Time: 5.804930925369263 seconds
CPU times: user 3.31 s, sys: 2.51 s, total: 5.82 s
Wall time: 5.81 s


In [19]:
%%time
# Leave most parameters as default
param = {'objective': 'multi:softmax', # Specify multiclass classification
         'num_class': 8, # Number of possible output classes
         'tree_method': 'gpu_hist_experimental', # Use GPU accelerated algorithm
         'grow_policy': 'depthwise',
         'max_depth': maxdepth
         }
gpu_res = {} # Store accuracy result
tmp = time.time()
# Train model
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=gpu_res)
print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))

[0]	test-merror:0.220877
[1]	test-merror:0.207934
[2]	test-merror:0.201724
[3]	test-merror:0.19559
[4]	test-merror:0.191549
[5]	test-merror:0.188265
[6]	test-merror:0.185655
[7]	test-merror:0.184051
[8]	test-merror:0.181958
[9]	test-merror:0.180423
GPU Training Time: 329.18328404426575 seconds
CPU times: user 1min 40s, sys: 3min 48s, total: 5min 29s
Wall time: 5min 29s


In [20]:
%%time
# Leave most parameters as default
param = {'objective': 'multi:softmax', # Specify multiclass classification
         'num_class': 8, # Number of possible output classes
         'tree_method': 'gpu_hist_experimental', # Use GPU accelerated algorithm
         'grow_policy': 'lossguide',
         'max_leaves': np.power(2,maxdepth)
         }
gpu_res = {} # Store accuracy result
tmp = time.time()
# Train model
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=gpu_res)
print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))

[0]	test-merror:0.254817
[1]	test-merror:0.247733
[2]	test-merror:0.244208
[3]	test-merror:0.240367
[4]	test-merror:0.23888
[5]	test-merror:0.234453
[6]	test-merror:0.232932
[7]	test-merror:0.231252
[8]	test-merror:0.227665
[9]	test-merror:0.226178
GPU Training Time: 305.76941204071045 seconds
CPU times: user 1min 33s, sys: 3min 32s, total: 5min 5s
Wall time: 5min 5s


In [21]:
%%time
# Leave most parameters as default
param = {'objective': 'multi:softmax', # Specify multiclass classification
         'num_class': 8, # Number of possible output classes
         'tree_method': 'hist', # Use CPU accelerated algorithm
         'grow_policy': 'depthwise',
         'max_depth': maxdepth
         }
# Repeat for CPU algorithm
tmp = time.time()
cpu_res = {}
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=cpu_res)
print("CPU Training Time: %s seconds" % (str(time.time() - tmp)))

[0]	test-merror:0.219149
[1]	test-merror:0.208009
[2]	test-merror:0.202027
[3]	test-merror:0.195404
[4]	test-merror:0.191514
[5]	test-merror:0.188609
[6]	test-merror:0.187239
[7]	test-merror:0.18474
[8]	test-merror:0.18242
[9]	test-merror:0.180871
CPU Training Time: 28.019179344177246 seconds
CPU times: user 28 s, sys: 20 ms, total: 28 s
Wall time: 28 s


In [22]:
%%time
# Leave most parameters as default
param = {'objective': 'multi:softmax', # Specify multiclass classification
         'num_class': 8, # Number of possible output classes
         'tree_method': 'hist', # Use CPU accelerated algorithm
         'grow_policy': 'lossguide',
         'max_leaves': np.power(2,maxdepth)
          }
# Repeat for CPU algorithm
tmp = time.time()
cpu_res = {}
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=cpu_res)
print("CPU Training Time: %s seconds" % (str(time.time() - tmp)))

[0]	test-merror:0.254831
[1]	test-merror:0.247912
[2]	test-merror:0.244298
[3]	test-merror:0.24069
[4]	test-merror:0.238536
[5]	test-merror:0.234804
[6]	test-merror:0.232229
[7]	test-merror:0.229703
[8]	test-merror:0.227162
[9]	test-merror:0.224519
CPU Training Time: 26.534437656402588 seconds
CPU times: user 26.5 s, sys: 52 ms, total: 26.6 s
Wall time: 26.5 s
