In [1]:
import numpy as np
import xgboost as xgb
from hummingbird.ml import convert, load

In [2]:
%load_ext memory_profiler

In [3]:
# Create some random data for binary classification
num_classes = 2
X = np.random.rand(100000, 28)
y = np.random.randint(num_classes, size=100000)

In [4]:
estimators=25

In [5]:
model_path = "humming_bird"+"-"+str(estimators)

## Train the xgb model

In [6]:
# Create the model (XGBoost in this case).
model = xgb.XGBRegressor(n_estimators=estimators, max_depth=10)

In [7]:
# Train the model
model.fit(X, y)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=10,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=25, n_jobs=16, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [8]:
# Save the model
model.save_model(model_path)

## Load the xgb model

In [9]:
model_loaded = xgb.XGBRegressor()

In [10]:
model_loaded.load_model(model_path)

In [11]:
model_loaded.predict(X)

array([0.67077154, 0.42272907, 0.6180591 , ..., 0.45412898, 0.48937115,
       0.51849025], dtype=float32)

## Humming Bird converting to Torch Model

In [12]:
%%time
%%memit
# Use Hummingbird to convert the model to PyTorch
# Note that XGBRegressor requires us to pass it some sample data.
hb_model = convert(model_loaded, 'torch', X[0:1])

peak memory: 385.48 MiB, increment: 8.27 MiB
CPU times: user 1.82 s, sys: 12.1 ms, total: 1.83 s
Wall time: 1.69 s


#### Humming Bird on CPU

In [13]:
%%timeit -r 3

# Run Hummingbird on CPU - By default CPU execution is used in Hummingbird.
hb_model.predict(X)

101 ms ± 68.4 µs per loop (mean ± std. dev. of 3 runs, 10 loops each)


#### Humming Bird on GPU

In [14]:
%%time
%%memit

# Run Hummingbird on GPU (Note that you must have a GPU-enabled machine).
hb_model.to('cuda')


peak memory: 3062.85 MiB, increment: 2422.35 MiB
CPU times: user 2.21 s, sys: 1.19 s, total: 3.41 s
Wall time: 3.52 s


In [15]:
%%timeit
hb_model.predict(X)

14.5 ms ± 1.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Predict using FIL on GPU

In [16]:
from cuml import ForestInference

#### Using Naive

In [17]:
%%time
%%memit
fil_model_naive = ForestInference.load(
    filename=model_path,
    algo='NAIVE',
    output_class=True,
    threshold=0.50,
    model_type='xgboost'
)

peak memory: 4797.70 MiB, increment: 1561.10 MiB
CPU times: user 1.19 s, sys: 569 ms, total: 1.76 s
Wall time: 1.85 s


In [18]:
%%timeit -r 3
# perform prediction on the model loaded from path
fil_preds_naive = fil_model_naive.predict(X)

11.8 ms ± 38.4 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


#### Using Tree Reorg

In [19]:
%%time
%%memit
fil_model_btr = ForestInference.load(
    filename=model_path,
    algo='BATCH_TREE_REORG',
    output_class=True,
    threshold=0.50,
    model_type='xgboost'
)

peak memory: 4825.12 MiB, increment: 0.12 MiB
CPU times: user 199 ms, sys: 137 ms, total: 336 ms
Wall time: 404 ms


In [20]:
%%timeit -r 3
# perform prediction on the model loaded from path
fil_preds_btr = fil_model_btr.predict(X)

10.9 ms ± 39.8 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)
