In [1]:
import numpy as np
import xgboost as xgb
from hummingbird.ml import convert, load

In [2]:
%load_ext memory_profiler

In [3]:
# Create some random data for binary classification
num_classes = 2
X = np.random.rand(100000, 28)
y = np.random.randint(num_classes, size=100000)

In [4]:
estimators=100

In [5]:
model_path = "humming_bird"+"-"+str(estimators)

## Train the xgb model

In [6]:
# Create the model (XGBoost in this case).
model = xgb.XGBRegressor(n_estimators=estimators, max_depth=8)

In [7]:
# Train the model
model.fit(X, y)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=8,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=16, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [8]:
# Save the model
model.save_model(model_path)

## Load the xgb model

In [9]:
model_loaded = xgb.XGBRegressor()

In [10]:
model_loaded.load_model(model_path)

In [11]:
model_loaded.predict(X)

array([0.428404  , 0.54306775, 0.4893642 , ..., 0.399043  , 0.5021067 ,
       0.5078474 ], dtype=float32)

In [23]:
batch_size = 1000

In [24]:
X_batch = X[0:batch_size]

## Humming Bird converting to Torch Model

In [25]:
%%time
%%memit
# Use Hummingbird to convert the model to PyTorch
# Note that XGBRegressor requires us to pass it some sample data.
hb_model = convert(model_loaded, 'torch', X_batch[0:1])

peak memory: 4818.18 MiB, increment: 13.82 MiB
CPU times: user 1.37 s, sys: 59.9 ms, total: 1.43 s
Wall time: 1.47 s


#### Humming Bird on CPU

In [26]:
%%timeit -r 3

# Run Hummingbird on CPU - By default CPU execution is used in Hummingbird.
hb_model.predict(X_batch)

4.03 ms ± 3.55 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


#### Humming Bird on GPU

In [27]:
%%time
%%memit

# Run Hummingbird on GPU (Note that you must have a GPU-enabled machine).
hb_model.to('cuda')


peak memory: 4816.00 MiB, increment: 0.00 MiB
CPU times: user 98.9 ms, sys: 255 ms, total: 354 ms
Wall time: 513 ms


In [28]:
%%timeit
hb_model.predict(X_batch)

959 µs ± 1.06 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


## Predict using FIL on GPU

In [29]:
from cuml import ForestInference

#### Using Naive

In [30]:
%%time
%%memit
fil_model_naive = ForestInference.load(
    filename=model_path,
    algo='NAIVE',
    output_class=True,
    threshold=0.50,
    model_type='xgboost'
)

peak memory: 4816.00 MiB, increment: 0.00 MiB
CPU times: user 206 ms, sys: 132 ms, total: 338 ms
Wall time: 403 ms


In [31]:
%%timeit -r 3
# perform prediction on the model loaded from path
fil_preds_naive = fil_model_naive.predict(X_batch)

581 µs ± 1.95 µs per loop (mean ± std. dev. of 3 runs, 1000 loops each)


#### Using Tree Reorg

In [32]:
%%time
%%memit
fil_model_btr = ForestInference.load(
    filename=model_path,
    algo='BATCH_TREE_REORG',
    output_class=True,
    threshold=0.50,
    model_type='xgboost'
)

peak memory: 4815.96 MiB, increment: 0.00 MiB
CPU times: user 192 ms, sys: 156 ms, total: 348 ms
Wall time: 402 ms


In [33]:
%%timeit -r 3
# perform prediction on the model loaded from path
fil_preds_btr = fil_model_btr.predict(X_batch)

565 µs ± 1.29 µs per loop (mean ± std. dev. of 3 runs, 1000 loops each)
