# Booster RF Regression Example

## Imports

In [1]:
import wallaroo
from wallaroo.pipeline import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
import pyarrow as pa
import pandas as pd
import numpy as np
from wallaroo.framework import Framework

import pickle
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from xgboost import train, DMatrix

wl = wallaroo.Client(auth_type="sso", interactive=True)

## Data

Load data:

In [5]:
dataset = load_diabetes()

# assuming the model is trained on the following DMatrix
X, y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
dtrain = DMatrix(X_train, label=y_train)
dtest = DMatrix(X_test, label=y_test)

Load raw model:

In [16]:
with open("booster_rf_regression.pkl", "rb") as fp:
    raw_model = pickle.load(fp)

## Configure & Upload Model

### Configure PyArrow Schema

In [6]:
input_schema = pa.schema([
    pa.field('inputs', pa.list_(pa.float32(), list_size=10))
])

output_schema = pa.schema([
    pa.field('predictions', pa.float32()),
])

### Upload model

You can find the model uploaded in [model zoo](https://storage.cloud.google.com/wallaroo-model-zoo/model-auto-conversion/convert-non-native-frameworks/xgboost/xgb_booster_rf_regression.pkl).

In [7]:
model = wl.upload_model('booster-rf-regression', 'booster_rf_regression.pkl', framework=Framework.XGBOOST, input_schema=input_schema, output_schema=output_schema)
model

Waiting for model loading - this will take up to 10.0min.
Model is pending loading to a native runtime..
Ready


0,1
Name,booster-rf-regression
Version,2a10bc32-1fa9-46f6-93d5-6ae06a3d1617
File Name,booster_rf_regression.pkl
SHA,b58b410a1eb4690dcf1bdcd08157f37253d8316cafd406a165b484ceb47408b3
Status,ready
Image Path,
Architecture,
Acceleration,
Updated At,2024-18-Mar 11:32:27


In [8]:
model.config().runtime()

'onnx'

## Deploy Pipeline

In [9]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [10]:
pipeline_name = f"booster-rf-regression-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 45s ............. ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.223.128.192',
   'name': 'engine-8457f47df7-hxtwr',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'booster-rf-regression-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'config': {'batch_config': None,
       'filter_threshold': None,
       'id': 9,
       'input_schema': '/////7AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAEAAAAyP///wAAARAUAAAAJAAAAAQAAAABAAAAMAAAAAYAAABpbnB1dHMAAAAABgAIAAQABgAAAAoAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAHAAAAAQAAAAAAAAABAAAAGl0ZW0AAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'model_version_id': 3,
       'output_schema': '/////4AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEDEAAAACQAAAAEAAAAAAAAAAsAAABwcmVkaWN0aW9ucwAAAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'runtime': 'onnx',
       'sidekick_uri': None,
       'tensor_fields': Non

## Run Inference

In [11]:
data = dtest.get_data().todense()[:100]

In [12]:
import pandas as pd

dataframe = pd.DataFrame({"inputs": data.tolist()})
dataframe

Unnamed: 0,inputs
0,"[0.04534098505973816, -0.044641636312007904, -..."
1,"[0.09256398677825928, -0.044641636312007904, 0..."
2,"[0.0635036751627922, 0.050680119544267654, -0...."
3,"[0.09619652479887009, -0.044641636312007904, 0..."
4,"[0.012648137286305428, 0.050680119544267654, -..."
...,...
84,"[0.0017505219439044595, -0.044641636312007904,..."
85,"[0.012648137286305428, -0.044641636312007904, ..."
86,"[-0.027309786528348923, -0.044641636312007904,..."
87,"[-0.023677246645092964, -0.044641636312007904,..."


Get results from the pipeline:

In [13]:
%%time
pipeline.infer(dataframe)

CPU times: user 14.4 ms, sys: 1.94 ms, total: 16.3 ms
Wall time: 26.1 ms


Unnamed: 0,time,in.inputs,out.predictions,anomaly.count
0,2024-03-18 11:33:08.368,"[0.0453409851, -0.0446416363, -0.0062059541, -...",154.345660,0
1,2024-03-18 11:33:08.368,"[0.0925639868, -0.0446416363, 0.0369065292, 0....",161.982850,0
2,2024-03-18 11:33:08.368,"[0.0635036752, 0.0506801195, -0.0040503298, -0...",154.345660,0
3,2024-03-18 11:33:08.368,"[0.0961965248, -0.0446416363, 0.0519958995, 0....",244.706510,0
4,2024-03-18 11:33:08.368,"[0.0126481373, 0.0506801195, -0.0202175118, -0...",108.613180,0
...,...,...,...,...
84,2024-03-18 11:33:08.368,"[0.0017505219, -0.0446416363, -0.065485619, -0...",104.921776,0
85,2024-03-18 11:33:08.368,"[0.0126481373, -0.0446416363, -0.0256065708, -...",78.300160,0
86,2024-03-18 11:33:08.368,"[-0.0273097865, -0.0446416363, -0.0633299947, ...",82.259480,0
87,2024-03-18 11:33:08.368,"[-0.0236772466, -0.0446416363, -0.0697968677, ...",68.962540,0


Comparing them to the original model:

In [18]:
raw_model.predict(dtest)[:100]

array([154.34566 , 161.98285 , 154.34566 , 244.70651 , 108.61318 ,
        90.13367 , 222.60936 , 216.11751 ,  95.64049 , 141.68506 ,
        85.97687 , 145.99286 ,  80.62069 , 208.578   , 106.19375 ,
        88.29169 , 222.60936 , 244.70651 , 205.60529 , 244.70651 ,
       190.86652 ,  93.20975 ,  77.53956 , 224.3846  , 131.42479 ,
       148.30319 , 200.4963  , 143.51114 ,  76.604965, 112.95469 ,
       147.85834 ,  81.30711 , 204.29251 , 165.45825 , 188.92328 ,
       217.28133 , 136.13403 , 136.13403 , 161.14886 ,  70.106415,
        83.257484,  90.17679 , 116.94887 , 161.14886 , 154.78029 ,
        64.749084,  68.96254 , 109.63051 ,  80.62069 , 144.93239 ,
       107.90619 ,  74.991714, 147.85834 , 103.40725 , 122.28518 ,
       152.96732 ,  83.16392 , 228.43729 ,  76.18227 ,  91.63138 ,
       210.28957 , 210.28957 , 126.3298  , 116.10301 , 115.68012 ,
       232.38673 , 143.91437 , 148.65825 , 101.39177 , 116.99875 ,
       116.99875 , 224.3846  , 191.23105 , 105.35338 ,  68.411

## Undeploy Pipelines

In [141]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s ..................................... ok


0,1
name,xgb-booster-rf-classification-pipeline-123
created,2023-11-17 13:33:28.878254+00:00
last_updated,2023-11-17 13:33:28.909970+00:00
deployed,False
arch,
tags,
versions,"5e1ddb2d-1c60-4f7b-865e-3b3eb0282ff6, 6041516c-7113-42f0-97bf-902945718ece"
steps,xgb-booster-rf-classification
published,False
