# Booster Regression Example

## Imports

In [11]:
import wallaroo
from wallaroo.pipeline import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
import pyarrow as pa
from wallaroo.framework import Framework

import pickle
from sklearn.datasets import load_diabetes
from xgboost import train, DMatrix
from sklearn.model_selection import train_test_split

wl = wallaroo.Client(auth_type="sso", interactive=True)

## Data

Load data:

In [12]:
dataset = load_diabetes()

# assuming the model is trained on the following DMatrix
X, y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
dtrain = DMatrix(X_train, label=y_train)
dtest = DMatrix(X_test, label=y_test)

Load raw model:

In [15]:
with open("booster_regression.pkl", "rb") as fp:
    raw_model = pickle.load(fp)

## Configure & Upload Model

### Configure PyArrow Schema

In [16]:
input_schema = pa.schema([
    pa.field('inputs', pa.list_(pa.float32(), list_size=10))
])

output_schema = pa.schema([
    pa.field('predictions', pa.float32()),
])

### Upload model

You can find the model uploaded in [model zoo](https://storage.cloud.google.com/wallaroo-model-zoo/model-auto-conversion/convert-non-native-frameworks/xgboost/xgb_booster_regression.pkl?authuser=0).

In [17]:
model = wl.upload_model('booster-regression', 'booster_regression.pkl', framework=Framework.XGBOOST, input_schema=input_schema, output_schema=output_schema)
model

Waiting for model loading - this will take up to 10.0min.
Model is pending loading to a native runtime..
Ready


0,1
Name,booster-regression
Version,5bf402d3-fe8a-429f-8ab6-e5092ba61266
File Name,booster_regression.pkl
SHA,4d1c91595d7c89f0e67227813f818eee50667ce04116841f1c4d8d817f610b7b
Status,ready
Image Path,
Architecture,
Acceleration,
Updated At,2024-18-Mar 11:56:23


In [18]:
model.config().runtime()

'onnx'

## Deploy Pipeline

In [19]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [20]:
pipeline_name = f"booster-regression-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 45s .............. ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.223.128.226',
   'name': 'engine-6c7fc94bc7-m8m7m',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'booster-regression-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'config': {'batch_config': None,
       'filter_threshold': None,
       'id': 15,
       'input_schema': '/////7AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAEAAAAyP///wAAARAUAAAAJAAAAAQAAAABAAAAMAAAAAYAAABpbnB1dHMAAAAABgAIAAQABgAAAAoAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAHAAAAAQAAAAAAAAABAAAAGl0ZW0AAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'model_version_id': 5,
       'output_schema': '/////4AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEDEAAAACQAAAAEAAAAAAAAAAsAAABwcmVkaWN0aW9ucwAAAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'runtime': 'onnx',
       'sidekick_uri': None,
       'tensor_fields': None}

## Run Inference

In [21]:
data = dtest.get_data().todense()[:100]

In [22]:
import pandas as pd

dataframe = pd.DataFrame({"inputs": data.tolist()})
dataframe

Unnamed: 0,inputs
0,"[0.04534098505973816, -0.044641636312007904, -..."
1,"[0.09256398677825928, -0.044641636312007904, 0..."
2,"[0.0635036751627922, 0.050680119544267654, -0...."
3,"[0.09619652479887009, -0.044641636312007904, 0..."
4,"[0.012648137286305428, 0.050680119544267654, -..."
...,...
84,"[0.0017505219439044595, -0.044641636312007904,..."
85,"[0.012648137286305428, -0.044641636312007904, ..."
86,"[-0.027309786528348923, -0.044641636312007904,..."
87,"[-0.023677246645092964, -0.044641636312007904,..."


Getting results from the pipeline:

In [23]:
%%time
pipeline.infer(dataframe)

CPU times: user 16.8 ms, sys: 0 ns, total: 16.8 ms
Wall time: 29 ms


Unnamed: 0,time,in.inputs,out.predictions,anomaly.count
0,2024-03-18 11:56:55.525,"[0.0453409851, -0.0446416363, -0.0062059541, -...",121.672900,0
1,2024-03-18 11:56:55.525,"[0.0925639868, -0.0446416363, 0.0369065292, 0....",190.484830,0
2,2024-03-18 11:56:55.525,"[0.0635036752, 0.0506801195, -0.0040503298, -0...",111.782380,0
3,2024-03-18 11:56:55.525,"[0.0961965248, -0.0446416363, 0.0519958995, 0....",109.612880,0
4,2024-03-18 11:56:55.525,"[0.0126481373, 0.0506801195, -0.0202175118, -0...",116.345750,0
...,...,...,...,...
84,2024-03-18 11:56:55.525,"[0.0017505219, -0.0446416363, -0.065485619, -0...",71.996124,0
85,2024-03-18 11:56:55.525,"[0.0126481373, -0.0446416363, -0.0256065708, -...",46.890617,0
86,2024-03-18 11:56:55.525,"[-0.0273097865, -0.0446416363, -0.0633299947, ...",63.577980,0
87,2024-03-18 11:56:55.525,"[-0.0236772466, -0.0446416363, -0.0697968677, ...",55.595460,0


Comparing them to the original model:

In [24]:
raw_model.predict(dtest)[:100]

array([121.6729  , 190.48483 , 111.78238 , 109.61288 , 116.34575 ,
        85.216545, 213.15    , 207.94493 , 101.29846 , 170.69362 ,
        66.96854 , 145.21446 ,  73.83197 , 163.54646 ,  70.32    ,
        74.63077 , 201.35542 , 186.59804 , 130.75244 , 183.10846 ,
        98.62304 ,  87.72134 ,  59.893726, 171.65509 , 155.49603 ,
       160.44809 , 173.86526 , 115.04234 ,  56.607304,  86.4588  ,
       120.462746,  73.96513 , 115.0956  , 148.43173 , 100.05525 ,
       102.38193 ,  88.871796,  88.871796, 134.7709  ,  51.302593,
        53.15803 ,  83.91626 , 128.96738 , 120.48001 , 136.86176 ,
        52.778484,  46.68677 ,  71.15934 ,  58.29168 , 133.73485 ,
        35.829025,  66.652855, 121.18785 ,  74.08378 , 114.03221 ,
       128.89253 ,  70.77263 , 186.60004 ,  86.5795  ,  74.0783  ,
       151.58614 , 167.9001  ,  90.85109 ,  46.70047 ,  94.22118 ,
       174.81976 , 140.16185 , 121.38359 , 112.195694,  93.37313 ,
       147.53513 , 163.0744  , 122.94733 ,  85.41721 ,  65.301

## Undeploy Pipelines

In [54]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s .................................... ok


0,1
name,xgb-core-regression-pipeline-test-123
created,2023-11-17 11:03:01.946102+00:00
last_updated,2023-11-17 11:03:01.975551+00:00
deployed,False
arch,
tags,
versions,"0f68e5f3-07db-4eff-93fe-9b2c1806bce8, a84f6b11-095d-4420-a40b-c8d5bff0ad51"
steps,xgb-core-regression
published,False
