# XGRegressor Example

## Imports

In [1]:
import wallaroo
from wallaroo.pipeline import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
import pyarrow as pa
from wallaroo.framework import Framework

import pickle
from sklearn.datasets import load_diabetes

wl = wallaroo.Client(auth_type="sso", interactive=True)

## Data

In [2]:
data = load_diabetes(as_frame=True)

X = data['data'].values[:100]
y = data['target'][:100]

## Configure & Upload Model

### Configure PyArrow Schema

In [3]:
input_schema = pa.schema([
    pa.field('inputs', pa.list_(pa.float32(), list_size=10))
])

output_schema = pa.schema([
    pa.field('predictions', pa.float32()),
])

### Upload model

You can find the model uploaded in [model zoo](https://storage.cloud.google.com/wallaroo-model-zoo/model-auto-conversion/convert-non-native-frameworks/xgboost/xgb_regressor.pkl?authuser=0).

In [4]:
model = wl.upload_model('xgb-regressor', 'xgb_regressor.pkl', framework=Framework.XGBOOST, input_schema=input_schema, output_schema=output_schema)
model

Waiting for model loading - this will take up to 10.0min.
Model is pending loading to a native runtime..
Ready


0,1
Name,xgb-regressor
Version,cd9dfd0c-1def-4bcc-a0ef-2e3b29f04ce8
File Name,xgb_regressor.pkl
SHA,8b2bc0c86d0baac42807fa069b0798bcf1ef61b19b8cd23c1bfca0b27d3defd7
Status,ready
Image Path,
Architecture,
Acceleration,
Updated At,2024-18-Mar 11:19:45


In [5]:
model.config().runtime()

'onnx'

## Deploy Pipeline

In [6]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [7]:
pipeline_name = f"xgb-regression-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 45s .............. ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.223.128.175',
   'name': 'engine-5b7b897556-7t5zw',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'xgb-regression-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'config': {'batch_config': None,
       'filter_threshold': None,
       'id': 6,
       'input_schema': '/////7AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAEAAAAyP///wAAARAUAAAAJAAAAAQAAAABAAAAMAAAAAYAAABpbnB1dHMAAAAABgAIAAQABgAAAAoAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAHAAAAAQAAAAAAAAABAAAAGl0ZW0AAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'model_version_id': 2,
       'output_schema': '/////4AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEDEAAAACQAAAAEAAAAAAAAAAsAAABwcmVkaWN0aW9ucwAAAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'runtime': 'onnx',
       'sidekick_uri': None,
       'tensor_fields': None},
   

## Run Inference

In [8]:
import pandas as pd

dataframe = pd.DataFrame({"inputs": X[:100].tolist()})
dataframe

Unnamed: 0,inputs
0,"[0.038075906433423026, 0.05068011873981862, 0...."
1,"[-0.0018820165277906047, -0.044641636506989144..."
2,"[0.08529890629667548, 0.05068011873981862, 0.0..."
3,"[-0.0890629393522567, -0.044641636506989144, -..."
4,"[0.005383060374248237, -0.044641636506989144, ..."
...,...
95,"[-0.07090024709715959, -0.044641636506989144, ..."
96,"[0.056238598688520124, 0.05068011873981862, 0...."
97,"[-0.027309785684926546, -0.044641636506989144,..."
98,"[0.001750521923228816, 0.05068011873981862, -0..."


Getting results from the pipeline:

In [9]:
%%time
pipeline.infer(dataframe)

CPU times: user 17.5 ms, sys: 1.51 ms, total: 19 ms
Wall time: 37.5 ms


Unnamed: 0,time,in.inputs,out.predictions,anomaly.count
0,2024-03-18 11:20:43.551,"[0.0380759064, 0.0506801187, 0.0616962065, 0.0...",151.232500,0
1,2024-03-18 11:20:43.551,"[-0.0018820165, -0.0446416365, -0.0514740612, ...",75.027435,0
2,2024-03-18 11:20:43.551,"[0.0852989063, 0.0506801187, 0.0444512133, -0....",141.047700,0
3,2024-03-18 11:20:43.551,"[-0.0890629394, -0.0446416365, -0.0115950145, ...",205.850620,0
4,2024-03-18 11:20:43.551,"[0.0053830604, -0.0446416365, -0.0363846922, 0...",135.084580,0
...,...,...,...,...
95,2024-03-18 11:20:43.551,"[-0.0709002471, -0.0446416365, -0.0579409337, ...",162.006600,0
96,2024-03-18 11:20:43.551,"[0.0562385987, 0.0506801187, 0.009961227, 0.04...",149.929400,0
97,2024-03-18 11:20:43.551,"[-0.0273097857, -0.0446416365, 0.0886415084, -...",278.562560,0
98,2024-03-18 11:20:43.551,"[0.0017505219, 0.0506801187, -0.0051281421, -0...",91.701310,0


Comparing them to the original model:

In [10]:
import pickle

with open("xgb_regressor.pkl", "rb") as fp:
    model = pickle.load(fp)

In [11]:
model.predict(X[:2])

array([151.23256 ,  75.027435], dtype=float32)

## Undeploy Pipelines

In [12]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s ..................................... ok


0,1
name,xgb-regression-pipeline
created,2024-03-18 11:20:03.590129+00:00
last_updated,2024-03-18 11:20:03.637267+00:00
deployed,False
arch,
accel,
tags,
versions,"5bcd61a8-1f1c-4157-96f8-6c72138545fe, 6543a14c-0a48-4ce9-ac9b-c5704a4178ec"
steps,xgb-regressor
published,False
