# XGRegressor Example

## Imports

In [1]:
import wallaroo
from wallaroo.pipeline import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
import pyarrow as pa
from wallaroo.framework import Framework

import pickle
from sklearn.datasets import load_diabetes

wl = wallaroo.Client(auth_type="sso", interactive=True)

## Data

In [2]:
data = load_diabetes(as_frame=True)

X = data['data'].values[:100]
y = data['target'][:100]

## Configure & Upload Model

### Configure PyArrow Schema

In [3]:
input_schema = pa.schema([
    pa.field('inputs', pa.list_(pa.float32(), list_size=10))
])

output_schema = pa.schema([
    pa.field('predictions', pa.float32()),
])

### Upload model

You can find the model uploaded in [model zoo](https://storage.cloud.google.com/wallaroo-model-zoo/model-auto-conversion/xgboost/xgb_rf_regressor.pkl?authuser=0).

In [4]:
model = wl.upload_model('xgb-rf-regressor', 'xgb_rf_regressor.pkl', framework=Framework.XGBOOST, input_schema=input_schema, output_schema=output_schema)
model

Waiting for model loading - this will take up to 10.0min.
Model is pending loading to a native runtime..
Model is attempting loading to a native runtime................incompatible

Model is pending loading to a container runtime..
Model is attempting loading to a container runtime........................successful

Ready


0,1
Name,xgb-rf-regressor
Version,83b4dd8f-c3d1-49d8-a84b-7df92eb9f91f
File Name,xgb_rf_regressor.pkl
SHA,ead81583402e6b17f14402b848c6ae6eb7854578f607ea9baf63379a6f60539c
Status,ready
Image Path,proxy.replicated.com/proxy/wallaroo/ghcr.io/wallaroolabs/mac-deploy:v2024.1.0-main-4756
Architecture,
Acceleration,
Updated At,2024-20-Mar 10:47:00


In [5]:
model.config().runtime()

'flight'

## Deploy Pipeline

In [6]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [7]:
pipeline_name = f"xgb-rf-regressor-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 45s .............. ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.124.3.134',
   'name': 'engine-5bd8d496d6-mmk28',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'xgb-rf-regressor-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'config': {'batch_config': None,
       'filter_threshold': None,
       'id': 214,
       'input_schema': '/////7AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAEAAAAyP///wAAARAUAAAAJAAAAAQAAAABAAAAMAAAAAYAAABpbnB1dHMAAAAABgAIAAQABgAAAAoAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAHAAAAAQAAAAAAAAABAAAAGl0ZW0AAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'model_version_id': 132,
       'output_schema': '/////4AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEDEAAAACQAAAAEAAAAAAAAAAsAAABwcmVkaWN0aW9ucwAAAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'runtime': 'flight',
       'sidekick_uri': None,
       'tensor_fields': None

## Run Inference

In [8]:
import pandas as pd

dataframe = pd.DataFrame({"inputs": X[:100].tolist()})
dataframe

Unnamed: 0,inputs
0,"[0.038075906433423026, 0.05068011873981862, 0...."
1,"[-0.0018820165277906047, -0.044641636506989144..."
2,"[0.08529890629667548, 0.05068011873981862, 0.0..."
3,"[-0.0890629393522567, -0.044641636506989144, -..."
4,"[0.005383060374248237, -0.044641636506989144, ..."
...,...
95,"[-0.07090024709715959, -0.044641636506989144, ..."
96,"[0.056238598688520124, 0.05068011873981862, 0...."
97,"[-0.027309785684926546, -0.044641636506989144,..."
98,"[0.001750521923228816, 0.05068011873981862, -0..."


Getting results from the pipeline:

In [9]:
%%time
pipeline.infer(dataframe)

CPU times: user 23.5 ms, sys: 2.73 ms, total: 26.3 ms
Wall time: 56.4 ms


Unnamed: 0,time,in.inputs,out.predictions,anomaly.count
0,2024-03-20 10:47:26.132,"[0.0380759064, 0.0506801187, 0.0616962065, 0.0...",222.492130,0
1,2024-03-20 10:47:26.132,"[-0.0018820165, -0.0446416365, -0.0514740612, ...",85.170210,0
2,2024-03-20 10:47:26.132,"[0.0852989063, 0.0506801187, 0.0444512133, -0....",156.588300,0
3,2024-03-20 10:47:26.132,"[-0.0890629394, -0.0446416365, -0.0115950145, ...",180.512100,0
4,2024-03-20 10:47:26.132,"[0.0053830604, -0.0446416365, -0.0363846922, 0...",105.263480,0
...,...,...,...,...
95,2024-03-20 10:47:26.132,"[-0.0709002471, -0.0446416365, -0.0579409337, ...",139.580600,0
96,2024-03-20 10:47:26.132,"[0.0562385987, 0.0506801187, 0.009961227, 0.04...",207.162200,0
97,2024-03-20 10:47:26.132,"[-0.0273097857, -0.0446416365, 0.0886415084, -...",166.432820,0
98,2024-03-20 10:47:26.132,"[0.0017505219, 0.0506801187, -0.0051281421, -0...",124.094310,0


Comparing them to the original model:

In [10]:
import pickle

with open("xgb_rf_regressor.pkl", "rb") as fp:
    model = pickle.load(fp)

In [12]:
model

In [11]:
model.predict(X[:2])

array([222.49213,  85.17021], dtype=float32)

## Undeploy Pipelines

In [13]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s .................................... ok


0,1
name,xgb-rf-regressor-pipeline
created,2024-03-20 10:47:10.328621+00:00
last_updated,2024-03-20 10:47:10.410037+00:00
deployed,False
arch,
accel,
tags,
versions,"0e942c7c-0bc8-4153-bf4e-7aa66e238b7f, 98904e0b-daff-462e-8b6f-fec1448ba076"
steps,xgb-rf-regressor
published,False
