# XGBRFClassifier Example

## Imports

In [1]:
import wallaroo
from wallaroo.pipeline import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
import pyarrow as pa
from wallaroo.framework import Framework

import pickle
from sklearn.datasets import load_iris
from xgboost import XGBClassifier

wl = wallaroo.Client(auth_type="sso", interactive=True)

Please log into the following URL in a web browser:

	https://keycloak.autoscale-uat-gcp.wallaroo.dev/auth/realms/master/device?user_code=FNFK-JIPA

Login successful!


## Data

In [3]:
data = load_iris(as_frame=True)

X = data['data'].values[:100]
y = data['target'][:100]

## Configure & Upload Model

### Configure PyArrow Schema

In [9]:
input_schema = pa.schema([
    pa.field('inputs', pa.list_(pa.float32(), list_size=4))
])

output_schema = pa.schema([
    pa.field('predictions', pa.int64()),
    pa.field('probabilities', pa.list_(pa.float32(), list_size=3))
])

### Upload model

You can find the model uploaded in [model zoo](https://storage.cloud.google.com/wallaroo-model-zoo/model-auto-conversion/xgboost/xgb_rf_classifier.pkl?authuser=0).

In [12]:
model = wl.upload_model('xgb-rf-classifier', 'xgb_rf_classifier.pkl', framework=Framework.XGBOOST, input_schema=input_schema, output_schema=output_schema)
model

Waiting for model loading - this will take up to 10.0min.
Model is pending loading to a native runtime..
Model is attempting loading to a native runtime...incompatible

Model is pending loading to a container runtime........
Model is attempting loading to a container runtime......................successful

Ready


0,1
Name,xgb-rf-classifier
Version,88f5c580-3146-495e-a855-3b27764f2603
File Name,xgb_rf_classifier.pkl
SHA,1e5ecfd77cb3bb75eab7aaf5b3975d037191c2d9e825805bff027fbec76be296
Status,ready
Image Path,proxy.replicated.com/proxy/wallaroo/ghcr.io/wallaroolabs/mac-deploy:v2024.1.0-main-4756
Architecture,
Acceleration,
Updated At,2024-20-Mar 10:37:04


In [14]:
model.config().runtime()

'flight'

## Deploy Pipeline

In [15]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [16]:
pipeline_name = f"xgb-rf-classifier-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 45s .........................................

WaitForDeployError: Deployment failed. See status for details.
Status: {'status': 'Starting', 'details': [], 'engines': [{'ip': '10.124.3.133', 'name': 'engine-577f45c9b9-zc9x8', 'status': 'Running', 'reason': None, 'details': [], 'pipeline_statuses': {'pipelines': [{'id': 'xgb-rf-classifier-pipeline', 'status': 'Running'}]}, 'model_statuses': {'models': [{'config': {'batch_config': None, 'filter_threshold': None, 'id': 213, 'input_schema': '/////7AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAEAAAAyP///wAAARAUAAAAJAAAAAQAAAABAAAAMAAAAAYAAABpbnB1dHMAAAAABgAIAAQABgAAAAQAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAHAAAAAQAAAAAAAAABAAAAGl0ZW0AAAYACAAGAAYAAAAAAAEAAAAAAA==', 'model_version_id': 131, 'output_schema': '//////AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAIAAACEAAAABAAAAJT///8AAAEQFAAAACgAAAAEAAAAAQAAACQAAAANAAAAcHJvYmFiaWxpdGllcwAGAAgABAAGAAAAAwAAAND///8AAAEDEAAAABwAAAAEAAAAAAAAAAQAAABpdGVtAAAGAAgABgAGAAAAAAABABAAFAAIAAYABwAMAAAAEAAQAAAAAAABAhAAAAAkAAAABAAAAAAAAAALAAAAcHJlZGljdGlvbnMACAAMAAgABwAIAAAAAAAAAUAAAAA=', 'runtime': 'flight', 'sidekick_uri': None, 'tensor_fields': None}, 'model_version': {'conversion': {'framework': 'xgboost', 'python_version': '3.8', 'requirements': []}, 'created_on_version': '2024.1.0', 'file_info': {'file_name': 'xgb_rf_classifier.pkl', 'sha': '1e5ecfd77cb3bb75eab7aaf5b3975d037191c2d9e825805bff027fbec76be296', 'version': '88f5c580-3146-495e-a855-3b27764f2603'}, 'id': 131, 'image_path': 'proxy.replicated.com/proxy/wallaroo/ghcr.io/wallaroolabs/mac-deploy:v2024.1.0-main-4756', 'name': 'xgb-rf-classifier', 'status': 'ready', 'task_id': '1485a503-c15a-425a-b7f1-f2662be0b448', 'visibility': 'private', 'workspace_id': 33}, 'status': 'Running'}]}}], 'engine_lbs': [{'ip': '10.124.2.6', 'name': 'engine-lb-d7cc8fc9c-4nvgk', 'status': 'Running', 'reason': None, 'details': []}], 'sidekicks': [{'ip': None, 'name': 'engine-sidekick-xgb-rf-classifier-131-688ff59849-gqq2k', 'status': 'Pending', 'reason': None, 'details': ['containers with unready status: [engine-sidekick-xgb-rf-classifier-131]', 'containers with unready status: [engine-sidekick-xgb-rf-classifier-131]'], 'statuses': None}]}

In [17]:
pipeline.status()

{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.124.3.133',
   'name': 'engine-577f45c9b9-zc9x8',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'xgb-rf-classifier-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'config': {'batch_config': None,
       'filter_threshold': None,
       'id': 213,
       'input_schema': '/////7AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAEAAAAyP///wAAARAUAAAAJAAAAAQAAAABAAAAMAAAAAYAAABpbnB1dHMAAAAABgAIAAQABgAAAAQAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAHAAAAAQAAAAAAAAABAAAAGl0ZW0AAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'model_version_id': 131,
       'output_schema': '//////AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAIAAACEAAAABAAAAJT///8AAAEQFAAAACgAAAAEAAAAAQAAACQAAAANAAAAcHJvYmFiaWxpdGllcwAGAAgABAAGAAAAAwAAAND///8AAAEDEAAAABwAAAAEAAAAAAAAAAQAAABpdGVtAAAGAAgABgAGAAAAAAABABAAFAAIAAYABwAMAAAAEAAQAAAAAAABAhAAAAAkAAA

## Run Inference

In [18]:
import pandas as pd

dataframe = pd.DataFrame({"inputs": X[:100].tolist()})
dataframe

Unnamed: 0,inputs
0,"[5.1, 3.5, 1.4, 0.2]"
1,"[4.9, 3.0, 1.4, 0.2]"
2,"[4.7, 3.2, 1.3, 0.2]"
3,"[4.6, 3.1, 1.5, 0.2]"
4,"[5.0, 3.6, 1.4, 0.2]"
...,...
95,"[5.7, 3.0, 4.2, 1.2]"
96,"[5.7, 2.9, 4.2, 1.3]"
97,"[6.2, 2.9, 4.3, 1.3]"
98,"[5.1, 2.5, 3.0, 1.1]"


Getting results from the pipeline:

In [19]:
%%time
pipeline.infer(dataframe)

CPU times: user 30.1 ms, sys: 168 µs, total: 30.3 ms
Wall time: 69.8 ms


Unnamed: 0,time,in.inputs,out.predictions,out.probabilities,anomaly.count
0,2024-03-20 10:41:13.218,"[5.1, 3.5, 1.4, 0.2]",0,"[0.82590103, 0.087049514, 0.08704949]",0
1,2024-03-20 10:41:13.218,"[4.9, 3.0, 1.4, 0.2]",0,"[0.82590103, 0.087049514, 0.08704949]",0
2,2024-03-20 10:41:13.218,"[4.7, 3.2, 1.3, 0.2]",0,"[0.82590103, 0.087049514, 0.08704949]",0
3,2024-03-20 10:41:13.218,"[4.6, 3.1, 1.5, 0.2]",0,"[0.82590103, 0.087049514, 0.08704949]",0
4,2024-03-20 10:41:13.218,"[5.0, 3.6, 1.4, 0.2]",0,"[0.82590103, 0.087049514, 0.08704949]",0
...,...,...,...,...,...
95,2024-03-20 10:41:13.218,"[5.7, 3.0, 4.2, 1.2]",1,"[0.08704949, 0.82590103, 0.08704949]",0
96,2024-03-20 10:41:13.218,"[5.7, 2.9, 4.2, 1.3]",1,"[0.08704949, 0.82590103, 0.08704949]",0
97,2024-03-20 10:41:13.218,"[6.2, 2.9, 4.3, 1.3]",1,"[0.08704949, 0.82590103, 0.08704949]",0
98,2024-03-20 10:41:13.218,"[5.1, 2.5, 3.0, 1.1]",1,"[0.088133186, 0.82373357, 0.088133186]",0


Comparing them to the original model:

In [20]:
import pickle

with open("xgb_rf_classifier.pkl", "rb") as fp:
    model = pickle.load(fp)

In [21]:
model

In [22]:
model.predict(X[:2])

array([0, 0])

In [23]:
model.predict_proba(X[:10])

array([[0.82590103, 0.08704951, 0.08704949],
       [0.82590103, 0.08704951, 0.08704949],
       [0.82590103, 0.08704951, 0.08704949],
       [0.82590103, 0.08704951, 0.08704949],
       [0.82590103, 0.08704951, 0.08704949],
       [0.82590103, 0.08704951, 0.08704949],
       [0.82590103, 0.08704951, 0.08704949],
       [0.82590103, 0.08704951, 0.08704949],
       [0.82204384, 0.09131323, 0.08664294],
       [0.82590103, 0.08704951, 0.08704949]], dtype=float32)

## Undeploy Pipelines

In [24]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s ..................................... ok


0,1
name,xgb-rf-classifier-pipeline
created,2024-03-20 10:37:45.186721+00:00
last_updated,2024-03-20 10:37:45.262804+00:00
deployed,False
arch,
accel,
tags,
versions,"4ea75cf6-0f08-4963-acca-0cb811e2819e, ae0981fc-de2b-4bc4-9b21-3ebd98479ab6"
steps,xgb-rf-classifier
published,False
