# XGBClassifier Example

## Imports

In [1]:
import wallaroo
from wallaroo.pipeline import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
import pyarrow as pa
from wallaroo.framework import Framework

import pickle
from sklearn.datasets import load_iris
from xgboost import XGBClassifier

wl = wallaroo.Client(auth_type="sso", interactive=True)

Please log into the following URL in a web browser:

	https://curly-plum-3868.keycloak.wallaroo.dev/auth/realms/master/device?user_code=SVLX-DAYF

Login successful!


## Data

In [2]:
data = load_iris(as_frame=True)

X = data['data'].values[:100]
y = data['target'][:100]

## Configure & Upload Model

### Configure PyArrow Schema

In [3]:
input_schema = pa.schema([
    pa.field('inputs', pa.list_(pa.float32(), list_size=4))
])

output_schema = pa.schema([
    pa.field('predictions', pa.int64()),
    pa.field('probabilities', pa.list_(pa.float32(), list_size=3))
])

### Upload model

You can find the model uploaded in [model zoo](https://storage.cloud.google.com/wallaroo-model-zoo/model-auto-conversion/convert-non-native-frameworks/xgboost/xgb_classifier.pkl?authuser=0).

In [4]:
model = wl.upload_model('xgb-classification', 'xgb_classifier.pkl', framework=Framework.XGBOOST, input_schema=input_schema, output_schema=output_schema)
model

Waiting for model loading - this will take up to 10.0min.
Model is pending loading to a native runtime.....
Ready


0,1
Name,xgb-classification
Version,01702cf6-d77f-4274-a767-4bff73377b7d
File Name,xgb_classifier.pkl
SHA,0f9b98ba3b779f428b44d7e02049bb3720447c4d0d60d41cd02770adc9e5618d
Status,ready
Image Path,
Architecture,
Acceleration,
Updated At,2024-18-Mar 11:09:00


In [5]:
model.config().runtime()

'onnx'

## Deploy Pipeline

In [6]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [7]:
pipeline_name = f"xgb-classifier-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 45s ............. ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.223.128.160',
   'name': 'engine-75744cb476-fj8l2',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'xgb-classifier-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'config': {'batch_config': None,
       'filter_threshold': None,
       'id': 3,
       'input_schema': '/////7AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAEAAAAyP///wAAARAUAAAAJAAAAAQAAAABAAAAMAAAAAYAAABpbnB1dHMAAAAABgAIAAQABgAAAAQAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAHAAAAAQAAAAAAAAABAAAAGl0ZW0AAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'model_version_id': 1,
       'output_schema': '//////AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAIAAACEAAAABAAAAJT///8AAAEQFAAAACgAAAAEAAAAAQAAACQAAAANAAAAcHJvYmFiaWxpdGllcwAGAAgABAAGAAAAAwAAAND///8AAAEDEAAAABwAAAAEAAAAAAAAAAQAAABpdGVtAAAGAAgABgAGAAAAAAABABAAFAAIAAYABwAMAAAAEAAQAAAAAAABAhAAAAAkAAAABAAA

## Run Inference

In [8]:
import pandas as pd

dataframe = pd.DataFrame({"inputs": X[:100].tolist()})
dataframe

Unnamed: 0,inputs
0,"[5.1, 3.5, 1.4, 0.2]"
1,"[4.9, 3.0, 1.4, 0.2]"
2,"[4.7, 3.2, 1.3, 0.2]"
3,"[4.6, 3.1, 1.5, 0.2]"
4,"[5.0, 3.6, 1.4, 0.2]"
...,...
95,"[5.7, 3.0, 4.2, 1.2]"
96,"[5.7, 2.9, 4.2, 1.3]"
97,"[6.2, 2.9, 4.3, 1.3]"
98,"[5.1, 2.5, 3.0, 1.1]"


Getting results from the pipeline:

In [9]:
%%time
pipeline.infer(dataframe)

CPU times: user 17.5 ms, sys: 1.1 ms, total: 18.7 ms
Wall time: 29.5 ms


Unnamed: 0,time,in.inputs,out.predictions,out.probabilities,anomaly.count
0,2024-03-18 11:09:40.884,"[5.1, 3.5, 1.4, 0.2]",0,"[0.9968028, 0.0023831066, 0.0008141332]",0
1,2024-03-18 11:09:40.884,"[4.9, 3.0, 1.4, 0.2]",0,"[0.99636227, 0.0023820533, 0.00125574]",0
2,2024-03-18 11:09:40.884,"[4.7, 3.2, 1.3, 0.2]",0,"[0.9968028, 0.0023831066, 0.0008141332]",0
3,2024-03-18 11:09:40.884,"[4.6, 3.1, 1.5, 0.2]",0,"[0.9967945, 0.0023830866, 0.00082237856]",0
4,2024-03-18 11:09:40.884,"[5.0, 3.6, 1.4, 0.2]",0,"[0.9968028, 0.0023831066, 0.0008141332]",0
...,...,...,...,...,...
95,2024-03-18 11:09:40.884,"[5.7, 3.0, 4.2, 1.2]",1,"[0.0045280554, 0.9935835, 0.0018885169]",0
96,2024-03-18 11:09:40.884,"[5.7, 2.9, 4.2, 1.3]",1,"[0.0045280554, 0.9935835, 0.0018885169]",0
97,2024-03-18 11:09:40.884,"[6.2, 2.9, 4.3, 1.3]",1,"[0.0016663194, 0.9956091, 0.0027245525]",0
98,2024-03-18 11:09:40.884,"[5.1, 2.5, 3.0, 1.1]",1,"[0.009020356, 0.98142415, 0.009555613]",0


Comparing them to the original model:

In [10]:
import pickle

with open("xgb_classifier.pkl", "rb") as fp:
    model = pickle.load(fp)

In [11]:
model.predict(X[:2])

array([0, 0])

In [12]:
model.predict_proba(X[:2])

array([[9.9680281e-01, 2.3831066e-03, 8.1413286e-04],
       [9.9636227e-01, 2.3820533e-03, 1.2557388e-03]], dtype=float32)

## Undeploy Pipelines

In [22]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s .................................... ok


0,1
name,xgb-classifier-pipeline-test
created,2023-11-17 10:05:33.958599+00:00
last_updated,2023-11-17 10:54:17.135286+00:00
deployed,False
arch,
tags,
versions,"ae268b47-095e-40e6-873b-a64e459c5007, 1403c792-86ed-4012-9d3b-c56cd3b01c5d, ea05c344-d283-40f6-a79c-386e9068d715"
steps,xgb-classification
published,False
