# Booster Multiclass Classification Softprob Example

## Imports

In [1]:
import wallaroo
from wallaroo.pipeline import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
import pyarrow as pa
from wallaroo.framework import Framework

import pickle
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from xgboost import train, DMatrix

wl = wallaroo.Client(auth_type="sso", interactive=True)

## Data

Load data:

In [2]:
dataset = load_iris()
X, y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
dtrain = DMatrix(X_train, label=y_train)
dtest = DMatrix(X_test, label=y_test)

Load raw model:

In [13]:
with open("booster_multi_classification_softprob.pkl", "rb") as fp:
    raw_model = pickle.load(fp)

## Configure & Upload Model

### Configure PyArrow Schema

In [4]:
input_schema = pa.schema([
    pa.field('inputs', pa.list_(pa.float32(), list_size=4))
])

output_schema = pa.schema([
    pa.field('probabilities', pa.list_(pa.float32(), list_size=3)),
])

### Upload model

You can find the model uploaded in [model zoo](https://storage.cloud.google.com/wallaroo-model-zoo/model-auto-conversion/convert-non-native-frameworks/xgboost/xgb_booster_multi_classification_softprob.pkl?authuser=0).

In [5]:
model = wl.upload_model('booster-multi-classification-softprob', 'booster_multi_classification_softprob.pkl', framework=Framework.XGBOOST, input_schema=input_schema, output_schema=output_schema)
model

Waiting for model loading - this will take up to 10.0min.
Model is pending loading to a native runtime..
Ready


0,1
Name,booster-multi-classification-softprob
Version,746c5fe1-a92c-4fbf-b929-e2af35f40da6
File Name,booster_multi_classification_softprob.pkl
SHA,eefed02b648166eda12c0d617c160d9aa8c560fdf54a5c42fe9934d473151034
Status,ready
Image Path,
Architecture,
Acceleration,
Updated At,2024-18-Mar 12:20:10


In [6]:
model.config().runtime()

'onnx'

## Deploy Pipeline

In [7]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [8]:
pipeline_name = f"xgb-booster-multi-classification-softprob-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 45s ............. ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.223.128.24',
   'name': 'engine-7d755445f9-vkjcm',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'xgb-booster-multi-classification-softprob-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'config': {'batch_config': None,
       'filter_threshold': None,
       'id': 24,
       'input_schema': '/////7AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAEAAAAyP///wAAARAUAAAAJAAAAAQAAAABAAAAMAAAAAYAAABpbnB1dHMAAAAABgAIAAQABgAAAAQAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAHAAAAAQAAAAAAAAABAAAAGl0ZW0AAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'model_version_id': 8,
       'output_schema': '/////7AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAEAAAAxP///wAAARAUAAAAKAAAAAQAAAABAAAANAAAAA0AAABwcm9iYWJpbGl0aWVzAAYACAAEAAYAAAADAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEDEAAAABwAAAAEAAAAAAAAAAQAAABpdGVtAAAGAAgABgAGAAAAAAABAA=='

## Run Inference

In [9]:
data = dtest.get_data().todense()[:100]

In [10]:
import pandas as pd

dataframe = pd.DataFrame({"inputs": data.tolist()})
dataframe

Unnamed: 0,inputs
0,"[6.099999904632568, 2.799999952316284, 4.69999..."
1,"[5.699999809265137, 3.799999952316284, 1.70000..."
2,"[7.699999809265137, 2.5999999046325684, 6.9000..."
3,"[6.0, 2.9000000953674316, 4.5, 1.5]"
4,"[6.800000190734863, 2.799999952316284, 4.80000..."
5,"[5.400000095367432, 3.4000000953674316, 1.5, 0..."
6,"[5.599999904632568, 2.9000000953674316, 3.5999..."
7,"[6.900000095367432, 3.0999999046325684, 5.0999..."
8,"[6.199999809265137, 2.200000047683716, 4.5, 1.5]"
9,"[5.800000190734863, 2.700000047683716, 3.90000..."


Getting results from pipeline:

In [11]:
%%time
pipeline.infer(dataframe)

CPU times: user 14 ms, sys: 2 ms, total: 16 ms
Wall time: 25.2 ms


Unnamed: 0,time,in.inputs,out.probabilities,anomaly.count
0,2024-03-18 12:20:39.544,"[6.0999999046, 2.7999999523, 4.6999998093, 1.2...","[0.0860216, 0.82712775, 0.08685072]",0
1,2024-03-18 12:20:39.544,"[5.6999998093, 3.7999999523, 1.7000000477, 0.3...","[0.8245585, 0.08896249, 0.08647902]",0
2,2024-03-18 12:20:39.544,"[7.6999998093, 2.5999999046, 6.9000000954, 2.2...","[0.087060094, 0.0922827, 0.82065725]",0
3,2024-03-18 12:20:39.544,"[6.0, 2.9000000954, 4.5, 1.5]","[0.083474845, 0.8026398, 0.11388545]",0
4,2024-03-18 12:20:39.544,"[6.8000001907, 2.7999999523, 4.8000001907, 1.3...","[0.074739285, 0.7186443, 0.20661643]",0
5,2024-03-18 12:20:39.544,"[5.4000000954, 3.4000000954, 1.5, 0.400000006]","[0.8245585, 0.08896249, 0.08647902]",0
6,2024-03-18 12:20:39.544,"[5.5999999046, 2.9000000954, 3.5999999046, 1.2...","[0.0860216, 0.82712775, 0.08685072]",0
7,2024-03-18 12:20:39.544,"[6.9000000954, 3.0999999046, 5.0999999046, 2.2...","[0.087060094, 0.0922827, 0.82065725]",0
8,2024-03-18 12:20:39.544,"[6.1999998093, 2.2000000477, 4.5, 1.5]","[0.083474845, 0.8026398, 0.11388545]",0
9,2024-03-18 12:20:39.544,"[5.8000001907, 2.7000000477, 3.9000000954, 1.2...","[0.0860216, 0.82712775, 0.08685072]",0


Comparing them to the original model:

In [14]:
raw_model.predict(dtest)[:100]

array([[0.08602159, 0.82712764, 0.08685071],
       [0.8245585 , 0.08896249, 0.08647902],
       [0.08706011, 0.09228271, 0.82065713],
       [0.08347484, 0.8026398 , 0.11388545],
       [0.07473928, 0.7186443 , 0.20661643],
       [0.8245585 , 0.08896249, 0.08647902],
       [0.08602159, 0.82712764, 0.08685071],
       [0.08706011, 0.09228271, 0.82065713],
       [0.08347484, 0.8026398 , 0.11388545],
       [0.08602159, 0.82712764, 0.08685071],
       [0.08706011, 0.09228271, 0.82065713],
       [0.8245585 , 0.08896249, 0.08647902],
       [0.8245585 , 0.08896249, 0.08647902],
       [0.8245585 , 0.08896249, 0.08647902],
       [0.8245585 , 0.08896249, 0.08647902],
       [0.08347484, 0.8026398 , 0.11388545],
       [0.08706011, 0.09228271, 0.82065713],
       [0.08602159, 0.82712764, 0.08685071],
       [0.08602159, 0.82712764, 0.08685071],
       [0.08706011, 0.09228271, 0.82065713],
       [0.8245585 , 0.08896249, 0.08647902],
       [0.07847062, 0.18183963, 0.73968977],
       [0.

## Undeploy Pipelines

In [35]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s ..................................... ok


0,1
name,xgb-booster-binary-classification-softprob-pipeline-123
created,2023-11-17 11:18:25.623392+00:00
last_updated,2023-11-17 11:18:25.654893+00:00
deployed,False
arch,
tags,
versions,"0c66611d-0696-4edc-8c35-e5bd0be69b98, 4f0274c0-90f9-4512-afed-e1150ec3206c"
steps,xgb-booster-binary-classification-softprob
published,False
