# Booster Multiclass Classification Softmax Example

## Imports

In [1]:
import wallaroo
from wallaroo.deployment_config import DeploymentConfigBuilder
from wallaroo.pipeline import Pipeline
import pyarrow as pa
from wallaroo.framework import Framework

import pickle
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from xgboost import train, DMatrix

wl = wallaroo.Client(auth_type="sso", interactive=True)

## Data

Load data:

In [2]:
dataset = load_iris()
X, y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
dtrain = DMatrix(X_train, label=y_train)
dtest = DMatrix(X_test, label=y_test)

Load raw model:

In [13]:
with open("booster_multi_classification_softmax.pkl", "rb") as fp:
    raw_model = pickle.load(fp)

## Configure & Upload Model

### Configure PyArrow Schema

In [4]:
input_schema = pa.schema([
    pa.field('inputs', pa.list_(pa.float32(), list_size=4))
])

output_schema = pa.schema([
    pa.field('predictions', pa.float32()),
])

### Upload model

You can find the model uploaded in [model zoo](https://storage.cloud.google.com/wallaroo-model-zoo/model-auto-conversion/convert-non-native-frameworks/xgboost/xgb_booster_multi_classification_softmax.pkl?authuser=0).

In [5]:
model = wl.upload_model('booster-multi-classification-softmax', 'booster_multi_classification_softmax.pkl', framework=Framework.XGBOOST, input_schema=input_schema, output_schema=output_schema)
model

Waiting for model loading - this will take up to 10.0min.
Model is pending loading to a native runtime..
Ready


0,1
Name,booster-multi-classification-softmax
Version,c105d1a7-7894-4c2d-9786-d3ab6063f893
File Name,booster_multi_classification_softmax.pkl
SHA,6ace49bb2a3514724937de8ec023a14c94ac71dde7dce220ad27e44d296b4b25
Status,ready
Image Path,
Architecture,
Acceleration,
Updated At,2024-18-Mar 12:15:01


In [6]:
model.config().runtime()

'onnx'

## Deploy Pipeline

In [7]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [8]:
pipeline_name = f"booster-binary-classification-softmax-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 45s ...................... ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.223.128.12',
   'name': 'engine-74ccd7ffc4-lf2lf',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'booster-binary-classification-softmax-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'config': {'batch_config': None,
       'filter_threshold': None,
       'id': 21,
       'input_schema': '/////7AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAEAAAAyP///wAAARAUAAAAJAAAAAQAAAABAAAAMAAAAAYAAABpbnB1dHMAAAAABgAIAAQABgAAAAQAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAHAAAAAQAAAAAAAAABAAAAGl0ZW0AAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'model_version_id': 7,
       'output_schema': '/////4AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEDEAAAACQAAAAEAAAAAAAAAAsAAABwcmVkaWN0aW9ucwAAAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'runtime': 'onnx',
       'sidekick_uri': None,
       'ten

## Run Inference

In [9]:
data = dtest.get_data().todense()[:100]

In [10]:
import pandas as pd

dataframe = pd.DataFrame({"inputs": data.tolist()})
dataframe

Unnamed: 0,inputs
0,"[6.099999904632568, 2.799999952316284, 4.69999..."
1,"[5.699999809265137, 3.799999952316284, 1.70000..."
2,"[7.699999809265137, 2.5999999046325684, 6.9000..."
3,"[6.0, 2.9000000953674316, 4.5, 1.5]"
4,"[6.800000190734863, 2.799999952316284, 4.80000..."
5,"[5.400000095367432, 3.4000000953674316, 1.5, 0..."
6,"[5.599999904632568, 2.9000000953674316, 3.5999..."
7,"[6.900000095367432, 3.0999999046325684, 5.0999..."
8,"[6.199999809265137, 2.200000047683716, 4.5, 1.5]"
9,"[5.800000190734863, 2.700000047683716, 3.90000..."


Getting results from pipeline:

In [11]:
%%time
pipeline.infer(dataframe)

CPU times: user 16.4 ms, sys: 2.41 ms, total: 18.8 ms
Wall time: 35.3 ms


Unnamed: 0,time,in.inputs,out.predictions,anomaly.count
0,2024-03-18 12:15:57.227,"[6.0999999046, 2.7999999523, 4.6999998093, 1.2...",1.0,0
1,2024-03-18 12:15:57.227,"[5.6999998093, 3.7999999523, 1.7000000477, 0.3...",0.0,0
2,2024-03-18 12:15:57.227,"[7.6999998093, 2.5999999046, 6.9000000954, 2.2...",2.0,0
3,2024-03-18 12:15:57.227,"[6.0, 2.9000000954, 4.5, 1.5]",1.0,0
4,2024-03-18 12:15:57.227,"[6.8000001907, 2.7999999523, 4.8000001907, 1.3...",1.0,0
5,2024-03-18 12:15:57.227,"[5.4000000954, 3.4000000954, 1.5, 0.400000006]",0.0,0
6,2024-03-18 12:15:57.227,"[5.5999999046, 2.9000000954, 3.5999999046, 1.2...",1.0,0
7,2024-03-18 12:15:57.227,"[6.9000000954, 3.0999999046, 5.0999999046, 2.2...",2.0,0
8,2024-03-18 12:15:57.227,"[6.1999998093, 2.2000000477, 4.5, 1.5]",1.0,0
9,2024-03-18 12:15:57.227,"[5.8000001907, 2.7000000477, 3.9000000954, 1.2...",1.0,0


Comparing them to the original model:

In [14]:
raw_model.predict(dtest)[:100]

array([1., 0., 2., 1., 1., 0., 1., 2., 1., 1., 2., 0., 0., 0., 0., 1., 2.,
       1., 1., 2., 0., 2., 0., 2., 2., 2., 2., 2., 0., 0.], dtype=float32)

## Undeploy Pipelines

In [15]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s ............................................

RuntimeError: Undeployment did not finish within 45s.
Status: None