# Booster RF Classification Example

## Imports

In [1]:
import wallaroo
from wallaroo.pipeline import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
import pyarrow as pa
import pandas as pd
import numpy as np
from wallaroo.framework import Framework

import pickle
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from xgboost import train, DMatrix

wl = wallaroo.Client(auth_type="sso", interactive=True)

## Data

Load data:

In [2]:
dataset = load_breast_cancer()
X, y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
dtrain = DMatrix(X_train, label=y_train)
dtest = DMatrix(X_test, label=y_test)

Load raw model:

In [3]:
with open("booster_rf_classification.pkl", "rb") as fp:
    raw_model = pickle.load(fp)

## Configure & Upload Model

### Configure PyArrow Schema

In [4]:
input_schema = pa.schema([
    pa.field('inputs', pa.list_(pa.float32(), list_size=30))
])

output_schema = pa.schema([
    pa.field('probabilities', pa.float32()),
])

### Upload model

You can find the model uploaded in [model zoo](https://storage.cloud.google.com/wallaroo-model-zoo/model-auto-conversion/convert-non-native-frameworks/xgboost/xgb_booster_rf_classification.pkl).

In [5]:
model = wl.upload_model('booster-rf-classification', 'booster_rf_classification.pkl', framework=Framework.XGBOOST, input_schema=input_schema, output_schema=output_schema)
model

Waiting for model loading - this will take up to 10.0min.
Model is pending loading to a native runtime..
Ready


0,1
Name,booster-rf-classification
Version,533d860a-1135-4e15-87dc-740f6ff1a758
File Name,booster_rf_classification.pkl
SHA,141eeb77f04e16b500b5f0e60864be208586af4d61030de4fa1de8488b54fda4
Status,ready
Image Path,
Architecture,
Acceleration,
Updated At,2024-18-Mar 12:24:33


In [6]:
model.config().runtime()

'onnx'

## Deploy Pipeline

In [7]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [8]:
pipeline_name = f"booster-rf-classification-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 45s ............ ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.223.128.33',
   'name': 'engine-5546c764c7-cbgwn',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'booster-rf-classification-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'config': {'batch_config': None,
       'filter_threshold': None,
       'id': 12,
       'input_schema': '/////7AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAEAAAAyP///wAAARAUAAAAJAAAAAQAAAABAAAAMAAAAAYAAABpbnB1dHMAAAAABgAIAAQABgAAAB4AAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAHAAAAAQAAAAAAAAABAAAAGl0ZW0AAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'model_version_id': 4,
       'output_schema': '/////4AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEDEAAAACQAAAAEAAAAAAAAAA0AAABwcm9iYWJpbGl0aWVzAAYACAAGAAYAAAAAAAEAAAAAAA==',
       'runtime': 'onnx',
       'sidekick_uri': None,
       'tensor_fields':

## Run Inference

In [9]:
data = dtest.get_data().todense()[:100]

In [10]:
import pandas as pd

dataframe = pd.DataFrame({"inputs": data.tolist()})
dataframe

Unnamed: 0,inputs
0,"[12.470000267028809, 18.600000381469727, 81.08..."
1,"[18.940000534057617, 21.309999465942383, 123.5..."
2,"[15.460000038146973, 19.479999542236328, 101.6..."
3,"[12.399999618530273, 17.68000030517578, 81.470..."
4,"[11.539999961853027, 14.4399995803833, 74.6500..."
...,...
95,"[9.777000427246094, 16.989999771118164, 62.5, ..."
96,"[20.1299991607666, 28.25, 131.1999969482422, 1..."
97,"[13.8100004196167, 23.75, 91.55999755859375, 5..."
98,"[11.149999618530273, 13.079999923706055, 70.87..."


Get results from the pipeline:

In [11]:
%%time
pipeline.infer(dataframe)

CPU times: user 14.8 ms, sys: 950 µs, total: 15.7 ms
Wall time: 26.4 ms


Unnamed: 0,time,in.inputs,out.probabilities,anomaly.count
0,2024-03-18 12:24:48.171,"[12.470000267, 18.6000003815, 81.0899963379, 4...",0.872063,0
1,2024-03-18 12:24:48.171,"[18.9400005341, 21.3099994659, 123.5999984741,...",0.126989,0
2,2024-03-18 12:24:48.171,"[15.4600000381, 19.4799995422, 101.6999969482,...",0.126989,0
3,2024-03-18 12:24:48.171,"[12.3999996185, 17.6800003052, 81.4700012207, ...",0.872063,0
4,2024-03-18 12:24:48.171,"[11.5399999619, 14.4399995804, 74.6500015259, ...",0.872063,0
...,...,...,...,...
95,2024-03-18 12:24:48.171,"[9.7770004272, 16.9899997711, 62.5, 290.200012...",0.872063,0
96,2024-03-18 12:24:48.171,"[20.1299991608, 28.25, 131.1999969482, 1261.0,...",0.126989,0
97,2024-03-18 12:24:48.171,"[13.8100004196, 23.75, 91.5599975586, 597.7999...",0.126989,0
98,2024-03-18 12:24:48.171,"[11.1499996185, 13.0799999237, 70.8700027466, ...",0.872063,0


Comparing them to the original model:

In [12]:
raw_model.predict(dtest)[:100]

array([0.87206346, 0.12698936, 0.12698936, 0.87206346, 0.87206346,
       0.12698936, 0.12698936, 0.63091993, 0.5152838 , 0.87206346,
       0.87206346, 0.16085385, 0.87206346, 0.24110334, 0.87206346,
       0.12698936, 0.87206346, 0.87206346, 0.87206346, 0.12698936,
       0.87206346, 0.87206346, 0.12698936, 0.87206346, 0.87206346,
       0.84158796, 0.87206346, 0.87206346, 0.87206346, 0.12698936,
       0.87206346, 0.87206346, 0.54983395, 0.87206346, 0.87206346,
       0.87206346, 0.33302268, 0.8227614 , 0.12698936, 0.87206346,
       0.87206346, 0.12698936, 0.87206346, 0.87206346, 0.7319315 ,
       0.87206346, 0.87206346, 0.8227614 , 0.87206346, 0.87206346,
       0.12698936, 0.12698936, 0.72419685, 0.84158796, 0.87206346,
       0.87206346, 0.87206346, 0.12698936, 0.21343476, 0.87206346,
       0.87206346, 0.12698936, 0.12698936, 0.87206346, 0.87206346,
       0.83846515, 0.12698936, 0.1621926 , 0.87206346, 0.87206346,
       0.20617768, 0.12698936, 0.87206346, 0.12698936, 0.87206

## Undeploy Pipelines

In [13]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s .................................... ok


0,1
name,booster-rf-classification-pipeline
created,2024-03-18 11:46:43.465032+00:00
last_updated,2024-03-18 12:24:35.249571+00:00
deployed,False
arch,
accel,
tags,
versions,"7a10bfa8-0198-49c0-9a34-0ddbe86316e3, 47008751-baa9-4ebc-ae96-b04a3c9d670d, 5af2a874-6087-463b-b911-0e416a9889b5, d0263370-5985-4df2-b2c9-629461c68321"
steps,booster-rf-classification
published,False
