## XGB Classification Upload Tutorial

The following example will:

* Set the input and output schemas.
* Upload a XGB Classification model to Wallaroo.
* Deploy a pipeline with the uploaded SKLearn model as a pipeline step.
* Perform a test inference.
* Undeploy the pipeline.

In [1]:
import json
import os
import pickle

import wallaroo
from wallaroo.pipeline   import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
from wallaroo.framework import Framework

import pyarrow as pa
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from xgboost import XGBClassifier

In [2]:
wl = wallaroo.Client(auth_type="sso", interactive=True)

In [3]:
def get_workspace(name):
    workspace = None
    for ws in wl.list_workspaces():
        if ws.name() == name:
            workspace= ws
    if(workspace == None):
        workspace = wl.create_workspace(name)
    return workspace

prefix = "xgb-ranker"

In [4]:
workspace = get_workspace(f"{prefix}-jch")
wl.set_current_workspace(workspace)

{'name': 'xgb-ranker-jch', 'id': 53, 'archived': False, 'created_by': '3cc9e92a-fa3c-4371-a7a7-487884df059e', 'created_at': '2023-06-16T18:26:15.008546+00:00', 'models': [{'name': 'xgb-ranker', 'versions': 3, 'owner_id': '""', 'last_update_time': datetime.datetime(2023, 6, 16, 18, 32, 41, 553693, tzinfo=tzutc()), 'created_at': datetime.datetime(2023, 6, 16, 18, 26, 17, 933009, tzinfo=tzutc())}], 'pipelines': [{'name': 'xgb-ranker-pipeline', 'create_time': datetime.datetime(2023, 6, 16, 18, 27, 38, 223760, tzinfo=tzutc()), 'definition': '[]'}]}

## Data Schema

In [6]:
input_schema = pa.schema([
    pa.field('SecuritiesCode', pa.int64()),
    pa.field('Open', pa.float64()),
    pa.field('High', pa.float64()),
    pa.field('Low', pa.float64()),
    pa.field('Close', pa.float64()),
    pa.field('Volume', pa.int64()),
    pa.field('AdjustmentFactor', pa.float64()),
    pa.field('ExpectedDividend', pa.bool_()),
    pa.field('SupervisionFlag', pa.bool_()),
])
display(input_schema)

output_schema = pa.schema([
    pa.field('output', pa.float64())
])

SecuritiesCode: int64
Open: double
High: double
Low: double
Close: double
Volume: int64
AdjustmentFactor: double
ExpectedDividend: bool
SupervisionFlag: bool

## Upload model

In [7]:
model = wl.upload_model(f"{prefix}", 'models/model-auto-conversion_xgboost_xgb_ranker_model.pkl', framework=Framework.XGBOOST, input_schema=input_schema, output_schema=output_schema)
model

Waiting for model conversion... It may take up to 10.0min.
Model is Pending conversion...Converting..Pending conversion.Converting.........Ready.


{'name': 'xgb-ranker', 'version': 'c53c6a84-9f56-41c6-bb2f-049ef6b067e8', 'file_name': 'model-auto-conversion_xgboost_xgb_ranker_model.pkl', 'image_path': 'proxy.replicated.com/proxy/wallaroo/ghcr.io/wallaroolabs/mlflow-deploy:v2023.3.0-main-3367', 'last_update_time': datetime.datetime(2023, 6, 16, 18, 51, 15, 27969, tzinfo=tzutc())}

## Configure model and pipeline

In [8]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [9]:
pipeline_name = f"{prefix}-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.clear()
pipeline.add_model_step(model)

0,1
name,xgb-ranker-pipeline
created,2023-06-16 18:27:38.223760+00:00
last_updated,2023-06-16 18:51:16.088452+00:00
deployed,False
tags,
versions,"5e705c54-0c59-4a00-bed0-6b23404dc0cf, d47b225e-84aa-4317-ac0b-3e486a4d3b58, ec9ba7b2-1070-40d5-9027-387c813eed15, a52871bf-dbc6-4a79-b067-d4e2dcaf1e39, a309493e-eeaf-49a8-8049-d297e5ea1c1c, 90d257e7-ad44-451d-aaff-64d8fe555adb, 4664d189-ae0c-4c32-a645-a57650c10573"
steps,xgb-ranker


In [10]:
pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 90s ................ ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.244.4.66',
   'name': 'engine-7bcfdd8f68-6gs8p',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'xgb-ranker-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'name': 'xgb-ranker',
      'version': 'c53c6a84-9f56-41c6-bb2f-049ef6b067e8',
      'sha': 'f03bf4a595b9fe6c09fa1f38c3340d1c47da57ba6dc729efee36b84eb51c8941',
      'status': 'Running'}]}}],
 'engine_lbs': [{'ip': '10.244.4.67',
   'name': 'engine-lb-584f54c899-78qp7',
   'status': 'Running',
   'reason': None,
   'details': []}],
 'sidekicks': [{'ip': '10.244.4.68',
   'name': 'engine-sidekick-xgb-ranker-50-597db44c97-xmx82',
   'status': 'Running',
   'reason': None,
   'details': [],
   'statuses': '\n'}]}

## Inference

In [11]:
pipeline.infer_from_file('./data/test_xgb_ranker_data.json')

Unnamed: 0,time,in.AdjustmentFactor,in.Close,in.ExpectedDividend,in.High,in.Low,in.Open,in.SecuritiesCode,in.SupervisionFlag,in.Volume,out.output,check_failures
0,2023-06-16 18:51:32.812,1,451,False,452,443,445,3656,False,322400,0.478053,0
1,2023-06-16 18:51:32.812,1,1936,False,1942,1905,1933,9719,False,475600,0.478053,0
2,2023-06-16 18:51:32.812,1,2086,False,2091,2040,2048,8098,False,124600,0.478053,0
3,2023-06-16 18:51:32.812,1,2093,False,2102,2066,2067,6923,False,929800,0.478053,0
4,2023-06-16 18:51:32.812,1,2445,False,2445,2418,2437,9828,False,13700,0.478053,0
5,2023-06-16 18:51:32.812,1,921,False,926,921,924,1780,False,14700,0.478053,0
6,2023-06-16 18:51:32.812,1,2550,False,2556,2504,2532,6823,False,17700,0.49472,0
7,2023-06-16 18:51:32.812,1,267,False,271,263,268,4571,False,232600,0.478053,0
8,2023-06-16 18:51:32.812,1,898,False,909,881,909,9273,False,56100,0.478053,0
9,2023-06-16 18:51:32.812,1,2142,False,2171,2141,2164,5999,False,5400,0.478053,0


In [None]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s .