### Import Libraries

The first step is to import the libraries that we will need.

In [1]:
import json
import os
import datetime

import wallaroo
from wallaroo.object import EntityNotFoundError

# used to display dataframe information without truncating
from IPython.display import display
import pandas as pd
pd.set_option('display.max_colwidth', None)

# for Big Query connections
# from google.cloud import bigquery
# from google.oauth2 import service_account
# import db_dtypes


In [2]:
wallaroo.__version__

'2023.2.1'

### Initialize connection

Start a connect to the Wallaroo instance and save the connection into the variable `wl`.

In [3]:
# Login through local Wallaroo instance

wl = wallaroo.Client()


wl = wallaroo.Client()

wallarooPrefix = "doc-test."
wallarooSuffix = "wallaroocommunity.ninja"

wl = wallaroo.Client(api_endpoint=f"https://{wallarooPrefix}api.{wallarooSuffix}", 
                    auth_endpoint=f"https://{wallarooPrefix}keycloak.{wallarooSuffix}", 
                    auth_type="sso")

### Set Configurations

The following will set the workspace, model name, and pipeline that will be used for this example.  If the workspace or pipeline already exist, then they will assigned for use in this example.  If they do not exist, they will be created based on the names listed below.

In [4]:
workspace_name = 'bikerental-nbz'
pipeline_name = 'bikeforecast-pipe'
model_name = 'bikeforecast-arima'

## Set the Workspace and Pipeline


In [5]:
def get_workspace(name):
    workspace = None
    for ws in wl.list_workspaces():
        if ws.name() == name:
            workspace= ws
    if(workspace == None):
        workspace = wl.create_workspace(name)
    return workspace

# get a pipeline by name in the workspace
def get_pipeline(pname, create_if_absent=False):
    plist = wl.get_current_workspace().pipelines()
    pipeline = [p for p in plist if p.name() == pname]
    if len(pipeline) <= 0:
        if create_if_absent:
            pipeline = wl.build_pipeline(pname)
        else:
            raise KeyError(f"pipeline {pname} not found in this workspace")
    else:
        pipeline = pipeline[0]
    return pipeline

# Get the most recent version of a model in the workspace
# Assumes that the most recent version is the first in the list of versions.
# wl.get_current_workspace().models() returns a list of models in the current workspace

def get_model(mname):
    modellist = wl.get_current_workspace().models()
    model = [m.versions()[0] for m in modellist if m.name() == mname]
    if len(model) <= 0:
        raise KeyError(f"model {mname} not found in this workspace")
    return model[0]


workspace = get_workspace(workspace_name)

wl.set_current_workspace(workspace)


{'name': 'bikerental-nbz', 'id': 18, 'archived': False, 'created_by': 'c3a45eb6-37ff-4020-8d59-7166c3e153d0', 'created_at': '2023-07-26T19:42:24.57956+00:00', 'models': [{'name': 'bikeforecast-arima', 'versions': 1, 'owner_id': '""', 'last_update_time': datetime.datetime(2023, 7, 26, 19, 42, 55, 544718, tzinfo=tzutc()), 'created_at': datetime.datetime(2023, 7, 26, 19, 42, 55, 544718, tzinfo=tzutc())}], 'pipelines': [{'name': 'bikeforecast-pipe', 'create_time': datetime.datetime(2023, 7, 26, 19, 42, 26, 35307, tzinfo=tzutc()), 'definition': '[]'}]}

### Fetch champion and upload challenger

Champion ARIMA(1,0,1) hard-coded to forecast seven days out.
Challenger ARIMA(2, 0, 2)


In [6]:
# champion model
champion = get_model(model_name)
champion

0,1
Name,bikeforecast-arima
Version,d3ba8a8d-dcbf-477f-85e3-3670fdf81b85
File Name,forecast.py
SHA,7ed0600d8b754ff9d901633cf78de7f825c9d2d0f79a2647d271d2b04f4befd8
Status,ready
Image Path,
Updated At,2023-26-Jul 19:42:55


In [8]:
challenger_name = 'bikeforecast-arima202'
challenger_file_name = './models/forecast-arma2.py'

from wallaroo.framework import Framework

challenger = wl.upload_model(challenger_name, challenger_file_name,  framework=Framework.PYTHON)

## Create a Shadow Pipeline

In [9]:
shadowpipe_name = 'bikeforecast-shadow'
pipeline = wl.build_pipeline(shadowpipe_name).add_shadow_deploy(champion, [challenger])

# deploy
pipeline.deploy()

0,1
name,bikeforecast-shadow
created,2023-07-26 19:47:56.429162+00:00
last_updated,2023-07-26 19:47:57.696964+00:00
deployed,True
tags,
versions,"d83a90b0-e490-4fa1-a322-60b41c0b7b8d, 9217d555-6c3b-4b0a-b866-3d8b82c56b45"
steps,bikeforecast-arima


### Run Inference

Note that for a python runtime model,  the results are only from one model (the champion).


In [10]:
# show the input data
pd.read_json('./data/testdata_dict.json')


Unnamed: 0,dteday,site_id,cnt,season,holiday,weekday,workingday
0,2011-02-02,site0001,1240,1,0,3,1
1,2011-02-03,site0001,1551,1,0,4,1
2,2011-02-04,site0001,2324,1,0,5,1
3,2011-02-05,site0001,805,1,0,6,0
4,2011-02-06,site0001,1948,1,0,0,0
5,2011-02-07,site0001,1650,1,0,1,1
6,2011-02-08,site0001,913,1,0,2,1
7,2011-02-09,site0001,931,1,0,3,1
8,2011-02-10,site0001,1256,1,0,4,1
9,2011-02-11,site0001,1614,1,0,5,1


In [11]:
# simple test
results = pipeline.infer_from_file('./data/testdata_dict.json', data_format="custom-json")
results

[{'dteday': {'0': '2011-03-02',
   '1': '2011-03-03',
   '2': '2011-03-04',
   '3': '2011-03-05',
   '4': '2011-03-06',
   '5': '2011-03-07',
   '6': '2011-03-08'},
  'site_id': {'0': 'site0001',
   '1': 'site0001',
   '2': 'site0001',
   '3': 'site0001',
   '4': 'site0001',
   '5': 'site0001',
   '6': 'site0001'},
  'forecast': {'0': 2269,
   '1': 1712,
   '2': 1795,
   '3': 1371,
   '4': 1819,
   '5': 2045,
   '6': 1974}}]

In [12]:
resultframe = pd.DataFrame(results[0])
resultframe

Unnamed: 0,dteday,site_id,forecast
0,2011-03-02,site0001,2269
1,2011-03-03,site0001,1712
2,2011-03-04,site0001,1795
3,2011-03-05,site0001,1371
4,2011-03-06,site0001,1819
5,2011-03-07,site0001,2045
6,2011-03-08,site0001,1974


To see the forecasts from both the champion and the challenger, you must look at the logs.

In [13]:
logs = pipeline.logs(limit=1)
logs



Unnamed: 0,time,in.json,out.json,check_failures,out_bikeforecast-arima202.json
0,2023-07-26 19:48:13.313,"{""dteday"":[""2011-02-02"",""2011-02-03"",""2011-02-04"",""2011-02-05"",""2011-02-06"",""2011-02-07"",""2011-02-08"",""2011-02-09"",""2011-02-10"",""2011-02-11"",""2011-02-12"",""2011-02-13"",""2011-02-14"",""2011-02-15"",""2011-02-16"",""2011-02-17"",""2011-02-18"",""2011-02-19"",""2011-02-20"",""2011-02-21"",""2011-02-22"",""2011-02-23"",""2011-02-24"",""2011-02-25"",""2011-02-26"",""2011-02-27"",""2011-02-28"",""2011-03-01"",""2011-03-02"",""2011-03-03"",""2011-03-04"",""2011-03-05"",""2011-03-06"",""2011-03-07"",""2011-03-08""],""site_id"":[""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001""],""cnt"":[1240,1551,2324,805,1948,1650,913,931,1256,1614,1000,1883,1964,2036,2586,3219,3947,1826,1418,723,1281,2564,2181,1539,2059,2428,836,1235,-1,-1,-1,-1,-1,-1,-1],""season"":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],""holiday"":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],""weekday"":[3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2],""workingday"":[1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1]}","{""dteday"":{""0"":""2011-03-02"",""1"":""2011-03-03"",""2"":""2011-03-04"",""3"":""2011-03-05"",""4"":""2011-03-06"",""5"":""2011-03-07"",""6"":""2011-03-08""},""site_id"":{""0"":""site0001"",""1"":""site0001"",""2"":""site0001"",""3"":""site0001"",""4"":""site0001"",""5"":""site0001"",""6"":""site0001""},""forecast"":{""0"":2269,""1"":1712,""2"":1795,""3"":1371,""4"":1819,""5"":2045,""6"":1974}}",0,"{""dteday"":{""0"":""2011-03-02"",""1"":""2011-03-03"",""2"":""2011-03-04"",""3"":""2011-03-05"",""4"":""2011-03-06"",""5"":""2011-03-07"",""6"":""2011-03-08""},""site_id"":{""0"":""site0001"",""1"":""site0001"",""2"":""site0001"",""3"":""site0001"",""4"":""site0001"",""5"":""site0001"",""6"":""site0001""},""forecast"":{""0"":1880,""1"":1454,""2"":1721,""3"":1385,""4"":1617,""5"":1706,""6"":1725}}"


In [14]:
logs.columns

Index(['time', 'in.json', 'out.json', 'check_failures',
       'out_bikeforecast-arima202.json'],
      dtype='object')

In [15]:
# convenience function to retrieve output from custom json inference result 
# assume that the result is a dictionary representation of a data frame
def get_output(inf_result):
    rdict = inf_result.raw['outputs'][0]['Json']['data'][0]
    return pd.DataFrame.from_dict(rdict)

In [16]:
champion = logs['out.json'][0]
pd.read_json(champion)

Unnamed: 0,dteday,site_id,forecast
0,2011-03-02,site0001,2269
1,2011-03-03,site0001,1712
2,2011-03-04,site0001,1795
3,2011-03-05,site0001,1371
4,2011-03-06,site0001,1819
5,2011-03-07,site0001,2045
6,2011-03-08,site0001,1974


In [17]:
challenger = logs['out_bikeforecast-arima202.json'][0]
pd.read_json(challenger)

Unnamed: 0,dteday,site_id,forecast
0,2011-03-02,site0001,1880
1,2011-03-03,site0001,1454
2,2011-03-04,site0001,1721
3,2011-03-05,site0001,1385
4,2011-03-06,site0001,1617
5,2011-03-07,site0001,1706
6,2011-03-08,site0001,1725


### Undeploy the Pipeline

Undeploy the pipeline and return the resources back to the Wallaroo instance.

In [18]:
pipeline.undeploy()

0,1
name,bikeforecast-shadow
created,2023-07-26 19:47:56.429162+00:00
last_updated,2023-07-26 19:47:57.696964+00:00
deployed,False
tags,
versions,"d83a90b0-e490-4fa1-a322-60b41c0b7b8d, 9217d555-6c3b-4b0a-b866-3d8b82c56b45"
steps,bikeforecast-arima
