### Import Libraries

The first step is to import the libraries that we will need.

In [2]:
import json
import os
import datetime

import wallaroo
from wallaroo.object import EntityNotFoundError

# used to display dataframe information without truncating
from IPython.display import display
import pandas as pd
pd.set_option('display.max_colwidth', None)

# # for Big Query connections
# from google.cloud import bigquery
# from google.oauth2 import service_account
# import db_dtypes


In [3]:
wallaroo.__version__

'2023.2.1'

### Initialize connection

Start a connect to the Wallaroo instance and save the connection into the variable `wl`.

In [4]:
# Login through local Wallaroo instance

wl = wallaroo.Client()


wl = wallaroo.Client()

wallarooPrefix = "doc-test."
wallarooSuffix = "wallaroocommunity.ninja"

wl = wallaroo.Client(api_endpoint=f"https://{wallarooPrefix}api.{wallarooSuffix}", 
                    auth_endpoint=f"https://{wallarooPrefix}keycloak.{wallarooSuffix}", 
                    auth_type="sso")

### Set Configurations

The following will set the workspace, model name, and pipeline that will be used for this example.  If the workspace or pipeline already exist, then they will assigned for use in this example.  If they do not exist, they will be created based on the names listed below.

In [5]:
workspace_name = 'bikerental-nbz'
pipeline_name = 'bikeforecast-pipe'
model_name = 'bikeforecast-arima'

## Set the Workspace and Pipeline


In [6]:
def get_workspace(name):
    workspace = None
    for ws in wl.list_workspaces():
        if ws.name() == name:
            workspace= ws
    if(workspace == None):
        workspace = wl.create_workspace(name)
    return workspace

# get a pipeline by name in the workspace
def get_pipeline(pname, create_if_absent=False):
    plist = wl.get_current_workspace().pipelines()
    pipeline = [p for p in plist if p.name() == pname]
    if len(pipeline) <= 0:
        if create_if_absent:
            pipeline = wl.build_pipeline(pname)
        else:
            raise KeyError(f"pipeline {pname} not found in this workspace")
    else:
        pipeline = pipeline[0]
    return pipeline


workspace = get_workspace(workspace_name)

wl.set_current_workspace(workspace)

pipeline = get_pipeline(pipeline_name, create_if_absent=True)
# pipeline

### Upload Model

This model is a simple ARIMA(1,0,1) with no exogenous variables, hard-coded to forecast seven days out.

Note that this package is being specified as a `python` configuration.

In [8]:
model_file_name = './models/forecast.py'

from wallaroo.framework import Framework

arima_model = wl.upload_model(model_name, model_file_name, framework=Framework.PYTHON)

### Deploy the Pipeline

We will now add the uploaded model as a step for the pipeline, then deploy it.

In [9]:
pipeline.add_model_step(arima_model).deploy()

0,1
name,bikeforecast-pipe
created,2023-07-26 19:42:26.035307+00:00
last_updated,2023-07-26 19:43:00.509267+00:00
deployed,True
tags,
versions,"1d9c162a-3116-4f58-8f52-64aeb559e4bc, e2159554-214a-424d-83d7-fd01c41b31ed"
steps,bikeforecast-arima


In [10]:
pipeline.status()

{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.244.3.234',
   'name': 'engine-c84674699-5hn54',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'bikeforecast-pipe',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'name': 'bikeforecast-arima',
      'version': 'd3ba8a8d-dcbf-477f-85e3-3670fdf81b85',
      'sha': '7ed0600d8b754ff9d901633cf78de7f825c9d2d0f79a2647d271d2b04f4befd8',
      'status': 'Running'}]}}],
 'engine_lbs': [{'ip': '10.244.4.8',
   'name': 'engine-lb-584f54c899-z6kp9',
   'status': 'Running',
   'reason': None,
   'details': []}],
 'sidekicks': []}

### Run Inference

Test using the connector


In [12]:
# simple test
results = pipeline.infer_from_file('./data/testdata_dict.json', data_format="custom-json")
resultframe = pd.DataFrame(results[0])
display(resultframe)

Unnamed: 0,dteday,site_id,forecast
0,2011-03-02,site0001,2269
1,2011-03-03,site0001,1712
2,2011-03-04,site0001,1795
3,2011-03-05,site0001,1371
4,2011-03-06,site0001,1819
5,2011-03-07,site0001,2045
6,2011-03-08,site0001,1974


In [13]:
pipeline.logs()

Unnamed: 0,time,in.json,out.json,check_failures
0,2023-07-26 19:43:37.422,"{""dteday"":[""2011-02-02"",""2011-02-03"",""2011-02-04"",""2011-02-05"",""2011-02-06"",""2011-02-07"",""2011-02-08"",""2011-02-09"",""2011-02-10"",""2011-02-11"",""2011-02-12"",""2011-02-13"",""2011-02-14"",""2011-02-15"",""2011-02-16"",""2011-02-17"",""2011-02-18"",""2011-02-19"",""2011-02-20"",""2011-02-21"",""2011-02-22"",""2011-02-23"",""2011-02-24"",""2011-02-25"",""2011-02-26"",""2011-02-27"",""2011-02-28"",""2011-03-01"",""2011-03-02"",""2011-03-03"",""2011-03-04"",""2011-03-05"",""2011-03-06"",""2011-03-07"",""2011-03-08""],""site_id"":[""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001""],""cnt"":[1240,1551,2324,805,1948,1650,913,931,1256,1614,1000,1883,1964,2036,2586,3219,3947,1826,1418,723,1281,2564,2181,1539,2059,2428,836,1235,-1,-1,-1,-1,-1,-1,-1],""season"":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],""holiday"":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],""weekday"":[3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2],""workingday"":[1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1]}","{""dteday"":{""0"":""2011-03-02"",""1"":""2011-03-03"",""2"":""2011-03-04"",""3"":""2011-03-05"",""4"":""2011-03-06"",""5"":""2011-03-07"",""6"":""2011-03-08""},""site_id"":{""0"":""site0001"",""1"":""site0001"",""2"":""site0001"",""3"":""site0001"",""4"":""site0001"",""5"":""site0001"",""6"":""site0001""},""forecast"":{""0"":2269,""1"":1712,""2"":1795,""3"":1371,""4"":1819,""5"":2045,""6"":1974}}",0
1,2023-07-26 19:43:18.055,"{""dteday"":[""2011-02-02"",""2011-02-03"",""2011-02-04"",""2011-02-05"",""2011-02-06"",""2011-02-07"",""2011-02-08"",""2011-02-09"",""2011-02-10"",""2011-02-11"",""2011-02-12"",""2011-02-13"",""2011-02-14"",""2011-02-15"",""2011-02-16"",""2011-02-17"",""2011-02-18"",""2011-02-19"",""2011-02-20"",""2011-02-21"",""2011-02-22"",""2011-02-23"",""2011-02-24"",""2011-02-25"",""2011-02-26"",""2011-02-27"",""2011-02-28"",""2011-03-01"",""2011-03-02"",""2011-03-03"",""2011-03-04"",""2011-03-05"",""2011-03-06"",""2011-03-07"",""2011-03-08""],""site_id"":[""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001"",""site0001""],""cnt"":[1240,1551,2324,805,1948,1650,913,931,1256,1614,1000,1883,1964,2036,2586,3219,3947,1826,1418,723,1281,2564,2181,1539,2059,2428,836,1235,-1,-1,-1,-1,-1,-1,-1],""season"":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],""holiday"":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],""weekday"":[3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2],""workingday"":[1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,0,0,1,1]}","{""dteday"":{""0"":""2011-03-02"",""1"":""2011-03-03"",""2"":""2011-03-04"",""3"":""2011-03-05"",""4"":""2011-03-06"",""5"":""2011-03-07"",""6"":""2011-03-08""},""site_id"":{""0"":""site0001"",""1"":""site0001"",""2"":""site0001"",""3"":""site0001"",""4"":""site0001"",""5"":""site0001"",""6"":""site0001""},""forecast"":{""0"":2269,""1"":1712,""2"":1795,""3"":1371,""4"":1819,""5"":2045,""6"":1974}}",0


In [None]:
# connection = wl.get_connection('bq-wl-dev')

# # set the credentials
# bigquery_credentials = service_account.Credentials.from_service_account_info(connection.details())

# # start the client
# bigqueryclient = bigquery.Client(
#     credentials=bigquery_credentials, 
#     project=connection.details()['project_id']
# )


# dataset = 'bikerental_forecast_demo'
# input_table = 'bikerentals'
# tablename = f'{dataset}.{input_table}'

# sites = bigqueryclient.query(f"select distinct site_id from {tablename}").to_dataframe()
# sites = sites['site_id'].to_numpy()
# print(f'{len(sites)} rental sites')

In [None]:
# from resources import util

# today = '2011-03-01'

# # create the query to get historical data
# query = util.mk_dt_range_query(tablename=tablename, day_of_forecast=today, site_id=sites[0])
# print(query)

# xquery = util.mk_exog_query(tablename=tablename, day_of_forecast=today, site_id=sites[0], nforecast=7)
# print(xquery)

In [None]:
# historical_data = bigqueryclient.query(query).to_dataframe()
# exog = bigqueryclient.query(xquery).to_dataframe()

# # fill the exog frame with a "nan" count: -1 so I can pass everything in as one frame
# # actual nan would be better but I'm not sure if the platform handles it
# exog['cnt'] = -1

# input_frame = pd.concat([historical_data, exog]).reset_index(drop=True)
# input_frame

In [None]:
input_frame['dteday'] = input_frame['dteday'].astype(str)

In [None]:
results = pipeline.infer(input_frame.to_dict())[0]

In [None]:
results

In [None]:
resultframe = pd.DataFrame(results)
resultframe

### Undeploy the Pipeline

Undeploy the pipeline and return the resources back to the Wallaroo instance.

In [14]:
pipeline.undeploy()

0,1
name,bikeforecast-pipe
created,2023-07-26 19:42:26.035307+00:00
last_updated,2023-07-26 19:43:00.509267+00:00
deployed,False
tags,
versions,"1d9c162a-3116-4f58-8f52-64aeb559e4bc, e2159554-214a-424d-83d7-fd01c41b31ed"
steps,bikeforecast-arima


In [None]:
bigqueryclient.close()