In [43]:
# Set up a workspace and pipeline for testing

import wallaroo
from wallaroo.object import EntityNotFoundError, RequiredAttributeMissing

# to display dataframe tables
from IPython.display import display
# used to display dataframe information without truncating
import pandas as pd
pd.set_option('display.max_colwidth', None)
import pyarrow as pa

import os
# Used for the Wallaroo SDK version 2023.1
os.environ["ARROW_ENABLED"]="True"

import time
import json

# for Big Query connections
from google.cloud import bigquery
from google.oauth2 import service_account
import db_dtypes

# Login through local Wallaroo instance

# wl = wallaroo.Client()

# # SSO login through keycloak

# wallarooPrefix = "product-uat-ee"
# wallarooSuffix = "wallaroocommunity.ninja"

wallarooPrefix = "doc-test"
wallarooSuffix = "wallaroocommunity.ninja"

wl = wallaroo.Client(api_endpoint=f"https://{wallarooPrefix}.api.{wallarooSuffix}", 
                    auth_endpoint=f"https://{wallarooPrefix}.keycloak.{wallarooSuffix}", 
                    auth_type="sso")

# Setting variables for later steps

workspace_name = 'bigquerystatsmodelworkspace'
pipeline_name = 'bigquerystatsmodelpipeline04'
model_name = 'bigquerystatsmodelmodel2'
model_file_name = './models/bike_day_model.pkl'

bigquery_connection_input_name = "bigqueryforecastinputs2"
bigquery_connection_input_type = "BIGQUERY"
bigquery_connection_input_argument = json.load(open('./resources/bigquery_service_account_input_key.json.example'))

bigquery_connection_output_name = "bigqueryforecastoutputs2"
bigquery_connection_output_type = "BIGQUERY"
bigquery_connection_output_argument = json.load(open('./resources/bigquery_service_account_output_key.json.example'))

# helper methods to retrieve workspaces and pipelines

def get_workspace(name):
    workspace = None
    for ws in wl.list_workspaces():
        if ws.name() == name:
            workspace= ws
    if(workspace == None):
        workspace = wl.create_workspace(name)
    return workspace

def get_pipeline(name):
    try:
        pipeline = wl.pipelines_by_name(name)[0]
    except EntityNotFoundError:
        pipeline = wl.build_pipeline(name)
    return pipeline

def get_connection(name, connection_type, connection_arguments):
    try:
        connection = wl.get_connection(name)
    except RequiredAttributeMissing:
        connection =wl.create_connection(name, 
                  connection_type, 
                  connection_arguments)
    return connection


workspace = get_workspace(workspace_name)
wl.set_current_workspace(workspace)

pipeline = get_pipeline(pipeline_name)

# Upload the model

bike_day_model = wl.upload_model(model_name, model_file_name).configure(runtime="python")

# Add the model as a pipeline step

pipeline.add_model_step(bike_day_model)

connection_input = get_connection(bigquery_connection_input_name, bigquery_connection_input_type, bigquery_connection_input_argument)
connection_output = get_connection(bigquery_connection_output_name, bigquery_connection_output_type, bigquery_connection_output_argument)


In [44]:
pipeline

0,1
name,bigquerystatsmodelpipeline04
created,2023-05-11 20:35:53.367690+00:00
last_updated,2023-05-11 20:35:53.367690+00:00
deployed,(none)
tags,
versions,95070db1-c5f9-4900-8146-d2a5516aef9c
steps,


Now we'll create our tasks with the custom parameters from existing orchestrations.

In [45]:
orchestration = wl.upload_orchestration(path="./bigquery_statsmodel_remote_inference/bigquery_statsmodel_remote_inference.zip")
wl.list_orchestrations()

id,status,name,sha,created at,updated at
78779772-c674-481b-8753-3d4378d86ea3,ready,bigquery_statsmodel_remote_inference.zip,1ba67b04bf70d6546d6a9bc39ca1ec74a1302f538297d639d609afeccea11b19,2023-11-May 20:08:24,2023-11-May 20:09:11
66d64672-c764-4c3d-95de-70e407aafb37,pending_packaging,bigquery_statsmodel_remote_inference.zip,35ccbc981ece6426e73520eaccdd7802ff42730b721154c7b1b99bf5ec031aaf,2023-11-May 20:36:24,2023-11-May 20:36:24
30d171eb-c225-4dd5-9902-6d1c34e2f457,ready,bigquery_statsmodel_remote_inference.zip,6f9c76cfce70afae13b7486a92210f49644149696f47b307b48edd1caee10f15,2023-11-May 19:04:55,2023-11-May 19:05:42


In [46]:
while orchestration.status() != 'ready':
    print(orchestration.status())
    time.sleep(5)

pending_packaging
packaging
packaging
packaging
packaging
packaging
packaging
packaging


In [39]:
# Without these, the task will fail

# pipeline.deploy()
# pipeline.undeploy()

0,1
name,bigquerystatsmodelpipeline03
created,2023-05-11 20:08:05.069839+00:00
last_updated,2023-05-11 20:09:46.328966+00:00
deployed,False
tags,
versions,"f429497b-ac51-4cc6-a0d8-3b35f5f804e7, 51c65a05-7ac8-4542-94dd-3def2f8b4c9a"
steps,bigquerystatsmodelmodel2


In [47]:
task = orchestration.run_once({
    "pipeline_name": pipeline_name
})
display(task)

Field,Value
ID,0d8225f0-5d98-43a6-b530-f7f8083a9547
Status,pending
Type,Temporary Run
Created At,2023-11-May 20:37:14
Updated At,2023-11-May 20:37:14


In [48]:
while task.status() != "started":
    display(task.status())
    time.sleep(5)

'pending'

In [42]:
# deploy/undeploy

pipeline.deploy()
pipeline.undeploy()

Unnamed: 0,time,in.json,out.json,check_failures
0,2023-05-11 20:11:03.953,"{""temp"":{""0"":0.291304,""1"":0.243333,""2"":0.254167,""3"":0.253333,""4"":0.253333,""5"":0.255833,""6"":0.215833},""holiday"":{""0"":1,""1"":0,""2"":0,""3"":0,""4"":0,""5"":0,""6"":0},""workingday"":{""0"":0,""1"":1,""2"":1,""3"":1,""4"":0,""5"":0,""6"":1},""windspeed"":{""0"":0.168726,""1"":0.316546,""2"":0.350133,""3"":0.155471,""4"":0.124383,""5"":0.350754,""6"":0.154846}}","{""forecast"":[1231.2556997246595,1627.3643469089343,1674.3769827243134,1621.9273295873882,1140.7465817903185,1211.5223974364667,1457.1896450382922]}",0


In [None]:
task = orchestration.run_once({
    "pipeline_name": pipeline_name
})
display(task)

In [None]:
# the task should now succeed

while task.status() != "started":
    display(task.status())
    time.sleep(5)