In [1]:
# Set up a workspace and pipeline for testing

import wallaroo
from wallaroo.object import EntityNotFoundError, RequiredAttributeMissing

# to display dataframe tables
from IPython.display import display
# used to display dataframe information without truncating
import pandas as pd
pd.set_option('display.max_colwidth', None)
import pyarrow as pa

import os
# Used for the Wallaroo SDK version 2023.1
os.environ["ARROW_ENABLED"]="True"

import time
import json

# for Big Query connections
from google.cloud import bigquery
from google.oauth2 import service_account
import db_dtypes

# Login through local Wallaroo instance

# wl = wallaroo.Client()

# # SSO login through keycloak

# wallarooPrefix = "product-uat-ee"
# wallarooSuffix = "wallaroocommunity.ninja"

wallarooPrefix = "doc-test"
wallarooSuffix = "wallaroocommunity.ninja"

wl = wallaroo.Client(api_endpoint=f"https://{wallarooPrefix}.api.{wallarooSuffix}", 
                    auth_endpoint=f"https://{wallarooPrefix}.keycloak.{wallarooSuffix}", 
                    auth_type="sso")

# Setting variables for later steps

workspace_name = 'bigquerystatsmodelworkspace'
pipeline_name = 'bigquerystatsmodelpipeline03'
model_name = 'bigquerystatsmodelmodel2'
model_file_name = './models/bike_day_model.pkl'

bigquery_connection_input_name = "bigqueryforecastinputs2"
bigquery_connection_input_type = "BIGQUERY"
bigquery_connection_input_argument = json.load(open('./resources/bigquery_service_account_input_key.json.example'))

bigquery_connection_output_name = "bigqueryforecastoutputs2"
bigquery_connection_output_type = "BIGQUERY"
bigquery_connection_output_argument = json.load(open('./resources/bigquery_service_account_output_key.json.example'))

# helper methods to retrieve workspaces and pipelines

def get_workspace(name):
    workspace = None
    for ws in wl.list_workspaces():
        if ws.name() == name:
            workspace= ws
    if(workspace == None):
        workspace = wl.create_workspace(name)
    return workspace

def get_pipeline(name):
    try:
        pipeline = wl.pipelines_by_name(name)[0]
    except EntityNotFoundError:
        pipeline = wl.build_pipeline(name)
    return pipeline

def get_connection(name, connection_type, connection_arguments):
    try:
        connection = wl.get_connection(name)
    except RequiredAttributeMissing:
        connection =wl.create_connection(name, 
                  connection_type, 
                  connection_arguments)
    return connection


workspace = get_workspace(workspace_name)
wl.set_current_workspace(workspace)

pipeline = get_pipeline(pipeline_name)

# Upload the model

bike_day_model = wl.upload_model(model_name, model_file_name).configure(runtime="python")

# Add the model as a pipeline step

pipeline.add_model_step(bike_day_model)

connection_input = get_connection(bigquery_connection_input_name, bigquery_connection_input_type, bigquery_connection_input_argument)
connection_output = get_connection(bigquery_connection_output_name, bigquery_connection_output_type, bigquery_connection_output_argument)


Now we'll create our tasks with the custom parameters from existing orchestrations.

In [13]:
wl.list_orchestrations()

id,status,name,sha,created at,updated at
5bb2372b-e7af-40e9-8edd-be09b128f716,ready,bigquery_statsmodel_remote_inference.zip,6f9c76cfce70afae13b7486a92210f49644149696f47b307b48edd1caee10f15,2023-11-May 19:17:05,2023-11-May 19:17:51
ed3c00e6-90e6-484d-9425-4511addb798b,ready,bigquery_statsmodel_remote_inference.zip,b94bae4a94b443b5d8f5fac19849805b47dbf5d10d24a09cfa40900187bda161,2023-11-May 19:23:19,2023-11-May 19:24:05


In [20]:
orchestration = wl.upload_orchestration(path="./bigquery_statsmodel_remote_inference/bigquery_statsmodel_remote_inference.zip")
wl.list_orchestrations()

id,status,name,sha,created at,updated at
5bb2372b-e7af-40e9-8edd-be09b128f716,ready,bigquery_statsmodel_remote_inference.zip,6f9c76cfce70afae13b7486a92210f49644149696f47b307b48edd1caee10f15,2023-11-May 19:17:05,2023-11-May 19:17:51
ed3c00e6-90e6-484d-9425-4511addb798b,ready,bigquery_statsmodel_remote_inference.zip,b94bae4a94b443b5d8f5fac19849805b47dbf5d10d24a09cfa40900187bda161,2023-11-May 19:23:19,2023-11-May 19:24:05
96915dce-ede9-4ea6-b7a7-850de26c6e84,ready,bigquery_statsmodel_remote_inference.zip,7e9cafd2a624fba4ea91c4e995eccbdf0ed03e94891fffaf01e7ea15f07cb60c,2023-11-May 19:26:40,2023-11-May 19:27:26
779ec014-90f3-46c7-a7bd-288f9ec34a69,error,bigquery_statsmodel_remote_inference.zip,1ba67b04bf70d6546d6a9bc39ca1ec74a1302f538297d639d609afeccea11b19,2023-11-May 19:32:24,2023-11-May 19:32:34
2b96c913-1209-4970-8b58-45bd912c5804,pending_packaging,bigquery_statsmodel_remote_inference.zip,1ba67b04bf70d6546d6a9bc39ca1ec74a1302f538297d639d609afeccea11b19,2023-11-May 19:33:43,2023-11-May 19:33:43


In [21]:
while orchestration.status() != 'ready':
    print(orchestration.status())
    time.sleep(5)

packaging
packaging
packaging
packaging
packaging
packaging
packaging


In [29]:
task = orchestration.run_once({
    "workspace_name": workspace_name,
    "pipeline_name": pipeline_name,
    "bigquery_connection_input_name": bigquery_connection_input_name,
    "bigquery_connection_output_name": bigquery_connection_output_name
})
display(task)

Field,Value
ID,d26baaa7-29b5-4fdb-9d99-367d9b4f9b50
Status,pending
Type,Temporary Run
Created At,2023-11-May 19:41:01
Updated At,2023-11-May 19:41:01


In [30]:
while task.status() != "started":
    display(task.status())
    time.sleep(5)

'pending'

In [None]:
pipeline.logs()