# Workshop Notebook 3: Observability - Drift Detection

CV model will inference without issues, but CV model with the same Python post-processing step now returns an error.

In [1]:
# preload needed libraries 

import wallaroo
from wallaroo.object import EntityNotFoundError
from wallaroo.framework import Framework

from IPython.display import display

# used to display DataFrame information without truncating
from IPython.display import display
import pandas as pd
pd.set_option('display.max_colwidth', None)

import json
import datetime
import time

# used for unique connection names

import string
import random

import pyarrow as pa

In [2]:
## convenience functions from the previous notebooks

# return the workspace called <name> through the Wallaroo client.
def get_workspace(name, client):
    workspace = None
    for ws in client.list_workspaces():
        if ws.name() == name:
            workspace= ws
            return workspace
    # if no workspaces were found
    if workspace==None:
        workspace = client.create_workspace(name)
    return workspace

# get a pipeline by name in the workspace
def get_pipeline(pipeline_name, workspace, client):
    plist = workspace.pipelines()
    pipeline = [p for p in plist if p.name() == pipeline_name]
    if len(pipeline) <= 0:
        pipeline = client.build_pipeline(pipeline_name)
    return pipeline[0]


## Login to Wallaroo

Retrieve the previous workspace, model versions, and pipelines used in the previous notebook.

In [3]:
## blank space to log in 

## blank space to log in 

wl = wallaroo.Client()

wallarooPrefix = "product-uat-ee."
wallarooSuffix = "wallaroocommunity.ninja"

wl = wallaroo.Client(api_endpoint=f"https://{wallarooPrefix}api.{wallarooSuffix}", 
                    auth_endpoint=f"https://{wallarooPrefix}keycloak.{wallarooSuffix}", 
                    auth_type="sso")

# retrieve the previous workspace, model, and pipeline version

workspace_name = "cv-test-01"

workspace = get_workspace(workspace_name, wl)

# set your current workspace to the workspace that you just created
wl.set_current_workspace(workspace)

# optionally, examine your current workspace
wl.get_current_workspace()

pipeline_name = 'cv-retail'

pipeline = get_pipeline(pipeline_name, workspace, wl)

# display the workspace, pipeline and model version
display(workspace)
display(pipeline)

{'name': 'cv-test-01', 'id': 23, 'archived': False, 'created_by': 'b0a387d7-cb82-49e1-81f9-1eb47952a01c', 'created_at': '2023-11-17T16:26:31.360634+00:00', 'models': [], 'pipelines': [{'name': 'cv-retail', 'create_time': datetime.datetime(2023, 11, 17, 16, 26, 32, 379033, tzinfo=tzutc()), 'definition': '[]'}]}

0,1
name,cv-retail
created,2023-11-17 16:26:32.379033+00:00
last_updated,2023-11-17 16:26:32.379033+00:00
deployed,(none)
arch,
tags,
versions,def9a1a9-83bf-4b7c-88b3-d5b649f6be25
steps,
published,False


In [4]:
cv_model = wl.upload_model('mobilenet',
                "./models/mobilenet.pt.onnx",
                framework=Framework.ONNX).configure('onnx',
                                                    batch_config="single",
                                                    tensor_fields=["tensor"]
                                                    )

Set up the pipeline with the single model step as was done in notebook 1, then deploy it.

In [5]:
# undeploy just in case
pipeline.undeploy()

# clear the steps
pipeline.clear()

# just the cv model as the step
pipeline.add_model_step(cv_model)

deploy_config = wallaroo.DeploymentConfigBuilder().replica_count(1).cpus(0.5).memory("1Gi").build()
pipeline.deploy(deployment_config=deploy_config)

0,1
name,cv-retail
created,2023-11-17 16:26:32.379033+00:00
last_updated,2023-11-17 16:27:17.760646+00:00
deployed,True
arch,
tags,
versions,"2589236f-ad15-47ed-9a05-8f2b6a39de84, def9a1a9-83bf-4b7c-88b3-d5b649f6be25"
steps,mobilenet
published,False


In [6]:
# inference test

infResults = pipeline.infer_from_file('./data/dairy_bottles.df.json', 
                                      dataset=["*", "metadata.elapsed"])

# just show the confidences
display(infResults.loc[:, ['out.confidences']])

Unnamed: 0,out.confidences
0,"[0.98649, 0.90115356, 0.6077846, 0.5922323, 0.53729033, 0.4513168, 0.43728516, 0.43094054, 0.4084834, 0.39185277, 0.35759133, 0.3181266, 0.26451287, 0.23062895, 0.20482065, 0.174621, 0.17313862, 0.15999581, 0.14913696, 0.1366402, 0.13322707, 0.12218643, 0.121301256, 0.11956108, 0.11527827, 0.09616333, 0.08654833, 0.078406945, 0.07234089, 0.062820904, 0.052787986]"


## Add Pipeline Processing Step

Add in the Python step 


In [7]:
input_schema = pa.schema([
    pa.field('boxes', pa.list_(pa.list_(pa.float32(), list_size=4))),
    pa.field('classes', pa.list_(pa.int64())),
    pa.field('confidences', pa.list_(pa.float32())),])

output_schema = pa.schema([
    pa.field('boxes', pa.list_(pa.list_(pa.float32(), list_size=4))),
    pa.field('classes', pa.list_(pa.int64())),
    pa.field('confidences', pa.list_(pa.float32())),
    pa.field('avg_confidence', pa.list_(pa.float32())),
])

python_step = wl.upload_model("cv-post-process-drift-detection", 
                "./models/post-process-drift-detection-arrow.py",
                framework=Framework.PYTHON ).configure('python', 
                                             input_schema=input_schema, 
                                             output_schema=output_schema
                )

In [11]:
# undeploy just in case
pipeline.undeploy()

# clear the steps
pipeline.clear()

# cv model and python step
pipeline.add_model_step(cv_model)
pipeline.add_model_step(python_step)

deploy_config = wallaroo.DeploymentConfigBuilder().replica_count(1).cpus(0.5).memory("1Gi").build()
pipeline.deploy(deployment_config=deploy_config)

0,1
name,cv-retail
created,2023-11-17 16:26:32.379033+00:00
last_updated,2023-11-17 16:30:43.745691+00:00
deployed,True
arch,
tags,
versions,"16c40cff-6936-41d2-bf29-c3e6759a454c, b536c035-a4f8-48e7-8bef-dd9e82657b33, 2589236f-ad15-47ed-9a05-8f2b6a39de84, def9a1a9-83bf-4b7c-88b3-d5b649f6be25"
steps,mobilenet
published,False


In [9]:
pipeline.status()

{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.244.0.96',
   'name': 'engine-6f7f5688c7-qgfvn',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'cv-retail',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'name': 'cv-post-process-drift-detection',
      'version': '557167c4-054e-48ba-83b9-e61aff339552',
      'sha': 'f60c8ca55c6350d23a4e76d24cc3e5922616090686e88c875fadd6e79c403be5',
      'status': 'Running'},
     {'name': 'mobilenet',
      'version': '05f4e78d-9b45-42ae-8d13-83925b7a73d1',
      'sha': '9044c970ee061cc47e0c77e20b05e884be37f2a20aa9c0c3ce1993dbd486a830',
      'status': 'Running'}]}}],
 'engine_lbs': [{'ip': '10.244.1.22',
   'name': 'engine-lb-584f54c899-7rdtc',
   'status': 'Running',
   'reason': None,
   'details': []}],
 'sidekicks': []}

In [10]:
# inference test

infResults = pipeline.infer_from_file('./data/dairy_bottles.df.json', 
                                      dataset=["*", "metadata.elapsed"])

# just show the confidences
display(infResults.loc[:, ['out.confidences']])

KeyError: 0

In [None]:
# blank space to undeploy your pipeline

pipeline.undeploy()
