# BYOP Example

## Summary

On this notebook we will upload the model using the `Wallaroo SDK` and run inferences on unseen data.

## Imports

In [2]:
import numpy as np
import pandas as pd
import json
import os
import pickle
import pyarrow as pa
import tensorflow as tf
import wallaroo

from sklearn.cluster import KMeans
from tensorflow.keras.datasets import cifar10
from tensorflow.keras import Model
from tensorflow.keras.layers import Flatten
from wallaroo.pipeline   import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
from wallaroo.framework import Framework

In [None]:
wl = wallaroo.Client(auth_type="sso", interactive=True)

In [None]:
def get_workspace(name):
    workspace = None
    for ws in wl.list_workspaces():
        if ws.name() == name:
            workspace= ws
    if(workspace == None):
        workspace = wl.create_workspace(name)
    return workspace

prefix = "arbitrary-python-vgg16-clustering"

In [None]:
workspace = get_workspace(f"{prefix}-jch")
wl.set_current_workspace(workspace)

### Configure PyArrow Schema

> `input_schema` and `output_schema` should match exactly the data we're expecting to retrieve and return within `ImageClustering._predict()`.

In [11]:
input_schema = pa.schema([
    pa.field('images', pa.list_(
        pa.list_(
            pa.list_(
                pa.int64(),
                list_size=3
            ),
            list_size=32
        ),
        list_size=32
    )),
])

output_schema = pa.schema([
    pa.field('predictions', pa.int64()),
])

### Upload Model

In [12]:
model = wl.upload_model('vgg16-clustering', 'model-auto-conversion_BYOP_vgg16_clustering', framework=Framework.CUSTOM, input_schema=input_schema, output_schema=output_schema, convert_wait=True)
model

Waiting for model conversion... It may take up to 10.0min.
Model is Pending conversion...Converting.................Ready.


{'name': 'vgg16-clustering', 'version': '5f3f4a0e-8921-4e36-b3af-ee32dec77314', 'file_name': 'vgg16_clustering.zip', 'image_path': 'proxy.replicated.com/proxy/wallaroo/ghcr.io/wallaroolabs/mlflow-deploy:v2023.3.0-main-3443', 'last_update_time': datetime.datetime(2023, 6, 28, 16, 36, 47, 931056, tzinfo=tzutc())}

## Deploy Pipeline

In [13]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('4Gi') \
    .build()

In [14]:
pipeline_name = f"{prefix}-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 90s ............................. ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.244.29.63',
   'name': 'engine-dfd47ffbc-gs9b5',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'vgg16-clustering-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'name': 'vgg16-clustering',
      'version': '5f3f4a0e-8921-4e36-b3af-ee32dec77314',
      'sha': 'f5f5e1ab29057ac750b7b7afefd6fb16c789b22c3291a966597a5d9846eb1c53',
      'status': 'Running'}]}}],
 'engine_lbs': [{'ip': '10.244.29.62',
   'name': 'engine-lb-584f54c899-7m4dz',
   'status': 'Running',
   'reason': None,
   'details': []}],
 'sidekicks': [{'ip': '10.244.4.22',
   'name': 'engine-sidekick-vgg16-clustering-46-6cb499d45b-tmfkk',
   'status': 'Running',
   'reason': None,
   'details': [],
   'statuses': '\n'}]}

## Run inference

In [15]:
input_data = {
        "images": [np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)] * 2,
}
dataframe = pd.DataFrame(input_data)
dataframe

Unnamed: 0,images
0,"[[[0, 42, 244], [163, 88, 141], [195, 14, 131]..."
1,"[[[0, 42, 244], [163, 88, 141], [195, 14, 131]..."


In [16]:
%time
pipeline.infer(dataframe, timeout=10000)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.53 µs


Unnamed: 0,time,in.images,out.predictions,check_failures
0,2023-06-28 16:37:46.068,"[0, 42, 244, 163, 88, 141, 195, 14, 131, 89, 1...",1,0
1,2023-06-28 16:37:46.068,"[0, 42, 244, 163, 88, 141, 195, 14, 131, 89, 1...",1,0


## Undeploy Pipelines

In [17]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s ...................................... ok
Waiting for undeployment - this will take up to 45s ..................................... ok
