In [50]:
import json
import os

import wallaroo
from wallaroo.pipeline   import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
from wallaroo.framework import Framework
from wallaroo.engine_config import Architecture

import pyarrow as pa
import numpy as np
import pandas as pd

In [51]:
# Helper functions

def get_workspace(name):
    workspace = None
    for ws in wl.list_workspaces():
        if ws.name() == name:
            workspace= ws
    if(workspace == None):
        workspace = wl.create_workspace(name)
    return workspace

def get_pipeline(pipeline_name):
    try:
        pipeline = wl.pipelines_by_name(pipeline_name)[0]
    except EntityNotFoundError:
        pipeline = wl.build_pipeline(pipeline_name)
    return pipeline

def get_workspace_id_by_name(workspace_name: str, workspaces_list: []):
    # Get the workspace ID from the list of workspaces
    workspaceId = list(filter(lambda x:x["name"]==workspace_name, workspaceList))[0]['id']
    return workspaceId

In [52]:
wl = wallaroo.Client(request_timeout=1200)

# Select or Create Workspace


In [53]:
workspace_name = "llm-models"
workspace = get_workspace(workspace_name)
wl.set_current_workspace(workspace)
workspace_id = workspace.id()

# Get uploaded model

In [54]:
model = wl.search_models("llama3-instruct")[0]

In [55]:
model

0,1
Name,llama3-instruct
Version,700fe65c-53c9-45db-974a-d3bab7389658
File Name,model-auto-conversion_BYOP_llama_byop_llama3_instruct_8b_v2.zip
SHA,b92b26c9c53e32ef8d465922ff449288b8d305dd311d48f48aaef2ff3ebce2ec
Status,ready
Image Path,proxy.replicated.com/proxy/wallaroo/ghcr.io/wallaroolabs/mlflow-deploy:v2023.4.2-4668
Architecture,
Updated At,2024-01-May 19:29:11


### Deployment

In [56]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(1).memory('1Gi') \
    .sidekick_gpus(model, 1) \
    .deployment_label('wallaroo.ai/accelerator: a100') \
    .build()

In [57]:
pipeline = wl.build_pipeline("llamav3-instruct")
pipeline.add_model_step(model)
pipeline.deploy(deployment_config=deployment_config)

Waiting for deployment - this will take up to 1200s ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... ok


0,1
name,llamav3-instruct
created,2024-05-01 19:51:08.240637+00:00
last_updated,2024-05-01 19:51:08.307643+00:00
deployed,True
arch,
tags,
versions,"4172bf2c-bcdc-4025-9768-bdeaad5e8770, 230101db-e555-4ca8-8e79-cd57464138cc"
steps,llama3-instruct
published,False


In [58]:
pipeline.status()

{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.60.3.8',
   'name': 'engine-6bbc665cc6-hf8sd',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'llamav3-instruct',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'name': 'llama3-instruct',
      'version': '700fe65c-53c9-45db-974a-d3bab7389658',
      'sha': 'b92b26c9c53e32ef8d465922ff449288b8d305dd311d48f48aaef2ff3ebce2ec',
      'status': 'Running'}]}}],
 'engine_lbs': [{'ip': '10.60.4.32',
   'name': 'engine-lb-5df9b487cf-l95cg',
   'status': 'Running',
   'reason': None,
   'details': []}],
 'sidekicks': [{'ip': '10.60.3.9',
   'name': 'engine-sidekick-llama3-instruct-8-774475c8d7-dqkmf',
   'status': 'Running',
   'reason': None,
   'details': [],
   'statuses': '\n'}]}

### Inference

In [59]:
data = pd.DataFrame({'text': ['What is Wallaroo.AI?']})

In [60]:
%time
result = pipeline.infer(data, timeout=10000)

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 8.82 µs


In [64]:
result['out.generated_text'][0]

'Wallaroo.AI is an AI platform that enables developers to build, deploy, and manage AI and machine learning models at scale. It provides a cloud-based infrastructure for building, training, and deploying AI models, as well as a set of tools and APIs for integrating AI into various applications.\n\nWallaroo.AI is designed to make it easy for developers to build and deploy AI models, regardless of their level of expertise in machine learning. It provides a range of features, including support for popular machine learning frameworks such as TensorFlow and PyTorch, as well as a set of pre-built AI models and APIs for common use cases such as image and speech recognition, natural language processing, and predictive analytics.\n\nWallaroo.AI is particularly well-suited for developers who are looking to build AI-powered applications, but may not have extensive expertise in machine learning or AI development. It provides a range of tools and resources to help developers get started with buildi

### Undeploy

In [65]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 1200s ............................................ ok


0,1
name,llamav3-instruct
created,2024-05-01 19:51:08.240637+00:00
last_updated,2024-05-01 19:51:08.307643+00:00
deployed,False
arch,
tags,
versions,"4172bf2c-bcdc-4025-9768-bdeaad5e8770, 230101db-e555-4ca8-8e79-cd57464138cc"
steps,llama3-instruct
published,False
