This tutorial and the assets can be downloaded as part of the [Wallaroo Tutorials repository](https://github.com/WallarooLabs/Wallaroo_Tutorials/blob/wallaroo2024.4_tutorials/wallaroo-llms/llamacpp-with-safeguards).

In [1]:
import json
import os

import wallaroo
from wallaroo.pipeline   import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
from wallaroo.framework import Framework
from wallaroo.engine_config import Architecture
from wallaroo.dynamic_batching_config import DynamicBatchingConfig

import pyarrow as pa
import numpy as np
import pandas as pd

In [17]:
wl = wallaroo.Client()

### Upload model

In [19]:
input_schema = pa.schema([
    pa.field("text", pa.string())
])

output_schema = pa.schema([
    pa.field("text", pa.string()),
    pa.field("generated_text", pa.string())
])

In [20]:
model = wl.upload_model('llama-cpp-sdk-safeguards', 
    'byop_llamacpp_safeguards.zip',
    framework=Framework.CUSTOM,
    input_schema=input_schema,
    output_schema=output_schema
).configure(input_schema=input_schema,output_schema=output_schema,dynamic_batching_config=DynamicBatchingConfig(max_batch_delay_ms=1000, batch_size_target=8))
model

Please log into the following URL in a web browser:

	https://autoscale-uat-gcp.wallaroo.dev/auth/realms/master/device?user_code=EJBN-TSRY

Login successful!
Waiting for model loading - this will take up to 10.0min.
Model is pending loading to a container runtime..
Model is attempting loading to a container runtime............successful

Ready


0,1
Name,llama-cpp-sdk-safeguards
Version,c28e8fee-a1e0-48eb-a906-430fe1eba7ac
File Name,byop_llamacpp_safeguards.zip
SHA,45752b3566691a641787abd9b1b9d94809f8a74d545283d599e8a2cdc492d110
Status,ready
Image Path,proxy.replicated.com/proxy/wallaroo/ghcr.io/wallaroolabs/mac-deploy:v2024.4.0-5825
Architecture,x86
Acceleration,none
Updated At,2024-12-Dec 15:34:44
Workspace id,60


In [5]:
#Safeguards Harmful Language Listener
#Define schemas
input_schema = pa.schema([
    pa.field("text", pa.string()),
    pa.field("generated_text", pa.string())
])

output_schema = pa.schema([
    pa.field("harmful", pa.bool_()),
    pa.field("reasoning", pa.string()),
    pa.field("confidence", pa.float32()),
    pa.field("generated_text", pa.string())
])

In [6]:
#upload harmful language listener
listener = wl.upload_model('byop-safeguards-harmful-5', 
    'byop-safeguards-harmful.zip',
    framework=Framework.CUSTOM,
    input_schema=input_schema,
    output_schema=output_schema,
)
listener

Waiting for model loading - this will take up to 10.0min.
Model is pending loading to a container runtime..
Model is attempting loading to a container runtime................................successful

Ready


0,1
Name,byop-safeguards-harmful-5
Version,98893de8-6c13-44cf-b098-b4f1f44ff483
File Name,byop-safeguards-harmful.zip
SHA,c41ff30b7032262e6ceffed2da658a44d16e698c1e826c3526b6a2379c8d2b1b
Status,ready
Image Path,proxy.replicated.com/proxy/wallaroo/ghcr.io/wallaroolabs/mac-deploy:v2024.4.0-5825
Architecture,x86
Acceleration,none
Updated At,2024-12-Dec 14:50:39
Workspace id,60


### Deployment

In [21]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(1).memory('2Gi') \
    .sidekick_cpus(model, 6) \
    .sidekick_memory(model, '10Gi') \
    .sidekick_cpus(listener, 2) \
    .sidekick_memory(listener, '10Gi') \
    .sidekick_env(listener, json.load(open("credentials.json", 'r'))) \
    .build()

In [None]:
pipeline = wl.build_pipeline("safeguards-llamacpp-2")
pipeline.add_model_step(model)
pipeline.add_model_step(listener)
pipeline.deploy(deployment_config=deployment_config)

In [27]:
pipeline.status()

{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.4.4.27',
   'name': 'engine-6c578848c9-bhs29',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'safeguards-llamacpp-2',
      'status': 'Running',
      'version': '2b61e016-1e92-4f7a-8efb-f09b29cd126a'}]},
   'model_statuses': {'models': [{'model_version_id': 151,
      'name': 'byop-safeguards-harmful-5',
      'sha': 'c41ff30b7032262e6ceffed2da658a44d16e698c1e826c3526b6a2379c8d2b1b',
      'status': 'Running',
      'version': '98893de8-6c13-44cf-b098-b4f1f44ff483'},
     {'model_version_id': 152,
      'name': 'llama-cpp-sdk-safeguards',
      'sha': '45752b3566691a641787abd9b1b9d94809f8a74d545283d599e8a2cdc492d110',
      'status': 'Running',
      'version': 'c28e8fee-a1e0-48eb-a906-430fe1eba7ac'}]}}],
 'engine_lbs': [{'ip': '10.4.4.26',
   'name': 'engine-lb-6676794678-bbpfm',
   'status': 'Running',
   'reason': None,
   'details': []}],
 'sidekicks': [{'ip':

### Inference

In [28]:
data = pd.DataFrame({'text': ['Describe what Wallaroo.AI is']})

In [33]:
result=pipeline.infer(data, timeout=10000)
result

Unnamed: 0,time,in.text,out.confidence,out.generated_text,out.harmful,out.reasoning,anomaly.count
0,2024-12-12 15:54:38.440,Describe what Wallaroo.AI is,0.95,Describe what Wallaroo.AI is,False,This response provides a neutral and informati...,0


In [36]:
result["out.confidence"][0]

0.95

In [35]:
result["out.harmful"][0]

False

In [34]:
result["out.reasoning"][0]

'This response provides a neutral and informative description of Wallaroo.AI, highlighting its capabilities without perpetuating any biases or stereotypes.'

In [None]:
pipeline.undeploy()