# Zero Shot Classification (text) Pipeline Example

## Imports

In [1]:
import json
import os

import wallaroo
from wallaroo.pipeline   import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
from wallaroo.framework import Framework

import pyarrow as pa
import numpy as np
import pandas as pd

In [2]:
wl = wallaroo.Client()

Please log into the following URL in a web browser:

	https://keycloak.autoscale-uat-ee.wallaroo.dev/auth/realms/master/device?user_code=VTLN-JKLY

Login successful!


In [3]:
def get_workspace(name):
    workspace = None
    for ws in wl.list_workspaces():
        if ws.name() == name:
            workspace= ws
    if(workspace == None):
        workspace = wl.create_workspace(name)
    return workspace

prefix = "hf-zero-shot-classification"

## Setup up workspace



In [4]:
workspace = get_workspace(f"{prefix}-jch")
wl.set_current_workspace(workspace)

{'name': 'hf-zero-shot-classification-jch', 'id': 42, 'archived': False, 'created_by': '0892876b-8b50-4541-bf29-0e570e590c01', 'created_at': '2023-06-22T13:42:34.416552+00:00', 'models': [{'name': 'hf-zero-shot-classification-model', 'versions': 1, 'owner_id': '""', 'last_update_time': datetime.datetime(2023, 6, 22, 13, 44, 51, 233603, tzinfo=tzutc()), 'created_at': datetime.datetime(2023, 6, 22, 13, 44, 51, 233603, tzinfo=tzutc())}], 'pipelines': [{'name': 'hf-zero-shot-classification-pipeline', 'create_time': datetime.datetime(2023, 6, 22, 13, 46, 11, 623402, tzinfo=tzutc()), 'definition': '[]'}]}

### Configure PyArrow Schema

You can find more info on the available inputs under the [official source code](https://github.com/huggingface/transformers/blob/v4.28.1/src/transformers/pipelines/zero_shot_classification.py#L172) from `ðŸ¤— Hugging Face`.

In [5]:
input_schema = pa.schema([
    pa.field('inputs', pa.string()), # required
    pa.field('candidate_labels', pa.list_(pa.string(), list_size=2)), # required
    pa.field('hypothesis_template', pa.string()), # optional
    pa.field('multi_label', pa.bool_()), # optional
])

output_schema = pa.schema([
    pa.field('sequence', pa.string()),
    pa.field('scores', pa.list_(pa.float64(), list_size=2)), # same as number of candidate labels, list_size can be skipped by may result in slightly worse performance
    pa.field('labels', pa.list_(pa.string(), list_size=2)), # same as number of candidate labels, list_size can be skipped by may result in slightly worse performance
])

### Upload Model

In [6]:
model = wl.upload_model(f"{prefix}-model",
                        './models/model-auto-conversion_hugging-face_dummy-pipelines_zero-shot-classification-pipeline.zip', 
                        framework=Framework.HUGGING_FACE_ZERO_SHOT_CLASSIFICATION, 
                        input_schema=input_schema,
                        output_schema=output_schema)
model

Waiting for model conversion... It may take up to 10.0min.
Model is Pending conversion..Converting..............Ready.


{'name': 'hf-zero-shot-classification-model', 'version': '6b930398-5887-4727-a26a-7fb29035c899', 'file_name': 'model-auto-conversion_hugging-face_dummy-pipelines_zero-shot-classification-pipeline.zip', 'image_path': 'proxy.replicated.com/proxy/wallaroo/ghcr.io/wallaroolabs/mlflow-deploy:v2023.3.0-main-3397', 'last_update_time': datetime.datetime(2023, 6, 22, 16, 56, 6, 951569, tzinfo=tzutc())}

## Deploy Pipeline

In [7]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [8]:
pipeline_name = f"{prefix}-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

 ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.244.4.133',
   'name': 'engine-5c9889d5b4-dvd4d',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'hf-zero-shot-classification-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'name': 'hf-zero-shot-classification-model',
      'version': 'acb7fd8a-57aa-41cd-b49e-a3e39c111630',
      'sha': '3dcc14dd925489d4f0a3960e90a7ab5917ab685ce955beca8924aa7bb9a69398',
      'status': 'Running'}]}}],
 'engine_lbs': [{'ip': '10.244.4.132',
   'name': 'engine-lb-584f54c899-7sbdq',
   'status': 'Running',
   'reason': None,
   'details': []}],
 'sidekicks': [{'ip': '10.244.4.131',
   'name': 'engine-sidekick-hf-zero-shot-classification-model-75-9b5dc6b7vx',
   'status': 'Running',
   'reason': None,
   'details': [],
   'statuses': '\n'}]}

## Run inference

In [9]:
input_data = {
        "inputs": ["this is a test", "this is another test"], # required
        "candidate_labels": [["english", "german"], ["english", "german"]], # optional: using the defaults, similar to not passing this parameter
        "hypothesis_template": ["This example is {}.", "This example is {}."], # optional: using the defaults, similar to not passing this parameter
        "multi_label": [False, False], # optional: using the defaults, similar to not passing this parameter
}
dataframe = pd.DataFrame(input_data)
dataframe

Unnamed: 0,inputs,candidate_labels,hypothesis_template,multi_label
0,this is a test,"[english, german]",This example is {}.,False
1,this is another test,"[english, german]",This example is {}.,False


In [10]:
%time
pipeline.infer(dataframe)

CPU times: user 2 Âµs, sys: 0 ns, total: 2 Âµs
Wall time: 5.25 Âµs


Unnamed: 0,time,in.candidate_labels,in.hypothesis_template,in.inputs,in.multi_label,out.labels,out.scores,out.sequence,check_failures
0,2023-06-22 16:56:11.396,"[english, german]",This example is {}.,this is a test,False,"[english, german]","[0.504054605960846, 0.49594545364379883]",this is a test,0
1,2023-06-22 16:56:11.396,"[english, german]",This example is {}.,this is another test,False,"[english, german]","[0.5037839412689209, 0.4962160289287567]",this is another test,0


## Undeploy Pipelines

In [11]:
pipeline.undeploy()

Waiting for undeployment - this will take up to 45s ....................................... ok


0,1
name,hf-zero-shot-classification-pipeline
created,2023-06-22 13:46:11.623402+00:00
last_updated,2023-06-22 16:56:11.137324+00:00
deployed,False
tags,
versions,"55c7a8d9-cb6b-4018-b165-9728a84b6a62, 9951f37d-3620-4687-823a-3c3d0013d9c3, 2fee5e40-d356-432b-ba0a-ee79a5ed81f9, 6de8b8cc-a08d-4436-a169-2721d4137027"
steps,hf-zero-shot-classification-model
