# Zero Shot Classification (text) Pipeline Example

## Imports

In [1]:
import json
import os

import wallaroo
from wallaroo.pipeline   import Pipeline
from wallaroo.deployment_config import DeploymentConfigBuilder
from wallaroo.framework import Framework

import pyarrow as pa
import numpy as np
import pandas as pd

In [None]:
wl = wallaroo.Client(auth_type="sso", interactive=True)

### Configure PyArrow Schema

You can find more info on the available inputs under the [official source code](https://github.com/huggingface/transformers/blob/v4.28.1/src/transformers/pipelines/zero_shot_classification.py#L172) from `🤗 Hugging Face`.

In [4]:
input_schema = pa.schema([
    pa.field('inputs', pa.string()), # required
    pa.field('candidate_labels', pa.list_(pa.string(), list_size=2)), # required
    pa.field('hypothesis_template', pa.string()), # optional
    pa.field('multi_label', pa.bool_()), # optional
])

output_schema = pa.schema([
    pa.field('sequence', pa.string()),
    pa.field('scores', pa.list_(pa.float64(), list_size=2)), # same as number of candidate labels, list_size can be skipped by may result in slightly worse performance
    pa.field('labels', pa.list_(pa.string(), list_size=2)), # same as number of candidate labels, list_size can be skipped by may result in slightly worse performance
])

### Upload Model

In [None]:
model = wl.upload_model('hf-zero-shot-classification', 
                        'models/hugging-face/zero-shot-classification-pipeline.zip', 
                        framework=Framework.HUGGING_FACE_ZERO_SHOT_CLASSIFICATION, 
                        input_schema=input_schema, 
                        output_schema=output_schema)
model

Waiting for model conversion... It may take up to 10.0min.
Model is Pending conversion..Converting............Ready.


{'name': 'hf-zero-shot-classification-new-2', 'version': 'c40be249-4e25-47aa-8c80-3b932d6c5d23', 'file_name': 'zero-shot-classification-pipeline.zip', 'image_path': 'ghcr.io/wallaroolabs/mlflow-deploy:main', 'last_update_time': datetime.datetime(2023, 6, 9, 12, 14, 49, 928287, tzinfo=tzutc())}

## Deploy Pipeline

In [8]:
deployment_config = DeploymentConfigBuilder() \
    .cpus(0.25).memory('1Gi') \
    .build()

In [9]:
pipeline_name = "hf-zero-shot-classification-pipeline"
pipeline = wl.build_pipeline(pipeline_name)
pipeline.add_model_step(model)

pipeline.deploy(deployment_config=deployment_config)
pipeline.status()

Waiting for deployment - this will take up to 90s .............. ok


{'status': 'Running',
 'details': [],
 'engines': [{'ip': '10.24.32.219',
   'name': 'engine-58ccb99c7-vrmpw',
   'status': 'Running',
   'reason': None,
   'details': [],
   'pipeline_statuses': {'pipelines': [{'id': 'hf-zero-shot-classification-pipeline',
      'status': 'Running'}]},
   'model_statuses': {'models': [{'name': 'hf-zero-shot-classification-new-2',
      'version': 'c40be249-4e25-47aa-8c80-3b932d6c5d23',
      'sha': '3dcc14dd925489d4f0a3960e90a7ab5917ab685ce955beca8924aa7bb9a69398',
      'status': 'Running'}]}}],
 'engine_lbs': [{'ip': '10.24.32.218',
   'name': 'engine-lb-7866867b55-9bpz9',
   'status': 'Running',
   'reason': None,
   'details': []}],
 'sidekicks': [{'ip': '10.24.32.217',
   'name': 'engine-sidekick-hf-zero-shot-classification-new-2-42-56445vnd82',
   'status': 'Running',
   'reason': None,
   'details': [],
   'statuses': '\n'}]}

## Run inference

In [10]:
input_data = {
        "inputs": ["this is a test", "this is another test"], # required
        "candidate_labels": [["english", "german"], ["english", "german"]], # optional: using the defaults, similar to not passing this parameter
        "hypothesis_template": ["This example is {}.", "This example is {}."], # optional: using the defaults, similar to not passing this parameter
        "multi_label": [False, False], # optional: using the defaults, similar to not passing this parameter
}
dataframe = pd.DataFrame(input_data)
dataframe

Unnamed: 0,inputs,candidate_labels,hypothesis_template,multi_label
0,this is a test,"[english, german]",This example is {}.,False
1,this is another test,"[english, german]",This example is {}.,False


In [11]:
%time
pipeline.infer(dataframe)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.96 µs


Unnamed: 0,time,in.candidate_labels,in.hypothesis_template,in.inputs,in.multi_label,out.labels,out.scores,out.sequence,check_failures
0,2023-06-09 12:15:54.467,"[english, german]",This example is {}.,this is a test,False,"[english, german]","[0.504054605960846, 0.49594545364379883]",this is a test,0
1,2023-06-09 12:15:54.467,"[english, german]",This example is {}.,this is another test,False,"[english, german]","[0.5037839412689209, 0.4962160289287567]",this is another test,0


## Undeploy Pipelines

In [12]:
for pipeline in wl.list_pipelines():
    pipeline.undeploy()

Waiting for undeployment - this will take up to 45s ..................................... ok
Waiting for undeployment - this will take up to 45s .................................... ok
 ok
 ok
 ok
 ok
 ok
 ok
 ok
 ok
 ok
 ok
 ok
 ok
 ok
 ok
 ok
 ok
