Note to run these commands, you will need to run `pip install transformers` and `pip install torch`.

Also, you must have `onnx` installed, but as of this writing, `pip install onnx` does not work on ARM64 devices. You must use conda to install a binary version.  https://anaconda.org/conda-forge/onnx

# Using DeBERTa

In [2]:
import json
from transformers import pipeline
classifier = pipeline("zero-shot-classification", model="MoritzLaurer/deberta-v3-large-zeroshot-v1")
sequence_to_classify = "The prime minister met with international leaders"
candidate_labels = ["weather", "entertainment", "sports", "finance", "science", "politics"]
output = classifier(sequence_to_classify, candidate_labels, multi_label=False)
print(json.dumps(output,indent=4))

Device set to use cpu


{
    "sequence": "The prime minister met with international leaders",
    "labels": [
        "politics",
        "finance",
        "entertainment",
        "science",
        "sports",
        "weather"
    ],
    "scores": [
        0.998672604560852,
        0.00031738588586449623,
        0.0002589169598650187,
        0.00025301624555140734,
        0.0002507023746147752,
        0.00024740086519159377
    ]
}


In [3]:
import json
from transformers import pipeline
classifier = pipeline("zero-shot-classification", model="MoritzLaurer/deberta-v3-large-zeroshot-v1")
sequence_to_classify = "How many calories are in your hamburger?"
candidate_labels = [ "sales", "customer support", "technical support", "accounting", "marketing", "shipping and orders" ]
output = classifier(sequence_to_classify, candidate_labels, multi_label=False)
print(json.dumps(output,indent=4))

Device set to use cpu


{
    "sequence": "How many calories are in your hamburger?",
    "labels": [
        "marketing",
        "accounting",
        "sales",
        "customer support",
        "technical support",
        "shipping and orders"
    ],
    "scores": [
        0.634022057056427,
        0.12989985942840576,
        0.06265775114297867,
        0.0603039376437664,
        0.06020446866750717,
        0.052911896258592606
    ]
}


# Converting the model from PyTorch to ONNX

In [4]:
#Set up the model we want to convert
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "MoritzLaurer/deberta-v3-large-zeroshot-v1"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [5]:
# Set up dummy input tensor for ONNX to trace the model
import torch

# Create dummy input (batch of 1, random sentence); "pt" means return PyTorch tensors (input_ids and attention_mask)
inputs = tokenizer("This is a dummy input for export.", return_tensors="pt")

# Export the ONNX
torch.onnx.export(
    model,                                 # model to export
    (inputs["input_ids"], inputs["attention_mask"]),  # example inputs
    "zero_shot_classifier.onnx",            # output file
    input_names=["input_ids", "attention_mask"],    # input node names
    output_names=["logits"],                # output node name
    dynamic_axes={
        "input_ids": {0: "batch_size", 1: "sequence_length"},
        "attention_mask": {0: "batch_size", 1: "sequence_length"},
        "logits": {0: "batch_size"}
    },
    opset_version=13  
)


# We'll use a preoptimized model
https://huggingface.co/protectai/deberta-v3-large-zeroshot-v1-onnx 

# Test ONNX locally with QNN EP

In [2]:
import onnxruntime
import numpy as np
import torch
import gc
from transformers import AutoTokenizer

# Parameters
text = "How many calories are in your hamburger?"
candidate_labels = ["sales", "customer support", "technical support", "accounting", "marketing", "shipping and orders"]
model_path = r"..\AImodels\zero_shot_classifier.onnx"
tokenizer_name = "MoritzLaurer/deberta-v3-large-zeroshot-v1"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

# Use the QNN (aka Qualcomm AI Engine Direct) execution provider
options = onnxruntime.SessionOptions()
session = onnxruntime.InferenceSession(model_path, sess_options=options, providers=["QNNExecutionProvider"], provider_options=[{"backend_path":"QnnHtp.dll"}])

# Create hypotheses
template = "This example is {}."
hypotheses = [template.format(label) for label in candidate_labels]

# Tokenize
encoded = tokenizer(
    [text] * len(candidate_labels),
    hypotheses,
    return_tensors="np",
    truncation=True,
    padding=True
)

# Run ONNX model
logits = session.run(
    None,
    {
        "input_ids": encoded["input_ids"],
        "attention_mask": encoded["attention_mask"],
    }
)[0]

del session

# Use entailment scores (index 0) and normalize with softmax
entailment_scores = logits[:, 0]
probs = torch.softmax(torch.from_numpy(entailment_scores), dim=0).numpy()

# Zip labels and scores
results = sorted(zip(candidate_labels, probs), key=lambda x: x[1], reverse=True)

# Display
for label, score in results:
    print(f"{label}: {score:.4f}")


gc.collect()


marketing: 0.6341
accounting: 0.1299
sales: 0.0627
customer support: 0.0603
technical support: 0.0602
shipping and orders: 0.0529


3

# Test the Azure deployment of the model
You will need the endpoint URL and the key

In [1]:
# Test deployment
import requests
import json

url = "https://hybridaiworkspace-ukxcp.eastus.inference.ml.azure.com/score"

data = {
    "text": "How many calories are in your hamburger?",
    "labels": [ "sales", "customer support", "technical support", "accounting", "marketing", "shipping and orders" ]
}

headers = {'Content-Type':'application/json', 'Accept': 'application/json', 'Authorization':('Bearer '+ '8DgboAAfb2kVsmYvpSxclg9sQ6D4bi7hvzRm6GEBWovE0RKxZ2QpJQQJ99BEAAAAAAAAAAAAINFRAZML3Nn5')}

response = requests.post(url, json=data, headers=headers)
parsed = json.loads(response.content)      # parse the JSON string

print(parsed)


[["marketing", 0.6340243816375732], ["accounting", 0.12989859282970428], ["sales", 0.06265752762556076], ["customer support", 0.06030360609292984], ["technical support", 0.060204170644283295], ["shipping and orders", 0.05291173234581947]]
