In [2]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# load model and tokenizer
model_id = "sentiment"
model = AutoModelForSequenceClassification.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
dummy_model_input = tokenizer("অপকর্মের সাথে পুলিশ জড়িত থাকবে এটা তো কমন বিষয়", return_tensors="pt")

# export
torch.onnx.export(
    model, 
    tuple(dummy_model_input.values()),
    f="torch-model.onnx",  
    input_names=['input_ids', 'attention_mask'], 
    output_names=['logits'], 
    dynamic_axes={'input_ids': {0: 'batch_size', 1: 'sequence'}, 
                  'attention_mask': {0: 'batch_size', 1: 'sequence'}, 
                  'logits': {0: 'batch_size', 1: 'sequence'}}, 
    do_constant_folding=True, 
    opset_version=13, 
)

OSError: We couldn't connect to 'https://huggingface.co' to load this file, couldn't find it in the cached files and it looks like sentiment is not the path to a directory containing a file named config.json.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [6]:
from pathlib import Path
from transformers import AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForQuestionAnswering

model_id = "sentiment"
onnx_path = Path("onnx")
task = "text-classification"

# load vanilla transformers and convert to onnx
model = ORTModelForQuestionAnswering.from_pretrained(model_id, from_transformers=True)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# save onnx checkpoint and tokenizer
model.save_pretrained(onnx_path)
tokenizer.save_pretrained(onnx_path)


Some weights of the model checkpoint at sentiment were not used when initializing DistilBertForQuestionAnswering: ['pre_classifier.bias', 'classifier.weight', 'pre_classifier.weight', 'classifier.bias']
- This IS expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at sentiment and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  mask, torch.te

('onnx/tokenizer_config.json',
 'onnx/special_tokens_map.json',
 'onnx/vocab.txt',
 'onnx/added_tokens.json',
 'onnx/tokenizer.json')

In [42]:
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import pipeline, AutoTokenizer
import time 
start = time.time()
onnx_path = "onnx"
model = ORTModelForSequenceClassification.from_pretrained(onnx_path, file_name="torch-model.onnx")
tokenizer = AutoTokenizer.from_pretrained(onnx_path)

clx = pipeline("text-classification",model=model, tokenizer=tokenizer)
output = clx("ক্রোস ফায়ার দেওয়া হোক এইসব জানোয়ার অমানুষদের")
print(output[0]["score"])
end = time.time()
print (end - start)

0.5603016018867493
0.22680449485778809


In [30]:
clx

<transformers.pipelines.text_classification.TextClassificationPipeline at 0x7f14c8df2200>

In [25]:
import time 
start = time.time()
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
import torch

text = "ক্রোস ফায়ার দেওয়া হোক এইসব জানোয়ার অমানুষদের"
tokenizer = AutoTokenizer.from_pretrained("sentiment")
inputs = tokenizer(text, return_tensors="pt")
model = AutoModelForSequenceClassification.from_pretrained("sentiment")
with torch.no_grad():
    logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
print(predicted_class_id)
model.config.id2label[predicted_class_id]
end = time.time()
print (end - start)

0
0.4353194236755371


In [43]:
from fastapi import FastAPI
from pydantic import BaseModel
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import pipeline, AutoTokenizer
import torch


app = FastAPI()

class TextClassificationRequest(BaseModel):
    text: str

class TextClassificationResponse(BaseModel):
    label: str
    score: float


# Load tokenizer and model
onnx_path = "onnx"
model = ORTModelForSequenceClassification.from_pretrained(onnx_path, file_name="torch-model.onnx")
tokenizer = AutoTokenizer.from_pretrained(onnx_path)


@app.post("/classify", response_model=TextClassificationResponse)
def classify(request: TextClassificationRequest):
    clx = pipeline("text-classification",model=model, tokenizer=tokenizer)
    outputs = clx(request.text)
    pred = output[0]["label"]
    score = output[0]["score"]
    return TextClassificationResponse(label=pred, score=score)


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)