<a href="https://colab.research.google.com/github/davidandw190/faas-dl-inference/blob/main/notebooks/sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install datasets transformers onnx onnxruntime

For the purpuse of this prototype, a small distilled BERT model from Microsoft will be used as our pre-trained model which we fine-tune on the emotion classification task.

- See https://huggingface.co/microsoft/xtremedistil-l6-h256-uncased for details.


In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer

model_name = 'microsoft/xtremedistil-l6-h256-uncased'
dataset = load_dataset("emotion")
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)


In [3]:
full_train_dataset = tokenized_datasets["train"]
full_eval_dataset = tokenized_datasets["test"]

In [4]:
import torch

device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)

cuda:0


In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=6)
model = model.to(device)


In [None]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
from transformers import TrainingArguments
from transformers import Trainer

training_args = TrainingArguments("test_trainer",
                                  per_device_train_batch_size=128,
                                  num_train_epochs=24,learning_rate=3e-05,
                                  evaluation_strategy="epoch")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=full_train_dataset,
    eval_dataset=full_eval_dataset,
    compute_metrics=compute_metrics,
)

In [9]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,1.192076,0.6135
2,No log,0.928103,0.7125
3,No log,0.774844,0.7435
4,1.061000,0.684443,0.764
5,1.061000,0.616006,0.8105
6,1.061000,0.545634,0.8495
7,1.061000,0.505468,0.8485
8,0.584700,0.455692,0.87
9,0.584700,0.419475,0.88
10,0.584700,0.395776,0.885


TrainOutput(global_step=3000, training_loss=0.47109615834554036, metrics={'train_runtime': 770.0358, 'train_samples_per_second': 498.678, 'train_steps_per_second': 3.896, 'total_flos': 1423136673497088.0, 'train_loss': 0.47109615834554036, 'epoch': 24.0})

In [10]:
trainer.evaluate()

{'eval_loss': 0.24925249814987183,
 'eval_accuracy': 0.924,
 'eval_runtime': 2.4446,
 'eval_samples_per_second': 818.114,
 'eval_steps_per_second': 102.264,
 'epoch': 24.0}

Exporting the PyTorch model to ONNX format for serving with ONNX Runtime Web

In [11]:
import transformers
import transformers.convert_graph_to_onnx as onnx_convert
from pathlib import Path

In [None]:
pipeline = transformers.pipeline("text-classification",model=model,tokenizer=tokenizer)

model = model.to("cpu")

onnx_convert.convert_pytorch(pipeline, opset=14, output=Path("classifier.onnx"), use_external_format=False)

In [None]:
%pip install onnx onnxruntime

In [None]:
from onnxruntime.quantization import quantize_dynamic, QuantType

quantize_dynamic("classifier.onnx", "classifier_int8.onnx",
                 weight_type=QuantType.QUInt8)

Evaluating the accuracy using ONNX-Runtime inference - validating PyTorch inference versus ONNX-Runtime

In [15]:
import onnxruntime as ort
import numpy as np

In [16]:
session = ort.InferenceSession("classifier.onnx")
session_int8 = ort.InferenceSession("classifier_int8.onnx")

input_feed = {
    "input_ids": np.array(full_eval_dataset['input_ids']),
    "attention_mask": np.array(full_eval_dataset['attention_mask']),
    "token_type_ids": np.array(full_eval_dataset['token_type_ids'])
}

In [17]:
out = session.run(input_feed=input_feed,output_names=['output_0'])[0]
out_int8 = session_int8.run(input_feed=input_feed,output_names=['output_0'])[0]

predictions = np.argmax(out, axis=-1)
predictions_int8 = np.argmax(out_int8, axis=-1)

In [18]:
metric.compute(predictions=predictions, references=full_eval_dataset['label'])

{'accuracy': 0.924}

In [19]:
metric.compute(predictions=predictions_int8, references=full_eval_dataset['label'])

{'accuracy': 0.849}

In [20]:
from google.colab import files

files.download('classifier_int8.onnx')
files.download('classifier.onnx')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>