In [1]:
import os
import numpy as np
from scipy.special import softmax
from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_all_providers
from transformers import BertTokenizer


def create_model_for_provider(model_path: str, provider: str= 'CPUExecutionProvider') -> InferenceSession:
    assert provider in get_all_providers(), f"provider {provider} not found, {get_all_providers()}"
    # Few properties that might have an impact on performances (provided by MS)
    options = SessionOptions()
    options.intra_op_num_threads = int(os.environ.get('NUM_THREADS', 4))
    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
    # Load the model as a graph and prepare the CPU backend
    session = InferenceSession(model_path, options, providers=[provider])
    session.disable_fallback()
    return session

In [41]:
# tokenizer = BertTokenizer.from_pretrained('IDEA-CCNL/Erlangshen-Roberta-330M-Similarity')
# tokenizer.save_pretrained('roberta_sts_tokenizer')
tokenizer = BertTokenizer.from_pretrained('roberta_sts_tokenizer')
model = create_model_for_provider('./roberta_sts_300m.quant.onnx')

In [30]:
def sim(texta, textb):
    inputs = np.array([tokenizer.encode(texta, textb)], dtype=np.int64)
    logits = model.run(None, {
        "input": inputs
    })[0]
    probs = softmax(logits)
    label = '相似' if probs.argmax(-1)[0] == 1 else '不同'
    return {
        'logits': logits.astype(float).tolist()[0],
        'softmax': probs.astype(float).tolist()[0],
        'prob': probs.astype(float).tolist()[0][1],
        'label': label
    }

In [33]:
sim('我要去吃饭', '去吃饭')

{'logits': [-0.8455033302307129, 0.8869611024856567],
 'softmax': [0.15027260780334473, 0.8497273921966553],
 'prob': 0.8497273921966553,
 'label': '相似'}

In [37]:
%%timeit
sim('我要去吃饭', '不吃饭')

22.8 ms ± 1.32 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
