In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

paraphraseTokenizer = AutoTokenizer.from_pretrained(
    "bert-base-cased-finetuned-mrpc",cache_dir ='../model/')
paraphraseModel = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-cased-finetuned-mrpc", cache_dir ='../model/',)

from sentence_transformers import SentenceTransformer, util
sentenceTransformer = SentenceTransformer("roberta-base-nli-stsb-mean-tokens")

classes = ["not paraphrase", "is paraphrase"]

def inference_mrpc(seq1s, seq2s):
    for i in range(len(seq1s)):
        paraphrase = paraphraseTokenizer.encode_plus(
            seq1s[i], seq2s[i], return_tensors="pt")
        logits = paraphraseModel(**paraphrase)[0]
        paraphrase_results = torch.softmax(logits, dim=1).tolist()[0]
        print(f"{classes[1]}: {round(paraphrase_results[1] * 100)}%")

def inference_sts(sentences1, sentences2):
    embeddings1 = sentenceTransformer.encode(sentences1, convert_to_tensor=True)
    embeddings2 = sentenceTransformer.encode(sentences2, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(embeddings1, embeddings2)
    for i in range(len(sentences1)):
        print("Similarity Score: {:.4f}".format(cosine_scores[i][i]))

In [2]:
seq0 = "I caused him to submit his resignation"
seq1 = "I caused the submission of his resignation"
seq2 = "I caused him to submit the resignation"
seq3 = "I caused the submission of his resignation"
seq4 = "I caused the submission of the resignation"
seq5 = "I caused the submission of his resignation"
seq6 = "cut up an apple"
seq7 = "cut an apple into piece"

sentences1 = [seq0, seq2, seq4, seq6]
sentences2 = [seq1, seq3, seq5, seq7]

inference_mrpc(sentences1, sentences2)
inference_sts(sentences1, sentences2)

is paraphrase: 94%
is paraphrase: 94%
is paraphrase: 94%
is paraphrase: 93%
Similarity Score: 0.8999
Similarity Score: 0.9313
Similarity Score: 0.9685
Similarity Score: 0.9409
