In [2]:
import sys
import math
from pqdict import pqdict

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

paraphraseTokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-MRPC")  
paraphraseModel = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-MRPC")

from sentence_transformers import SentenceTransformer, util

classes = ["not paraphrase", "is paraphrase"]

def inference_mrpc(seq1s, seq2s):
    for i in range(len(seq1s)):
        paraphrase = paraphraseTokenizer.encode_plus(
            seq1s[i], seq2s[i], return_tensors="pt")
        logits = paraphraseModel(**paraphrase)[0]
        paraphrase_results = torch.softmax(logits, dim=1).tolist()[0]
        print(f"{classes[1]}: {round(paraphrase_results[1] * 100)}%")

In [3]:
class AStarSearch:
    def __init__(self):    
        self.closed_forward = set()                        
        self.closed_backward = set()                     
        self.entailments = set()
        self.contradictions = set()
        self.hypothesis = ""

        model_name = "roberta-large-nli-stsb-mean-tokens"
        self.sbert = SentenceTransformer(model_name)

    def word_similarity(self, s1, s2):
        num_sim = 0
        seq1 = s1.split(" ")
        for w in seq1:
            if w in s2:
                num_sim += 1
        return num_sim / len(seq1)

    def inference_sts(self, seqs1, seqs2):
        embeddings1 = self.sbert.encode(seqs1, convert_to_tensor=True)
        embeddings2 = self.sbert.encode(seqs2, convert_to_tensor=True)
        cosine_scores = util.pytorch_cos_sim(embeddings1, embeddings2)
        for i in range(len(seqs1)):
            cost1 = cosine_scores[i][i]
            cost2 = self.word_similarity(seqs1[i], seqs2[i])
            cost = cost1 * cost2
            return cost

    def clear(self):
        self.closedF.clear()
        self.closedR.clear()

    def generate_motion(self, open_set, side):
        closed = self.closed_forward if side == 0 else self.closed_backward
        opened = open_set[side]
        self.generate_premises()
        for premise in self.entailments:
            if premise in closed:
                continue
            cost = self.inference_sts([premise], [self.hypothesis])
            if premise not in opened:
                opened[premise] = cost
            elif cost > opened[premise]:
                opened[premise] = cost

    def query(self, premises, hypothesis):
        self.clear()
        self.hypothesis = hypothesis
        open_lists = [pqdict({}), pqdict({})]
        open_lists[0][premises] = self.inference_sts([premise], [hypothesis])
        open_lists[0][hypothesis] = self.inference_sts([hypothesis], [hypothesis])

        while open_lists[0] or open_lists[1]:
            while open_lists[0]:
                optimal = open_lists[0].pop()
                break
            self.generate_motion(open_list, 0)
            if optimal in self.closed_backward:
                break
            self.closed_forward.add(optimal)

            while open[1]:
                optimal = open_lists[1].pop()
                break
            self.generate_motion(open_list, 0)
            if optimal in self.closed_forward:
                break
            self.closed_backward.add(optimal)

        self.closed_forward = self.closed_forward | self.closed_backward