In [49]:
from wordnet import *

def assign_all_relations_wordnet(k):
    assign_relations([k.nouns], k, pos="NN", depth=10)
    assign_relations([k.subsecAdj], k, pos="JJ", depth=10)
    assign_relations([k.verbs], k, pos="VB", depth=10)
    assign_relations([k.advs], k, pos="RB", depth=10)


def assign_relations(word_lists, k, pos, depth):
    """ note that k.nouns = { wholeStr: {type1: node, type2: node} }
     so we need to make sure the two compared nodes are of the same type """
    full_dict = {}
    for word_list in word_lists:
        full_dict.update(word_list)
    words = sorted(full_dict.keys())
    for idx_pair in itertools.combinations(range(len(full_dict)), 2):
        # print(idx_pair)
        word1_wholeStr, word2_wholeStr = words[idx_pair[0]], words[idx_pair[1]]
        # a dict: {type1: node1, type2: node2}
        node1_dict = full_dict[word1_wholeStr]
        node2_dict = full_dict[word2_wholeStr]

        # first find the relation between word1_wholeStr and word2_wholeStr
        word1_lower, word2_lower = word1_wholeStr.lower(), word2_wholeStr.lower()
        rel = find_relation(word1_lower, word2_lower, pos, depth)
        if rel:
            add_relation_wordnet(node1_dict, node2_dict, k, rel)

In [50]:
class Knowledge:
    def __init__(self):
        self.frags = {}
        self.numPairs = 0
        self.tokens = {
            'nouns': {},
            'subjectAdj': {},
            'RCs': {},
            'VPs': {},
            'NPs': {},
            'PPN': {},
            'PPV': {},
            'advs': {},
            'verbs': {},
            'CDs': {}
        }
        self.pairs_added = []

    def update_modifier(self):
        self.modifier_NP()

    def modifier_NP(self):
        pass

In [3]:
"""
refused + 'VERB' => 'Did not VERB'
refuses + 'VERB' => 'Does not VERB'
refuse + 'VERB' => 'Not VERB'
"""

"\nrefused + 'VERB' => 'Did not VERB'\nrefuses + 'VERB' => 'Does not VERB'\nrefuse + 'VERB' => 'Not VERB'\n"

In [51]:
from Udep2Mono.binarization import BinaryDependencyTree
from Udep2Mono import polarization
from Udep2Mono.util import btreeToList
from copy import deepcopy

class TreeFactory:
    def __init__(self):
        self.tree_builder = {
            "amod": self.build_amod
        }
        
    def buildTree(self, rel, word, wid):
        return self.tree_builder[rel](word, wid)

    def build_amod(self, noun, wid):
        left = BinaryDependencyTree("ADJ", "N", "N", 1024, wid=wid-(wid-1)/2, npos="JJ")
        right = BinaryDependencyTree(noun, "N", "N", 1024, wid=wid, npos="NN")
        return BinaryDependencyTree("amod", left, right, 1025)


INFO:stanza:Loading these models for language: en (English):
| Processor | Package   |
-------------------------
| tokenize  | gum       |
| pos       | gum       |
| lemma     | gum       |
| depparse  | gum       |
| ner       | ontonotes |

INFO:stanza:Use device: gpu
INFO:stanza:Loading: tokenize
INFO:stanza:Loading: pos
INFO:stanza:Loading: lemma
INFO:stanza:Loading: depparse
INFO:stanza:Loading: ner
INFO:stanza:Done loading processors!


In [52]:
import torch
import torchtext.vocab as vocab

from database import *
from pattern.en import pluralize, singularize

class LexicalGenerator:
    def __init__(self):
        self.deptree = None
        self.length = 0
        self.tree_factory = TreeFactory()
        self.treeLog = []
        self.polarLog = []
        self.lexial_generation = {
             "acl": self.generate_acl_relcl,
             "acl:relcl": self.generate_acl_relcl,
             "advcl": self.generate_acl_relcl,
             "cc:preconj": self.generate_det,
             "det": self.generate_det,
             "det:predet": self.generate_det,
        }
        '''"advmod": self.generate_advmod,
            "advmod:count": self.generate_advmod,
            "amod": self.generate_amod,
            "appos": self.generate_inherite,
            "aux": self.generate_aux,
            "aux:pass": self.generate_aux,
            "case": self.generate_case,
            "cc": self.generate_cc,
           
            "ccomp": self.generate_ccomp,
            "compound": self.generate_inherite,
            "compound:prt": self.generate_inherite,
            "conj": self.generate_inherite,
            "cop": self.generate_inherite,
            "csubj": self.generate_nsubj,
            "csubj:pass": self.generate_nsubj,
            "dep": self.generate_dep,
            
            "discourse": self.generate_discourse,
            "expl": self.generate_expl,
            "fixed": self.generate_inherite,
            "flat": self.generate_inherite,
            "goeswith": self.generate_inherite,
            "iobj": self.generate_inherite,
            "mark": self.generate_inherite,
            "nmod": self.generate_nmod,
            "nmod:npmod": self.generate_nmod,
            "nmod:tmod": self.generate_nmod,
            "nmod:poss": self.generate_nmod_poss,
            "nsubj": self.generate_nsubj,
            "nsubj:pass": self.generate_nsubj,
            "nummod": self.generate_nummod,
            "obj": self.generate_obj,
            "obl": self.generate_obj,
            "obl:npmod": self.generate_oblnpmod,
            "obl:tmod": self.generate_inherite,
            "parataxis": self.generate_inherite,
            "xcomp": self.generate_obj,'''

    def deptree_generate(self, length, tree):
        self.deptree = tree
        self.length = length
        self.generate(self.deptree)

    def generate(self, tree):
        if tree.val in self.lexial_generation.keys():
            self.lexial_generation[tree.val](tree)

    def save_tree(self, tree=None):
        if tree is not None:
            generated, _, _, _ = btreeToList(tree, self.length, {}, 0)
        else:
            generated, _, _, _ = btreeToList(self.deptree, self.length, {}, 0)
        generated = '[%s]' % ', '.join(map(str, generated)).replace("'", "")
        generated = generated.replace(",", "")
        print("New tree: ", generated)

        if tree is not None:
            return deepcopy(self.deptree)
        else:
            return deepcopy(self.deptree)

    def rollback(self, tree, backup):
        tree.val = backup.val
        tree.left = deepcopy(backup.left)
        tree.right = deepcopy(backup.right)
        tree.mark = backup.mark
        tree.npos = backup.npos
        tree.id = backup.id

    def generate_acl_relcl(self, tree):
        left = tree.left
        right = tree.right
        backup = deepcopy(tree)
        hyper = [] 
        hypo = [] 
        syn =[] 

        if right.npos != None:
            hyper, hypo, syn, ant = get_word_sets(singularize(right.val), right.npos.lower())

            if right.mark == "+":
                tree.val = right.val
                tree.mark = right.mark
                tree.npos = right.npos
                tree.id = right.id
                tree.left = "N"
                tree.right = "N"
                self.treeLog.append(self.save_tree())
                self.rollback(tree, backup)
                
                print(hyper)
                for word in hyper:
                    tree.right.val = word
                    self.treeLog.append(self.save_tree())
                self.rollback(tree, backup)

            if right.mark == "-":
                amod_tree = self.tree_factory.buildTree("amod", right.val, right.id)
                tree.right = amod_tree
                self.treeLog.append(self.save_tree())
                self.rollback(tree, backup) 

                print(hypo)
                for word in hypo:
                    tree.right.val = word
                    self.treeLog.append(self.save_tree())
                self.rollback(tree, backup)
        else:
            self.generate(left)

    def generate_det(self, tree):
        left = tree.left
        right = tree.right
        backup = deepcopy(tree)
        #hyper, hypo, syn, ant = get_word_sets(right.val, right.npos.lower())
        kb = quantifier.find({"word": left.val})[0]
        self.generate(right)

        if left.mark == "+":
            for word in kb["<"]:
                tree.left.val = word
                self.treeLog.append(self.save_tree())
            self.rollback(tree, backup)
        
        if left.mark == "-":
            for word in kb[">"]:
                tree.left.val = word
                self.treeLog.append(self.save_tree())
            self.rollback(tree, backup)            

In [8]:
sentences = ["All flowers which "]
annotations, _ = polarization.run_polarize_pipeline(
    sentences, verbose=2, parser="stanza")

lexicalGenerator = LexicalGenerator()
for annotation in annotations:
    annotated, original, polarized, postags, polarized_tree = annotation
    print(annotated)
    lexicalGenerator.deptree_generate(len(original), polarized_tree)

100%|██████████| 1/1 [00:00<00:00,  2.53it/s]
all↑ dogs↓ who↓ follow↓ orders↓
New tree:  [det↓ [DT all↑] [acl:relcl↓ [nsubj↓ [WP who↓] [obj↓ [NNS orders↓] [VBP follow↓]]] [amod [JJ ADJ] [NN dogs]]]]
{'corgi', 'a Cocker Spaniel', 'Mexican hairless', 'hunt', 'Great Pyrenees', 'pooch', 'Leonberg', 'hound', 'basenji', 'Vienna sausage', 'quest', 'carriage dog', 'A poodle', 'pug-dog', 'tree', 'poodle', 'barker', 'Welsh corgi', 'toy', 'coach dog', 'toy dog', 'a Great Dane', 'a spaniel', 'spitz', 'an Irish Setter', 'a rottweiler', 'a golden retriever', 'dalmatian', 'bow-wow', 'mongrel', 'Labrador Retriever', 'hunting dog', 'lapdog', 'trace', 'Belgian griffon', 'pug', 'cur', 'griffon', 'Newfoundland', 'doggy', 'working dog', 'run down', 'puppy', 'Brussels griffon', 'poodle dog', 'Catahoula leopard dog', 'Newfoundland dog', 'doggie', 'perisher', 'an Afgan Hound', 'Joy', 'beagle', 'mutt'}
New tree:  [det↓ [DT all↑] [acl:relcl↓ [nsubj↓ [WP who↓] [obj↓ [NNS orders↓] [VBP follow↓]]] [NNS corgi↓]]]
N

In [53]:
from pattern.en import conjugate, lemma, lexeme, PAST, SG, PRESENT

class ImplicativeGenerator:
    def __init__(self, length, kb, tree):
        self.kb = kb
        self.treeLog = []
        self.polarLog = []
        self.deptree = tree
        self.length = length

    def find_verbs(self, postags):
        verbs = []
        for word in postags:
            if 'VB' in postags[word][1]:
                verbs.append((word, postags[word][0]))
        return verbs
    
    def fix_tense(self, verb, pos):
        if pos == "VBD":
            return conjugate(verb=verb, tense=PAST, person=1)
        elif pos == "VBZ":
            return conjugate(verb=verb, tense=PRESENT, person=3)
        else:
            return verb

    def search(self):
        #verbs = self.find_verbs(postags)
        self.generate(self.deptree)

    
    def save_tree(self, tree=None):
        if tree is not None:
            generated, _, _, _ = btreeToList(tree, self.length, {}, 0)
        else:
            generated, _, _, _ = btreeToList(self.deptree, self.length, {}, 0)
        generated = '[%s]' % ', '.join(map(str, generated)).replace("'", "")
        generated = generated.replace(",", "")
        print("New tree: ", generated)

        if tree is not None:
            return deepcopy(self.deptree)
        else:
            return deepcopy(self.deptree)

    def generate(self, tree):
        if tree.val in ["ccomp", "xcomp"]:
            backup = deepcopy(tree)

            verb = conjugate(verb=tree.right.val, tense=PRESENT, person=1)
            pos = tree.right.npos

            impl_signs = self.kb.find({"Verb": verb})
            if impl_signs:
                sign = impl_signs[0]['Signature'].split('/')
                if sign[0] == "+" and sign[1] == "+":
                    self.treeLog.append(self.save_tree(tree.left.right))
                elif sign[0] == "+" and sign[1] == "-":
                    tree.val = tree.left.right.val
                    tree.mark = tree.left.right.mark
                    tree.id = tree.left.right.id
                    tree.right = tree.left.right.right
                    tree.right.val = self.fix_tense(tree.right.val, pos)
                    tree.left = tree.left.right.left
                    
                    self.treeLog.append(self.save_tree())

                    tree.val = backup.val
                    tree.mark = backup.mark
                    tree.id = backup.id
                    tree.left = deepcopy(backup.left)
                    tree.right = deepcopy(backup.right)
        else:
            if tree.left != "N":
                self.generate(tree.left)

            if tree.right != "N":
                self.generate(tree.right)       

In [54]:
#from Udep2Mono.dependency_parse import dependencyParse
#tree, postags, words = dependencyParse("All dogs eat food", parser="stanza")[0]

sentences = ["I managed to finish my homework", 
             "I recognized that this apple is good",
             "I realized that my homework is hard"]
annotations, _ = polarization.run_polarize_pipeline(
    sentences, verbose=2, parser="stanza")
print()
for annotation in annotations:
    annotated, original, polarized, postags, polarized_tree = annotation
    impgenerator = ImplicativeGenerator(len(original), db.implicative, polarized_tree)
    impgenerator.search()  

100%|██████████| 2/2 [00:00<00:00,  4.45it/s]
New tree:  [nsubj↑ [PRP i↑] [obj↑ [nmod:poss↑ [PRP$ my↑] [NN homework↑]] [VB finished↑]]]
New tree:  [nsubj↑ [det= [DT this=] [NN apple=]] [cop↑ [VBZ is↑] [JJ good↑]]]



In [47]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

paraphraseTokenizer = AutoTokenizer.from_pretrained(
    "bert-base-cased-finetuned-mrpc",cache_dir ='../model/')
paraphraseModel = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-cased-finetuned-mrpc", cache_dir ='../model/',)

from sentence_transformers import SentenceTransformer, util
sentenceTransformer = SentenceTransformer("roberta-base-nli-stsb-mean-tokens")

classes = ["not paraphrase", "is paraphrase"]

def inference_mrpc(seq1s, seq2s):
    for i in range(len(seq1s)):
        paraphrase = paraphraseTokenizer.encode_plus(
            seq1s[i], seq2s[i], return_tensors="pt")
        logits = paraphraseModel(**paraphrase)[0]
        paraphrase_results = torch.softmax(logits, dim=1).tolist()[0]
        print(f"{classes[1]}: {round(paraphrase_results[1] * 100)}%")

def inference_sts(sentences1, sentences2):
    embeddings1 = sentenceTransformer.encode(sentences1, convert_to_tensor=True)
    embeddings2 = sentenceTransformer.encode(sentences2, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(embeddings1, embeddings2)
    for i in range(len(sentences1)):
        print("Similarity Score: {:.4f}".format(cosine_scores[i][i]))

In [46]:
seq0 = "I caused him to submit his resignation"
seq1 = "I caused the submission of his resignation"
seq2 = "I caused him to submit the resignation"
seq3 = "I caused the submission of his resignation"
seq4 = "I caused the submission of the resignation"
seq5 = "I caused the submission of his resignation"
seq6 = "cut up an apple"
seq7 = "cut an apple into piece"

sentences1 = [seq0, seq2, seq4, seq6]
sentences2 = [seq1, seq3, seq5, seq7]

inference_mrpc(sentences1, sentences2)
inference_sts(sentences1, sentences2)

is paraphrase: 94%
is paraphrase: 94%
is paraphrase: 94%
is paraphrase: 93%
Score: 0.8999
Score: 0.9313
Score: 0.9685
Score: 0.9409
