In [1]:
from pymongo import MongoClient
from wordnet import *
from Udep2Mono.binarization import BinaryDependencyTree
from Udep2Mono.polarization import PolarizationPipeline
from Udep2Mono.util import btreeToList
from Udep2Mono.util import det_mark, det_type
from copy import deepcopy
from pattern.en import pluralize, singularize

client = MongoClient('localhost', 27017)
db = client.UdepLog
quantifier = db.quantifier

2020-12-23 01:41:56 INFO: Loading these models for language: en (English):
| Processor | Package   |
-------------------------
| tokenize  | gum       |
| pos       | gum       |
| lemma     | gum       |
| depparse  | gum       |
| ner       | ontonotes |

2020-12-23 01:41:57 INFO: Use device: gpu
2020-12-23 01:41:57 INFO: Loading: tokenize
2020-12-23 01:42:00 INFO: Loading: pos
2020-12-23 01:42:01 INFO: Loading: lemma
2020-12-23 01:42:01 INFO: Loading: depparse
2020-12-23 01:42:03 INFO: Loading: ner
2020-12-23 01:42:04 INFO: Done loading processors!


In [2]:
quantifier.find({"word": "several"})[0]

{'_id': ObjectId('5fd18789c624d91cdd508dfe'),
 'word': 'several',
 '>': ['every', 'all', 'each', 'most', 'many', 'num'],
 '<': ['some', 'a', 'an', 'one', 'at-least-several'],
 '=': ['a few', 'several-of-the'],
 '!': []}

In [3]:
## Lexical Knowledge based Lexical Replacement

In [3]:
class LexicalGenerator:
    def __init__(self):
        self.deptree = None
        self.hypothesis = ""
        self.treeLog = []
        self.polarLog = []
        self.replacementLog = []
        self.key_tokens = [
            'NN','NNS','NNP','NNPS','VBD',
            'VBG','VBN','VBZ','VB']

    def deptree_generate(self, tree):
        self.deptree = tree
        self.generate(self.deptree)

    def generate(self, tree):
        if tree.npos is not None: 
            if tree.npos in self.key_tokens:
                backup = tree.val
                hyper = [] 
                hypo = [] 
                syn = [] 
                ant = []

                hyper, hypo, syn, ant = get_word_sets(
                    singularize(tree.val), tree.npos.lower())

                for word in syn:
                    if word in self.hypothesis:
                        tree.val = word
                        self.treeLog.append(self.save_tree())
                        self.replacementLog.append(
                            "{} => {}".format(backup, word))
                tree.val = backup

                for word in ant:
                    if word in self.hypothesis:
                        tree.val = word
                        self.treeLog.append(self.save_tree())
                        self.replacementLog.append(
                            "{} => {}".format(backup, word))
                tree.val = backup

                if tree.mark == "+":                
                    for word in hyper:
                        if word in self.hypothesis:
                            tree.val = word
                            self.treeLog.append(self.save_tree())
                            self.replacementLog.append(
                            "{} => {}".format(backup, word))
                    tree.val = backup

                if tree.mark == "-":
                    for word in hypo:
                        if word in self.hypothesis:
                            tree.val = word
                            self.treeLog.append(self.save_tree())
                            self.replacementLog.append(
                            "{} => {}".format(backup, word))
                    tree.val = backup
            
        elif tree.val == "det":
            backup = tree.left.val
            backup_mark = tree.right.mark
            kb = quantifier.find({"word": tree.left.val.lower()})[0]

            for word in kb["="]:
                tree.left.val = word
                detType = det_type(tree.left.val)
                if detType is None:
                    detType = "det:exist"
                detMark = det_mark[detType]
                tree.left.mark = detMark[1]
                self.treeLog.append(self.save_tree())
                self.replacementLog.append(
                    "{} => {}".format(backup, word))
            tree.left.val = backup
            tree.left.mark = backup_mark

            if tree.left.mark == "+":
                for word in kb["<"]:
                    if word in self.hypothesis:
                        tree.left.val = word
                        detType = det_type(tree.left.val)
                        if detType is None:
                            detType = "det:exist"
                        detMark = det_mark[detType]
                        tree.left.mark = detMark[1]
                        self.treeLog.append(self.save_tree())
                        self.replacementLog.append(
                            "{} => {}".format(backup, word))
                tree.left.val = backup
                tree.left.mark = backup_mark
            
            if tree.left.mark == "-":
                for word in kb[">"]:
                    if word in self.hypothesis:
                        tree.val = word
                        if detType is None:
                            detType = "det:exist"
                        detMark = det_mark[detType]
                        tree.left.mark = detMark[1]
                        self.treeLog.append(self.save_tree())
                        self.replacementLog.append(
                            "{} => {}".format(backup, word))
                tree.left.val = backup
                tree.left.mark = backup_mark
        
        if tree.left != "N":
            self.generate(tree.left)
        if tree.right != "N":
            self.generate(tree.right)

    def save_tree(self):
        return deepcopy(self.deptree)

In [4]:
sentences = ["Some flowers are beautiful"]
hypothesis = "Some flowering plant are beautiful"
pipeline = PolarizationPipeline(sentences, verbose=2, parser="stanza")
pipeline.run_polarize_pipeline()
print("\nPolarization Complete")

lexicalGenerator = LexicalGenerator()
for annotation in annotations:
    print("================")
    print("Init Premise: ", annotation['annotated'])
    lexicalGenerator.hypothesis = hypothesis
    lexicalGenerator.deptree_generate(
        len(annotation['original']), annotation['polarized_tree'])
    for record in lexicalGenerator.replacementLog:
        print(record) 

100%|██████████| 1/1 [00:00<00:00,  2.08it/s]

some↑ flowers↑ are↑ beautiful↑
http://api.conceptnet.io/query?start=/c/en/flower&rel=/r/IsA&limit=500
http://api.conceptnet.io/query?end=/c/en/flower&rel=/r/IsA&limit=500
some => a
some => an
some => one
flowers => flower
flowers => flowering plant


## Word Embedding Based Lexical Replacement

In [1]:
import fasttext
from sapphire import Sapphire

MODEL_NAME = "sapphire/model/wiki-news-300d.bin"
model = fasttext.FastText.load_model(MODEL_NAME)
aligner = Sapphire(model)



In [4]:
aligner.set_params(lambda_=0.5, delta=0.5, alpha=0.05, hungarian=True)

s1 = "Scientists created a vaccine against the virus".split(" ")
s2 = "Researchers developed the antivirus vaccine".split(" ")
s1 = "A man is slicing a orange".split(" ")
s2 = "A man is cutting a fruit".split(" ")
wa, pa = aligner(s1, s2)

for pair in wa:
    print(s1[pair[0]-1], "=>", s2[pair[1]-1])

A => A
man => man
is => is
slicing => cutting
a => a
orange => fruit


In [31]:
s1 = "Dreams will come true very soon".split(" ")
s2 = "Wishes will be fulfilled in the near future".split(" ")
wa, pa = aligner(s1, s2)

for pair in wa:
    print(s1[pair[0]-1], "=>", s2[pair[1]-1])

Dreams => Wishes
will => will
come => in
true => future
very => the
soon => be
