In [1]:
from pymongo import MongoClient
from wordnet import *
from Udep2Mono.binarization import BinaryDependencyTree
from Udep2Mono import polarization
from Udep2Mono.util import btreeToList
from Udep2Mono.util import det_mark, det_type
from copy import deepcopy
from pattern.en import pluralize, singularize

client = MongoClient('localhost', 27017)
db = client.UdepLog
quantifier = db.quantifier

2020-12-22 02:30:24 INFO: Loading these models for language: en (English):
| Processor | Package   |
-------------------------
| tokenize  | gum       |
| pos       | gum       |
| lemma     | gum       |
| depparse  | gum       |
| ner       | ontonotes |

2020-12-22 02:30:24 INFO: Use device: gpu
2020-12-22 02:30:24 INFO: Loading: tokenize
2020-12-22 02:30:27 INFO: Loading: pos
2020-12-22 02:30:29 INFO: Loading: lemma
2020-12-22 02:30:29 INFO: Loading: depparse
2020-12-22 02:30:30 INFO: Loading: ner
2020-12-22 02:30:32 INFO: Done loading processors!


In [52]:
quantifier.find({"word": "several"})[0]

{'_id': ObjectId('5fd18789c624d91cdd508dfe'),
 'word': 'several',
 '>': ['every', 'all', 'each', 'most', 'many', 'num'],
 '<': ['some', 'a', 'an', 'one', 'at-least-several'],
 '=': ['a few', 'several-of-the'],
 '!': []}

In [74]:
class LexicalGenerator:
    def __init__(self):
        self.deptree = None
        self.length = 0
        self.treeLog = []
        self.polarLog = []
        self.replacementLog = []
        self.key_tokens = [
            'NN','NNS','NNP','NNPS','VBD',
            'VBG','VBN','VBZ','VB']

    def deptree_generate(self, length, tree):
        self.deptree = tree
        self.length = length
        self.generate(self.deptree)

    def generate(self, tree):
        if tree.npos is not None: 
            if tree.npos in self.key_tokens:
                backup = tree.val
                hyper = [] 
                hypo = [] 
                syn = [] 
                ant = []

                hyper, hypo, syn, ant = get_word_sets(
                    singularize(tree.val), tree.npos.lower())

                for word in syn:
                    tree.val = word
                    self.treeLog.append(self.save_tree())
                    self.replacementLog.append(
                        "{} => {}".format(backup, word))
                    tree.val = backup

                for word in ant:
                    tree.val = word
                    self.treeLog.append(self.save_tree())
                    self.replacementLog.append(
                        "{} => {}".format(backup, word))
                    tree.val = backup

                if tree.mark == "+":                
                    for word in hyper:
                        tree.val = word
                        self.treeLog.append(self.save_tree())
                        self.replacementLog.append(
                        "{} => {}".format(backup, word))
                    tree.val = backup

                if tree.mark == "-":
                    for word in hypo:
                        tree.val = word
                        self.treeLog.append(self.save_tree())
                        self.replacementLog.append(
                        "{} => {}".format(backup, word))
                    tree.val = backup
            
        elif tree.val == "det":
            backup = tree.left.val
            backup_mark = tree.right.mark
            kb = quantifier.find({"word": tree.left.val.lower()})[0]

            for word in kb["="]:
                tree.left.val = word
                detType = det_type(tree.left.val)
                if detType is None:
                    detType = "det:exist"
                detMark = det_mark[detType]
                tree.left.mark = detMark[1]
                self.treeLog.append(self.save_tree())
                self.replacementLog.append(
                    "{} => {}".format(backup, word))
            tree.left.val = backup
            tree.left.mark = backup_mark

            if tree.left.mark == "+":
                for word in kb["<"]:
                    tree.left.val = word
                    detType = det_type(tree.left.val)
                    if detType is None:
                        detType = "det:exist"
                    detMark = det_mark[detType]
                    tree.left.mark = detMark[1]
                    self.treeLog.append(self.save_tree())
                    self.replacementLog.append(
                        "{} => {}".format(backup, word))
                tree.left.val = backup
                tree.left.mark = backup_mark
            
            if tree.left.mark == "-":
                for word in kb[">"]:
                    tree.val = word
                    if detType is None:
                        detType = "det:exist"
                    detMark = det_mark[detType]
                    tree.left.mark = detMark[1]
                    self.treeLog.append(self.save_tree())
                    self.replacementLog.append(
                        "{} => {}".format(backup, word))
                tree.left.val = backup
                tree.left.mark = backup_mark
        
        if tree.left != "N":
            self.generate(tree.left)
        if tree.right != "N":
            self.generate(tree.right)

    def save_tree(self, tree=None):
        if tree is not None:
            generated, _, _, _ = btreeToList(tree, self.length, {}, 0)
        else:
            generated, _, _, _ = btreeToList(self.deptree, self.length, {}, 0)
        generated = '[%s]' % ', '.join(
            map(str, generated)).replace("'", "")
        generated = generated.replace(",", "")
        #print("New tree: ", generated)

        if tree is not None:
            return deepcopy(self.deptree)
        else:
            return deepcopy(self.deptree)


In [75]:
sentences = ["Some flowers are beautiful"]
annotations, _ = polarization.run_polarize_pipeline(
    sentences, verbose=2, parser="stanza")

lexicalGenerator = LexicalGenerator()
for annotation in annotations:
    annotated, original, polarized, postags, polarized_tree = annotation
    print('\n' + annotated)
    lexicalGenerator.deptree_generate(len(original), polarized_tree)
    for record in lexicalGenerator.replacementLog:
        print(record)

100%|██████████| 1/1 [00:00<00:00,  3.47it/s]

some↑ flowers↑ are↑ beautiful↑
some => a
some => an
some => one
flowers => bloom
flowers => flower
flowers => flush
flowers => heyday
flowers => prime
flowers => efflorescence
flowers => blossom
flowers => peak
flowers => period
flowers => period of time
flowers => develop
flowers => reproductive structure
flowers => time period
flowers => angiosperm
flowers => flowering plant
