In [2]:
import stanza

depparse_gum_config = {
    'lang': "en",
    'processors': "tokenize,pos,lemma,depparse",
    'tokenize_model_path': './model/en/tokenize/gum.pt',
    'pos_model_path': './model/en/pos/gum.pt',
    'depparse_model_path': './model/en/depparse/gum.pt',
    'lemma_model_path': './model/en/lemma/gum.pt',
    'tokenize_no_ssplit': True,
    'use_gpu': True,
    'pos_batch_size': 2000
}

token_config = {
    'lang': "en",
    'processors': "tokenize",
    'tokenize_model_path': './model/en/tokenize/gum.pt',
    'tokenize_no_ssplit': True,
    'use_gpu': False,
    'pos_batch_size': 3000
}

gum_depparse = stanza.Pipeline(**depparse_gum_config)
tokenizer = stanza.Pipeline(**token_config)

2021-06-10 03:38:27 INFO: Loading these models for language: en (English):
| Processor | Package                 |
---------------------------------------
| tokenize  | ./model/en...ize/gum.pt |
| pos       | ./model/en/pos/gum.pt   |
| lemma     | ./model/en/lemma/gum.pt |
| depparse  | ./model/en...rse/gum.pt |

2021-06-10 03:38:27 INFO: Use device: cpu
2021-06-10 03:38:27 INFO: Loading: tokenize
2021-06-10 03:38:27 INFO: Loading: pos
2021-06-10 03:38:29 INFO: Loading: lemma
2021-06-10 03:38:29 INFO: Loading: depparse
2021-06-10 03:38:30 INFO: Done loading processors!
2021-06-10 03:38:30 INFO: Loading these models for language: en (English):
| Processor | Package                 |
---------------------------------------
| tokenize  | ./model/en...ize/gum.pt |

2021-06-10 03:38:30 INFO: Use device: cpu
2021-06-10 03:38:30 INFO: Loading: tokenize
2021-06-10 03:38:30 INFO: Done loading processors!


In [3]:
def dependency_parse(sentence, parser="gum"):
    return stanza_parse(sentence, parser=parser)


def stanza_parse(sentence, parser="gum"):
    postags = {}
    words = {}
    parse_tree = []
    head_log = {}
    depdent_log = {}

    parsed = gum_depparse(sentence + "\n")
    """if parser == "ewt":
        parsed = ewt_depparse(sentence)"""

    for sent in parsed.sentences:
        for word in sent.words:
            tree_node = post_process(sent, word, postags, words)

            if len(tree_node) == 0:
                continue

            if tree_node[2] in head_log:
                head_log[tree_node[2]].append(tree_node[0])
            else:
                head_log[tree_node[2]] = [tree_node[0]]

            if tree_node[1] in depdent_log:
                depdent_log[tree_node[1]].append(tree_node[0])
            else:
                depdent_log[tree_node[1]] = [tree_node[0]]

            parse_tree.append(tree_node)

        enhance_parse(parse_tree, head_log, depdent_log, words)
    return parse_tree, postags, words


def enhance_parse(tree, heads, deps, words):
    for node in tree:
        if node[0] == "conj":
            if "nsubj" in heads[node[1]] and "nsubj" in heads[node[2]]:
                node[0] = "conj-sent"
            elif words[node[1]][1] == "JJ" and words[node[2]][1] == "JJ":
                node[0] = "conj-adj"
            elif "NN" in words[node[1]][1] and "NN" in words[node[2]][1]:
                node[0] = "conj-n"
                vp_rel = set(["amod", "compound", "compound",  "compound:prt", "det",
                              "nummod", "appos", "advmod", "nmod", "nmod:poss"])
                vp_left = set(heads[node[1]]) & vp_rel
                vp_right = set(heads[node[2]]) & vp_rel
                if len(vp_left) and len(vp_right):
                    node[0] = "conj-np"
            elif "VB" in words[node[1]][1] and "VB" in words[node[2]][1]:
                node[0] = "conj-vb"
                vp_rel = set(["obj", "xcomp", "obl"])
                vp_left = set(heads[node[1]]) & vp_rel
                vp_right = set(heads[node[2]]) & vp_rel

                if len(vp_left):
                    if len(vp_right):
                        node[0] = "conj-vp"
                    # else:

        if node[0] == "advcl":
            if words[1][0] == "if":
                node[0] = "advcl-sent"
        if node[0] == "advmod":
            if words[node[1]][0] == "not" and node[1] == 1:
                node[0] = "advmod-sent"
        if node[0] == "case" and node[1] - node[2] > 0:
            node[0] = "case-after"
        if words[node[1]][0] in ["at-most", "at-least", "more-than", "less-than"]:
            node[0] = "det"


def post_process(sent, word, postag, words):
    word_id = int(word.id)
    if word_id not in words:
        postag[word.text] = (word_id, word.xpos)
        words[word_id] = (word.text, word.xpos)
    if word.deprel != "punct":
        tree_node = [word.deprel, word_id,
                     word.head if word.head > 0 else "root"]
        return tree_node
    return []


def printTree(tree, tag, word):
    if tree[0] != "root":
        print(
            f"word: {word[tree[1]][0]}\thead: {word[tree[2]][0]}\tdeprel: {tree[0]}", sep="\n")

In [4]:
from pqdict import pqdict

negate_mark = {
    "+": "-",
    "-": "+",
    "=": "="
}

class BinaryDependencyTree:
    def __init__(self, val, left, right, key, counter, id=None, pos=None):
        self.val = val
        self.parent = None
        self.left = left
        self.right = right
        self.mark = "0"
        self.id = id
        self.pos = pos
        self.key = key
        self.is_root = False
        self.is_tree = True
        self.length = 0
        self.leaves = pqdict({})
        self.counter = counter
        self.replaced = {}

    def sorted_leaves(self):
        self.traverse(self)
        return self.leaves

    def traverse(self, tree, multi_word=False):
        if not tree.is_tree:
            replacement = False
            if str((tree.val, tree.id)) in self.replaced:
                tree.val = self.replaced[str((tree.val, tree.id))]
                replacement = True
            if "-" in tree.val and replacement and multi_word:
                words = tree.val.split('-')
                words.reverse()
                for i in range(len(words)):
                    word_id = tree.id - i * 0.1
                    key = (words[i], tree.pos, tree.mark, word_id)
                    if words[i].lower() == "not" and len(words) == 2:
                        key = (words[i], tree.pos,
                               negate_mark[tree.mark], word_id)
                    self.leaves[key] = (word_id)
            else:
                item = (tree.id)
                key = (tree.val, tree.pos, tree.mark, tree.id)
                self.leaves[key] = item
        else:
            self.traverse(tree.left)
            self.traverse(tree.right)

    def copy(self):
        left = None
        if self.left is not None:
            left = self.left.copy()
        right = None
        if self.right is not None:
            right = self.right.copy()
        new_tree = BinaryDependencyTree(
            self.val, left, right, self.key, self.counter, self.id, self.pos)
        new_tree.mark = self.mark
        new_tree.parent = self.parent
        new_tree.is_tree = self.is_tree
        new_tree.is_root = self.is_root
        new_tree.leaves = pqdict({})
        return new_tree

    def set_length(self, lth):
        self.length = lth

    def set_root(self):
        self.is_root = True

    def set_not_tree(self):
        self.is_tree = False


hierarchy = {
    "conj-sent": 0,
    "advcl-sent": 1,
    "advmod-sent": 2,
    "case": 10,
    "case-after": 75,
    "mark": 10,
    "expl": 10,
    "discourse": 10,
    "nsubj": 20,
    "csubj": 20,
    "nsubj:pass": 20,
    "conj-vp": 25,
    "ccomp": 30,
    "advcl": 30,
    "advmod": 30,
    "nmod": 30,
    "nmod:tmod": 30,
    "nmod:npmod": 30,
    "nmod:poss": 30,
    "xcomp": 40,
    "aux": 40,
    "aux:pass": 40,
    "obj": 60,
    "iobj": 60,
    "obl": 50,
    "obl:tmod": 50,
    "obl:npmod": 50,
    "cop": 50,
    "acl": 60,
    "acl:relcl": 60,
    "appos": 60,
    "conj": 60,
    "conj-np": 60,
    "conj-adj": 60,
    "det": 55,
    "det:predet": 55,
    "cc": 70,
    "cc:preconj": 70,
    "nummod": 75,
    "fixed": 80,
    "compound": 80,
    "compound:prt": 80,
    "fixed": 80,
    "amod": 75,
    "conj-n": 90,
    "conj-vb": 90,
    "dep": 100,
    "flat": 100,
    "goeswith": 100,
    "parataxis": 100
}


class UnifiedCounter:
    def __init__(self, initial_val=0):
        self.addi_negates = initial_val
        self.unifies = initial_val
        self.nsubjLeft = False
        self.expl = False
        self.willing_verb = False

    def add_negates(self):
        self.addi_negates += 1

    def add_unifies(self):
        self.unifies += 1

    def is_unified_clause_subj(self):
        return self.unifies % 2 == 1 and self.nsubjLeft


class Binarizer:
    def __init__(self, parse_table=None, postag=None, words=None):
        self.postag = postag
        self.parse_table = parse_table
        self.words = words
        self.id = 0
        self.counter = UnifiedCounter(0)
        self.replaced = {}

    def process_not(self, children):
        if len(children) > 1:
            if children[0][0] == "advmod":
                if self.words[children[1][1]][0] == "not":
                    return [children[1]]
        return children

    def compose(self, head):
        children = list(filter(lambda x: x[2] == head, self.parse_table))
        children.sort(key=(lambda x: hierarchy[x[0]]))
        children = self.process_not(children)

        if len(children) == 0:
            word = self.words[head][0]
            tag = self.words[head][1]
            binary_tree = BinaryDependencyTree(
                word, None, None, self.id, self.counter, head, tag)
            binary_tree.replaced = self.replaced
            self.id += 1
            binary_tree.set_not_tree()
            return binary_tree, [binary_tree.key]
        else:
            top_dep = children[0]
        self.parse_table.remove(top_dep)

        left, left_rel = self.compose(top_dep[1])
        right, right_rel = self.compose(top_dep[2])
        if "conj" in top_dep[0]:
            dep_rel = "conj"
        elif "case" in top_dep[0]:
            dep_rel = "case"
        elif "advcl" in top_dep[0]:
            dep_rel = "advcl"
        elif "advmod" in top_dep[0]:
            dep_rel = "advmod"
        else:
            dep_rel = top_dep[0]

        binary_tree = BinaryDependencyTree(
            dep_rel, left, right, self.id, self.counter)
        binary_tree.left.parent = binary_tree
        binary_tree.right.parent = binary_tree
        binary_tree.replaced = self.replaced

        left_rel.append(binary_tree.key)
        self.id += 1
        return binary_tree, left_rel + right_rel

    def binarization(self):
        self.id = 0
        self.relation = []
        root = list(filter(lambda x: x[0] == "root", self.parse_table))[0][1]
        self.counter = UnifiedCounter(0)
        binary_tree, relation = self.compose(root)
        binary_tree.set_root()
        binary_tree.length = len(self.words)
        return binary_tree, relation

In [5]:
from pattern.en import conjugate
from nltk.tree import Tree
from nltk.draw import TreeWidget
from nltk.draw.util import CanvasFrame
from IPython.display import Image, display

arrows = {
    "+": "\u2191",
    "-": "\u2193",
    "=": "=",
    "0": ""
}

arrow2int = {
    "\u2191": 1,
    "\u2193": -1,
    "=": 0
}

def btree2list(binaryDepdency, verbose=0):
    def to_list(tree):
        treelist = []
        if tree.is_tree:
            word = tree.val + arrows[tree.mark]
            if verbose == 2:
                word += str(tree.key)
            treelist.append(word)
        else:
            treelist.append(tree.pos)
            word = tree.val.replace('-', ' ') + arrows[tree.mark]
            if verbose == 2:
                word += str(tree.key)
            treelist.append(word)

        if tree.left is not None:
            treelist.append(to_list(tree.left))

        if tree.right is not None:
            treelist.append(to_list(tree.right))

        return treelist
    return to_list(binaryDepdency)

def jupyter_draw_nltk_tree(tree):
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), tree)
    tc['node_font'] = 'arial 14 bold'
    tc['leaf_font'] = 'arial 14'
    tc['node_color'] = '#005990'
    tc['leaf_color'] = '#3F8F57'
    tc['line_color'] = '#175252'
    cf.add_widget(tc, 20, 20)
    cf.print_to_file('tree.ps')
    cf.destroy()
    os.system('convert tree.ps tree.png')
    display(Image(filename='tree.png'))
    
def jupyter_draw_rsyntax_tree(tree):
    font_size = '8'
    command = 'rsyntaxtree -s {} "{}"'.format(font_size, tree)
    os.system(command)
    display(Image(filename='./syntree.png'))

In [9]:
#G = Ugraph()
nounModifiers = {"det", "nummod", "amod","obl:tmod", "acl:relcl", "nmod", "case","nmod:pass",  "acl", "Prime","cc"}
verbModifiers = {"advmod","xcomp","advcl","mark","aux"}
nounCategories = {"compound"}
verbs = {"VBZ", "VBP", "VBD", "VBG"}
modified = {"NN", "PRP", "JJ", "VB","RB"}.union(verbs)
modifiers = nounModifiers
offFocus = {"expl"}
contents = {"nsubj","obj","cop","compound","conj","nsubj:pass","obl"}
cont_npos = {"nsubj":'nn', "obj": 'nn', "cop": 'vbz', "verb": 'vbz'}
mark_toProp = {"+": {"hyponym","synonym"}, "-": {"hypernym","synonym"}, "=": {"synonym"}}
clause_prop = {"which", "that", "who"}
be_verbs = {"is", "am", "are", "be","was","were"}
directions = {0: "lexical", 1: "phrasal", 2: "syntatic_variation", 3: "implicative"}

In [12]:
class Cnode:
    def __init__(self,prop,word,npos, mark):
        self.nexts = dict()
        self.prop = prop
        self.isRoot = False
        self.nexts["all"] = set()
        self.modifiers = set() 
        self.word = word
        self.npos = npos
        self.mark = mark
        self.phrases = set()
        self.pair = -1
        self.pairParts = dict()
        self.start = -1
        self.end = -1
        self.nodes = set()
        self.cc = None
        self.aligned = []
        self.isComp = False
        #self.alignedBy = []
        self.parent = None
        self.explMain = False
    def add_Unode(self, node):
        #print(node.prop)
        if(self.isRoot):
            if(node.prop == "obl"):
                node.prop = "obj"
            self.nexts[node.prop].add(node)
        else:
            self.nexts["all"].add(node)
            
    def add_modifier(self, modifierNode):
        this.modifiers.add(modifierNode)
    def getText(self):
        return
    
    def get_magicText(self):
        connected_info = ""
        if(self.isRoot):
            for key in self.nexts.keys():
                component = ""
                if(key != "all"):
                    print(key)
                    for keyItem in self.nexts[key]:
                        component += " (" + keyItem.get_magicText() + ")"
                    component = "(" + key + " " + component + ")"
                connected_info += component
            return "(" + connected_info + ")"
        else:
            for node in self.nexts["all"]:
                if(node != None):
                    #print("111")
                    connected_info +=  "(" + node.get_magicText() + ")"
            if(self.nexts["all"] == set()):
                if(self.pair != -1):
                    return self.word.replace(' ', '_') + str(self.pair)
                return self.word.replace(' ', '_')
            if(self.pair != -1):
                    return self.word.replace(' ', '_') + str(self.pair) + connected_info
            return  self.word.replace(' ', '_') + connected_info
    def addNum(self,num):
        self.pair = num
    def addPart(self, newNode, type1):
        if(type1 not in self.pairParts):
            self.pairParts[type1] = set()
        self.pairParts[type1].add(newNode)
    def getParts(self):
        ### return verb-obj subParts now
        return self.pairParts["obj"]
    def addCC(self,node):
        self.cc = node

In [13]:
class Cgraph:
    def __init__(self, rootNode):
        self.root = rootNode
        self.root.isRoot = True
        
        
        self.nodes = set()
        self.contentSet = set()
        self.chunks = set()
        self.Pairs = dict()
        self.Pairs["nsubj"] = dict()
        self.Pairs["obj"] = dict()
        self.align_log = []
        self.expl = False
        self.passive = False
    def add_node(self,node):
        self.nodes.add(node)
        self.root.addNode(node)
    def add_edge(self, node1, node2):
        if(node1.isRoot):
            self.contentSet.add(node2.word)
            node2.isComp = True
        node1.add_Unode(node2)
        node2.parent = node1
    def contains(self, word_assigned):
        return word_assigned in self.contentSet
    def get_magicText(self):
        return self.root.get_magicText()
    def addPair(self, newNode, num,type1):
        return

In [None]:
class Relation:
    

In [6]:
class GraphFactoryPipeline:
    def __init__(self, verbose=0, parser="gum"):
        self.parser = parser
        self.binarizer = Binarizer()
        self.exceptioned = []
        self.verbose = verbose

        self.concept_pos = ["NN", "NNS", "NNP", "NNPS", "PRP", "PRP$"]
        self.relation_pos = ["VB", "VBD", "VBG", "VBN", 
                             "VBP", "VBZ", "TO", "IN"]

        self.modifier_relation = {
            "NN": ["amod", "nmod", "acl:relcl", "fixed", "compound", "det", "nmod:poss", "conj", "nummod"],
            "VB": ["advmod", "acl", "obl", "xcomp", "advcl", "obl:tmod", "parataxis", "obj","ccomp"]
        }

    def extract_concepts_relation(self, postag):
        concepts = []
        relation = []
        for word in postag:
            if postag[word][1] in self.concept_pos:
                concepts.append(word)
            elif postag[word][1] in self.relation_pos:
                relation.append(word)
        return concepts, relation

    def down_right(self, tree):
        if(tree.right == None):
            return tree
        return self.down_right(tree.right)

    def down_left(self, tree):
        if(tree.left == None):
            return tree
        return self.down_left(tree.left)
    def tree2Graph_recur(sent_tree, G, mods, concept, pos = None, counter = -1):
        if(sent_tree is None):
            return
        else:
            if("pass" in sent_tree.val and (sent_tree.left != None or sent_tree.right != None)):
                G.passive = True
            if(any(list(map(lambda x: sent_tree.val is not None and x in sent_tree.val, list(modifiers))))):
                if("acl" in sent_tree.val):
                    G_prime = mono2Graph(sent_tree.left)
                    mods.add(G_prime.root)
                else:
                    left_result = mono2Graph_recur(sent_tree.left, G, set(), sent_tree.val,counter)
                    if(left_result is not None):
                        if(type(left_result) is set):
                            for item_result in left_result:
                                if(item_result is not None):
                                    mods.add(item_result)
                        else:
                            mods.add(left_result)
                return mono2Graph_recur(sent_tree.right, G, mods, pos,counter)            
            else:
                if ((sent_tree.left is None and sent_tree.right is None) or "compound" in sent_tree.val):
                        if("compound" in sent_tree.val):
                            right = sent_tree.right
                            comPos = right.pos
                            while(right.right != None):
                                comPos = right.right.pos
                                right = right.right
                            newNode = Cnode(pos, sent_tree.left.val.lower(),
                                            comPos, sent_tree.mark)
                            newNode.start = sent_tree.left.id
                            G.add_node(newNode)
                            if(sent_tree.val == "compound:prt"):
                                newNode.word = right.val.lower() + " " + sent_tree.left.val.lower()
                                newNode.start = sent_tree.right.id
                                newNode.end = sent_tree.left.id
                            else:
                                combine_comp(sent_tree.right, newNode)
                            newNode.word = quantifier_replacement.get(newNode.word, newNode.word)
                            if(pos in contents or pos == "verb"):
                                    G.add_edge(concept,newNode)
                                    if(pos != "nsubj"):
                                        G.addPair(newNode, counter.obj,"obj")
                            for node in mods:
                                    if(node.npos == "CC"):
                                        newNode.addCC(node)
                                    else:
                                        newNode.add_modifier(node)
                            return newNode
                        newNode = Cnode(pos, quantifier_replacement.get(sent_tree.val, sent_tree.val).lower(), sent_tree.pos, sent_tree.mark)
                        newNode.start = sent_tree.id
                        newNode.end = sent_tree.id
                        G.add_node(newNode)
                        if (any(list(map(lambda x : sent_tree.pos is not None and x in sent_tree.pos, list(modified)))) 
                                                            or any(list(map(lambda x: pos is not None and x in pos, list(contents))))
                                                                or pos == "verb"):
                            if(pos in contents or pos == "verb"):
                                    G.add_edge(concept,newNode)
                                    if(pos != "nsubj"):
                                        G.addPair(newNode, counter.obj,"obj")
                                        if(pos == "verb" and sent_tree.parent.val != "cop"):
                                            counter.incrementO()
                            for node in mods:
                                    if(node.npos == "CC"):
                                        newNode.addCC(node)
                                    else:
                                        newNode.add_modifier(node)
                            return newNode
                        else:
                            mods.add(newNode)
                            return newNode
                else: 
                    if(any(list(map(lambda x: sent_tree.val is not None and x in sent_tree.val, list(contents))))):
                        pos_left = sent_tree.val
                        pos_right = pos
                        if("nsubj" in sent_tree.val):
                            pos_right = "verb"
                            pos_left = sent_tree.val[0:5]
                        if("cop" in sent_tree.val):
                            target = sent_tree.right
                            ifObj = False
                            while(target != None):
                                if(target.val == "nsubj"):
                                    break
                                if(target.val in ["obj", "obl"]):
                                    ifObj = True
                                    break
                                target = target.right
                            if(ifObj):
                                sent_tree.val = "aux"
                                return mono2Graph_recur(sent_tree, G, mods, pos,counter )
                            else:
                                pos_left = "verb"
                                pos_right = "obj" 
                                mono2Graph_recur(sent_tree.left, G,set(),pos_left,counter)
                                output = mono2Graph_recur(sent_tree.right, G, mods, pos_right,counter)
                                counter.incrementO()
                                return output
                        if('conj' in sent_tree.val):
                            if (any(list(map(lambda x: pos is not None and x in pos, list(modifiers))))):
                                results = set()
                                print(222)
                                results.add(mono2Graph_recur(sent_tree.left, G, set(), pos,counter))
                                results.add(mono2Graph_recur(sent_tree.right, G, set(), pos,counter))
                                return results
                            else:
                                mono2Graph_recur(sent_tree.left, G, set(), pos,counter)

                                mono2Graph_recur(sent_tree.right, G, mods, pos,counter)

                        elif("aux" in sent_tree.val):
                            mono2Graph_recur(sent_tree.right, G, mods, "verb",counter)
                        elif("ob" in sent_tree.val):
                            if(not pos in ["verb", "obj"]):
                                right_result = mono2Graph_recur(sent_tree.right, G, set(), "Prime",counter)
                                if(right_result is not None):
                                    mods.add(right_result)
                                mono2Graph_recur(sent_tree.left, G, mods, pos_left, counter)
                            else:
                                mono2Graph_recur(sent_tree.left, G, set(),"obj",counter)
                                mono2Graph_recur(sent_tree.right, G, mods, "verb", counter)
                        else:
                            mono2Graph_recur(sent_tree.left, G,set(),pos_left,counter)
                            return mono2Graph_recur(sent_tree.right, G, mods, pos_right,counter)
                    elif(any(list(map(lambda x: sent_tree.val is not None and x in sent_tree.val, list(offFocus))))):
                        if(sent_tree.val == "expl"):
                            G.expl = True
                        mono2Graph_recur(sent_tree.right, G, mods, pos,counter)
    def tree_toGraph(self, tree):
        return new Cgraph()

    def collect_modifiers(self, tree, sent_set, mod_type="NN"):
        leaves = []
        if tree.is_tree:
            if tree.val in ["mark", "case", "compound", "flat", "nmod"]:
                leaves.append(
                    (list(tree.right.sorted_leaves().popkeys()),
                    self.down_right(tree.left).val)
                )
            if tree.val in self.modifier_relation[mod_type]:
                leaves.append(
                    (list(tree.left.sorted_leaves().popkeys()),
                    self.down_right(tree.right).val)
                )

            for leave in leaves:
                if len(leave) > 0 and len(leave) < 10:
                    head = leave[1]
                    modifier = ' '.join([x[0] for x in leave[0]])
                    if tree.val in sent_set:
                        sent_set[tree.val].append({'head': head,'mod': modifier})
                    else:
                        sent_set[tree.val] = [{'head': head,'mod': modifier}]
            
            self.collect_modifiers(tree.left, sent_set, mod_type)
            self.collect_modifiers(tree.right, sent_set, mod_type)       

    def run_binarization(self, parsed, sentence):
        self.binarizer.parse_table = parsed[0]
        self.binarizer.postag = parsed[1]
        self.binarizer.words = parsed[2]

        if self.verbose == 2:
            print()
            print(parsed[0])
            print()
            print(parsed[1])
            print()
            print(replaced)

        self.binarizer.replaced = []
        binary_dep, relation = self.binarizer.binarization()
        if self.verbose == 2:
            self.postprocess(binary_dep)
        return binary_dep, relation

    def single_polarization(self, sentence):
        parsed = dependency_parse(sentence, self.parser)
        print(parsed)
        concepts,relations = self.extract_concepts_relation(parsed[1])
        binary_dep, _ = self.run_binarization(parsed, sentence)

        modifiers = {}
        self.collect_modifiers(binary_dep, modifiers)

        return {
            'sentence': sentence,
            'concepts': concepts,
            'relations': relations,
            'modifiers': modifiers
        }

        def postprocess(self, tree, svg=False):
            sexpression = btree2list(tree, 0)
            if not svg:
                sexpression = '[%s]' % ', '.join(
                    map(str, sexpression)).replace(",", " ").replace("'", "")
            # print(sexpression)
            # jupyter_draw_rsyntax_tree(polarized)
            # btreeViz = Tree.fromstring(polarized.replace('[', '(').replace(']', ')'))
            # jupyter_draw_nltk_tree(btreeViz)
            return sexpression  


In [7]:
graph_factory = GraphFactoryPipeline()

In [8]:
import pprint
pp = pprint.PrettyPrinter(indent=2)

graph = graph_factory.single_polarization("That store sales some beautiful flowers to attract customers.")
print(graph['concepts'])
print(graph['relations'])
pp.pprint(graph['modifiers'])

([['det', 1, 2], ['nsubj', 2, 3], ['root', 3, 'root'], ['det', 4, 6], ['amod', 5, 6], ['obj', 6, 3], ['mark', 7, 8], ['advcl', 8, 3], ['obj', 9, 8]], {'That': (1, 'DT'), 'store': (2, 'NN'), 'sales': (3, 'VBZ'), 'some': (4, 'DT'), 'beautiful': (5, 'JJ'), 'flowers': (6, 'NNS'), 'to': (7, 'TO'), 'attract': (8, 'VB'), 'customers': (9, 'NNS'), '.': (10, '.')}, {1: ('That', 'DT'), 2: ('store', 'NN'), 3: ('sales', 'VBZ'), 4: ('some', 'DT'), 5: ('beautiful', 'JJ'), 6: ('flowers', 'NNS'), 7: ('to', 'TO'), 8: ('attract', 'VB'), 9: ('customers', 'NNS'), 10: ('.', '.')})
['store', 'flowers', 'customers']
['sales', 'to', 'attract']
{ 'amod': [{'head': 'flowers', 'mod': 'beautiful'}],
  'det': [{'head': 'store', 'mod': 'That'}, {'head': 'flowers', 'mod': 'some'}],
  'mark': [{'head': 'to', 'mod': 'attract customers'}]}
