In [1]:
import sys
sys.path.append('..')
import doctable as dt
import spacy
from pprint import pprint
nlp = spacy.load('en')

In [2]:
exstr = 'Hat is red. Barak Obama is tall for a dude.'
doc = nlp(exstr)
doc

Hat is red. Barak Obama is tall for a dude.

## Making Parsetrees

By default, `parse_tok_func=None` means it will use the vanilla `dt.DocParser.parse_tok()` method. Often times you may want to pass a lambda function specifying some of the parameters of that function, since it has a number of useful features.

You can also add additional token properties to the parsetree nodes by setting dictionary values attrname->func in the `info_func_map` parameter.

In [7]:
parsetrees = dt.DocParser.get_parsetrees(doc, merge_ents=True)
parsetrees

[<doctable.parsetree.ParseTree at 0x7f8424523748>,
 <doctable.parsetree.ParseTree at 0x7f83a596f7f0>]

In [8]:
for pt in parsetrees:
    pt.print_ascii_tree()
    print()

├─ (ROOT) is
|  ├─ (nsubj) hat
|  ├─ (acomp) red

├─ (ROOT) is
|  ├─ (nsubj) Barak Obama
|  ├─ (acomp) tall
|  ├─ (prep) for



In [14]:
print([node for node in parsetrees[0]])
print([node.i for node in parsetrees[0]]) # these five properties are inherent
print([node.tok for node in parsetrees[0]])
print([node.dep for node in parsetrees[0]])
print([node.tag for node in parsetrees[0]])
print([node.pos for node in parsetrees[0]])
print([node.info for node in parsetrees[0]]) # info is empty here

[ParseNode(hat), ParseNode(is), ParseNode(red), ParseNode(.)]
[0, 1, 2, 3]
['hat', 'is', 'red', '.']
['nsubj', 'ROOT', 'acomp', 'punct']
['NNP', 'VBZ', 'JJ', '.']
['PROPN', 'AUX', 'ADJ', 'PUNCT']
[{}, {}, {}, {}]


In [22]:
# can also apply information to be added to the .info property of nodes
fm = {'ent': lambda tok: tok.ent_type_}
parsetrees = dt.DocParser.get_parsetrees(doc, merge_ents=True, info_func_map=fm)
print([(node.tok,node.info) for node in parsetrees[1]])

[('Barak Obama', {'ent': 'PERSON'}), ('is', {'ent': ''}), ('tall', {'ent': ''}), ('for', {'ent': ''}), ('a', {'ent': ''}), ('dude', {'ent': ''}), ('.', {'ent': ''})]


## Working With ParseTree Objects
Parsetree objects are useful to manipulate to extract gramattical information.