In [1]:
import sys
sys.path.append('..')
import doctable as dt
from spacy import displacy
import spacy
from spacy.matcher import Matcher
from pprint import pprint
nlp = spacy.load('en')

In [2]:
exstr = 'Hat is red. Barak Obama is tall.'
doc = nlp(exstr)
doc

Hat is red. Barak Obama is tall.

In [3]:
parsetrees = dt.DocParser.get_parsetree_obj(doc, merge_ents=True)
for pt in parsetrees:
    print(pt.get_ents())

[]
[ParseNode(Barak), ParseNode(Obama)]


## Making Parsetrees

By default, `parse_tok_func=None` means it will use the internal `.parse_tok()` method passing additional arguments through the `parse_tok_args` parameter. You may add additional token info by setting dictionary values attrname->func in the `parsetree_tok_info` parameter.

`def get_parsetree(cls, doc, tok_info_map=None, children_attrname='childs', merge_ents=False, spacy_ngram_matcher=None, merge_noun_chunks=False)`

In [None]:
# can modify the attribute of the children
dt.DocParser.get_parsetree(doc, children_attrname='children')

In [None]:
# can modify any applied attributes
tok_info_map = {
    'tok': lambda tok: dt.DocParser.parse_tok(tok, lemmatize=True),
    'pos': lambda tok: tok.pos_,
}
dt.DocParser.get_parsetree(doc, tok_info_map=tok_info_map)

### Genere Parsetrees While Applying Token Merges

In [None]:
# full version includes tag, pos, dep, and ent_type in addition to 'tok' and 'children'
dt.DocParser.get_parsetree(doc, merge_ents=True)

In [None]:
# create spacy matcher object to pass to .get_parsetree()
matcher = Matcher(nlp.vocab)
matcher.add('currency', None, [{'LOWER':'tall'}, {'IS_PUNCT':True}])

# you can see that "tall." at the end of the sentence has been merged into a single token, 
# but still works with the parsetree
pprint(dt.DocParser.get_parsetree(doc, spacy_ngram_matcher=matcher))
doc = nlp(exstr) # reverts doc back to original because adding the match (called in .tokenize_doc()) modified it

## GrammarTree Objects
These objects allow you to work with ParseTree objects. They are produced by DocParser objects to extract aspects of parsetrees.