# 7.4 Recursion in Linguistic Structure

## Building Nested Structure with Cascaded Chunkers

In [25]:
import nltk
import os
from nltk.corpus import conll2000
nltk.config_megam('../megam')

In [26]:
grammar = r"""
NP: {<DT|JJ|NN.*>+}          # Chunk sequences of DT, JJ, NN
PP: {<IN><NP>}               # Chunk prepositions followed by NP
VP: {<VB.*><NP|PP|CLAUSE>+$} # Chunk verbs and their arguments
CLAUSE: {<NP><VP>}           # Chunk NP, VP
"""

In [27]:
cp = nltk.RegexpParser(grammar)
sentence = [("Mary", "NN"), ("saw", "VBD"), ("the", "DT"), ("cat", "NN"),
("sit", "VB"), ("on", "IN"), ("the", "DT"), ("mat", "NN")]
print cp.parse(sentence) # VP is missed while VP : VB.* CLASE

(S
  (NP Mary/NN)
  saw/VBD
  (CLAUSE
    (NP the/DT cat/NN)
    (VP sit/VB (PP on/IN (NP the/DT mat/NN)))))


In [28]:
sentence = [("John", "NNP"), ("thinks", "VBZ"), ("Mary", "NN"),
            ("saw", "VBD"), ("the", "DT"), ("cat", "NN"), ("sit", "VB"),
            ("on", "IN"), ("the", "DT"), ("mat", "NN")]
print cp.parse(sentence)

(S
  (NP John/NNP)
  thinks/VBZ
  (NP Mary/NN)
  saw/VBD
  (CLAUSE
    (NP the/DT cat/NN)
    (VP sit/VB (PP on/IN (NP the/DT mat/NN)))))


In [42]:
cp = nltk.RegexpParser(grammar, loop = 2) # added loop
sentence = [("Mary", "NN"), ("saw", "VBD"), ("the", "DT"), ("cat", "NN"),
("sit", "VB"), ("on", "IN"), ("the", "DT"), ("mat", "NN")]
result = cp.parse(sentence) # VP is NOT missed while VP : VB.* CLASE

In [44]:
result.draw()

## Trees

In [45]:
tree1 = nltk.Tree('NP', ['Alice'])
print tree1

(NP Alice)


In [46]:
tree2 = nltk.Tree('NP', ['the', 'rabbit'])
print tree2

(NP the rabbit)


In [47]:
tree3 = nltk.Tree('VP', ['chased', tree2])
tree4 = nltk.Tree('S', [tree1, tree3])
print tree4

(S (NP Alice) (VP chased (NP the rabbit)))


In [48]:
print tree4[1]

(VP chased (NP the rabbit))


In [49]:
tree4[1].label()

'VP'

In [50]:
tree4.leaves()

['Alice', 'chased', 'the', 'rabbit']

In [51]:
tree4[1][1][1]

'rabbit'

In [52]:
tree3.draw()

## Tree Traversal

In [53]:
def traverse(t):
    try:
        t.label()
    except AttributeError:
        print t,
    else:
        # Now we know that t.node is defined
        print '(', t.label(),
        for child in t:
            traverse(child)
        print ')',

In [54]:
traverse(tree4)

( S ( NP Alice ) ( VP chased ( NP the rabbit ) ) )
