In [1]:
import nltk

In [2]:
## write your own grammars
grammar = nltk.CFG.fromstring("""
  S -> NP VP
  VP -> V NP | V NP PP
  PP -> P NP
  V -> "saw" | "ate" | "walked"
  NP -> Prop | Det N | Det N PP
  Prop -> "John" | "Mary" | "Bob" 
  Det -> "a" | "an" | "the" | "my"
  N -> "man" | "dog" | "cat" | "telescope" | "park"
  P -> "in" | "on" | "by" | "with"
  """)

In [3]:
rd_parser = nltk.RecursiveDescentParser(grammar)

In [4]:
sentText = "Mary saw Bob"
sentTokens = nltk.word_tokenize(sentText)
print(sentTokens)

['Mary', 'saw', 'Bob']


In [7]:
trees = rd_parser.parse(sentTokens)
treelist = list(trees)

In [9]:
for tree in treelist:
    print(tree)

(S (NP (Prop Mary)) (VP (V saw) (NP (Prop Bob))))


In [10]:
sent = "John saw a man in the park with a telescope"
sentTokens = nltk.word_tokenize(sent)

trees = rd_parser.parse(sentTokens)
for tree in trees:
    print(tree)

(S
  (NP (Prop John))
  (VP
    (V saw)
    (NP
      (Det a)
      (N man)
      (PP
        (P in)
        (NP
          (Det the)
          (N park)
          (PP (P with) (NP (Det a) (N telescope))))))))
(S
  (NP (Prop John))
  (VP
    (V saw)
    (NP (Det a) (N man))
    (PP
      (P in)
      (NP
        (Det the)
        (N park)
        (PP (P with) (NP (Det a) (N telescope)))))))
(S
  (NP (Prop John))
  (VP
    (V saw)
    (NP (Det a) (N man) (PP (P in) (NP (Det the) (N park))))
    (PP (P with) (NP (Det a) (N telescope)))))


In [11]:
# extend the grammar with more words (I, elephant, pajamas)
groucho_grammar = nltk.CFG.fromstring("""
  S -> NP VP
  VP -> V NP | V NP PP
  PP -> P NP
  V -> "saw" | "ate" | "walked" | "shot"
  NP -> Prop | Det N | Det N PP
  Prop -> "John" | "Mary" | "Bob" | "I"
  Det -> "a" | "an" | "the" | "my"
  N -> "man" | "dog" | "cat" | "telescope" | "park" | "elephant" | "pajamas"
  P -> "in" | "on" | "by" | "with"
  """)

In [12]:
rd_parser = nltk.RecursiveDescentParser(groucho_grammar)

In [13]:
# try sent4 with the recursive descent parser on groucho grammar
sent4list = "I shot an elephant in my pajamas".split()
for tree in rd_parser.parse(sent4list):
	print (tree)

(S
  (NP (Prop I))
  (VP
    (V shot)
    (NP (Det an) (N elephant) (PP (P in) (NP (Det my) (N pajamas))))))
(S
  (NP (Prop I))
  (VP
    (V shot)
    (NP (Det an) (N elephant))
    (PP (P in) (NP (Det my) (N pajamas)))))


In [49]:
flight_grammar = nltk.CFG.fromstring("""
  S -> NP VP | VP 
  VP -> V NP | V NP PP | V PP | V NP NP | V INF NP
  PP -> P NP
  INF -> TO V
  TO -> "to"
  V -> "saw" | "ate" | "walked" | "shot" | "book" | "prefer" | "gave" | "want"
  NP -> Prop | Det N | Det N PP 
  Prop -> "John" | "Mary" | "Bob" | "I" | "Houston" | "Jack"
  Det -> "a" | "an" | "the" | "my" | "that"
  N -> "man" | "dog" | "cat" | "telescope" | "park" | "elephant" | "pajamas" | "flight" | "bone"
  P -> "in" | "on" | "by" | "with" | "through"
  """)
rd_parser = nltk.RecursiveDescentParser(flight_grammar)

In [54]:
sent1 = "I want to book that flight"
sent1Toks = sent1.split()
trees = rd_parser.parse(sent1Toks)

for tree in trees:
    print(tree)

(S
  (NP (Prop I))
  (VP (V want) (INF (TO to) (V book)) (NP (Det that) (N flight))))
