In [1]:
import nltk 

#### Top - Down Parsing

In [10]:
grammar = nltk.CFG.fromstring("""
S -> NP VP
NP -> Det N | Det N PP
VP -> V | V NP | V NP PP
PP -> P NP

Det -> 'a' | 'an' | 'the'
N -> 'man' | 'park' | 'dog' | 'telescope'
V -> 'saw' | 'walked'
P -> 'in' | 'with'
""")

test_str = "the man saw a dog in the park with a telescope"

In [15]:
from nltk.parse import RecursiveDescentParser

#Using a top-down parser
rdstr = RecursiveDescentParser(grammar)

#Print each of the trees 
for tree in rdstr.parse(test_str.split()):
    print(tree)

(S
  (NP (Det the) (N man))
  (VP
    (V saw)
    (NP
      (Det a)
      (N dog)
      (PP
        (P in)
        (NP
          (Det the)
          (N park)
          (PP (P with) (NP (Det a) (N telescope))))))))
(S
  (NP (Det the) (N man))
  (VP
    (V saw)
    (NP (Det a) (N dog))
    (PP
      (P in)
      (NP
        (Det the)
        (N park)
        (PP (P with) (NP (Det a) (N telescope)))))))
(S
  (NP (Det the) (N man))
  (VP
    (V saw)
    (NP (Det a) (N dog) (PP (P in) (NP (Det the) (N park))))
    (PP (P with) (NP (Det a) (N telescope)))))


#### Bottom - Up Parsing

In [18]:
grammar=nltk.CFG.fromstring("""
S -> NP VP
VP -> V NP
V -> "saw" | "ate"
NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
Det -> "a" | "an" | " the" | "my"
N -> "dog"| "cat" | "cookie" | "park" 
PP -> P NP 
P -> "in" | "on" 
""")

test_str = "Mary saw a cat"

In [20]:
# ShiftReduce Parser is a bottom up parser
from nltk.parse import ShiftReduceParser
srp = ShiftReduceParser(grammar)

for t in srp.parse(test_str.split()):
    print(t)

# Shift Reduce parser tries to aggregate the string to the start symbol, since it's a bottom-up parser
# It is not able to find the parse tree even if it exists 

(S (NP Mary) (VP (V saw) (NP (Det a) (N cat))))


##### With both parsing techniques you can open application and edit the 'Text' and 'Grammar' according to your requirements

In [None]:
nltk.app.rdparser()

In [None]:
nltk.app.srparser()

#### Parsing using PCFG (probablistic)

In [21]:
pcfg_grammar = nltk.PCFG.fromstring("""
    S -> NP VP [1.0] 
    PP -> P NP [1.0]
    VP -> V NP [0.7] | VP PP [0.3] 
    NP -> NP PP [0.4] 
    P -> 'with' [1.0]
    V -> 'saw' [1.0]
    NP -> 'astronomers' [0.1] | 'ears' [0.18] | 'saw' [0.04] | 'stars' [0.18] | 'telescopes' [0.1]
    """)

test_str = "astronomers saw stars with ears"

In [23]:
from nltk.parse import pchart

parser = pchart.InsideChartParser(pcfg_grammar)

#print all possible trees, showing probability of each parse
for t in parser.parse(test_str.split()):
     print(t)

(S
  (NP astronomers)
  (VP (V saw) (NP (NP stars) (PP (P with) (NP ears))))) (p=0.0009072)
(S
  (NP astronomers)
  (VP (VP (V saw) (NP stars)) (PP (P with) (NP ears)))) (p=0.0006804)
