# 8.3 Context-Free Grammar

In [1]:
import nltk
#S can be broken down to NP, followed by VP
#VP can be broken down to V followed by NP, or V followed by NP followed by PP
#PP can be broken down to P followed by NP
grammar1 = nltk.CFG.fromstring("""
  S -> NP VP              
  VP -> V NP | V NP PP    
  PP -> P NP              
  V -> "saw" | "ate" | "walked"
  NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
  Det -> "a" | "an" | "the" | "my"
  N -> "man" | "dog" | "cat" | "telescope" | "park"
  P -> "in" | "on" | "by" | "with"
  """)
sent = "Mary saw Bob".split()
rd_parser = nltk.RecursiveDescentParser(grammar1)
for tree in rd_parser.parse(sent):
    print(tree)

(S (NP Mary) (VP (V saw) (NP Bob)))


### List of Syntactic Categories:

- S - Sentence
- NP - Noun Phrase
- VP - Verb Phrase
- PP - Prepositional Phrase
- Det - Determiner
- N - Noun
- V - Verb
- P - Preposition

TODO
If a grammar licenses 2 different trees for a sentence, we say the sentence is **structurally ambiguous**. The ambiguity of the sentence *The dog saw a man in the park* is called **prepositional phrase attachment ambiguity**. $PP$ can be a child of $VP$ or $NP$.

In [2]:
sent_2 = "The dog saw a man in the park".lower().split()
for tree in rd_parser.parse(sent_2):
    print(tree)

(S
  (NP (Det the) (N dog))
  (VP
    (V saw)
    (NP (Det a) (N man) (PP (P in) (NP (Det the) (N park))))))
(S
  (NP (Det the) (N dog))
  (VP
    (V saw)
    (NP (Det a) (N man))
    (PP (P in) (NP (Det the) (N park)))))


The sentence is said to be **structurally ambiguous**. The ambiguity here is **prepositional phrase attachment ambiguity**. The $PP$ `in the park` can either be a child of the $VP$ or the $NP$.

- When the $PP$ is a child of the $VP$, the event of seeing happened in the park. Both the dog and the man were in the park when it happened.
- When the $PP$ is a child of the $NP$, the position of the man was in the park. The dog could have seen the man inside or outside of the park.


In [3]:
#A recursive context free grammar
grammar2 = nltk.CFG.fromstring("""
  S  -> NP VP
  NP -> Det Nom | PropN
  Nom -> Adj Nom | N
  VP -> V Adj | V NP | V S | V NP PP
  PP -> P NP
  PropN -> 'Buster' | 'Chatterer' | 'Joe'
  Det -> 'the' | 'a'
  N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log'
  Adj  -> 'angry' | 'frightened' |  'little' | 'tall'
  V ->  'chased'  | 'saw' | 'said' | 'thought' | 'was' | 'put'
  P -> 'on'
  """)

In [4]:
sent_3 = "the angry bear chased the frightened little squirrel".lower().split()
rd_parser = nltk.RecursiveDescentParser(grammar2)
for t in rd_parser.parse(sent_3):
    print(t)

(S
  (NP (Det the) (Nom (Adj angry) (Nom (N bear))))
  (VP
    (V chased)
    (NP
      (Det the)
      (Nom (Adj frightened) (Nom (Adj little) (Nom (N squirrel)))))))


In [5]:
sent_4 = "Chatterer said Buster thought the tree was tall".split()
rd_parser = nltk.RecursiveDescentParser(grammar2)
for t in rd_parser.parse(sent_4):
    print(t)

(S
  (NP (PropN Chatterer))
  (VP
    (V said)
    (S
      (NP (PropN Buster))
      (VP
        (V thought)
        (S (NP (Det the) (Nom (N tree))) (VP (V was) (Adj tall)))))))
