## 1 Grammatical dilemmas:

#### Ambiguity:

In [2]:
import nltk, re, pprint
from nltk.tokenize import word_tokenize, sent_tokenize

In [3]:
# CFG - context free grammar

groucho_grammar = nltk.CFG.fromstring("""
S -> NP VP
PP -> P NP
NP -> Det N | Det N PP | 'I'
VP -> V NP | VP PP
Det -> 'an' | 'my'
N -> 'elephant' | 'pajamas'
V -> 'shot'
P -> 'in'
""") 

In [25]:
text = "I shot an elephant in my pajamas"
sent = word_tokenize(text)
sent

['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']

In [5]:
parser = nltk.ChartParser(groucho_grammar)
for x in parser.parse(sent):
    print(x)
    #print(x.draw())

(S
  (NP I)
  (VP
    (VP (V shot) (NP (Det an) (N elephant)))
    (PP (P in) (NP (Det my) (N pajamas)))))
(S
  (NP I)
  (VP
    (V shot)
    (NP (Det an) (N elephant) (PP (P in) (NP (Det my) (N pajamas))))))


In [6]:
from pickle import load
input = open('t2.pkl', 'rb') # binary format for reading
tagger = load(input)
input.close()

text2 = 'Fighting animals could be dangerous. Visiting relatives can be tiresome.' # ambiguous pos - JJ vs. NN 

for x in sent_tokenize(text2):
    print(tagger.tag(word_tokenize(x)))

[('Fighting', 'NN'), ('animals', 'NNS'), ('could', 'MD'), ('be', 'BE'), ('dangerous', 'JJ'), ('.', '.')]
[('Visiting', 'NN'), ('relatives', 'NNS'), ('can', 'MD'), ('be', 'BE'), ('tiresome', 'NN'), ('.', '.')]


## 2 Context free grammar:

In [7]:
grammar1 = nltk.CFG.fromstring("""
  S -> NP VP
  VP -> V NP | V NP PP
  PP -> P NP
  V -> "saw" | "ate" | "walked"
  NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
  Det -> "a" | "an" | "the" | "my"
  N -> "man" | "dog" | "cat" | "telescope" | "park"
  P -> "in" | "on" | "by" | "with"
  """)

In [8]:
sent2 = 'Mary saw Bob'.split() # simplified word tokenisation
rd_parser = nltk.RecursiveDescentParser(grammar1)
for x in rd_parser.parse(sent2):
    #print(x.draw())
    print(x)

(S (NP Mary) (VP (V saw) (NP Bob)))


In [9]:
sent3 = 'The dog saw a man in the park'.split()
sent3

['The', 'dog', 'saw', 'a', 'man', 'in', 'the', 'park']

In [3]:
nltk.app.rdparser()

[('under',)]
[('with',)]
[('in',)]
[('under',), ('with',)]
[('ate',)]
[('saw',)]
[('dog',)]
[('telescope',)]
[('park',)]
[('dog',), ('telescope',)]
[('man',)]
[('park',), ('dog',), ('telescope',)]
[('the',)]
[('a',)]
[(V, NP)]
[(V,)]
[(V, NP, PP)]
[(V, NP), (V,)]
[(Det, N, PP)]
[(Det, N)]
S [(NP, VP)]
NP [(Det, N, PP), (Det, N)]
VP [(V, NP, PP), (V, NP), (V,)]
PP [(P, NP)]
NP [('I',)]
Det [('the',), ('a',)]
N [('man',), ('park',), ('dog',), ('telescope',)]
V [('ate',), ('saw',)]
P [('in',), ('under',), ('with',)]


Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Users\AD\anaconda3\lib\tkinter\__init__.py", line 1883, in __call__
    return self.func(*args)
  File "C:\Users\AD\anaconda3\lib\site-packages\nltk\draw\cfg.py", line 522, in _apply
    productions = self._parse_productions()
  File "C:\Users\AD\anaconda3\lib\site-packages\nltk\draw\cfg.py", line 492, in _parse_productions
    productions += _read_cfg_production(line)
  File "C:\Users\AD\anaconda3\lib\site-packages\nltk\grammar.py", line 1315, in _read_cfg_production
    return _read_production(input, standard_nonterm_parser)
  File "C:\Users\AD\anaconda3\lib\site-packages\nltk\grammar.py", line 1353, in _read_production
    raise ValueError("Expected an arrow")
ValueError: Expected an arrow
Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Users\AD\anaconda3\lib\tkinter\__init__.py", line 1883, in __call__
    return self.func(*args)
  File "C:\Users\AD\anaconda3\lib\site-packages\nl

#### Writing your own grammar:

In [15]:
grammar_user = nltk.data.load('file:mygrammar.cfg')
sent = 'The dog saw a man in the park'.lower().split()
sent2 = 'The dog saw a man'.lower().split()
rd_parser = nltk.RecursiveDescentParser(grammar_user, trace=1) # use trace for more step info
for x in rd_parser.parse(sent):
    print(x)

Parsing 'the dog saw a man in the park'
Found a parse:
(S
  (NP (Det the) (N dog))
  (VP
    (V saw)
    (NP (Det a) (N man) (PP (P in) (NP (Det the) (N park))))))
(S
  (NP (Det the) (N dog))
  (VP
    (V saw)
    (NP (Det a) (N man) (PP (P in) (NP (Det the) (N park))))))
Found a parse:
(S
  (NP (Det the) (N dog))
  (VP
    (V saw)
    (NP (Det a) (N man))
    (PP (P in) (NP (Det the) (N park)))))
(S
  (NP (Det the) (N dog))
  (VP
    (V saw)
    (NP (Det a) (N man))
    (PP (P in) (NP (Det the) (N park)))))


## 3 Parsing with context free grammar:

#### Shift-reduce parsing:

In [12]:
sr_parser = nltk.ShiftReduceParser(grammar_user, trace=2)
for x in sr_parser.parse(sent):
    print(x)

Parsing 'the dog saw a man in the park'
    [ * the dog saw a man in the park]
  S [ 'the' * dog saw a man in the park]
  R [ Det * dog saw a man in the park]
  S [ Det 'dog' * saw a man in the park]
  R [ Det N * saw a man in the park]
  R [ NP * saw a man in the park]
  S [ NP 'saw' * a man in the park]
  R [ NP V * a man in the park]
  S [ NP V 'a' * man in the park]
  R [ NP V Det * man in the park]
  S [ NP V Det 'man' * in the park]
  R [ NP V Det N * in the park]
  R [ NP V NP * in the park]
  R [ NP VP * in the park]
  R [ S * in the park]
  S [ S 'in' * the park]
  R [ S P * the park]
  S [ S P 'the' * park]
  R [ S P Det * park]
  S [ S P Det 'park' * ]
  R [ S P Det N * ]
  R [ S P NP * ]
  R [ S PP * ]


#### The left-corner parser:

In [22]:
lc_parser = nltk.LeftCornerChartParser(grammar_user, trace=2)
for x in lc_parser.parse(sent2):
    print(x)

|.  the  .  dog  .  saw  .   a   .  man  .|
Leaf Init Rule:
|[-------]       .       .       .       .| [0:1] 'the'
|.       [-------]       .       .       .| [1:2] 'dog'
|.       .       [-------]       .       .| [2:3] 'saw'
|.       .       .       [-------]       .| [3:4] 'a'
|.       .       .       .       [-------]| [4:5] 'man'
Filtered Bottom Up Predict Combine Rule:
|[-------]       .       .       .       .| [0:1] Det -> 'the' *
Filtered Bottom Up Predict Combine Rule:
|[------->       .       .       .       .| [0:1] NP -> Det * N
|[------->       .       .       .       .| [0:1] NP -> Det * N PP
Filtered Bottom Up Predict Combine Rule:
|.       [-------]       .       .       .| [1:2] N  -> 'dog' *
Filtered Single Edge Fundamental Rule:
|[---------------]       .       .       .| [0:2] NP -> Det N *
Filtered Bottom Up Predict Combine Rule:
|[--------------->       .       .       .| [0:2] S  -> NP * VP
Filtered Bottom Up Predict Combine Rule:
|.       .       [-------]    

## 4 Dependencies and dependency grammar:

In [23]:
groucho_dep_grammar = nltk.DependencyGrammar.fromstring("""
'shot' -> 'I' | 'elephant' | 'in'
'elephant' -> 'an' | 'in'
'in' -> 'pajamas'
'pajamas' -> 'my'
""")
print(groucho_dep_grammar)

Dependency grammar with 7 productions
  'shot' -> 'I'
  'shot' -> 'elephant'
  'shot' -> 'in'
  'elephant' -> 'an'
  'elephant' -> 'in'
  'in' -> 'pajamas'
  'pajamas' -> 'my'


##### Projective dependency parser:

In [33]:
pdp = nltk.ProjectiveDependencyParser(groucho_dep_grammar)
trees = pdp.parse(sent)
for x in trees:
    #print(x.draw())
    print(x)

(shot I (elephant an (in (pajamas my))))
(shot I (elephant an) (in (pajamas my)))
