# Natural Language Processing

## Exercise Sheet 9

In [1]:
#imports for all exercises
import nltk
from nltk import grammar, parse

### Exercise 1

Take the following grammar:

In [106]:
g = """
%start S
S                    -> NP[AGR=?n] VP[AGR=?n]
NP[AGR=?n]           -> PropN[AGR=?n]
VP[TENSE=?t, AGR=?n] -> Cop[TENSE=?t, AGR=?n] Adj


Cop[TENSE=pres,  AGR=[NUM=sg, PER=1]] -> 'am'
Cop[TENSE=pres,  AGR=[NUM=sg, PER=2]] -> 'are'
Cop[TENSE=pres,  AGR=[NUM=sg, PER=3]] -> 'is'
PropN[AGR=[NUM=sg, PER=1]]            -> 'I'
PropN[AGR=[NUM=sg, PER=2]]            -> 'you'
PropN[AGR=[NUM=sg, PER=3]]            -> 'she'
PropN[AGR=[NUM=pl, PER=3]]            -> 'they'
Adj                                   -> 'happy'
"""
gr = grammar.FeatureGrammar.fromstring(g)

def parse_sent(sent, gr):
    tokens = sent.split()
    parser = parse.FeatureEarleyChartParser(gr)
    trees = parser.parse(tokens)
    for tree in trees: print(tree)

as starting point to correctly parse word sequences like "I am happy" and "she is happy" but not " * you is happy" or " * they am happy". 

In [107]:
sent1 = "I am happy"
sent2 = "she is happy"
sent3 = "you is happy"
sent4 = "they am happy"

In [108]:
parse_sent(sent1,gr)

(S[]
  (NP[AGR=[NUM='sg', PER=1]] (PropN[AGR=[NUM='sg', PER=1]] I))
  (VP[AGR=[NUM='sg', PER=1], TENSE='pres']
    (Cop[AGR=[NUM='sg', PER=1], TENSE='pres'] am)
    (Adj[] happy)))


In [109]:
parse_sent(sent2,gr)

(S[]
  (NP[AGR=[NUM='sg', PER=3]] (PropN[AGR=[NUM='sg', PER=3]] she))
  (VP[AGR=[NUM='sg', PER=3], TENSE='pres']
    (Cop[AGR=[NUM='sg', PER=3], TENSE='pres'] is)
    (Adj[] happy)))


In [110]:
parse_sent(sent3,gr)

In [111]:
parse_sent(sent4,gr)

### Exercise 2

Develop a variant of the following grammar

In [90]:
g = """
% start S

# ###################
# Grammar Productions
# ###################
# S expansion productions
S -> NP[NUM=?n] VP[NUM=?n]
# NP expansion productions
NP[NUM=?n] -> N[NUM=?n, -count]
NP[NUM=?n] -> PropN[NUM=?n]
NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n]
NP[NUM=pl] -> N[NUM=pl]
# VP expansion productions
VP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n]
VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP
VP[TENSE=?t, AGR=?n] -> Cop[TENSE=?t, AGR=?n] Adj
# ###################
# Lexical Productions
# ###################
Det[NUM=sg] -> 'this' | 'every'
Det[NUM=pl] -> 'these' | 'all'
Det -> 'the' | 'some' | 'several'
PropN[NUM=sg]-> 'Kim' | 'Jody'
N[NUM=sg, +count] -> 'dog' | 'girl' | 'car' | 'child' | 'boy' | 'water'
N[NUM=pl, +count] -> 'dogs' | 'girls' | 'cars' | 'children' |'boys'
N[NUM=sg, -count] -> 'water'
IV[TENSE=pres,  NUM=sg] -> 'disappears' | 'walks' | 'sings'
TV[TENSE=pres, NUM=sg] -> 'sees' | 'likes' 
IV[TENSE=pres,  NUM=pl] -> 'disappear' | 'walk' | 'sing'
TV[TENSE=pres, NUM=pl] -> 'see' | 'like'
IV[TENSE=past] -> 'disappeared' | 'walked'
TV[TENSE=past] -> 'saw' | 'liked'
Cop[TENSE=pres,  AGR=[NUM=sg, PER=3]] -> 'is'
Adj                                   -> 'precious'
"""

gr = grammar.FeatureGrammar.fromstring(g)

def parse_sent(sent, gr):
    tokens = sent.split()
    parser = parse.FeatureEarleyChartParser(gr)
    trees = parser.parse(tokens)
    for tree in trees: print(tree)

that uses a feature `COUNT` to make the distinctions shown below:

(1a) the boy sings

(1b) * boy sings

(2a) the boys sing

(2b) boys sing

(3a) the water is precious

(3b) water is precious

In [83]:
sent1 = "the boy sings"
sent2 = "boy sings"
sent3 = "the boys sing"
sent4 = "boys sing"
sent5 = "the water is precious"
sent6 = "water is precious"

In [91]:
parse_sent(sent6,gr)

(S[]
  (NP[NUM='sg'] (N[NUM='sg', -count] water))
  (VP[AGR=[NUM='sg', PER=3], TENSE='pres']
    (Cop[AGR=[NUM='sg', PER=3], TENSE='pres'] is)
    (Adj[] precious)))


### Exercise 3

Extend the German grammar

In [112]:
g = """
% start S
# Grammar Productions
 S -> NP[CASE=nom, AGR=?a] VP[AGR=?a]
 S/TV -> NP 
 NP[CASE=?c, AGR=?a] -> PRO[CASE=?c, AGR=?a]
 NP[CASE=?c, AGR=?a] -> Det[CASE=?c, AGR=?a] N[CASE=?c, AGR=?a]
 VP[AGR=?a] -> IV[AGR=?a]
 VP[AGR=?a] -> TV[OBJCASE=?c, AGR=?a] NP[CASE=?c]
 # Lexical Productions
 # Singular determiners
 # masc
 Det[CASE=nom, AGR=[GND=masc,PER=3,NUM=sg]] -> 'der'
 Det[CASE=dat, AGR=[GND=masc,PER=3,NUM=sg]] -> 'dem'
 Det[CASE=acc, AGR=[GND=masc,PER=3,NUM=sg]] -> 'den'
 # fem
 Det[CASE=nom, AGR=[GND=fem,PER=3,NUM=sg]] -> 'die'
 Det[CASE=dat, AGR=[GND=fem,PER=3,NUM=sg]] -> 'der'
 Det[CASE=acc, AGR=[GND=fem,PER=3,NUM=sg]] -> 'die'
 # Plural determiners
 Det[CASE=nom, AGR=[PER=3,NUM=pl]] -> 'die'
 Det[CASE=dat, AGR=[PER=3,NUM=pl]] -> 'den'
 Det[CASE=acc, AGR=[PER=3,NUM=pl]] -> 'die'
 # Nouns
 N[AGR=[GND=masc,PER=3,NUM=sg]] -> 'Hund'
 N[CASE=nom, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunde'
 N[CASE=dat, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunden'
 N[CASE=acc, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunde'
 N[AGR=[GND=fem,PER=3,NUM=sg]] -> 'Katze'
 N[AGR=[GND=fem,PER=3,NUM=pl]] -> 'Katzen'
 # Pronouns
 PRO[CASE=nom, AGR=[PER=1,NUM=sg]] -> 'ich'
 PRO[CASE=acc, AGR=[PER=1,NUM=sg]] -> 'mich'
 PRO[CASE=dat, AGR=[PER=1,NUM=sg]] -> 'mir'
 PRO[CASE=nom, AGR=[PER=2,NUM=sg]] -> 'du'
 PRO[CASE=nom, AGR=[PER=3,NUM=sg]] -> 'er' | 'sie' | 'es'
 PRO[CASE=nom, AGR=[PER=1,NUM=pl]] -> 'wir'
 PRO[CASE=acc, AGR=[PER=1,NUM=pl]] -> 'uns'
 PRO[CASE=dat, AGR=[PER=1,NUM=pl]] -> 'uns'
 PRO[CASE=nom, AGR=[PER=2,NUM=pl]] -> 'ihr'
 PRO[CASE=nom, AGR=[PER=3,NUM=pl]] -> 'sie'
 # Verbs
 IV[AGR=[NUM=sg,PER=1]] -> 'komme'
 IV[AGR=[NUM=sg,PER=2]] -> 'kommst'
 IV[AGR=[NUM=sg,PER=3]] -> 'kommt'
 IV[AGR=[NUM=pl, PER=1]] -> 'kommen'
 IV[AGR=[NUM=pl, PER=2]] -> 'kommt'
 IV[AGR=[NUM=pl, PER=3]] -> 'kommen'
 TV[OBJCASE=acc, AGR=[NUM=sg,PER=1]] -> 'sehe' | 'mag'
 TV[OBJCASE=acc, AGR=[NUM=sg,PER=2]] -> 'siehst' | 'magst'
 TV[OBJCASE=acc, AGR=[NUM=sg,PER=3]] -> 'sieht' | 'mag'
 TV[OBJCASE=dat, AGR=[NUM=sg,PER=1]] -> 'folge' | 'helfe'
 TV[OBJCASE=dat, AGR=[NUM=sg,PER=2]] -> 'folgst' | 'hilfst'
 TV[OBJCASE=dat, AGR=[NUM=sg,PER=3]] -> 'folgt' | 'hilft'
 TV[OBJCASE=acc, AGR=[NUM=pl,PER=1]] -> 'sehen' | 'moegen'
 TV[OBJCASE=acc, AGR=[NUM=pl,PER=2]] -> 'sieht' | 'moegt'
 TV[OBJCASE=acc, AGR=[NUM=pl,PER=3]] -> 'sehen' | 'moegen'
 TV[OBJCASE=dat, AGR=[NUM=pl,PER=1]] -> 'folgen' | 'helfen'
 TV[OBJCASE=dat, AGR=[NUM=pl,PER=2]] -> 'folgt' | 'helft'
 TV[OBJCASE=dat, AGR=[NUM=pl,PER=3]] -> 'folgen' | 'helfen'
"""
gr = grammar.FeatureGrammar.fromstring(g)

def parse_sent(sent, gr):
    tokens = sent.split()
    parser = parse.FeatureEarleyChartParser(gr)
    trees = parser.parse(tokens)
    for tree in trees: print(tree)

so that it can handle so-called verb-second structures like "heute sieht der Hund die Katze" by using a slash category `S/TV` for the missing transitive verb in "der Hund die Katze". Use the following test sentences:

In [115]:
sent1 = "heute sieht der Hund die Katze"
sent2 = "heute sehe der Hund die Katze"
sent3 = "heute sieht der Hund die Katzen"

In [118]:
parse_sent(sent1,gr)

ValueError: Grammar does not cover some of the input words: "'heute'".

### Exercise 4

Consider the patterns of grammaticality for the verbs "loaded", "filled", and "dumped" below. Write grammar productions to handle such data:

(1a) the farmer loaded the cart with sand

(1b) the farmer loaded sand into the cart

(2a) the farmer filled the cart with sand

(2b) * the farmer filled sand into the cart

(3a) * the farmer dumped the cart with sand

(3b) the farmer dumped sand into the cart

In [119]:

g = """
% start S
# ###################
# Grammar Productions
# ###################
# S expansion productions
S -> NP[NUM=?n] VP[NUM=?n]
# NP expansion productions
NP[NUM=?n] -> N[NUM=?n]
NP[NUM=?n] -> PropN[NUM=?n]
NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n]
NP[NUM=pl] -> N[NUM=pl]
# VP expansion productions
VP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n]
VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP
VP[TENSE=?t, AGR=?n] -> Cop[TENSE=?t, AGR=?n] Adj
# ###################
# Lexical Productions
# ###################
Det[NUM=sg] -> 'this' | 'every'
Det[NUM=pl] -> 'these' | 'all'
Det -> 'the' | 'some' | 'several'
PropN[NUM=sg]-> 'cart' | 'sand'
N[NUM=sg] -> 'farmer'
IV[TENSE=past] -> 'loaded' | 'filled' | 'dumped'
"""
gr = grammar.FeatureGrammar.fromstring(g)

def parse_sent(sent, gr):
    tokens = sent.split()
    parser = parse.FeatureEarleyChartParser(gr)
    trees = parser.parse(tokens)
    for tree in trees: print(tree)
sent1 = "the farmer loaded the cart with sand"
sent2 = "the farmer loaded sand into the cart"
sent3 = "the farmer filled the cart with sand"
sent4 = "the farmer filled sand into the cart"
sent5 = "the farmer dumped the cart with sand"
sent6 = "the farmer dumped sand into the cart"

In [120]:
parse_sent(sent1,gr)

ValueError: Grammar does not cover some of the input words: "'with'".

### Exercise 5

Consider the following feature structures:

In [122]:
fs1 = nltk.FeatStruct("[A = ?x, B= [C = ?x]]")
fs2 = nltk.FeatStruct("[B = [D = d]]")
fs3 = nltk.FeatStruct("[B = [C = d]]")
fs4 = nltk.FeatStruct("[A = (1)[B = b], C->(1)]")
fs5 = nltk.FeatStruct("[A = (1)[D = ?x], C = [E -> (1), F = ?x] ]")
fs6 = nltk.FeatStruct("[A = [D = d]]")
fs7 = nltk.FeatStruct("[A = [D = d], C = [F = [D = d]]]")
fs8 = nltk.FeatStruct("[A = (1)[D = ?x, G = ?x], C = [B = ?x, E -> (1)] ]")
fs9 = nltk.FeatStruct("[A = [B = b], C = [E = [G = e]]]")
fs10 = nltk.FeatStruct("[A = (1)[B = b], C -> (1)]")

What is the result of the following unifications? 

1. `fs1` and `fs2`,  
2. `fs1` and `fs3`,  
3. `fs4` and `fs5`,  
4. `fs5` and `fs6`,  
5. `fs5` and `fs7`,  
6. `fs8` and `fs9`,  
7. `fs8` and `fs10`.  

In [123]:
print(fs1.unify(fs2))

[ A = ?x          ]
[                 ]
[ B = [ C = ?x  ] ]
[     [ D = 'd' ] ]


In [124]:
print(fs1.unify(fs3))

[ A = 'd'         ]
[                 ]
[ B = [ C = 'd' ] ]


In [125]:
print(fs4.unify(fs5))

[         [ B = 'b'  ] ]
[ A = (1) [ D = ?x   ] ]
[         [ E -> (1) ] ]
[         [ F = ?x   ] ]
[                      ]
[ C -> (1)             ]


In [126]:
print(fs5.unify(fs6))

[ A = (1) [ D = 'd' ] ]
[                     ]
[ C = [ E -> (1) ]    ]
[     [ F = 'd'  ]    ]


In [127]:
print(fs5.unify(fs7))

None


In [128]:
print(fs8.unify(fs9))

[         [ B = 'b' ] ]
[ A = (1) [ D = 'e' ] ]
[         [ G = 'e' ] ]
[                     ]
[ C = [ B = 'e'  ]    ]
[     [ E -> (1) ]    ]


In [129]:
print(fs8.unify(fs10))

[         [ B = 'b'  ] ]
[ A = (1) [ D = 'b'  ] ]
[         [ E -> (1) ] ]
[         [ G = 'b'  ] ]
[                      ]
[ C -> (1)             ]
