## 1 Grammatical features:

In [61]:
import nltk, re, pprint
from nltk import load_parser

#### Feature structures:

In [7]:
kim = {'CAT': 'NP', 'ORTH': 'Kim', 'REF': 'k'} # REF - referent, CAT - grammatical category, ORTH - orthography
chase = {'CAT': 'V', 'ORTH': 'chased', 'REL': 'chase'} # REL - relation

In [8]:
chase['AGT'] = 'sbj' # AGT - agent
chase['PAT'] = 'obj' # PAT - patient

In [9]:
chase

{'CAT': 'V', 'ORTH': 'chased', 'REL': 'chase', 'AGT': 'sbj', 'PAT': 'obj'}

In [22]:
sent = 'Kim chased Lee'
tokens = sent.split()
lee = {'CAT': 'NP', 'ORTH': 'Lee', 'REF': 'l'}

def lex2fs(word):
    for x in [kim, lee, chase]:
        if x['ORTH'] == word:
            return x
        
subj, verb, obj = lex2fs(tokens[0]), lex2fs(tokens[1]), lex2fs(tokens[2])
subj, verb, obj

({'CAT': 'NP', 'ORTH': 'Kim', 'REF': 'k'},
 {'CAT': 'V', 'ORTH': 'chased', 'REL': 'chase', 'AGT': 'k', 'PAT': 'l'},
 {'CAT': 'NP', 'ORTH': 'Lee', 'REF': 'l'})

In [28]:
verb['AGT'] = subj['REF']
verb['PAT'] = obj['REF']

In [29]:
subj, verb, obj

({'CAT': 'NP', 'ORTH': 'Kim', 'REF': 'k'},
 {'CAT': 'V', 'ORTH': 'chased', 'REL': 'chase', 'AGT': 'k', 'PAT': 'l'},
 {'CAT': 'NP', 'ORTH': 'Lee', 'REF': 'l'})

In [25]:
for x in ['ORTH', 'REL', 'AGT', 'PAT']:
    print("%-5s => %s" % (x, verb[x]))

ORTH  => chased
REL   => chase
AGT   => k
PAT   => l


#### Attributes and constraints:

In [30]:
nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg')

% start S
# ###################
# Grammar Productions
# ###################
# S expansion productions
S -> NP[NUM=?n] VP[NUM=?n]
# NP expansion productions
NP[NUM=?n] -> N[NUM=?n] 
NP[NUM=?n] -> PropN[NUM=?n] 
NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n]
NP[NUM=pl] -> N[NUM=pl] 
# VP expansion productions
VP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n]
VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP
# ###################
# Lexical Productions
# ###################
Det[NUM=sg] -> 'this' | 'every'
Det[NUM=pl] -> 'these' | 'all'
Det -> 'the' | 'some' | 'several'
PropN[NUM=sg]-> 'Kim' | 'Jody'
N[NUM=sg] -> 'dog' | 'girl' | 'car' | 'child'
N[NUM=pl] -> 'dogs' | 'girls' | 'cars' | 'children' 
IV[TENSE=pres,  NUM=sg] -> 'disappears' | 'walks'
TV[TENSE=pres, NUM=sg] -> 'sees' | 'likes'
IV[TENSE=pres,  NUM=pl] -> 'disappear' | 'walk'
TV[TENSE=pres, NUM=pl] -> 'see' | 'like'
IV[TENSE=past] -> 'disappeared' | 'walked'
TV[TENSE=past] -> 'saw' | 'liked'


In [33]:
tokens = 'Kim likes children'.split()

cp = load_parser('grammars/book_grammars/feat0.fcfg', trace=2)
for x in cp.parse(tokens):
    print(x.draw())

|.Kim .like.chil.|
Leaf Init Rule:
|[----]    .    .| [0:1] 'Kim'
|.    [----]    .| [1:2] 'likes'
|.    .    [----]| [2:3] 'children'
Feature Bottom Up Predict Combine Rule:
|[----]    .    .| [0:1] PropN[NUM='sg'] -> 'Kim' *
Feature Bottom Up Predict Combine Rule:
|[----]    .    .| [0:1] NP[NUM='sg'] -> PropN[NUM='sg'] *
Feature Bottom Up Predict Combine Rule:
|[---->    .    .| [0:1] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'sg'}
Feature Bottom Up Predict Combine Rule:
|.    [----]    .| [1:2] TV[NUM='sg', TENSE='pres'] -> 'likes' *
Feature Bottom Up Predict Combine Rule:
|.    [---->    .| [1:2] VP[NUM=?n, TENSE=?t] -> TV[NUM=?n, TENSE=?t] * NP[] {?n: 'sg', ?t: 'pres'}
Feature Bottom Up Predict Combine Rule:
|.    .    [----]| [2:3] N[NUM='pl'] -> 'children' *
Feature Bottom Up Predict Combine Rule:
|.    .    [----]| [2:3] NP[NUM='pl'] -> N[NUM='pl'] *
Feature Bottom Up Predict Combine Rule:
|.    .    [---->| [2:3] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'pl'}
Feature Single Edge Fundame

## 2 Processing feature structures:

In [35]:
fs1 = nltk.FeatStruct(TENSE='past', NUM='sg') # atomic features can be strings or integers
print(fs1)

[ NUM   = 'sg'   ]
[ TENSE = 'past' ]


In [38]:
fs2 = nltk.FeatStruct(POS='N', AGR=fs1) # complex (nested) feature structure
print(fs2)

[ AGR = [ NUM   = 'sg'   ] ]
[       [ TENSE = 'past' ] ]
[                          ]
[ POS = 'N'                ]


In [43]:
print(nltk.FeatStruct("[POS='N', AGR=[TENSE='past', NUM='sg']]")) # alternative method of specifying complex feature structure

[ AGR = [ NUM   = 'sg'   ] ]
[       [ TENSE = 'past' ] ]
[                          ]
[ POS = 'N'                ]


In [46]:
nltk.FeatStruct(NAME='Lee', TELNO='01 27 86 42 96', AGE=33) # feature structure can contain any information

[AGE=33, NAME='Lee', TELNO='01 27 86 42 96']

#### Reentrancy:

In [57]:
print(nltk.FeatStruct("""[NAME='Lee', ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
SPOUSE=[NAME='Kim', ADDRESS->(1)]]
"""))

# (1) is a tag or coindex

[ ADDRESS = (1) [ NUMBER = 74           ] ]
[               [ STREET = 'rue Pascal' ] ]
[                                         ]
[ NAME    = 'Lee'                         ]
[                                         ]
[ SPOUSE  = [ ADDRESS -> (1)  ]           ]
[           [ NAME    = 'Kim' ]           ]


#### Unification:

In [53]:
fs1 = nltk.FeatStruct("[ADDRESS1=[NUMBER=74, STREET='rue Pascal']]")
fs2 = nltk.FeatStruct("[ADDRESS1=?x, ADDRESS2=?x]")
#print(fs2)
print(fs2.unify(fs1))

[ ADDRESS1 = (1) [ NUMBER = 74           ] ]
[                [ STREET = 'rue Pascal' ] ]
[                                          ]
[ ADDRESS2 -> (1)                          ]


In [55]:
fs3 = nltk.FeatStruct("[CASE=nom, AGR=?a]")
fs3

[AGR=?a, CASE='nom']

## 3 Extending a feature based grammar:

In [58]:
nltk.data.show_cfg('grammars/book_grammars/feat1.fcfg')

% start S
# ###################
# Grammar Productions
# ###################
S[-INV] -> NP VP
S[-INV]/?x -> NP VP/?x
S[-INV] -> NP S/NP
S[-INV] -> Adv[+NEG] S[+INV]
S[+INV] -> V[+AUX] NP VP
S[+INV]/?x -> V[+AUX] NP VP/?x
SBar -> Comp S[-INV]
SBar/?x -> Comp S[-INV]/?x
VP -> V[SUBCAT=intrans, -AUX]
VP -> V[SUBCAT=trans, -AUX] NP
VP/?x -> V[SUBCAT=trans, -AUX] NP/?x
VP -> V[SUBCAT=clause, -AUX] SBar
VP/?x -> V[SUBCAT=clause, -AUX] SBar/?x
VP -> V[+AUX] VP
VP/?x -> V[+AUX] VP/?x
# ###################
# Lexical Productions
# ###################
V[SUBCAT=intrans, -AUX] -> 'walk' | 'sing'
V[SUBCAT=trans, -AUX] -> 'see' | 'like'
V[SUBCAT=clause, -AUX] -> 'say' | 'claim'
V[+AUX] -> 'do' | 'can'
NP[-WH] -> 'you' | 'cats'
NP[+WH] -> 'who'
Adv[+NEG] -> 'rarely' | 'never'
NP/NP ->
Comp -> 'that'


In [69]:
tokens = 'who do you claim that you like'.split()
tokens = 'ich folge den Katze'.split()
cp = load_parser('grammars/book_grammars/german.fcfg', trace=2)
for x in cp.parse(tokens):
     print(x.draw())

|.ich.fol.den.Kat.|
Leaf Init Rule:
|[---]   .   .   .| [0:1] 'ich'
|.   [---]   .   .| [1:2] 'folge'
|.   .   [---]   .| [2:3] 'den'
|.   .   .   [---]| [3:4] 'Katze'
Feature Bottom Up Predict Combine Rule:
|[---]   .   .   .| [0:1] PRO[AGR=[NUM='sg', PER=1], CASE='nom'] -> 'ich' *
Feature Bottom Up Predict Combine Rule:
|[---]   .   .   .| [0:1] NP[AGR=[NUM='sg', PER=1], CASE='nom'] -> PRO[AGR=[NUM='sg', PER=1], CASE='nom'] *
Feature Bottom Up Predict Combine Rule:
|[--->   .   .   .| [0:1] S[] -> NP[AGR=?a, CASE='nom'] * VP[AGR=?a] {?a: [NUM='sg', PER=1]}
Feature Bottom Up Predict Combine Rule:
|.   [---]   .   .| [1:2] TV[AGR=[NUM='sg', PER=1], OBJCASE='dat'] -> 'folge' *
Feature Bottom Up Predict Combine Rule:
|.   [--->   .   .| [1:2] VP[AGR=?a] -> TV[AGR=?a, OBJCASE=?c] * NP[CASE=?c] {?a: [NUM='sg', PER=1], ?c: 'dat'}
Feature Bottom Up Predict Combine Rule:
|.   .   [---]   .| [2:3] Det[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc'] -> 'den' *
|.   .   [---]   .| [2:3] Det[AGR=[