# 文法特征

In [1]:
kim = {'CAT': 'NP', 'ORTH': 'Kim', 'REF': 'k'}
chase = {'CAT': 'V', 'ORTH': 'chased', 'REL': 'chase'}
lee = {'CAT': 'NP', 'ORTH': 'Lee', 'REF': 'l'}

sent = "Kim chased Lee"
tokens = sent.split()
def lex2fs(word):
    for fs in [kim,lee,chase]:
        if fs['ORTH'] == word:
            return fs

subj,verb,obj = lex2fs(tokens[0]),lex2fs(tokens[1]),lex2fs(tokens[2])
verb['AGT'] = subj['REF']
verb['PAT'] = obj['REF']
for k in ['ORTH','REL','AGT','PAT']:
    print ("%-5s => %s" % (k,verb[k]))

surprise = {'CAT': 'V', 'ORTH': 'surprised', 'REL': 'surprise','SRC': 'sbj', 'EXP': 'obj'}    

ORTH  => chased
REL   => chase
AGT   => k
PAT   => l


# 使用属性和约束

In [2]:
import nltk
nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg')

% start S
# ###################
# Grammar Productions
# ###################
# S expansion productions
S -> NP[NUM=?n] VP[NUM=?n]
# NP expansion productions
NP[NUM=?n] -> N[NUM=?n] 
NP[NUM=?n] -> PropN[NUM=?n] 
NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n]
NP[NUM=pl] -> N[NUM=pl] 
# VP expansion productions
VP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n]
VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP
# ###################
# Lexical Productions
# ###################
Det[NUM=sg] -> 'this' | 'every'
Det[NUM=pl] -> 'these' | 'all'
Det -> 'the' | 'some' | 'several'
PropN[NUM=sg]-> 'Kim' | 'Jody'
N[NUM=sg] -> 'dog' | 'girl' | 'car' | 'child'
N[NUM=pl] -> 'dogs' | 'girls' | 'cars' | 'children' 
IV[TENSE=pres,  NUM=sg] -> 'disappears' | 'walks'
TV[TENSE=pres, NUM=sg] -> 'sees' | 'likes'
IV[TENSE=pres,  NUM=pl] -> 'disappear' | 'walk'
TV[TENSE=pres, NUM=pl] -> 'see' | 'like'
IV[TENSE=past] -> 'disappeared' | 'walked'
TV[TENSE=past] -> 'saw' | 'liked'


In [3]:
tokens = 'Kim likes children'.split()
from nltk import load_parser
cp = load_parser('grammars/book_grammars/feat0.fcfg', trace=2)
trees = cp.parse(tokens)

|.Kim .like.chil.|
Leaf Init Rule:
|[----]    .    .| [0:1] 'Kim'
|.    [----]    .| [1:2] 'likes'
|.    .    [----]| [2:3] 'children'
Feature Bottom Up Predict Combine Rule:
|[----]    .    .| [0:1] PropN[NUM='sg'] -> 'Kim' *
Feature Bottom Up Predict Combine Rule:
|[----]    .    .| [0:1] NP[NUM='sg'] -> PropN[NUM='sg'] *
Feature Bottom Up Predict Combine Rule:
|[---->    .    .| [0:1] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'sg'}
Feature Bottom Up Predict Combine Rule:
|.    [----]    .| [1:2] TV[NUM='sg', TENSE='pres'] -> 'likes' *
Feature Bottom Up Predict Combine Rule:
|.    [---->    .| [1:2] VP[NUM=?n, TENSE=?t] -> TV[NUM=?n, TENSE=?t] * NP[] {?n: 'sg', ?t: 'pres'}
Feature Bottom Up Predict Combine Rule:
|.    .    [----]| [2:3] N[NUM='pl'] -> 'children' *
Feature Bottom Up Predict Combine Rule:
|.    .    [----]| [2:3] NP[NUM='pl'] -> N[NUM='pl'] *
Feature Bottom Up Predict Combine Rule:
|.    .    [---->| [2:3] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'pl'}
Feature Single Edge Fundame

In [4]:
for tree in trees:
    print (tree)

(S[]
  (NP[NUM='sg'] (PropN[NUM='sg'] Kim))
  (VP[NUM='sg', TENSE='pres']
    (TV[NUM='sg', TENSE='pres'] likes)
    (NP[NUM='pl'] (N[NUM='pl'] children))))


# 处理特征结构

In [11]:
from nltk import FeatStruct
fs1 = FeatStruct(TENSE='past', NUM='sg')
print (fs1)
fs1 = FeatStruct(PER=3, NUM='pl', GND='fem')
print (fs1['GND'])
fs1['CASE']='acc'
fs2=FeatStruct(POS='N',AGR=fs1)
print (fs2)
print (fs2['AGR'])
print (fs2['AGR']['PER'])
print (FeatStruct(name='Lee',telno='01 27 86 42 96',age=33))

[ NUM   = 'sg'   ]
[ TENSE = 'past' ]
fem
[       [ CASE = 'acc' ] ]
[ AGR = [ GND  = 'fem' ] ]
[       [ NUM  = 'pl'  ] ]
[       [ PER  = 3     ] ]
[                        ]
[ POS = 'N'              ]
[ CASE = 'acc' ]
[ GND  = 'fem' ]
[ NUM  = 'pl'  ]
[ PER  = 3     ]
3
[ age   = 33               ]
[ name  = 'Lee'            ]
[ telno = '01 27 86 42 96' ]


结构共享或重入。当两条路径具有相同的值时，它们被称为是等价的

括号内的整数有时也被称为标记或同指标志(coi ndex)。整数的选择并不重要。可以有
任意数目的标记在一个单独的特征结构中

In [12]:
print (FeatStruct("""[NAME='Lee', ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
SPOUSE=[NAME='Kim', ADDRESS->(1)]]"""))

print (FeatStruct("[A='a', B=(1)[C='c'], D->(1), E->(1)]"))

[ ADDRESS = (1) [ NUMBER = 74           ] ]
[               [ STREET = 'rue Pascal' ] ]
[                                         ]
[ NAME    = 'Lee'                         ]
[                                         ]
[ SPOUSE  = [ ADDRESS -> (1)  ]           ]
[           [ NAME    = 'Kim' ]           ]
[ A = 'a'             ]
[                     ]
[ B = (1) [ C = 'c' ] ]
[                     ]
[ D -> (1)            ]
[ E -> (1)            ]


# 包含和统一

In [19]:
fs1=FeatStruct(NUMBER=74,STREET='rue Pascal')
fs2=FeatStruct(CITY='Pairs')
print (fs1.unify(fs2))
print ()
print (fs2.unify(fs1))
print ()
fs0=FeatStruct(A='a')
fs1=FeatStruct(A='b')
print (fs0.unify(fs1))
print ()
fs0 = FeatStruct("""[NAME=Lee,ADDRESS=[NUMBER=74,STREET='rue Pascal'],
                        SPOUSE=[NAME=Kim,ADDRESS=[NUMBER=74,STREET='rue Pascal']]]""")
print (fs0)
print ()
fs1 = FeatStruct("[SPOUSE=[ADDRESS =[CITY=Paris]]]")
print (fs1.unify(fs0))
print ()
fs2 = FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],SPOUSE=[NAME=Kim, ADDRESS->(1)]]""")
print (fs1.unify(fs2))
print ()
fs1=FeatStruct("[ADDRESS1=[NUMBER=74, STREET='rue Pascal']]")
fs2=FeatStruct("[ADDRESS1=?x, ADDRESS2=?x]")
print (fs2)
print ()
print(fs2.unify(fs1))

[ CITY   = 'Pairs'      ]
[ NUMBER = 74           ]
[ STREET = 'rue Pascal' ]

[ CITY   = 'Pairs'      ]
[ NUMBER = 74           ]
[ STREET = 'rue Pascal' ]

None

[ ADDRESS = [ NUMBER = 74           ]               ]
[           [ STREET = 'rue Pascal' ]               ]
[                                                   ]
[ NAME    = 'Lee'                                   ]
[                                                   ]
[           [ ADDRESS = [ NUMBER = 74           ] ] ]
[ SPOUSE  = [           [ STREET = 'rue Pascal' ] ] ]
[           [                                     ] ]
[           [ NAME    = 'Kim'                     ] ]

[ ADDRESS = [ NUMBER = 74           ]               ]
[           [ STREET = 'rue Pascal' ]               ]
[                                                   ]
[ NAME    = 'Lee'                                   ]
[                                                   ]
[           [           [ CITY   = 'Paris'      ] ] ]
[           [ ADDRESS = [

# 无限制依赖成分

In [20]:
nltk.data.show_cfg('grammars/book_grammars/feat1.fcfg')

% start S
# ###################
# Grammar Productions
# ###################
S[-INV] -> NP VP
S[-INV]/?x -> NP VP/?x
S[-INV] -> NP S/NP
S[-INV] -> Adv[+NEG] S[+INV]
S[+INV] -> V[+AUX] NP VP
S[+INV]/?x -> V[+AUX] NP VP/?x
SBar -> Comp S[-INV]
SBar/?x -> Comp S[-INV]/?x
VP -> V[SUBCAT=intrans, -AUX]
VP -> V[SUBCAT=trans, -AUX] NP
VP/?x -> V[SUBCAT=trans, -AUX] NP/?x
VP -> V[SUBCAT=clause, -AUX] SBar
VP/?x -> V[SUBCAT=clause, -AUX] SBar/?x
VP -> V[+AUX] VP
VP/?x -> V[+AUX] VP/?x
# ###################
# Lexical Productions
# ###################
V[SUBCAT=intrans, -AUX] -> 'walk' | 'sing'
V[SUBCAT=trans, -AUX] -> 'see' | 'like'
V[SUBCAT=clause, -AUX] -> 'say' | 'claim'
V[+AUX] -> 'do' | 'can'
NP[-WH] -> 'you' | 'cats'
NP[+WH] -> 'who'
Adv[+NEG] -> 'rarely' | 'never'
NP/NP ->
Comp -> 'that'


In [26]:
tokens = 'who do you claim that you like'.split()
cp = load_parser('grammars/book_grammars/feat1.fcfg')
for tree in cp.parse(tokens):
    tree.draw()
    
tokens = 'you claim that you like cats'.split()
for tree in cp.parse(tokens):
    tree.draw()
    
tokens = 'rarely do you sing'.split()
for tree in cp.parse(tokens):
    tree.draw()

# 德语中的格和性别