# 语法困境

In [1]:
import nltk
from nltk.grammar import CFG
#从文本生成上下文无关语法 
groucho_grammar = CFG.fromstring('''
S -> NP VP
PP -> P NP
NP -> Det N | Det N PP | 'I'
VP -> V NP | VP PP
Det -> 'an' | 'my'
N -> 'elephant' | 'pajamas'
V -> 'shot'
P -> 'in'
''')

sent = ['I','shot','an','elephant','in','my','pajamas']
parser = nltk.parse.chart.ChartParser(groucho_grammar)#通用图表分析器
trees = parser.parse(sent)#为语句生成分析树的迭代器
for tree in trees:
    print (tree)

(S
  (NP I)
  (VP
    (VP (V shot) (NP (Det an) (N elephant)))
    (PP (P in) (NP (Det my) (N pajamas)))))
(S
  (NP I)
  (VP
    (V shot)
    (NP (Det an) (N elephant) (PP (P in) (NP (Det my) (N pajamas))))))


# 文法的作用

# 上下文无关文法-简单文法
context-free grammar，CFG

In [2]:
grammar1 = CFG.fromstring("""
S -> NP VP
VP -> V NP | V NP PP
PP -> P NP
V -> 'saw' | 'ate' | 'walked'
NP -> 'John' | 'Mary' | 'Bob' | Det N | Det N PP
Det -> 'a' | 'an' | 'the' | 'my'
N -> 'man' | 'dog' | 'cat' | 'telescope' | 'park'
P -> 'in' | 'on' | 'by' | 'with'
""")
sent = ['Mary','saw','Bob']
rd_parser = nltk.parse.recursivedescent.RecursiveDescentParser(grammar1)#, trace=2
for tree in rd_parser.parse(sent):
    print (tree)
    tree.draw()

sent2 = [x.lower() for x in "The dog saw a man in the park".split()]
for tree in rd_parser.parse(sent2):
    print (tree)
    tree.draw()

(S (NP Mary) (VP (V saw) (NP Bob)))
(S
  (NP (Det the) (N dog))
  (VP
    (V saw)
    (NP (Det a) (N man) (PP (P in) (NP (Det the) (N park))))))
(S
  (NP (Det the) (N dog))
  (VP
    (V saw)
    (NP (Det a) (N man))
    (PP (P in) (NP (Det the) (N park)))))


# 构建文法

In [3]:
grammar1 = nltk.data.load('file:mygrammar.cfg')
sent = "Mary saw Bob".split()
rd_parser = nltk.parse.recursivedescent.RecursiveDescentParser(grammar1)
for tree in rd_parser.parse(sent):
    print (tree)
    tree.draw()

(S (NP Mary) (VP (V saw) (NP Bob)))


# 递归下降分析
递归下降分析器在上述过程中建立分析树。带着最初的目标(找到一个 S)，创建 S 根 节点。随着上述过程使用文法的产生式递归扩展，分析树不断向下延伸(故名为递归下降)

In [6]:
rd_parser = nltk.parse.recursivedescent.RecursiveDescentParser(grammar1)
sent ='Mary saw a dog'.split()
for t in rd_parser.parse(sent):
    print (t)

(S (NP Mary) (VP (V saw) (NP (Det a) (N dog))))


# 移进-归约分析
一种简单的自下而上分析器是移进-归约分析器。与所有自下而上的分析器一样，移进-归约分析器尝试找到对应文法生产式右侧的词和短语的序列，用左侧的替换它们，直到整个句子归约为一个 S

移进-规约分析器相比递归下降分析器的好处是，它们只建立与输入中的词对应的结构。 此外，每个结构它们只建立一次

In [9]:
nltk.app.srparser_app.app()#用于探索移进-归约解析器的图形工具



In [12]:
sr_parse = nltk.ShiftReduceParser(grammar1)
sent = 'Mary saw a dog'.split()
for t in sr_parse.parse(sent):
    print (t)

(S (NP Mary) (VP (V saw) (NP (Det a) (N dog))))
