# Rule Based 实践
给定语法规则，生成句子。

In [1]:
import random

In [2]:
# 定义句子的语法规则
grammer = """
sentence => noun_phrase verb_phrase
noun_phrase => Article Adj* noun
Adj* => null | Adj Adj*
verb_phrase => verb noun_phrase
Article => 一个 | 这个
noun => 女人 | 篮球 | 桌子 | 小猫
verb => 看着 | 坐在 | 听着 | 看见
Adj => 蓝色的 | 好看的 | 小小的
"""

## 1. 笨方法
- 直接定义很多方法，随机生成；
- 代码复用性差，grammar修改后代码无法继续使用。

In [3]:
def adj():
    return random.choice(["蓝色的", "好看的", "小小的"])

In [4]:
adj()

'好看的'

In [5]:
def noun():
    return random.choice(["女人", "篮球", "桌子", "小猫"])

In [6]:
noun()

'女人'

def article(): ...
def verb(): ...

如果改变语法规则，需要重新写程序，eg 改成decimal_grammer;    

In [8]:
decimal_grammer = """
operator = num op num
num = 0 | 1 | 2 | 3 | 4 | num num
op = + | - | * | /
"""

## 2. 优化

generate("verb_pharase") = generate("verb") + generate("noun_phrase")

In [9]:
# 生成语法字典
def parse_grammer(grammer_str, sep='=>'):
    grammer = {}
    for line in grammer_str.split('\n'):
        line = line.strip()
        if not line:
            continue
        target, rules = line.split(sep)
        grammer[target.strip()] = [r.split() for r in rules.split('|')]
    return grammer

In [10]:
parse_grammer(grammer)

{'sentence': [['noun_phrase', 'verb_phrase']],
 'noun_phrase': [['Article', 'Adj*', 'noun']],
 'Adj*': [['null'], ['Adj', 'Adj*']],
 'verb_phrase': [['verb', 'noun_phrase']],
 'Article': [['一个'], ['这个']],
 'noun': [['女人'], ['篮球'], ['桌子'], ['小猫']],
 'verb': [['看着'], ['坐在'], ['听着'], ['看见']],
 'Adj': [['蓝色的'], ['好看的'], ['小小的']]}

In [11]:
g = parse_grammer(grammer)

In [12]:
def gene(grammer_parsed, target='sentence'):
    # 递归终止条件
    if target not in grammer_parsed:
        return target
    
    rules = random.choice(grammer_parsed[target])
    return ''.join(gene(grammer_parsed, target=r) for r in rules if r != 'null')

In [24]:
gene(g)

'这个女人坐在这个篮球'

In [27]:
gene(g)

'一个小小的女人坐在一个蓝色的小小的好看的小小的好看的蓝色的小猫'

### 生成加减乘除运算

In [55]:
decimal_grammer = """
operator = num op num
num = 0 | 1 | 2 | 3 | 4 | num num
op = + | - | * | /
"""

In [56]:
gene(parse_grammer(decimal_grammer, sep='='), target='operator')

'3+42'

In [57]:
decimal_grammer2 = """
expression = operator op operator
operator = num op num
num = 0 | 1 | 2 | 3 | 4 | num num
op = + | - | * | /
"""

In [58]:
gene(parse_grammer(decimal_grammer2, sep='='), target='expression')

'4+2/3*2'

In [62]:
for i in range(20):
    print(gene(parse_grammer(decimal_grammer2, sep='='), target='expression'))

4*0*4/3
0/2*0+3
3*4+1/0
1+30-4-3
12*1-2/4
2+0*1-34
2-2-1220*3
2/1/4-3
0*4/1-2
0+1*2-2
01+2*4+1
4*223-32+3
2-2*0+0
33*1+3+0142
4/2+4+2
0-1-12-30
4*4/1*1
1*2*3+2
3/2-4+4
3*0*4-4


## 3.Conclusion
+ Data Driven的思维过程：注重对问题的建模和抽象，模型能够处理未知的输入
+ 遇到新问题后可以直接复用代码(这里是说复用模型)，不需要重写代码