## A Grammar for a Subset of English

In [None]:
'''
Sentence => Noun-Phrase + Verb-Phrase
Noun-Phrase => Article + Noun
Verb-Phrase => Verb + Noun-Phrase
Article => the,a,...
Noun => man,ball,woman,table...
Verb => hit,took,saw,liked...
'''

# A Straightforward Solution

In [1]:
def sentence():return noun_phrase() + verb_phrase()
def noun_phrase():return article() + noun()
def verb_phrase():return verb() + noun_phrase()
def article():return one_of(['the','a'])
def noun():return one_of(['man','ball','woman','table'])
def verb():return one_of(['hit','took','saw','liked'])

In [2]:
import random

In [3]:
def one_of(lot):return [random.choice(lot)]

In [4]:
sentence()

['the', 'ball', 'liked', 'the', 'woman']

# A Rule-Based Solution

In [28]:
simple_grammar = """
Sentence => Noun_Phrase Verb_Phrase
Noun_Phrase => Article Noun
Verb_Phrase => Verb Noun_Phrase
Article => the | a
Noun => man | ball | woman | table
Verb => hit | took | saw | liked
"""

In [29]:
def parse_grammar(raw_grammar):
    grammar = {}
    for line in raw_grammar.split('\n'):
        if not line:continue
        lhs,rhs = line.split('=>')
        left = lhs.strip()
        right = list(map(lambda x:x.split(),rhs.split('|')))
        grammar[left] = right
        
    return grammar

In [30]:
grammar = parse_grammar(simple_grammar)

In [31]:
grammar

{'Article': [['the'], ['a']],
 'Noun': [['man'], ['ball'], ['woman'], ['table']],
 'Noun_Phrase': [['Article', 'Noun']],
 'Sentence': [['Noun_Phrase', 'Verb_Phrase']],
 'Verb': [['hit'], ['took'], ['saw'], ['liked']],
 'Verb_Phrase': [['Verb', 'Noun_Phrase']]}

In [56]:
def rewrites(category):return grammar.get(category,None)

In [57]:
from functools import reduce

In [58]:
def mappend(func,iterable):
    return reduce(lambda a, b:a + b ,map(func,iterable))

In [59]:
def generate(phrase):
    if isinstance(phrase,list):
        return mappend(generate,phrase)
    elif rewrites(phrase):
        return generate(random.choice(rewrites(phrase)))
    else:
        return [phrase]

In [60]:
generate('Sentence')

['the', 'table', 'saw', 'the', 'man']

In [61]:
generate('Sentence')

['the', 'man', 'hit', 'the', 'man']

In [62]:
generate('Noun_Phrase')

['a', 'man']

In [63]:
generate('Verb_Phrase')

['hit', 'a', 'woman']

# Changing the  the Grammar without Chaning the Program

In [72]:
bigger_grammar = """
Sentence => Noun_Phrase Verb_Phrase
Noun_Phrase => Article Adj* Noun PP* | Name | Pron
Verb_Phrase => Verb Noun_Phrase PP*
PP* => null | PP PP*
Adj* => null | Adj Adj*
PP => Prep Noun_Phrase
Prep => to | in | by | with | on
Adj => big | little | blue | green | adiabatic
Article => the | a 
Name => Pat | Kim | Lee | Terry | Robin
Noun => man | ball | woman | table
Verb => hit | took | saw | liked
Pron => he | she | it | these | those | that
"""

In [73]:
grammar = parse_grammar(bigger_grammar)

In [74]:
grammar

{'Adj': [['big'], ['little'], ['blue'], ['green'], ['adiabatic']],
 'Adj*': [['null'], ['Adj', 'Adj*']],
 'Article': [['the'], ['a']],
 'Name': [['Pat'], ['Kim'], ['Lee'], ['Terry'], ['Robin']],
 'Noun': [['man'], ['ball'], ['woman'], ['table']],
 'Noun_Phrase': [['Article', 'Adj*', 'Noun', 'PP*'], ['Name'], ['Pron']],
 'PP': [['Prep', 'Noun_Phrase']],
 'PP*': [['null'], ['PP', 'PP*']],
 'Prep': [['to'], ['in'], ['by'], ['with'], ['on']],
 'Pron': [['he'], ['she'], ['it'], ['these'], ['those'], ['that']],
 'Sentence': [['Noun_Phrase', 'Verb_Phrase']],
 'Verb': [['hit'], ['took'], ['saw'], ['liked']],
 'Verb_Phrase': [['Verb', 'Noun_Phrase', 'PP*']]}

In [80]:
def format_result(listed_result):
    return list(filter(lambda x: x != 'null', listed_result))

In [83]:
format_result(generate('Sentence'))

['Lee', 'hit', 'a', 'green', 'ball', 'to', 'Terry']

In [84]:
format_result(generate('Sentence'))

['she', 'liked', 'Kim']

In [85]:
format_result(generate('Sentence'))

['Pat', 'saw', 'a', 'blue', 'big', 'table', 'by', 'she', 'by', 'Lee']

# Using the Same Data for Several Programs 

In [107]:
def generate_tree(phrase):
    if isinstance(phrase, list):
        return list(map(generate_tree, phrase))
    elif rewrites(phrase):
        return [phrase] + generate_tree(random.choices(rewrites(phrase)))
    else:
        return [phrase]

In [109]:
generate_tree('Sentence')

['Sentence',
 [['Noun_Phrase', [['Pron', [['these']]]]],
  ['Verb_Phrase',
   [['Verb', [['liked']]],
    ['Noun_Phrase', [['Name', [['Terry']]]]],
    ['PP*',
     [['PP', [['Prep', [['to']]], ['Noun_Phrase', [['Name', [['Pat']]]]]]],
      ['PP*', [['null']]]]]]]]]