# Generating random sentences from a context-free grammar
# https://eli.thegreenplace.net/2010/01/28/generating-random-sentences-from-a-context-free-grammar

In [9]:
from collections import defaultdict
import random

class CFG(object):
    def __init__(self):
        self.prod = defaultdict(list)

    def add_prod(self, lhs, rhs):
        """ Add production to the grammar. 'rhs' can
            be several productions separated by '|'.
            Each production is a sequence of symbols
            separated by whitespace.

            Usage:
                grammar.add_prod('NT', 'VP PP')
                grammar.add_prod('Digit', '1|2|3|4')
        """
        prods = rhs.split('|')
        for prod in prods:
            self.prod[lhs].append(tuple(prod.split()))

    def gen_random(self, symbol):
        """ Generate a random sentence from the
            grammar, starting with the given
            symbol.
        """
        sentence = ''

        # select one production of this symbol randomly
        rand_prod = random.choice(self.prod[symbol])

        for sym in rand_prod:
            # for non-terminals, recurse
            print(sym)
            if sym in self.prod:
                sentence += self.gen_random(sym)
            else:
                sentence += sym + ' '

        return sentence

In [10]:
cfg1 = CFG()
cfg1.add_prod('S', 'NP VP')
cfg1.add_prod('NP', 'Det N | Det N')
cfg1.add_prod('NP', 'I | he | she | Joe')
cfg1.add_prod('VP', 'V NP | VP')
cfg1.add_prod('Det', 'a | the | my | his')
cfg1.add_prod('N', 'elephant | cat | jeans | suit')
cfg1.add_prod('V', 'kicked | followed | shot')

for i in range(0, 10):
    print(cfg1.gen_random('S'))

NP
Det
a
N
cat
VP
V
followed
NP
she
a cat followed she 
NP
I
VP
VP
V
followed
NP
Det
my
N
jeans
I followed my jeans 
NP
Joe
VP
V
shot
NP
he
Joe shot he 
NP
Det
his
N
elephant
VP
VP
V
kicked
NP
he
his elephant kicked he 
NP
Joe
VP
VP
V
kicked
NP
Det
a
N
elephant
Joe kicked a elephant 
NP
she
VP
VP
VP
VP
V
followed
NP
I
she followed I 
NP
she
VP
V
followed
NP
Det
the
N
elephant
she followed the elephant 
NP
Det
his
N
suit
VP
VP
V
followed
NP
I
his suit followed I 
NP
he
VP
VP
VP
VP
VP
V
kicked
NP
Det
his
N
cat
he kicked his cat 
NP
he
VP
VP
V
shot
NP
Det
his
N
cat
he shot his cat 


In [19]:
from collections import defaultdict
import random

class GrammarSampler(object):
    def __init__(self, grammar):
        self.prod = grammar
        
    def ChooseConjunct(self, connector, disjunct):
        """
        Chooses a random conjunct from the ones in disjunct that contain connector
        """
        valid_conjs = [conj for conj in disjunct if connector in conj] # filters inappropriate connectors
        return list(rand.choice(valid_conjs))

    def gen_random(self, node_class, connector, counter):
        """ Generate a random sentence from the
            grammar, starting with the given
            symbol.
        """
        sentence = [counter]

        # select one valid production of this class randomly
        conjunct = ChooseConjunct(connector, self.prod[node_class])
        conjunct.remove(connector) # eliminate connector already used

        # for non-terminals, recurse
        for conn in conjunct:
            sentence.insert(len(sentence), self.gen_random(node_class, conn, counter + 1))

        return sentence

In [20]:
test = GrammarSampler()

TypeError: __init__() missing 1 required positional argument: 'grammar'