In [None]:
from nltk.grammar import CFG
from nltk.parse import generate

demo_grammar = """
  S -> NP VP
  NP -> Det N
  PP -> P NP
  VP -> 'slept' | 'saw' NP | 'walked' PP
  Det -> 'the' | 'a'
  N -> 'man' | 'park' | 'dog'
  P -> 'in' | 'with'
"""


N = 23

print("Generating the first %d sentences for demo grammar:" % (N,))
print(demo_grammar)
grammar = CFG.fromstring(demo_grammar)
for n, sent in enumerate(generate.generate(grammar, n=N), 1):
    print("%3d. %s" % (n, " ".join(sent)))

Generating the first 23 sentences for demo grammar:

  S -> NP VP
  NP -> Det N
  PP -> P NP
  VP -> 'slept' | 'saw' NP | 'walked' PP
  Det -> 'the' | 'a'
  N -> 'man' | 'park' | 'dog'
  P -> 'in' | 'with'

  1. the man slept
  2. the man saw the man
  3. the man saw the park
  4. the man saw the dog
  5. the man saw a man
  6. the man saw a park
  7. the man saw a dog
  8. the man walked in the man
  9. the man walked in the park
 10. the man walked in the dog
 11. the man walked in a man
 12. the man walked in a park
 13. the man walked in a dog
 14. the man walked with the man
 15. the man walked with the park
 16. the man walked with the dog
 17. the man walked with a man
 18. the man walked with a park
 19. the man walked with a dog
 20. the park slept
 21. the park saw the man
 22. the park saw the park
 23. the park saw the dog


In [2]:

def demo():
    """
    A demonstration of the shift-reduce parser.
    """

    from nltk import CFG, parse

    grammar = CFG.fromstring(
        """
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """
    )

    sent = "I saw a man in the park".split()

    parser = parse.ShiftReduceParser(grammar, trace=2)
    for p in parser.parse(sent):
        print(p)

In [3]:
demo()

Parsing 'I saw a man in the park'
    [ * I saw a man in the park]
  S [ 'I' * saw a man in the park]
  R [ NP * saw a man in the park]
  S [ NP 'saw' * a man in the park]
  R [ NP V * a man in the park]
  S [ NP V 'a' * man in the park]
  R [ NP V Det * man in the park]
  S [ NP V Det 'man' * in the park]
  R [ NP V Det N * in the park]
  R [ NP V NP * in the park]
  R [ NP VP * in the park]
  R [ S * in the park]
  S [ S 'in' * the park]
  R [ S P * the park]
  S [ S P 'the' * park]
  R [ S P Det * park]
  S [ S P Det 'park' * ]
  R [ S P Det N * ]
  R [ S P NP * ]
  R [ S PP * ]


In [4]:
from time import perf_counter
from nltk.parse import EarleyChartParser


def demo(
    print_times=True,
    print_grammar=False,
    print_trees=True,
    trace=2,
    sent="I saw John with a dog with my cookie",
    numparses=5,
):
    """
    A demonstration of the Earley parsers.
    """
    import sys
    import time

    from nltk.parse.chart import demo_grammar

    # The grammar for ChartParser and SteppingChartParser:
    grammar = demo_grammar()
    if print_grammar:
        print("* Grammar")
        print(grammar)

    # Tokenize the sample sentence.
    print("* Sentence:")
    print(sent)
    tokens = sent.split()
    print(tokens)
    print()

    # Do the parsing.
    earley = EarleyChartParser(grammar, trace=trace)
    t = perf_counter()
    chart = earley.chart_parse(tokens)
    parses = list(chart.parses(grammar.start()))
    t = perf_counter() - t

    # Print results.
    if numparses:
        assert len(parses) == numparses, "Not all parses found"
    if print_trees:
        for tree in parses:
            print(tree)
    else:
        print("Nr trees:", len(parses))
    if print_times:
        print("Time:", t)

demo()

* Sentence:
I saw John with a dog with my cookie
['I', 'saw', 'John', 'with', 'a', 'dog', 'with', 'my', 'cookie']

|. I  .saw .John.with. a  .dog .with. my .cook.|
Leaf Init Rule:
|[----]    .    .    .    .    .    .    .    .| [0:1] 'I'
|.    [----]    .    .    .    .    .    .    .| [1:2] 'saw'
|.    .    [----]    .    .    .    .    .    .| [2:3] 'John'
|.    .    .    [----]    .    .    .    .    .| [3:4] 'with'
|.    .    .    .    [----]    .    .    .    .| [4:5] 'a'
|.    .    .    .    .    [----]    .    .    .| [5:6] 'dog'
|.    .    .    .    .    .    [----]    .    .| [6:7] 'with'
|.    .    .    .    .    .    .    [----]    .| [7:8] 'my'
|.    .    .    .    .    .    .    .    [----]| [8:9] 'cookie'
Top Down Init Rule:
|>    .    .    .    .    .    .    .    .    .| [0:0] S  -> * NP VP

* Processing queue: 0 

Predictor Rule:
|>    .    .    .    .    .    .    .    .    .| [0:0] NP -> * NP PP
|>    .    .    .    .    .    .    .    .    .| [0:0] NP -> * Det Noun

In [5]:
from nltk.grammar import Nonterminal

def viable_next_tokens(chart, position, grammar):
    """
    Given a chart and the current position in the input,
    return the set of terminals that could appear next.
    """
    candidates = set()
    for edge in chart.select(end=position):
        # edge is something like A -> α • β, [i:j]
        next_sym = edge.nextsym()
        if next_sym is None:
            continue
        if isinstance(next_sym, Nonterminal):
            # Expand FIRST set of this nonterminal
            for prod in grammar.productions(lhs=next_sym):
                first = prod.rhs()[0]
                if isinstance(first, str):  # terminal
                    candidates.add(first)
        else:
            # Directly a terminal
            candidates.add(next_sym)
    return candidates


In [6]:
from nltk.parse.chart import demo_grammar

# The grammar for ChartParser and SteppingChartParser:
grammar = demo_grammar()

sent = "I saw John with a cookie with my dog with John"

# Tokenize the sample sentence.
print("* Sentence:")
print(sent)
tokens = sent.split()
print(tokens)
print()

# Do the parsing.
earley = EarleyChartParser(grammar)

chart = earley.chart_parse(tokens)
for pos in range(len(tokens) + 1):
    print(f"At position {pos}, expected tokens:", viable_next_tokens(chart, pos, grammar))

* Sentence:
I saw John with a cookie with my dog with John
['I', 'saw', 'John', 'with', 'a', 'cookie', 'with', 'my', 'dog', 'with', 'John']

At position 0, expected tokens: {'John', 'the', 'I', 'my', 'a'}
At position 1, expected tokens: {'saw', 'ate', 'with'}
At position 2, expected tokens: {'John', 'the', 'with', 'I', 'my', 'a'}
At position 3, expected tokens: {'with'}
At position 4, expected tokens: {'John', 'the', 'I', 'my', 'a'}
At position 5, expected tokens: {'dog', 'cookie'}
At position 6, expected tokens: {'with'}
At position 7, expected tokens: {'John', 'the', 'I', 'my', 'a'}
At position 8, expected tokens: {'dog', 'cookie'}
At position 9, expected tokens: {'with'}
At position 10, expected tokens: {'John', 'the', 'I', 'my', 'a'}
At position 11, expected tokens: {'with'}


In [7]:
chart.e

AttributeError: 'IncrementalChart' object has no attribute 'e'