This notebook shows how to work with a simple English grammar.

In [11]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Evand\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [12]:
sentence = "In x ; y is x + 1 ; y is y + 9 ; Out 6 ; Out x; DONE"

tokens = nltk.tokenize.word_tokenize(sentence)
print(tokens)

# -> is definied to be
grammar = nltk.CFG.fromstring("""
    ALL -> "DONE" | S ALL
    S -> Input | Output | Assess
    Input -> "In" Var SemiColn
    Output -> "Out" Operand SemiColn
    Assess -> Var Is Exp SemiColn
    Exp -> Operand Operator Operand
    Operator -> "+" | "-"      
    Operand -> Var | Num
    Var -> "x" | "y" | "z"
    Num -> "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
    SemiColn -> ";"
    Is -> "is"
""")

print()
print(grammar)

parser = nltk.RecursiveDescentParser(grammar)

trees = []
try:
    for tree in parser.parse(tokens):
        trees.append(tree)
        print("")
        print(tree)
        print()
        tree.pretty_print()
except ValueError:
    print("HERE")
    print("No parse tree possible.")

print()
print(trees)

['In', 'x', ';', 'y', 'is', 'x', '+', '1', ';', 'y', 'is', 'y', '+', '9', ';', 'Out', '6', ';', 'Out', 'x', ';', 'DONE']

Grammar with 28 productions (start state = ALL)
    ALL -> 'DONE'
    ALL -> S ALL
    S -> Input
    S -> Output
    S -> Assess
    Input -> 'In' Var SemiColn
    Output -> 'Out' Operand SemiColn
    Assess -> Var Is Exp SemiColn
    Exp -> Operand Operator Operand
    Operator -> '+'
    Operator -> '-'
    Operand -> Var
    Operand -> Num
    Var -> 'x'
    Var -> 'y'
    Var -> 'z'
    Num -> '0'
    Num -> '1'
    Num -> '2'
    Num -> '3'
    Num -> '4'
    Num -> '5'
    Num -> '6'
    Num -> '7'
    Num -> '8'
    Num -> '9'
    SemiColn -> ';'
    Is -> 'is'

(ALL
  (S (Input In (Var x) (SemiColn ;)))
  (ALL
    (S
      (Assess
        (Var y)
        (Is is)
        (Exp (Operand (Var x)) (Operator +) (Operand (Num 1)))
        (SemiColn ;)))
    (ALL
      (S
        (Assess
          (Var y)
          (Is is)
          (Exp (Operand (Var y)) (Operator