Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

fix bugs; better error reporting; doctest more features

  • Loading branch information...
commit ad6ba5063dd40ffb3532e031c7a9a884a93bb3c6 1 parent 5cdec9c
@jflatow authored
Showing with 70 additions and 55 deletions.
  1. +1 −0  .gitignore
  2. +69 −55 grammar.py
View
1  .gitignore
@@ -0,0 +1 @@
+*.pyc
View
124 grammar.py
@@ -1,22 +1,25 @@
"""
A miniature parsing library.
->>> grammar = Grammar({'constant': Terminal(r'[\d]+\.?[\d]*'), \
- 'variable': Terminal(r'[A-Za-z]+'), \
- 'operator': Terminal.padded(r'[\+\-\*\/]'), \
- 'operand': Pipe('constant', 'variable'), \
- 'expression': Seq('operand', 'operator', 'operand')})
-
->>> valid = ['a + b', 'c - d', '5 * 4', 'z / 1']
->>> invalid = ['$any', 'any', 'a! + n', '&']
-
->>> [grammar.parse('expression', v) for v in valid]
-[(expression, (operand, variable, 'a'), (operator, ' + '), (operand, variable, 'b')),\
- (expression, (operand, variable, 'c'), (operator, ' - '), (operand, variable, 'd')),\
- (expression, (operand, constant, '5'), (operator, ' * '), (operand, constant, '4')),\
- (expression, (operand, variable, 'z'), (operator, ' / '), (operand, constant, '1'))]
-
->>> [grammar.is_valid('expression', i) for i in invalid]
+>>> grammar = Grammar({'eof': Terminal(r'$'), \
+ 'semicolon': Terminal.padded(r';'), \
+ 'constant': Terminal(r'[\d]+\.?[\d]*'), \
+ 'variable': Terminal(r'[A-Za-z]+'), \
+ 'operator': Terminal.padded(r'[\+\-\*\/]'), \
+ 'operand': Pipe('constant', 'variable'), \
+ 'expression': Seq('operand', Star('operator', 'operand')), \
+ 'block': Seq(Plus('expression', 'semicolon'), 'eof')})
+
+>>> valid = ['a+b; c+d;', 'c - d;', '5 * 4;', 'z / 1;']
+>>> invalid = ['$any', 'any +', 'a! + n', '&']
+
+>>> [grammar.parse('block', v) for v in valid] #doctest: +NORMALIZE_WHITESPACE
+[('block', [[('expression', [('operand', (variable, 'a')), [(operator, '+'), ('operand', (variable, 'b'))]]), (semicolon, '; '),
+ ('expression', [('operand', (variable, 'c')), [(operator, '+'), ('operand', (variable, 'd'))]]), (semicolon, ';')], (eof, '')]), \
+ ('block', [[('expression', [('operand', (variable, 'c')), [(operator, ' - '), ('operand', (variable, 'd'))]]), (semicolon, ';')], (eof, '')]),\
+ ('block', [[('expression', [('operand', (constant, '5')), [(operator, ' * '), ('operand', (constant, '4'))]]), (semicolon, ';')], (eof, '')]),\
+ ('block', [[('expression', [('operand', (variable, 'z')), [(operator, ' / '), ('operand', (constant, '1'))]]), (semicolon, ';')], (eof, '')])]
+>>> [grammar.is_valid('block', i) for i in invalid]
[False, False, False, False]
"""
import re
@@ -24,9 +27,7 @@
class BadGrammar(Exception): pass
class BadState(Exception):
def __str__((expected, string, start)):
- return "Expected %s but got '%s' >>> '%s'" % (expected,
- string[:start].replace("\'", "\\'"),
- string[start:].replace("\'", "\\'"))
+ return "Expected %r but got:\n\t%s" % (expected, string[start:])
class Grammar(dict):
def __init__(self, *args, **kwargs):
@@ -39,6 +40,9 @@ def __getitem__(self, state_or_name):
return state_or_name
return super(type(self), self).__getitem__(state_or_name)
+ def __repr__(self):
+ return '{%s}' % ', '.join('%r: %s' % item for item in self.items())
+
def is_valid(self, name, string):
try:
return self.parse(name, string) and True
@@ -50,7 +54,7 @@ def parse(self, name, string):
if len(matches) > 1:
raise BadGrammar(matches)
if len(matches) < 1:
- raise BadState(self, string, 0)
+ raise BadState(self[name], string, 0)
which, _end = matches[0]
return which
@@ -60,20 +64,22 @@ class State(tuple):
def __new__(cls, *pattern):
return tuple.__new__(cls, pattern or (Empty(),))
- def __iter__(self):
- # tupleiterator has no send, so wrap the default with a generator
+ def __iter__(self): # tupleiterator has no send, so wrap it with a generator
for item in tuple.__iter__(self):
yield item
def __repr__(self):
- return self.name or "%s%s" % (type(self).__name__, tuple.__repr__(self))
+ return self.name or str(self)
+
+ def __str__(self):
+ return '%s(%s)' % (type(self).__name__, ', '.join(repr(p) for p in self))
class Terminal(State):
def __new__(cls, *pattern):
return tuple.__new__(cls, (re.compile(''.join(pattern)),))
- def __repr__(self):
- return self.name or "%s('%s')" % (type(self).__name__, self[0].pattern)
+ def __str__(self):
+ return "r'%s'" % self[0].pattern.replace("\\'", "\\\\'").replace("'", "\\'")
@classmethod
def padded(cls, pattern):
@@ -86,59 +92,67 @@ def matches(self, grammar, string, start=0):
yield (self, match.group()), match.end()
class Empty(Terminal):
- def matches(self, grammar, string, start=0):
- yield (self, ''), start
+ def __str__(self):
+ return 'Empty'
-class Symbolic(State):
- pass
+ def matches(self, grammar, string, start=0):
+ yield None, start
+class Symbolic(State): pass
class Seq(Symbolic):
+ def fold(self, match):
+ if self.name:
+ return self.name, [m for m in match if m]
+ return [m for m in match if m]
+
def matches(self, grammar, string, start=0):
- # initialize the sequence with an empty match at the beginning of the string
+ errlist = [BadState(self, string, start)]
matches = [((), start)]
for pattern in self:
- # compute every possible match continuation
- matches = [(which + (which_,), end_)
- for which, end in matches
- for which_, end_ in grammar[pattern].matches(grammar, string, start=end)]
+ def continuations(which, end):
+ try:
+ for which_, end_ in grammar[pattern].matches(grammar, string, start=end):
+ yield (which + (which_,)), end_
+ except BadState, e:
+ errlist.append(e)
+ matches = [c for match in matches for c in continuations(*match)]
+ if not matches:
+ raise errlist[-1]
for which, end in matches:
- yield ((self,) + which), end
+ yield self.fold(which), end
class Pipe(Symbolic):
+ def fold(self, match):
+ if self.name:
+ return self.name, match
+ return match
+
def matches(self, grammar, string, start=0):
- # get a (possibly infinite) generator of choices
- choices = iter(self)
+ choices = iter(self) # could be infinite, like Star or Plus
pattern = choices.send(None)
- # this is a BadState unless at least one pattern matches
- matched = False
+ matched = []
while True:
try:
- # get the matches for the pattern
- matches = list(grammar[pattern].matches(grammar, string, start=start))
- for which, end in matches:
- yield ((self,) + which), end
- # we got one
- matched = True
+ for which, end in grammar[pattern].matches(grammar, string, start=start):
+ yield self.fold(which), end
+ matched.append(True)
except BadState, e:
- matches = None
+ matched.append(None)
try:
- # get the next pattern, given the last pattern matches
- # patterns decide when they are exhausted
- pattern = choices.send(matches)
+ pattern = choices.send(matched[-1])
except StopIteration:
- # no more patterns
- if not matched:
+ if not any(matched):
raise BadState(self, string, start)
raise StopIteration
class Star(Pipe):
def __iter__(self):
- pattern = (Empty(),)
+ pattern = Seq()
while (yield pattern):
- pattern += self
+ pattern = Seq(*(pattern + self))
class Plus(Pipe):
def __iter__(self):
- pattern = self
+ pattern = Seq(*self)
while (yield pattern):
- pattern += self
+ pattern = Seq(*(pattern + self))
Please sign in to comment.
Something went wrong with that request. Please try again.