Skip to content

Commit

Permalink
Optionally return a parse tree of tokens.
Browse files Browse the repository at this point in the history
  • Loading branch information
eerimoq committed Jul 27, 2018
1 parent fe6308e commit 0275181
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 10 deletions.
31 changes: 28 additions & 3 deletions tests/test_textparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,17 +638,42 @@ def grammar(self):
datas = [
(
'IF "foo" bar .',
['IF', [], '"foo"', 'bar', [[]], '.']
['IF', [], '"foo"', 'bar', [[]], '.'],
[
Token(kind='IF', value='IF', offset=0),
[],
Token(kind='ESCAPED_STRING', value='"foo"', offset=3),
Token(kind='WORD', value='bar', offset=9),
[[]],
Token(kind='.', value='.', offset=13)
]
),
(
'IF B "" b 1 2 .',
['IF', ['B'], '""', 'b', [['1', '2']], '.']
['IF', ['B'], '""', 'b', [['1', '2']], '.'],
[
Token(kind='IF', value='IF', offset=0),
[
Token(kind='B', value='B', offset=3)
],
Token(kind='ESCAPED_STRING', value='""', offset=5),
Token(kind='WORD', value='b', offset=8),
[
[
Token(kind='NUMBER', value='1', offset=10),
Token(kind='NUMBER', value='2', offset=12)
]
],
Token(kind='.', value='.', offset=14)
]
)
]

for string, expected_tree in datas:
for string, expected_tree, expected_token_tree in datas:
tree = Parser().parse(string)
self.assertEqual(tree, expected_tree)
tree = Parser().parse(string, token_tree=True)
self.assertEqual(tree, expected_token_tree)

def test_parser_tokenize_mismatch(self):
class Parser(textparser.Parser):
Expand Down
29 changes: 22 additions & 7 deletions textparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self, kind):

def match(self, tokens):
if self.kind == tokens.peek().kind:
return tokens.get().value
return tokens.get_value()
else:
return None

Expand Down Expand Up @@ -85,7 +85,7 @@ def __init__(self, tokens):
self._max_pos = -1
self._stack = []

def get(self):
def get_value(self):
pos = self._pos
self._pos += 1

Expand Down Expand Up @@ -119,6 +119,15 @@ def __repr__(self):
return str(self._tokens[self._pos:self._pos + 2])


class StringTokens(Tokens):

def get_value(self):
pos = self._pos
self._pos += 1

return self._tokens[pos].value


class Pattern(object):

def match(self, tokens):
Expand Down Expand Up @@ -354,7 +363,7 @@ class Any(Pattern):
"""

def match(self, tokens):
return tokens.get().value
return tokens.get_value()


class DelimitedList(Pattern):
Expand Down Expand Up @@ -464,8 +473,12 @@ class Grammar(object):
def __init__(self, grammar):
self._root = grammar

def parse(self, tokens):
tokens = Tokens(tokens)
def parse(self, tokens, token_tree=False):
if token_tree:
tokens = Tokens(tokens)
else:
tokens = StringTokens(tokens)

parsed = self._root.match(tokens)

if parsed is not None and tokens.peek_max().kind == '__EOF__':
Expand Down Expand Up @@ -609,9 +622,11 @@ def grammar(self):

raise NotImplementedError('To be implemented by subclasses.')

def parse(self, string):
def parse(self, string, token_tree=False):
"""Parse given string `string` and return the parse tree.
Returns a parse tree of tokens if `token_tree` is ``True``.
.. code-block:: python
>>> Parser().parse('Hello, World!')
Expand All @@ -625,6 +640,6 @@ def parse(self, string):
if len(tokens) == 0 or tokens[-1].kind != '__EOF__':
tokens.append(Token('__EOF__', None, len(string)))

return Grammar(self.grammar()).parse(tokens)
return Grammar(self.grammar()).parse(tokens, token_tree)
except (TokenizeError, GrammarError) as e:
raise ParseError(string, e.offset)

0 comments on commit 0275181

Please sign in to comment.