From 6825c18df9edd1e69d26ead3f9542983a4b16a1a Mon Sep 17 00:00:00 2001 From: Erik Moqvist Date: Sun, 22 Jul 2018 08:59:01 +0200 Subject: [PATCH] Various new functionality. --- tests/test_textparser.py | 15 +++++++++++++ textparser.py | 47 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/tests/test_textparser.py b/tests/test_textparser.py index 9aee9bb..64903d3 100644 --- a/tests/test_textparser.py +++ b/tests/test_textparser.py @@ -3,6 +3,7 @@ import textparser from textparser import Grammar from textparser import Sequence +from textparser import DelimitedList from textparser import Token @@ -34,6 +35,20 @@ def test_sequence_mismatch(self): self.assertEqual(str(cm.exception), '') + def test_delimited_list(self): + grammar = Grammar(DelimitedList('WORD')) + + datas = [ + ([], []), + ([('WORD', 'foo')], ['foo']), + ([('WORD', 'foo'), (',', ','), ('WORD', 'bar')], ['foo', 'bar']) + ] + + for tokens, expected_tree in datas: + tokens = tokenize(tokens + [('__EOF__', '')]) + tree = grammar.parse(tokens) + self.assertEqual(tree, expected_tree) + if __name__ == '__main__': unittest.main() diff --git a/textparser.py b/textparser.py index a1316fa..0aeface 100644 --- a/textparser.py +++ b/textparser.py @@ -14,6 +14,34 @@ class Error(Exception): pass +def markup_line(string, offset): + begin = string.rfind('\n', 0, offset) + begin += 1 + + end = string.find('\n', offset) + + if end == -1: + end = len(string) + + return string[begin:offset] + '>>!<<' + string[offset:end] + + +class TokenizerError(Error): + + def __init__(self, line, column, offset, string): + message = 'Invalid syntax at line {}, column {}: "{}"'.format( + line, + column, + markup_line(string, offset)) + super().__init__(message) + + +def create_token_re(spec): + return '|'.join([ + '(?P<{}>{})'.format(name, regex) for name, regex in spec + ]) + + class _Tokens(object): def __init__(self, tokens): @@ -220,7 +248,10 @@ def match(self, tokens): mo = _match_item(self._element, tokens) if mo is None: - return None + if len(matched) == 0: + return [] + else: + return None matched.append(mo) @@ -240,6 +271,20 @@ def match(self, tokens): return self._element.match(tokens) +class Forward(object): + + def __init__(self): + self._inner = None + + def __ilshift__(self, other): + self._inner = other + + return self + + def match(self, tokens): + return self._inner.match(tokens) + + class Grammar(object): """Creates a tree of given tokens.