Skip to content

Commit

Permalink
Added NoMatch and Not patterns. Ignore trailing delimiter in delimite…
Browse files Browse the repository at this point in the history
…d list.
  • Loading branch information
eerimoq committed Jul 29, 2018
1 parent 6ccaf53 commit 6541f8e
Show file tree
Hide file tree
Showing 3 changed files with 254 additions and 41 deletions.
11 changes: 10 additions & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ Functions and classes
.. autoclass:: textparser.Parser
:members:

.. autoclass:: textparser.Sequence
:members:

.. autoclass:: textparser.Choice
:members:

Expand All @@ -40,10 +43,16 @@ Functions and classes
.. autoclass:: textparser.DelimitedList
:members:

.. autoclass:: textparser.Optional
:members:

.. autoclass:: textparser.Any
:members:

.. autoclass:: textparser.Optional
.. autoclass:: textparser.Not
:members:

.. autoclass:: textparser.NoMatch
:members:

.. autoclass:: textparser.Tag
Expand Down
144 changes: 132 additions & 12 deletions tests/test_textparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from textparser import Optional
from textparser import Tag
from textparser import Forward
from textparser import NoMatch
from textparser import Not


def tokenize(items, add_eof_token=True):
Expand Down Expand Up @@ -157,16 +159,20 @@ def test_grammar_choice_dict_init(self):
self.assertEqual(str(cm.exception), message)

def test_grammar_delimited_list(self):
grammar = Grammar(DelimitedList('WORD'))
grammar = Grammar(Sequence(DelimitedList('WORD'), Optional('.')))

datas = [
(
[('WORD', 'foo')],
['foo']
[['foo'], []]
),
(
[('WORD', 'foo'), (',', ','), ('WORD', 'bar')],
['foo', 'bar']
[['foo', 'bar'], []]
),
(
[('WORD', 'foo'), (',', ','), ('WORD', 'bar'), ('.', '.')],
[['foo', 'bar'], ['.']]
)
]

Expand All @@ -176,19 +182,35 @@ def test_grammar_delimited_list(self):
self.assertEqual(tree, expected_tree)

def test_grammar_delimited_list_mismatch(self):
grammar = Grammar(DelimitedList('WORD'))
grammar = Grammar(Sequence(DelimitedList('WORD'), Optional('.')))

datas = [
[('WORD', 'foo'), (',', ',')]
(
[
('WORD', 'foo', 1),
(',', ',', 2)
],
2
),
(
[
('WORD', 'foo', 1),
(',', ',', 2),
('WORD', 'foo', 3),
(',', ',', 4),
('.', '.', 5)
],
4
)
]

for tokens in datas:
for tokens, offset in datas:
tokens = tokenize(tokens)

with self.assertRaises(textparser.GrammarError) as cm:
grammar.parse(tokens)

self.assertEqual(cm.exception.offset, -1)
self.assertEqual(cm.exception.offset, offset)

def test_grammar_zero_or_more(self):
grammar = Grammar(ZeroOrMore('WORD'))
Expand All @@ -213,6 +235,27 @@ def test_grammar_zero_or_more(self):
tree = grammar.parse(tokens)
self.assertEqual(tree, expected_tree)

def test_grammar_zero_or_more_partial_element_match(self):
grammar = Grammar(Sequence(
ZeroOrMore(Sequence('WORD', 'NUMBER')), 'WORD'))

datas = [
(
[
('WORD', 'foo'),
('NUMBER', '1'),
('WORD', 'bar'),
('NUMBER', '2'),
('WORD', 'fie')],
[[['foo', '1'], ['bar', '2']], 'fie']
)
]

for tokens, expected_tree in datas:
tokens = tokenize(tokens)
tree = grammar.parse(tokens)
self.assertEqual(tree, expected_tree)

def test_grammar_zero_or_more_end(self):
grammar = Grammar(
Sequence(ZeroOrMore('WORD', Sequence('WORD', 'NUMBER')),
Expand Down Expand Up @@ -254,7 +297,7 @@ def test_grammar_zero_or_more_dict(self):
]

for tokens, expected_tree in datas:
tokens = tokenize(tokens + [('__EOF__', '')])
tokens = tokenize(tokens)
tree = grammar.parse(tokens)
self.assertEqual(tree, expected_tree)

Expand Down Expand Up @@ -353,24 +396,43 @@ def test_grammar_one_or_more_dict(self):
]

for tokens, expected_tree in datas:
tokens = tokenize(tokens + [('__EOF__', '')])
tokens = tokenize(tokens)
tree = grammar.parse(tokens)
self.assertEqual(tree, expected_tree)

def test_grammar_one_or_more_dict_mismatch(self):
grammar = Grammar(OneOrMoreDict(Sequence('WORD', 'NUMBER')))

datas = [
[('WORD', 'foo')]
(
[('WORD', 'foo', 5)],
-1
),
(
[
('WORD', 'foo', 5),
('WORD', 'bar', 6)
],
6
),
(
[
('WORD', 'foo', 5),
('NUMBER', '4', 6),
('WORD', 'bar', 7),
('WORD', 'fie', 8)
],
8
)
]

for tokens in datas:
for tokens, line in datas:
tokens = tokenize(tokens)

with self.assertRaises(textparser.GrammarError) as cm:
grammar.parse(tokens)

self.assertEqual(cm.exception.offset, -1)
self.assertEqual(cm.exception.offset, line)

def test_grammar_any(self):
grammar = Grammar(Any())
Expand Down Expand Up @@ -501,6 +563,22 @@ def test_grammar_forward(self):
tree = grammar.parse(tokenize(tokens))
self.assertEqual(tree, expected_tree)

def test_grammar_forward_text(self):
foo = Forward()
foo <<= 'FOO'
grammar = Grammar(foo)

datas = [
(
[('FOO', 'foo')],
'foo'
)
]

for tokens, expected_tree in datas:
tree = grammar.parse(tokenize(tokens))
self.assertEqual(tree, expected_tree)

def test_grammar_optional(self):
grammar = Grammar(Sequence(Optional('WORD'),
Optional('WORD'),
Expand Down Expand Up @@ -573,6 +651,48 @@ def test_grammar_tag_mismatch(self):

self.assertEqual(cm.exception.offset, 1)

def test_grammar_not(self):
grammar = Grammar(Sequence(Not('WORD'), 'NUMBER'))

datas = [
[('NUMBER', '1')]
]

for tokens in datas:
tree = grammar.parse(tokenize(tokens))
self.assertEqual(tree, [[], '1'])

def test_grammar_not_mismatch(self):
grammar = Grammar(Sequence(Not('WORD'), 'NUMBER'))

datas = [
[('WORD', 'foo', 3), ('NUMBER', '1', 4)]
]

for tokens in datas:
tokens = tokenize(tokens)

with self.assertRaises(textparser.GrammarError) as cm:
grammar.parse(tokens)

self.assertEqual(cm.exception.offset, 3)

def test_grammar_no_match(self):
grammar = Grammar(NoMatch())

datas = [
[('NUMBER', '1', 3)],
[('WORD', 'foo', 3)]
]

for tokens in datas:
tokens = tokenize(tokens)

with self.assertRaises(textparser.GrammarError) as cm:
grammar.parse(tokens)

self.assertEqual(cm.exception.offset, 3)

def test_tokenizer_error(self):
datas = [
(2, 'hej', 'Invalid syntax at line 1, column 3: "he>>!<<j"'),
Expand Down

0 comments on commit 6541f8e

Please sign in to comment.