Skip to content

Commit

Permalink
Tag and zero/one or more dict pattern classes.
Browse files Browse the repository at this point in the history
  • Loading branch information
eerimoq committed Jul 23, 2018
1 parent 83fdde9 commit aa6d166
Show file tree
Hide file tree
Showing 2 changed files with 206 additions and 31 deletions.
112 changes: 110 additions & 2 deletions tests/test_textparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@
from textparser import Choice
from textparser import choice
from textparser import ChoiceDict
from textparser import OneOrMore
from textparser import ZeroOrMore
from textparser import ZeroOrMoreDict
from textparser import OneOrMore
from textparser import OneOrMoreDict
from textparser import DelimitedList
from textparser import Token
from textparser import TokenizeError
from textparser import tokenize_init
from textparser import Any
from textparser import Optional
from textparser import Inline
from textparser import Tag
from textparser import Forward
from textparser import Optional


def tokenize(items):
Expand Down Expand Up @@ -177,6 +180,30 @@ def test_zero_or_more_end(self):
tree = grammar.parse(tokens)
self.assertEqual(tree, expected_tree)

def test_zero_or_more_dict(self):
grammar = Grammar(ZeroOrMoreDict(Sequence('WORD', 'NUMBER')))

datas = [
(
[],
{}
),
(
[('WORD', 'foo'), ('NUMBER', '1'),
('WORD', 'bar'), ('NUMBER', '2'),
('WORD', 'foo'), ('NUMBER', '3')],
{
'foo': [['foo', '1'], ['foo', '3']],
'bar': [['bar', '2']]
}
)
]

for tokens, expected_tree in datas:
tokens = tokenize(tokens + [('__EOF__', '')])
tree = grammar.parse(tokens)
self.assertEqual(tree, expected_tree)

def test_one_or_more(self):
grammar = Grammar(OneOrMore('WORD'))

Expand Down Expand Up @@ -244,6 +271,47 @@ def test_one_or_more_end_mismatch(self):

self.assertEqual(str(cm.exception), '')

def test_one_or_more_dict(self):
grammar = Grammar(OneOrMoreDict(Sequence('WORD', 'NUMBER')))

datas = [
(
[('WORD', 'foo'), ('NUMBER', '1')],
{
'foo': [['foo', '1']]
}
),
(
[('WORD', 'foo'), ('NUMBER', '1'),
('WORD', 'bar'), ('NUMBER', '2'),
('WORD', 'foo'), ('NUMBER', '3')],
{
'foo': [['foo', '1'], ['foo', '3']],
'bar': [['bar', '2']]
}
)
]

for tokens, expected_tree in datas:
tokens = tokenize(tokens + [('__EOF__', '')])
tree = grammar.parse(tokens)
self.assertEqual(tree, expected_tree)

def test_one_or_more_dict_mismatch(self):
grammar = Grammar(OneOrMoreDict(Sequence('WORD', 'NUMBER')))

datas = [
[('WORD', 'foo')]
]

for tokens in datas:
tokens = tokenize(tokens)

with self.assertRaises(textparser.Error) as cm:
grammar.parse(tokens)

self.assertEqual(str(cm.exception), '')

def test_tokenizer_error(self):
datas = [
(2, 'hej', 'he>>!<<j'),
Expand Down Expand Up @@ -384,6 +452,46 @@ def test_optional(self):
tree = grammar.parse(tokenize(tokens))
self.assertEqual(tree, expected_tree)

def test_tag(self):
grammar = Grammar(Tag('a',
Tag('b',
choice(Tag('c', 'WORD'),
Tag('d', Optional('NUMBER'))))))

datas = [
(
[('WORD', 'bar')],
('a', ('b', ('c', 'bar')))
),
(
[('NUMBER', '1')],
('a', ('b', ('d', ['1'])))
),
(
[],
('a', ('b', ('d', [])))
)
]

for tokens, expected_tree in datas:
tree = grammar.parse(tokenize(tokens))
self.assertEqual(tree, expected_tree)

def test_tag_mismatch(self):
grammar = Grammar(Tag('a', 'WORD'))

datas = [
[('NUMBER', 'bar')]
]

for tokens in datas:
tokens = tokenize(tokens)

with self.assertRaises(textparser.Error) as cm:
grammar.parse(tokens)

self.assertEqual(str(cm.exception), '')


if __name__ == '__main__':
unittest.main()
125 changes: 96 additions & 29 deletions textparser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# A text parser.

from collections import namedtuple
from operator import itemgetter


__author__ = 'Erik Moqvist'
Expand Down Expand Up @@ -86,7 +87,13 @@ def __init__(self, line, column, offset, string):
Token = namedtuple('Token', ['kind', 'value', 'line', 'column'])


class Sequence(object):
class Pattern(object):

def match(self, tokens):
raise NotImplementedError('To be implemented by subclasses.')


class Sequence(Pattern):
"""Matches a sequence of patterns.
"""
Expand All @@ -111,7 +118,7 @@ def match(self, tokens):
return matched


class Choice(object):
class Choice(Pattern):
"""Matches any of given patterns.
"""
Expand All @@ -134,7 +141,7 @@ def match(self, tokens):
return None


class ChoiceDict(object):
class ChoiceDict(Pattern):
"""Matches any of given patterns.
"""
Expand Down Expand Up @@ -170,18 +177,19 @@ def match(self, tokens):
return None


class ZeroOrMore(object):
class Repeated(Pattern):
"""Matches a pattern zero or more times.
"""

def __init__(self, element, end=None):
def __init__(self, element, end=None, minimum_length=0):
self._element = _wrap_string(element)

if end is not None:
end = _wrap_string(end)

self._end = end
self._minimum_length = minimum_length

def match(self, tokens):
matched = []
Expand All @@ -202,24 +210,27 @@ def match(self, tokens):

matched.append(mo)

return matched
if len(matched) >= self._minimum_length:
return matched
else:
return None


class OneOrMore(object):
"""Matches a pattern one or more times.
class RepeatedDict(Repeated):
"""Matches a pattern zero or more times.
"""

def __init__(self, element, end=None):
self._element = _wrap_string(element)
def __init__(self, element, end=None, minimum_length=0, key=None):
super(RepeatedDict, self).__init__(element, end, minimum_length)

if end is not None:
end = _wrap_string(end)
if key is None:
key = itemgetter(0)

self._end = end
self._key = key

def match(self, tokens):
matched = []
matched = {}

while True:
if self._end is not None:
Expand All @@ -235,15 +246,56 @@ def match(self, tokens):
if mo is None:
break

matched.append(mo)
key = self._key(mo)

try:
matched[key].append(mo)
except KeyError:
matched[key] = [mo]

if len(matched) > 0:
if len(matched) >= self._minimum_length:
return matched
else:
return None


class Any(object):
class ZeroOrMore(Repeated):
"""Matches a pattern zero or more times.
"""

def __init__(self, element, end=None):
super(ZeroOrMore, self).__init__(element, end, 0)


class ZeroOrMoreDict(RepeatedDict):
"""Matches a pattern zero or more times.
"""

def __init__(self, element, end=None, key=None):
super(ZeroOrMoreDict, self).__init__(element, end, 0, key)


class OneOrMore(Repeated):
"""Matches a pattern one or more times.
"""

def __init__(self, element, end=None):
super(OneOrMore, self).__init__(element, end, 1)


class OneOrMoreDict(RepeatedDict):
"""Matches a pattern one or more times.
"""

def __init__(self, element, end=None, key=None):
super(OneOrMoreDict, self).__init__(element, end, 1, key)


class Any(Pattern):
"""Matches any token.
"""
Expand All @@ -252,7 +304,7 @@ def match(self, tokens):
return tokens.get().value


class DelimitedList(object):
class DelimitedList(Pattern):
"""Matches a delimented list of given pattern.
"""
Expand Down Expand Up @@ -280,16 +332,7 @@ def match(self, tokens):
return matched


class Inline(object):

def __init__(self, element):
self._element = element

def match(self, tokens):
return self._element.match(tokens)


class Optional(object):
class Optional(Pattern):
"""Matches a pattern zero or one times.
"""
Expand All @@ -306,7 +349,31 @@ def match(self, tokens):
return [mo]


class Forward(object):
class Inline(Pattern):

def __init__(self, inner):
self._inner = inner

def match(self, tokens):
return self._inner.match(tokens)


class Tag(Pattern):

def __init__(self, name, inner):
self._name = name
self._inner = _wrap_string(inner)

def match(self, tokens):
mo = self._inner.match(tokens)

if mo is not None:
return (self._name, mo)
else:
return None


class Forward(Pattern):

def __init__(self):
self._inner = None
Expand Down

0 comments on commit aa6d166

Please sign in to comment.