Skip to content

Commit

Permalink
Tokenize init function.
Browse files Browse the repository at this point in the history
  • Loading branch information
eerimoq committed Jul 23, 2018
1 parent d8f5570 commit f909d97
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 15 deletions.
7 changes: 2 additions & 5 deletions examples/benchmarks/json/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,7 @@ def tokenize(string):
('MISMATCH', r'.')
]

line = 1
line_start = -1
tokens = []
re_token = tp.create_token_re(spec)
line, line_start, tokens, re_token = tp.tokenize_init(spec)

for mo in re.finditer(re_token, string, re.DOTALL):
kind = mo.lastgroup
Expand All @@ -105,7 +102,7 @@ def tokenize(string):
else:
column = mo.start() - line_start

raise tp.TokenizerError(line, column, mo.start(), string)
raise tp.TokenizeError(line, column, mo.start(), string)

return tokens

Expand Down
16 changes: 10 additions & 6 deletions tests/test_textparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from textparser import ZeroOrMore
from textparser import DelimitedList
from textparser import Token
from textparser import TokenizerError
from textparser import create_token_re
from textparser import TokenizeError
from textparser import tokenize_init
from textparser import Any
from textparser import Inline
from textparser import Forward
Expand Down Expand Up @@ -253,8 +253,8 @@ def test_tokenizer_error(self):
]

for offset, string, message in datas:
with self.assertRaises(TokenizerError) as cm:
raise TokenizerError(0, 1, offset, string)
with self.assertRaises(TokenizeError) as cm:
raise TokenizeError(0, 1, offset, string)

self.assertEqual(
str(cm.exception),
Expand All @@ -272,8 +272,12 @@ def test_create_token_re(self):
)
]

for spec, re_token in datas:
self.assertEqual(create_token_re(spec), re_token)
for spec, expected_re_token in datas:
line, line_start, tokens, re_token = tokenize_init(spec)
self.assertEqual(line, 1)
self.assertEqual(line_start, -1)
self.assertEqual(tokens, [])
self.assertEqual(re_token, expected_re_token)

def test_any(self):
grammar = Grammar(Any())
Expand Down
13 changes: 9 additions & 4 deletions textparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,14 @@ class Error(Exception):
pass


class TokenizerError(Error):
class TokenizeError(Error):

def __init__(self, line, column, offset, string):
message = 'Invalid syntax at line {}, column {}: "{}"'.format(
line,
column,
markup_line(string, offset))
super(TokenizerError, self).__init__(message)
super(TokenizeError, self).__init__(message)
self.line = line
self.column = column
self.offset = offset
Expand Down Expand Up @@ -357,7 +357,12 @@ def markup_line(string, offset):
return string[begin:offset] + '>>!<<' + string[offset:end]


def create_token_re(spec):
return '|'.join([
def tokenize_init(spec):
line = 1
line_start = -1
tokens = []
re_token = '|'.join([
'(?P<{}>{})'.format(name, regex) for name, regex in spec
])

return line, line_start, tokens, re_token

0 comments on commit f909d97

Please sign in to comment.