Tokenize init function.

eerimoq · Jul 23, 2018 · f909d97 · f909d97
1 parent d8f5570
commit f909d97
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 15 deletions.
diff --git a/examples/benchmarks/json/main.py b/examples/benchmarks/json/main.py
@@ -78,10 +78,7 @@ def tokenize(string):
         ('MISMATCH', r'.')
     ]
 
-    line = 1
-    line_start = -1
-    tokens = []
-    re_token = tp.create_token_re(spec)
+    line, line_start, tokens, re_token = tp.tokenize_init(spec)
 
     for mo in re.finditer(re_token, string, re.DOTALL):
         kind = mo.lastgroup
@@ -105,7 +102,7 @@ def tokenize(string):
         else:
             column = mo.start() - line_start
 
-            raise tp.TokenizerError(line, column, mo.start(), string)
+            raise tp.TokenizeError(line, column, mo.start(), string)
 
     return tokens
 

diff --git a/tests/test_textparser.py b/tests/test_textparser.py
@@ -10,8 +10,8 @@
 from textparser import ZeroOrMore
 from textparser import DelimitedList
 from textparser import Token
-from textparser import TokenizerError
-from textparser import create_token_re
+from textparser import TokenizeError
+from textparser import tokenize_init
 from textparser import Any
 from textparser import Inline
 from textparser import Forward
@@ -253,8 +253,8 @@ def test_tokenizer_error(self):
         ]
 
         for offset, string, message in datas:
-            with self.assertRaises(TokenizerError) as cm:
-                raise TokenizerError(0, 1, offset, string)
+            with self.assertRaises(TokenizeError) as cm:
+                raise TokenizeError(0, 1, offset, string)
 
             self.assertEqual(
                 str(cm.exception),
@@ -272,8 +272,12 @@ def test_create_token_re(self):
             )
         ]
 
-        for spec, re_token in datas:
-            self.assertEqual(create_token_re(spec), re_token)
+        for spec, expected_re_token in datas:
+            line, line_start, tokens, re_token = tokenize_init(spec)
+            self.assertEqual(line, 1)
+            self.assertEqual(line_start, -1)
+            self.assertEqual(tokens, [])
+            self.assertEqual(re_token, expected_re_token)
 
     def test_any(self):
         grammar = Grammar(Any())

diff --git a/textparser.py b/textparser.py
@@ -69,14 +69,14 @@ class Error(Exception):
     pass
 
 
-class TokenizerError(Error):
+class TokenizeError(Error):
 
     def __init__(self, line, column, offset, string):
         message = 'Invalid syntax at line {}, column {}: "{}"'.format(
             line,
             column,
             markup_line(string, offset))
-        super(TokenizerError, self).__init__(message)
+        super(TokenizeError, self).__init__(message)
         self.line = line
         self.column = column
         self.offset = offset
@@ -357,7 +357,12 @@ def markup_line(string, offset):
     return string[begin:offset] + '>>!<<' + string[offset:end]
 
 
-def create_token_re(spec):
-    return '|'.join([
+def tokenize_init(spec):
+    line = 1
+    line_start = -1
+    tokens = []
+    re_token = '|'.join([
         '(?P<{}>{})'.format(name, regex) for name, regex in spec
     ])
+
+    return line, line_start, tokens, re_token