Get rid of global lexer state (#159)

* Get rid of Python 3.6 compat bits * Don't mix floating-point infinity with integers * Get rid of global lexer state
ikamensh · Jan 4, 2023 · eb91089 · eb91089
1 parent 2fdb9a8
commit eb91089
Show file tree

Hide file tree

Showing 8 changed files with 78 additions and 69 deletions.
diff --git a/src/flynt/lexer/Chunk.py b/src/flynt/lexer/Chunk.py
@@ -1,46 +1,23 @@
 import ast
-import sys
 import token
 from collections import deque
-from typing import Deque, Iterable, Iterator, Optional, Tuple
+from typing import Deque, Iterable, Iterator, Optional
 
+from flynt.lexer.context import LexerContext
 from flynt.lexer.PyToken import PyToken
 
 REUSE = "Token was not used"
 
-is_36 = sys.version_info.major == 3 and sys.version_info.minor == 6
-if is_36:
-    multiline_skip = (token.NEWLINE, 58)
-    multiline_break = (57,)
-
-    single_break = (token.NEWLINE, 57, 58)
-else:
-    multiline_skip = (token.NEWLINE, token.NL)
-    multiline_break = (token.COMMENT,)
-
-    single_break = (token.COMMENT, token.NEWLINE, token.NL)
-
-single_skip = ()
-
 
 class Chunk:
-    skip_tokens: Tuple[int, ...] = ()
-    break_tokens: Tuple[int, ...] = ()
-    multiline = None
-
-    @staticmethod
-    def set_multiline() -> None:
-        Chunk.skip_tokens = multiline_skip
-        Chunk.break_tokens = multiline_break
-        Chunk.multiline = True
-
-    @staticmethod
-    def set_single_line() -> None:
-        Chunk.skip_tokens = single_skip
-        Chunk.break_tokens = single_break
-        Chunk.multiline = False
-
-    def __init__(self, tokens: Iterable[PyToken] = ()) -> None:
+    def __init__(
+        self,
+        tokens: Iterable[PyToken] = (),
+        *,
+        lexer_context: LexerContext,
+    ) -> None:
+        self.lexer_context = lexer_context
+
         self.tokens: Deque[PyToken] = deque(tokens)
         self.complete = False
 
@@ -73,7 +50,6 @@ def second_append(self, t: PyToken) -> None:
             self.complete = True
 
     def percent_append(self, t: PyToken) -> Optional[str]:
-
         # todo handle all cases?
         if not self[0].is_string():
             self.complete = True
@@ -104,7 +80,6 @@ def percent_append(self, t: PyToken) -> Optional[str]:
         return None
 
     def call_append(self, t: PyToken) -> None:
-
         if t.is_string():
             self.string_in_string = True
 
@@ -120,7 +95,7 @@ def call_append(self, t: PyToken) -> None:
 
     def append(self, t: PyToken) -> Optional[str]:
         # stop on a comment or too long chunk
-        if t.toknum in self.break_tokens:
+        if t.toknum in self.lexer_context.break_tokens:
             self.complete = True
             self.successful = self.is_parseable and (
                 self.is_percent_chunk or self.is_call_chunk
@@ -132,7 +107,7 @@ def append(self, t: PyToken) -> Optional[str]:
             self.successful = False
             return None
 
-        if t.toknum in self.skip_tokens:
+        if t.toknum in self.lexer_context.skip_tokens:
             return None
 
         if len(self) == 0:

diff --git a/src/flynt/lexer/__init__.py b/src/flynt/lexer/__init__.py
@@ -1,5 +0,0 @@
-from flynt.lexer.Chunk import Chunk as _Chunk
-
-set_multiline = _Chunk.set_multiline
-set_single_line = _Chunk.set_single_line
-set_multiline()

diff --git a/src/flynt/lexer/context.py b/src/flynt/lexer/context.py
@@ -0,0 +1,23 @@
+import dataclasses
+import token
+from typing import FrozenSet
+
+
+@dataclasses.dataclass(frozen=True)
+class LexerContext:
+    skip_tokens: FrozenSet[int]
+    break_tokens: FrozenSet[int]
+    multiline: bool
+
+
+single_line_context = LexerContext(
+    skip_tokens=frozenset(),
+    break_tokens=frozenset((token.COMMENT, token.NEWLINE, token.NL)),
+    multiline=False,
+)
+
+multi_line_context = LexerContext(
+    skip_tokens=frozenset((token.NEWLINE, token.NL)),
+    break_tokens=frozenset((token.COMMENT,)),
+    multiline=True,
+)
diff --git a/src/flynt/lexer/split.py b/src/flynt/lexer/split.py
@@ -4,14 +4,19 @@
 from typing import Generator
 
 from flynt.lexer.Chunk import Chunk
+from flynt.lexer.context import LexerContext, multi_line_context
 from flynt.lexer.PyToken import PyToken
 
 log = logging.getLogger(__name__)
 
 
-def get_chunks(code: str) -> Generator[Chunk, None, None]:
+def get_chunks(
+    code: str,
+    *,
+    lexer_context: LexerContext,
+) -> Generator[Chunk, None, None]:
     g = tokenize.tokenize(io.BytesIO(code.encode("utf-8")).readline)
-    chunk = Chunk()
+    chunk = Chunk(lexer_context=lexer_context)
 
     try:
         for item in g:
@@ -21,13 +26,13 @@ def get_chunks(code: str) -> Generator[Chunk, None, None]:
             if chunk.complete:
 
                 yield chunk
-                chunk = Chunk()
+                chunk = Chunk(lexer_context=lexer_context)
                 if reuse:
                     reuse = chunk.append(t)
                     # assert not reuse
                     if chunk.complete:
                         yield chunk
-                        chunk = Chunk()
+                        chunk = Chunk(lexer_context=lexer_context)
 
         yield chunk
     except tokenize.TokenError as e:
@@ -37,18 +42,21 @@ def get_chunks(code: str) -> Generator[Chunk, None, None]:
         )
 
 
-def get_fstringify_chunks(code: str) -> Generator[Chunk, None, None]:
+def get_fstringify_chunks(
+    code: str,
+    lexer_context: LexerContext = multi_line_context,
+) -> Generator[Chunk, None, None]:
     """
     A generator yielding Chunks of the code where fstring can be formed.
     """
     last_concat = False
 
-    for chunk in get_chunks(code):
+    for chunk in get_chunks(code, lexer_context=lexer_context):
         if chunk.successful and not last_concat:
             yield chunk
 
         if len(chunk) and chunk[-1].is_string():
             last_concat = True
         else:
-            if Chunk.multiline or len(chunk) > 0:
+            if lexer_context.multiline or len(chunk) > 0:
                 last_concat = False
diff --git a/src/flynt/process.py b/src/flynt/process.py
@@ -1,11 +1,10 @@
 import logging
-import math
 import re
 import string
+import sys
 from functools import partial
 from typing import Callable, List, Optional, Tuple, Union
 
-from flynt import lexer
 from flynt.ast_chunk import AstChunk
 from flynt.exceptions import FlyntException
 from flynt.format import QuoteTypes as qt
@@ -36,7 +35,10 @@ def __init__(
         candidates_iter_factory: Callable,
         transform_func: Callable,
     ) -> None:
-        self.len_limit = len_limit if len_limit is not None else math.inf
+        if len_limit is None:
+            len_limit = sys.maxsize
+
+        self.len_limit = len_limit
         self.candidates_iter = candidates_iter_factory(code)
         self.transform_func = transform_func
         self.src_lines = code.split("\n")
@@ -176,7 +178,7 @@ def fstringify_code_by_line(code: str, state: State) -> Tuple[str, int]:
     """returns fstringified version of the code and amount of lines edited."""
     return _transform_code(
         code,
-        split.get_fstringify_chunks,
+        partial(split.get_fstringify_chunks, lexer_context=state.lexer_context),
         partial(transform_chunk, state=state),
         state,
     )
@@ -209,16 +211,9 @@ def _transform_code(
     state: State,
 ) -> Tuple[str, int]:
     """returns fstringified version of the code and amount of lines edited."""
-    len_limit = _multiline_settings(state)
-    jt = JoinTransformer(code, len_limit, candidates_iter_factory, transform_func)
-    return jt.fstringify_code_by_line()
-
-
-def _multiline_settings(state: State) -> Optional[int]:
-    # TODO: eradicate this function and system
-    if not state.multiline:
-        state.len_limit = 0
-        lexer.set_single_line()
-    else:
-        lexer.set_multiline()
-    return state.len_limit
+    return JoinTransformer(
+        code,
+        state.len_limit,
+        candidates_iter_factory,
+        transform_func,
+    ).fstringify_code_by_line()
diff --git a/src/flynt/state.py b/src/flynt/state.py
@@ -3,6 +3,8 @@
 import dataclasses
 from typing import Optional
 
+from flynt.lexer.context import LexerContext, multi_line_context, single_line_context
+
 
 @dataclasses.dataclass
 class State:
@@ -31,3 +33,13 @@ class State:
 
     join_candidates: int = 0
     join_changes: int = 0
+
+    def __post_init__(self):
+        if not self.multiline:
+            self.len_limit = 0
+
+    @property
+    def lexer_context(self) -> LexerContext:
+        if self.multiline:
+            return multi_line_context
+        return single_line_context
diff --git a/test/integration/test_files.py b/test/integration/test_files.py
@@ -18,8 +18,8 @@ def test_fstringify(filename, state):
 
 
 @pytest.mark.parametrize("filename", samples)
-def test_fstringify_single_line(filename, state: State):
-    state.multiline = False
+def test_fstringify_single_line(filename):
+    state = State(multiline=False)
     out, expected = try_on_file(
         filename,
         partial(fstringify_code_by_line, state=state),

diff --git a/test/test_styles.py b/test/test_styles.py
@@ -3,6 +3,7 @@
 import pytest
 
 from flynt.format import QuoteTypes, get_quote_type, set_quote_type
+from flynt.lexer.context import multi_line_context
 from flynt.lexer.split import get_chunks
 
 
@@ -17,7 +18,7 @@
 )
 def test_get_quote_type_token(code, quote_type):
 
-    g = get_chunks(code)
+    g = get_chunks(code, lexer_context=multi_line_context)
     next(g)
     chunk = next(g)
     token = chunk.tokens[0]