Skip to content

Commit

Permalink
Get rid of global lexer state (#159)
Browse files Browse the repository at this point in the history
* Get rid of Python 3.6 compat bits

* Don't mix floating-point infinity with integers

* Get rid of global lexer state
  • Loading branch information
akx committed Jan 4, 2023
1 parent 2fdb9a8 commit eb91089
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 69 deletions.
49 changes: 12 additions & 37 deletions src/flynt/lexer/Chunk.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,23 @@
import ast
import sys
import token
from collections import deque
from typing import Deque, Iterable, Iterator, Optional, Tuple
from typing import Deque, Iterable, Iterator, Optional

from flynt.lexer.context import LexerContext
from flynt.lexer.PyToken import PyToken

REUSE = "Token was not used"

is_36 = sys.version_info.major == 3 and sys.version_info.minor == 6
if is_36:
multiline_skip = (token.NEWLINE, 58)
multiline_break = (57,)

single_break = (token.NEWLINE, 57, 58)
else:
multiline_skip = (token.NEWLINE, token.NL)
multiline_break = (token.COMMENT,)

single_break = (token.COMMENT, token.NEWLINE, token.NL)

single_skip = ()


class Chunk:
skip_tokens: Tuple[int, ...] = ()
break_tokens: Tuple[int, ...] = ()
multiline = None

@staticmethod
def set_multiline() -> None:
Chunk.skip_tokens = multiline_skip
Chunk.break_tokens = multiline_break
Chunk.multiline = True

@staticmethod
def set_single_line() -> None:
Chunk.skip_tokens = single_skip
Chunk.break_tokens = single_break
Chunk.multiline = False

def __init__(self, tokens: Iterable[PyToken] = ()) -> None:
def __init__(
self,
tokens: Iterable[PyToken] = (),
*,
lexer_context: LexerContext,
) -> None:
self.lexer_context = lexer_context

self.tokens: Deque[PyToken] = deque(tokens)
self.complete = False

Expand Down Expand Up @@ -73,7 +50,6 @@ def second_append(self, t: PyToken) -> None:
self.complete = True

def percent_append(self, t: PyToken) -> Optional[str]:

# todo handle all cases?
if not self[0].is_string():
self.complete = True
Expand Down Expand Up @@ -104,7 +80,6 @@ def percent_append(self, t: PyToken) -> Optional[str]:
return None

def call_append(self, t: PyToken) -> None:

if t.is_string():
self.string_in_string = True

Expand All @@ -120,7 +95,7 @@ def call_append(self, t: PyToken) -> None:

def append(self, t: PyToken) -> Optional[str]:
# stop on a comment or too long chunk
if t.toknum in self.break_tokens:
if t.toknum in self.lexer_context.break_tokens:
self.complete = True
self.successful = self.is_parseable and (
self.is_percent_chunk or self.is_call_chunk
Expand All @@ -132,7 +107,7 @@ def append(self, t: PyToken) -> Optional[str]:
self.successful = False
return None

if t.toknum in self.skip_tokens:
if t.toknum in self.lexer_context.skip_tokens:
return None

if len(self) == 0:
Expand Down
5 changes: 0 additions & 5 deletions src/flynt/lexer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +0,0 @@
from flynt.lexer.Chunk import Chunk as _Chunk

set_multiline = _Chunk.set_multiline
set_single_line = _Chunk.set_single_line
set_multiline()
23 changes: 23 additions & 0 deletions src/flynt/lexer/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import dataclasses
import token
from typing import FrozenSet


@dataclasses.dataclass(frozen=True)
class LexerContext:
skip_tokens: FrozenSet[int]
break_tokens: FrozenSet[int]
multiline: bool


single_line_context = LexerContext(
skip_tokens=frozenset(),
break_tokens=frozenset((token.COMMENT, token.NEWLINE, token.NL)),
multiline=False,
)

multi_line_context = LexerContext(
skip_tokens=frozenset((token.NEWLINE, token.NL)),
break_tokens=frozenset((token.COMMENT,)),
multiline=True,
)
22 changes: 15 additions & 7 deletions src/flynt/lexer/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@
from typing import Generator

from flynt.lexer.Chunk import Chunk
from flynt.lexer.context import LexerContext, multi_line_context
from flynt.lexer.PyToken import PyToken

log = logging.getLogger(__name__)


def get_chunks(code: str) -> Generator[Chunk, None, None]:
def get_chunks(
code: str,
*,
lexer_context: LexerContext,
) -> Generator[Chunk, None, None]:
g = tokenize.tokenize(io.BytesIO(code.encode("utf-8")).readline)
chunk = Chunk()
chunk = Chunk(lexer_context=lexer_context)

try:
for item in g:
Expand All @@ -21,13 +26,13 @@ def get_chunks(code: str) -> Generator[Chunk, None, None]:
if chunk.complete:

yield chunk
chunk = Chunk()
chunk = Chunk(lexer_context=lexer_context)
if reuse:
reuse = chunk.append(t)
# assert not reuse
if chunk.complete:
yield chunk
chunk = Chunk()
chunk = Chunk(lexer_context=lexer_context)

yield chunk
except tokenize.TokenError as e:
Expand All @@ -37,18 +42,21 @@ def get_chunks(code: str) -> Generator[Chunk, None, None]:
)


def get_fstringify_chunks(code: str) -> Generator[Chunk, None, None]:
def get_fstringify_chunks(
code: str,
lexer_context: LexerContext = multi_line_context,
) -> Generator[Chunk, None, None]:
"""
A generator yielding Chunks of the code where fstring can be formed.
"""
last_concat = False

for chunk in get_chunks(code):
for chunk in get_chunks(code, lexer_context=lexer_context):
if chunk.successful and not last_concat:
yield chunk

if len(chunk) and chunk[-1].is_string():
last_concat = True
else:
if Chunk.multiline or len(chunk) > 0:
if lexer_context.multiline or len(chunk) > 0:
last_concat = False
29 changes: 12 additions & 17 deletions src/flynt/process.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import logging
import math
import re
import string
import sys
from functools import partial
from typing import Callable, List, Optional, Tuple, Union

from flynt import lexer
from flynt.ast_chunk import AstChunk
from flynt.exceptions import FlyntException
from flynt.format import QuoteTypes as qt
Expand Down Expand Up @@ -36,7 +35,10 @@ def __init__(
candidates_iter_factory: Callable,
transform_func: Callable,
) -> None:
self.len_limit = len_limit if len_limit is not None else math.inf
if len_limit is None:
len_limit = sys.maxsize

self.len_limit = len_limit
self.candidates_iter = candidates_iter_factory(code)
self.transform_func = transform_func
self.src_lines = code.split("\n")
Expand Down Expand Up @@ -176,7 +178,7 @@ def fstringify_code_by_line(code: str, state: State) -> Tuple[str, int]:
"""returns fstringified version of the code and amount of lines edited."""
return _transform_code(
code,
split.get_fstringify_chunks,
partial(split.get_fstringify_chunks, lexer_context=state.lexer_context),
partial(transform_chunk, state=state),
state,
)
Expand Down Expand Up @@ -209,16 +211,9 @@ def _transform_code(
state: State,
) -> Tuple[str, int]:
"""returns fstringified version of the code and amount of lines edited."""
len_limit = _multiline_settings(state)
jt = JoinTransformer(code, len_limit, candidates_iter_factory, transform_func)
return jt.fstringify_code_by_line()


def _multiline_settings(state: State) -> Optional[int]:
# TODO: eradicate this function and system
if not state.multiline:
state.len_limit = 0
lexer.set_single_line()
else:
lexer.set_multiline()
return state.len_limit
return JoinTransformer(
code,
state.len_limit,
candidates_iter_factory,
transform_func,
).fstringify_code_by_line()
12 changes: 12 additions & 0 deletions src/flynt/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import dataclasses
from typing import Optional

from flynt.lexer.context import LexerContext, multi_line_context, single_line_context


@dataclasses.dataclass
class State:
Expand Down Expand Up @@ -31,3 +33,13 @@ class State:

join_candidates: int = 0
join_changes: int = 0

def __post_init__(self):
if not self.multiline:
self.len_limit = 0

@property
def lexer_context(self) -> LexerContext:
if self.multiline:
return multi_line_context
return single_line_context
4 changes: 2 additions & 2 deletions test/integration/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def test_fstringify(filename, state):


@pytest.mark.parametrize("filename", samples)
def test_fstringify_single_line(filename, state: State):
state.multiline = False
def test_fstringify_single_line(filename):
state = State(multiline=False)
out, expected = try_on_file(
filename,
partial(fstringify_code_by_line, state=state),
Expand Down
3 changes: 2 additions & 1 deletion test/test_styles.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest

from flynt.format import QuoteTypes, get_quote_type, set_quote_type
from flynt.lexer.context import multi_line_context
from flynt.lexer.split import get_chunks


Expand All @@ -17,7 +18,7 @@
)
def test_get_quote_type_token(code, quote_type):

g = get_chunks(code)
g = get_chunks(code, lexer_context=multi_line_context)
next(g)
chunk = next(g)
token = chunk.tokens[0]
Expand Down

0 comments on commit eb91089

Please sign in to comment.