Skip to content

Commit

Permalink
Replace quadratic algo in LineDecoder
Browse files Browse the repository at this point in the history
Leading to enormous speedups when doing things such as
Response(...).iter_lines() as described on issue #2422
  • Loading branch information
giannitedesco committed Nov 6, 2022
1 parent 9e97d7d commit 9cbd511
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 63 deletions.
65 changes: 18 additions & 47 deletions httpx/_decoders.py
Expand Up @@ -266,57 +266,28 @@ def __init__(self) -> None:
self.buffer = ""

def decode(self, text: str) -> typing.List[str]:
lines = []

if text and self.buffer and self.buffer[-1] == "\r":
if text.startswith("\n"):
# Handle the case where we have an "\r\n" split across
# our previous input, and our new chunk.
lines.append(self.buffer[:-1] + "\n")
self.buffer = ""
text = text[1:]
else:
# Handle the case where we have "\r" at the end of our
# previous input.
lines.append(self.buffer[:-1] + "\n")
self.buffer = ""

while text:
num_chars = len(text)
for idx in range(num_chars):
char = text[idx]
next_char = None if idx + 1 == num_chars else text[idx + 1]
if char == "\n":
lines.append(self.buffer + text[: idx + 1])
self.buffer = ""
text = text[idx + 1 :]
break
elif char == "\r" and next_char == "\n":
lines.append(self.buffer + text[:idx] + "\n")
self.buffer = ""
text = text[idx + 2 :]
break
elif char == "\r" and next_char is not None:
lines.append(self.buffer + text[:idx] + "\n")
self.buffer = ""
text = text[idx + 1 :]
break
elif next_char is None:
self.buffer += text
text = ""
break
if self.buffer:
text = self.buffer + text

if not text:
return []

lines = text.splitlines()
if text.endswith("\n"):
self.buffer = ""
else:
remainder = lines.pop()
# Handle the case where there is a CR, but we may need to wait for
# an LF in the next chunk to avoid emitting spurious lines.
if text.endswith("\r"):
remainder += "\r"
self.buffer = remainder

return lines

def flush(self) -> typing.List[str]:
if self.buffer.endswith("\r"):
# Handle the case where we had a trailing '\r', which could have
# been a '\r\n' pair.
lines = [self.buffer[:-1] + "\n"]
elif self.buffer:
lines = [self.buffer]
else:
lines = []
# this handles stripping any trailng \r
lines = self.buffer.splitlines()
self.buffer = ""
return lines

Expand Down
4 changes: 2 additions & 2 deletions tests/models/test_responses.py
Expand Up @@ -639,7 +639,7 @@ def test_iter_lines():
content=b"Hello,\nworld!",
)
content = [line for line in response.iter_lines()]
assert content == ["Hello,\n", "world!"]
assert content == ["Hello,", "world!"]


@pytest.mark.asyncio
Expand All @@ -652,7 +652,7 @@ async def test_aiter_lines():
content = []
async for line in response.aiter_lines():
content.append(line)
assert content == ["Hello,\n", "world!"]
assert content == ["Hello,", "world!"]


def test_sync_streaming_response():
Expand Down
28 changes: 14 additions & 14 deletions tests/test_decoders.py
Expand Up @@ -236,69 +236,69 @@ def test_text_decoder_empty_cases():
def test_line_decoder_nl():
decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\n\nb\nc") == ["a\n", "\n", "b\n"]
assert decoder.decode("a\n\nb\nc") == ["a", "", "b"]
assert decoder.flush() == ["c"]

decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\n\nb\nc\n") == ["a\n", "\n", "b\n", "c\n"]
assert decoder.decode("a\n\nb\nc\n") == ["a", "", "b", "c"]
assert decoder.flush() == []

# Issue #1033
decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("12345\n") == ["12345\n"]
assert decoder.decode("12345\n") == ["12345"]
assert decoder.decode("foo ") == []
assert decoder.decode("bar ") == []
assert decoder.decode("baz\n") == ["foo bar baz\n"]
assert decoder.decode("baz\n") == ["foo bar baz"]
assert decoder.flush() == []


def test_line_decoder_cr():
decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\r\rb\rc") == ["a\n", "\n", "b\n"]
assert decoder.decode("a\r\rb\rc") == ["a", "", "b"]
assert decoder.flush() == ["c"]

decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\r\rb\rc\r") == ["a\n", "\n", "b\n"]
assert decoder.flush() == ["c\n"]
assert decoder.decode("a\r\rb\rc\r") == ["a", "", "b"]
assert decoder.flush() == ["c"]

# Issue #1033
decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("12345\r") == []
assert decoder.decode("foo ") == ["12345\n"]
assert decoder.decode("foo ") == ["12345"]
assert decoder.decode("bar ") == []
assert decoder.decode("baz\r") == []
assert decoder.flush() == ["foo bar baz\n"]
assert decoder.flush() == ["foo bar baz"]


def test_line_decoder_crnl():
decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\r\n\r\nb\r\nc") == ["a\n", "\n", "b\n"]
assert decoder.decode("a\r\n\r\nb\r\nc") == ["a", "", "b"]
assert decoder.flush() == ["c"]

decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\r\n\r\nb\r\nc\r\n") == ["a\n", "\n", "b\n", "c\n"]
assert decoder.decode("a\r\n\r\nb\r\nc\r\n") == ["a", "", "b", "c"]
assert decoder.flush() == []

decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("a\r") == []
assert decoder.decode("\n\r\nb\r\nc") == ["a\n", "\n", "b\n"]
assert decoder.decode("\n\r\nb\r\nc") == ["a", "", "b"]
assert decoder.flush() == ["c"]

# Issue #1033
decoder = LineDecoder()
assert decoder.decode("") == []
assert decoder.decode("12345\r\n") == ["12345\n"]
assert decoder.decode("12345\r\n") == ["12345"]
assert decoder.decode("foo ") == []
assert decoder.decode("bar ") == []
assert decoder.decode("baz\r\n") == ["foo bar baz\n"]
assert decoder.decode("baz\r\n") == ["foo bar baz"]
assert decoder.flush() == []


Expand Down

0 comments on commit 9cbd511

Please sign in to comment.