From 95600798119221138a4ce5f06cafdd8223d74f37 Mon Sep 17 00:00:00 2001 From: Adam Johnson Date: Fri, 10 Oct 2025 12:18:55 +0100 Subject: [PATCH 1/2] Fix gzip streaming Fix a bug found in Django where a streaming response would never be flushed, so the client would hang until the full response was generated. --- src/django_http_compression/middleware.py | 29 +++++- tests/test_middleware.py | 117 +++++++++++++++++++--- 2 files changed, 131 insertions(+), 15 deletions(-) diff --git a/src/django_http_compression/middleware.py b/src/django_http_compression/middleware.py index 507124c..2e074bb 100644 --- a/src/django_http_compression/middleware.py +++ b/src/django_http_compression/middleware.py @@ -2,6 +2,7 @@ from collections.abc import AsyncGenerator, Awaitable, Generator, Iterator from functools import lru_cache, partial +from gzip import GzipFile from types import MappingProxyType from typing import Callable, Literal, cast @@ -9,7 +10,10 @@ from django.http import HttpRequest, HttpResponse, StreamingHttpResponse from django.http.response import HttpResponseBase from django.utils.cache import patch_vary_headers -from django.utils.text import compress_sequence as gzip_compress_sequence +from django.utils.text import ( # type: ignore [attr-defined] + StreamingBuffer, + _get_random_filename, +) from django.utils.text import compress_string as gzip_compress from typing_extensions import assert_never @@ -250,6 +254,29 @@ def _parse_part( return None +def gzip_compress_sequence( + sequence: Iterator[bytes], *, max_random_bytes: int +) -> Generator[bytes]: + """ + Copy of Django’s compress_sequence() but with streaming response flushing + bug fixed. + """ + buf = StreamingBuffer() + filename = _get_random_filename(max_random_bytes) if max_random_bytes else None + with GzipFile( + filename=filename, mode="wb", compresslevel=6, fileobj=buf, mtime=0 + ) as zfile: + # Output headers... + yield b"" # Optimization + for item in sequence: + zfile.write(item) + zfile.flush() # Bug fix + data = buf.read() + if data: + yield data + yield buf.read() + + def brotli_compress_sequence(sequence: Iterator[bytes]) -> Generator[bytes]: # Output headers yield b"" diff --git a/tests/test_middleware.py b/tests/test_middleware.py index 75ff3c8..4782c74 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -3,12 +3,16 @@ import gzip import inspect import sys +import zlib +from collections.abc import Iterator from gzip import decompress as gzip_decompress from http import HTTPStatus from textwrap import dedent +from typing import cast import django import pytest +from brotli import Decompressor as BrotliDecompressor from brotli import decompress as brotli_decompress from django.http import StreamingHttpResponse from django.middleware import gzip as django_middleware_gzip @@ -22,6 +26,7 @@ class HttpCompressionMiddlewareTests(SimpleTestCase): def test_short(self): response = self.client.get("/short/", headers={"accept-encoding": "gzip"}) + assert response.status_code == HTTPStatus.OK assert "content-encoding" not in response.headers assert "vary" not in response.headers @@ -29,6 +34,7 @@ def test_short(self): def test_encoded(self): response = self.client.get("/encoded/", headers={"accept-encoding": "gzip"}) + assert response.status_code == HTTPStatus.OK assert response.headers["content-encoding"] == "supercompression" assert "vary" not in response.headers @@ -36,6 +42,7 @@ def test_encoded(self): def test_identity(self): response = self.client.get("/") + assert response.status_code == HTTPStatus.OK assert "content-encoding" not in response.headers assert "vary" not in response.headers @@ -43,6 +50,7 @@ def test_identity(self): def test_gzip(self): response = self.client.get("/", headers={"accept-encoding": "gzip"}) + assert response.status_code == HTTPStatus.OK assert response.headers["content-encoding"] == "gzip" assert response.headers["vary"] == "accept-encoding" @@ -52,6 +60,7 @@ def test_gzip(self): def test_brotli(self): response = self.client.get("/", headers={"accept-encoding": "br"}) + assert response.status_code == HTTPStatus.OK assert response.headers["content-encoding"] == "br" assert response.headers["vary"] == "accept-encoding" @@ -64,6 +73,7 @@ def test_zstd(self): from compression.zstd import decompress response = self.client.get("/", headers={"accept-encoding": "zstd"}) + assert response.status_code == HTTPStatus.OK assert response.headers["content-encoding"] == "zstd" assert response.headers["vary"] == "accept-encoding" @@ -73,47 +83,121 @@ def test_zstd(self): def test_streaming_identity(self): response = self.client.get("/streaming/") + + assert isinstance(response, StreamingHttpResponse) + assert not response.is_async assert response.status_code == HTTPStatus.OK assert "content-encoding" not in response.headers assert "vary" not in response.headers - content = response.getvalue() + streaming_content = cast(Iterator[bytes], response.streaming_content) + content = next(streaming_content) + assert content == b"\n" + content += next(streaming_content) + assert content == b"\n\n" + for chunk in streaming_content: + content += chunk assert content.decode() == basic_html def test_streaming_gzip(self): response = self.client.get("/streaming/", headers={"accept-encoding": "gzip"}) + + assert isinstance(response, StreamingHttpResponse) + assert not response.is_async assert response.status_code == HTTPStatus.OK assert response.headers["content-encoding"] == "gzip" assert response.headers["vary"] == "accept-encoding" - content = response.getvalue() - assert content.startswith(b"\x1f\x8b\x08") - decompressed = gzip.decompress(content) - assert decompressed.decode() == basic_html + + decompressor = zlib.decompressobj(zlib.MAX_WBITS | 16) # gzip decoding + content = b"" + streaming_content = cast(Iterator[bytes], response.streaming_content) + + decompressed = decompressor.decompress(next(streaming_content)) + assert decompressed == b"" + content += decompressed + + decompressed = decompressor.decompress(next(streaming_content)) + assert decompressed == b"\n" + content += decompressed + + decompressed = decompressor.decompress(next(streaming_content)) + assert decompressed == b"\n" + content += decompressed + + for chunk in streaming_content: + content += decompressor.decompress(chunk) + content += decompressor.flush() + + assert content.decode() == basic_html def test_streaming_brotli(self): response = self.client.get("/streaming/", headers={"accept-encoding": "br"}) + + assert isinstance(response, StreamingHttpResponse) + assert not response.is_async assert response.status_code == HTTPStatus.OK assert response.headers["content-encoding"] == "br" assert response.headers["vary"] == "accept-encoding" - content = response.getvalue() - assert content.startswith(b"\x8b\x07\x00\xf8") - decompressed = brotli_decompress(content) - assert decompressed.decode() == basic_html + + streaming_content = cast(Iterator[bytes], response.streaming_content) + decompressor = BrotliDecompressor() + content = b"" + + decompressed = decompressor.process(next(streaming_content)) + assert decompressed == b"" + content += decompressed + + decompressed = decompressor.process(next(streaming_content)) + assert decompressed == b"\n" + content += decompressed + + decompressed = decompressor.process(next(streaming_content)) + assert decompressed == b"\n" + content += decompressed + + for chunk in streaming_content: + content += decompressor.process(chunk) + + assert content.decode() == basic_html + assert decompressor.is_finished() @pytest.mark.skipif(sys.version_info < (3, 14), reason="Python 3.14+") def test_streaming_zstd(self): - from compression.zstd import decompress + from compression.zstd import ZstdDecompressor response = self.client.get("/streaming/", headers={"accept-encoding": "zstd"}) + + assert isinstance(response, StreamingHttpResponse) + assert not response.is_async assert response.status_code == HTTPStatus.OK assert response.headers["content-encoding"] == "zstd" assert response.headers["vary"] == "accept-encoding" - content = response.getvalue() - assert content.startswith(b"(\xb5/\xfd") - decompressed = decompress(content) - assert decompressed.decode() == basic_html + + streaming_content = cast(Iterator[bytes], response.streaming_content) + decompressor = ZstdDecompressor() + content = b"" + + decompressed = decompressor.decompress(next(streaming_content)) + assert decompressed == b"" + content += decompressed + + decompressed = decompressor.decompress(next(streaming_content)) + assert decompressed == b"\n" + content += decompressed + + decompressed = decompressor.decompress(next(streaming_content)) + assert decompressed == b"\n" + content += decompressed + + for chunk in streaming_content: + content += decompressor.decompress(chunk) + + assert decompressor.eof + assert decompressor.unused_data == b"" + assert content.decode() == basic_html def test_streaming_empty_identity(self): response = self.client.get("/streaming/empty/") + assert response.status_code == HTTPStatus.OK assert "content-encoding" not in response.headers assert "vary" not in response.headers @@ -124,6 +208,7 @@ def test_streaming_empty_gzip(self): response = self.client.get( "/streaming/empty/", headers={"accept-encoding": "gzip"} ) + assert response.status_code == HTTPStatus.OK assert response.headers["content-encoding"] == "gzip" assert response.headers["vary"] == "accept-encoding" @@ -136,6 +221,7 @@ def test_streaming_empty_brotli(self): response = self.client.get( "/streaming/empty/", headers={"accept-encoding": "br"} ) + assert response.status_code == HTTPStatus.OK assert response.headers["content-encoding"] == "br" assert response.headers["vary"] == "accept-encoding" @@ -151,6 +237,7 @@ def test_streaming_empty_zstd(self): response = self.client.get( "/streaming/empty/", headers={"accept-encoding": "zstd"} ) + assert response.status_code == HTTPStatus.OK assert response.headers["content-encoding"] == "zstd" assert response.headers["vary"] == "accept-encoding" @@ -263,6 +350,7 @@ async def test_async_streaming_zstd(self): def test_binary(self): response = self.client.get("/binary/", headers={"accept-encoding": "gzip"}) + assert response.status_code == HTTPStatus.OK assert response.headers["content-encoding"] == "gzip" assert response.headers["vary"] == "accept-encoding" @@ -273,6 +361,7 @@ def test_binary(self): def test_etag(self): response = self.client.get("/etag/", headers={"accept-encoding": "gzip"}) + assert response.status_code == HTTPStatus.OK assert response.headers["content-encoding"] == "gzip" assert response.headers["vary"] == "accept-encoding" From e4e74be2898331334364e2f8472acb0e3a265643 Mon Sep 17 00:00:00 2001 From: Adam Johnson Date: Fri, 10 Oct 2025 12:32:50 +0100 Subject: [PATCH 2/2] test blanks --- tests/test_middleware.py | 51 ++++++++++++++++++++++++++++++++++++++++ tests/urls.py | 1 + tests/views.py | 8 +++++++ 3 files changed, 60 insertions(+) diff --git a/tests/test_middleware.py b/tests/test_middleware.py index 4782c74..0edf2e6 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -246,6 +246,57 @@ def test_streaming_empty_zstd(self): decompressed = decompress(content) assert decompressed == b"" + def test_streaming_blanks_identity(self): + response = self.client.get("/streaming/blanks/") + + assert response.status_code == HTTPStatus.OK + assert "content-encoding" not in response.headers + assert "vary" not in response.headers + content = response.getvalue() + assert content == b"" + + def test_streaming_blanks_gzip(self): + response = self.client.get( + "/streaming/blanks/", headers={"accept-encoding": "gzip"} + ) + + assert response.status_code == HTTPStatus.OK + assert response.headers["content-encoding"] == "gzip" + assert response.headers["vary"] == "accept-encoding" + content = response.getvalue() + assert content.startswith(b"\x1f\x8b\x08") + decompressed = gzip.decompress(content) + assert decompressed == b"" + + def test_streaming_blanks_brotli(self): + response = self.client.get( + "/streaming/blanks/", headers={"accept-encoding": "br"} + ) + + assert response.status_code == HTTPStatus.OK + assert response.headers["content-encoding"] == "br" + assert response.headers["vary"] == "accept-encoding" + content = response.getvalue() + assert content == b"k\x00\x03" + decompressed = brotli_decompress(content) + assert decompressed == b"" + + @pytest.mark.skipif(sys.version_info < (3, 14), reason="Python 3.14+") + def test_streaming_blanks_zstd(self): + from compression.zstd import decompress + + response = self.client.get( + "/streaming/blanks/", headers={"accept-encoding": "zstd"} + ) + + assert response.status_code == HTTPStatus.OK + assert response.headers["content-encoding"] == "zstd" + assert response.headers["vary"] == "accept-encoding" + content = response.getvalue() + assert content.startswith(b"(\xb5/\xfd") + decompressed = decompress(content) + assert decompressed == b"" + async def test_async_identity(self): response = await self.async_client.get("/async/") assert response.status_code == HTTPStatus.OK diff --git a/tests/urls.py b/tests/urls.py index 08e38cc..eb02468 100644 --- a/tests/urls.py +++ b/tests/urls.py @@ -11,6 +11,7 @@ path("async/", views.async_), path("streaming/", views.streaming), path("streaming/empty/", views.streaming_empty), + path("streaming/blanks/", views.streaming_blanks), path("async/streaming/", views.async_streaming), path("binary/", views.binary), path("etag/", views.etag), diff --git a/tests/views.py b/tests/views.py index 2a76925..e19e08b 100644 --- a/tests/views.py +++ b/tests/views.py @@ -54,6 +54,14 @@ def empty() -> Generator[bytes]: return StreamingHttpResponse(empty()) +def streaming_blanks(request: HttpRequest) -> StreamingHttpResponse: + def empty() -> Generator[bytes]: + yield b"" + yield b"" + + return StreamingHttpResponse(empty()) + + async def async_streaming(request: HttpRequest) -> StreamingHttpResponse: async def lines() -> AsyncGenerator[str]: for line in basic_html.splitlines(keepends=True):