From 04fc69f2cf374f450f3f4dc5846a674d8e6b548a Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 3 Nov 2025 10:08:48 +0100 Subject: [PATCH] Fix streaming by no longer buffering whole response when compressing --- CHANGELOG.md | 7 ++ README.md | 3 +- flask_compress/flask_compress.py | 114 +++++++++++++++++++++++-------- tests/test_flask_compress.py | 20 ++++-- 4 files changed, 107 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 513d7be..7ba7392 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to `flask-compress` will be documented in this file. +## 1.21 (development) + +- streaming is now supported: + - the previous behavior was that is `COMPRESS_STREAMS` was `True` (the default), streaming responses were compressed in 1 sitting, meaning streaming responses were effectively not streamed, If `COMPRESS_STREAMS` was `False`, streaming responses were not compressed at all. + - now, if `COMPRESS_STREAMS` is `True`, streaming responses are compressed on-the-fly as data is streamed, which means that streaming responses are still streamed, but compressed. If `COMPRESS_STREAMS` is `False`, streaming responses are not compressed at all, as before. + - we have a new `COMPRESS_ALGORITHM_STREAMING` config option to specify the compression algorithm to use for streaming responses, which defaults to `["zstd", "br", "deflate"]`, as `"gzip"` is not suitable for streaming compression. + ## 1.20 (2025-10-20) - the new `COMPRESS_EVALUATE_CONDITIONAL_REQUEST` config option is now `True` by default diff --git a/README.md b/README.md index eecb3bd..aed6198 100644 --- a/README.md +++ b/README.md @@ -143,5 +143,6 @@ Within your Flask application's settings you can provide the following settings | `COMPRESS_CACHE_BACKEND` | Specified the backend for storing the cached response data. | `None` | | `COMPRESS_REGISTER` | Specifies if compression should be automatically registered. | `True` | | `COMPRESS_ALGORITHM` | Supported compression algorithms. | `['zstd', 'br', 'gzip', 'deflate']` | -| `COMPRESS_STREAMS` | Compress content streams. | `True` | +| `COMPRESS_ALGORITHM_STREAMING` | Supported compression algorithms for streaming. | `['zstd', 'br', 'deflate']` | +| `COMPRESS_STREAMS` | Compress streaming responses. | `True` | | `COMPRESS_EVALUATE_CONDITIONAL_REQUEST` | Compress evaluates conditional requests. | `True` | diff --git a/flask_compress/flask_compress.py b/flask_compress/flask_compress.py index 02d2504..a646b70 100644 --- a/flask_compress/flask_compress.py +++ b/flask_compress/flask_compress.py @@ -29,14 +29,14 @@ def set(self, key, value): @lru_cache(maxsize=128) -def _choose_algorithm(enabled_algorithms, accept_encoding): +def _choose_algorithm(algorithms, accept_encoding): """ Determine which compression algorithm we're going to use based on the client request. The `Accept-Encoding` header may list one or more desired algorithms, together with a "quality factor" for each one (higher quality means the client prefers that algorithm more). - :param enabled_algorithms: Tuple of supported compression algorithms + :param algorithms: Tuple of supported compression algorithms :param accept_encoding: Content of the `Accept-Encoding` header :return: name of a compression algorithm (`gzip`, `deflate`, `br`, 'zstd') or `None` if the client and server don't agree on any. @@ -49,7 +49,7 @@ def _choose_algorithm(enabled_algorithms, accept_encoding): algos_by_quality = defaultdict(set) # Set of supported algorithms - server_algos_set = set(enabled_algorithms) + server_algos_set = set(algorithms) for part in accept_encoding.lower().split(","): part = part.strip() @@ -87,15 +87,29 @@ def _choose_algorithm(enabled_algorithms, accept_encoding): if len(viable_algos) == 1: return viable_algos.pop() elif len(viable_algos) > 1: - for server_algo in enabled_algorithms: + for server_algo in algorithms: if server_algo in viable_algos: return server_algo if fallback_to_any: - return enabled_algorithms[0] + return algorithms[0] return None +def _format(algo): + """Format the algorithm configuration into a tuple of strings. + + >>> _format("gzip, deflate, br") + ('gzip', 'deflate', 'br') + >>> _format(["gzip", "deflate", "br"]) + ('gzip', 'deflate', 'br') + """ + if isinstance(algo, str): + return tuple(i.strip() for i in algo.split(",")) + else: + return tuple(algo) + + class Compress: """ The Compress object allows your application to use Flask-Compress. @@ -167,6 +181,7 @@ def init_app(self, app): ("COMPRESS_STREAMS", True), ("COMPRESS_EVALUATE_CONDITIONAL_REQUEST", True), ("COMPRESS_ALGORITHM", ["zstd", "br", "gzip", "deflate"]), + ("COMPRESS_ALGORITHM_STREAMING", ["zstd", "br", "deflate"]), # no gzip ] for k, v in defaults: @@ -177,12 +192,8 @@ def init_app(self, app): self.cache_key = app.config["COMPRESS_CACHE_KEY"] self.compress_mimetypes_set = set(app.config["COMPRESS_MIMETYPES"]) - - algo = app.config["COMPRESS_ALGORITHM"] - if isinstance(algo, str): - self.enabled_algorithms = tuple(i.strip() for i in algo.split(",")) - else: - self.enabled_algorithms = tuple(algo) + self.enabled_algorithms = _format(app.config["COMPRESS_ALGORITHM"]) + self.streaming_algorithms = _format(app.config["COMPRESS_ALGORITHM_STREAMING"]) if app.config["COMPRESS_REGISTER"] and app.config["COMPRESS_MIMETYPES"]: app.after_request(self.after_request) @@ -197,14 +208,16 @@ def after_request(self, response): response.headers["Vary"] = f"{vary}, Accept-Encoding" accept_encoding = request.headers.get("Accept-Encoding", "") - chosen_algorithm = _choose_algorithm(self.enabled_algorithms, accept_encoding) + streaming = response.is_streamed and app.config["COMPRESS_STREAMS"] + algorithms = self.streaming_algorithms if streaming else self.enabled_algorithms + chosen_algorithm = _choose_algorithm(algorithms, accept_encoding) if ( chosen_algorithm is None or response.mimetype not in self.compress_mimetypes_set or response.status_code < 200 or response.status_code >= 300 - or (response.is_streamed and app.config["COMPRESS_STREAMS"] is False) + or (response.is_streamed and not app.config["COMPRESS_STREAMS"]) or "Content-Encoding" in response.headers or ( response.content_length is not None @@ -214,20 +227,24 @@ def after_request(self, response): return response response.direct_passthrough = False + response.headers["Content-Encoding"] = chosen_algorithm - if self.cache is not None: - key = f"{chosen_algorithm};{self.cache_key(request)}" - compressed_content = self.cache.get(key) - if compressed_content is None: - compressed_content = self.compress(app, response, chosen_algorithm) - self.cache.set(key, compressed_content) + if streaming: + chunks = response.iter_encoded() + response.response = self.compress_chunks(app, chunks, chosen_algorithm) + response.headers.pop("Content-Length", None) else: - compressed_content = self.compress(app, response, chosen_algorithm) - - response.set_data(compressed_content) + if self.cache is not None: + key = f"{chosen_algorithm};{self.cache_key(request)}" + compressed_content = self.cache.get(key) + if compressed_content is None: + compressed_content = self.compress(app, response, chosen_algorithm) + self.cache.set(key, compressed_content) + else: + compressed_content = self.compress(app, response, chosen_algorithm) - response.headers["Content-Encoding"] = chosen_algorithm - response.headers["Content-Length"] = response.content_length + response.set_data(compressed_content) + response.headers["Content-Length"] = response.content_length # "123456789" => "123456789:gzip" - A strong ETag validator # W/"123456789" => W/"123456789:gzip" - A weak ETag validator @@ -236,12 +253,9 @@ def after_request(self, response): if etag and not is_weak: response.set_etag(f"{etag}:{chosen_algorithm}", weak=is_weak) - if ( - app.config["COMPRESS_EVALUATE_CONDITIONAL_REQUEST"] - and request.method in ("GET", "HEAD") - and (not response.is_streamed or app.config["COMPRESS_STREAMS"]) - ): - response.make_conditional(request) + if app.config["COMPRESS_EVALUATE_CONDITIONAL_REQUEST"]: + if request.method in ("GET", "HEAD"): + response.make_conditional(request) return response @@ -280,3 +294,43 @@ def compress(self, app, response, algorithm): return compression.zstd.compress( response.get_data(), app.config["COMPRESS_ZSTD_LEVEL"] ) + else: + raise ValueError(f"Unknown compression algorithm: {algorithm}") + + def compress_chunks(self, app, chunks, algorithm): + if algorithm == "deflate": + compressor = compression.zlib.compressobj( + level=app.config["COMPRESS_DEFLATE_LEVEL"] + ) + for data in chunks: + out = compressor.compress(data) + if out: + yield out + out = compressor.flush() + if out: + yield out + + elif algorithm == "br": + compressor = brotli.Compressor( + mode=app.config["COMPRESS_BR_MODE"], + quality=app.config["COMPRESS_BR_LEVEL"], + lgwin=app.config["COMPRESS_BR_WINDOW"], + lgblock=app.config["COMPRESS_BR_BLOCK"], + ) + for data in chunks: + out = compressor.process(data) + if out: + yield out + yield compressor.finish() + + elif algorithm == "zstd": + compressor = compression.zstd.ZstdCompressor( + level=app.config["COMPRESS_ZSTD_LEVEL"] + ) + for data in chunks: + out = compressor.compress(data) + if out: + yield out + yield compressor.flush() + else: + raise ValueError(f"Unsupported streaming algorithm: {algorithm}") diff --git a/tests/test_flask_compress.py b/tests/test_flask_compress.py index 2dc55eb..064b6f3 100644 --- a/tests/test_flask_compress.py +++ b/tests/test_flask_compress.py @@ -443,7 +443,6 @@ class StreamTests(unittest.TestCase): def setUp(self): self.app = Flask(__name__) self.app.testing = True - self.app.config["COMPRESS_STREAMS"] = False self.file_path = os.path.join(os.getcwd(), "tests", "templates", "large.html") self.file_size = os.path.getsize(self.file_path) @@ -457,8 +456,9 @@ def _stream(): return self.app.response_class(_stream(), mimetype="text/html") def test_no_compression_stream(self): - """Tests compression is skipped when response is streamed""" + """Tests compression is skipped when COMPRESS_STREAMS is False""" Compress(self.app) + self.app.config["COMPRESS_STREAMS"] = False client = self.app.test_client() for algorithm in ("gzip", "deflate", "br", "zstd", ""): headers = [("Accept-Encoding", algorithm)] @@ -467,17 +467,25 @@ def test_no_compression_stream(self): self.assertEqual(response.is_streamed, True) self.assertEqual(self.file_size, len(response.data)) - def test_disabled_stream(self): - """Test that stream compression can be disabled.""" + def test_compression_stream(self): Compress(self.app) - self.app.config["COMPRESS_STREAMS"] = True client = self.app.test_client() - for algorithm in ("gzip", "deflate", "br", "zstd"): + for algorithm in ("deflate", "br", "zstd"): headers = [("Accept-Encoding", algorithm)] response = client.get("/stream/large", headers=headers) self.assertIn("Content-Encoding", response.headers) + self.assertEqual(response.status_code, 200) + self.assertEqual(response.is_streamed, True) self.assertGreater(self.file_size, len(response.data)) + # gzip is not supported for streamed responses + headers = [("Accept-Encoding", "gzip")] + response = client.get("/stream/large", headers=headers) + self.assertNotIn("Content-Encoding", response.headers) + self.assertEqual(response.status_code, 200) + self.assertEqual(response.is_streamed, True) + self.assertEqual(self.file_size, len(response.data)) + class CachingCompressionTests(unittest.TestCase): def setUp(self):