From 96a0879c0e97ec998ec14e5cc2d0b4a272a402f9 Mon Sep 17 00:00:00 2001 From: Elliot Courant Date: Wed, 15 Oct 2025 15:53:03 -0500 Subject: [PATCH] fix(client): Handle multiple layers of compression If a server responds with a `Content-Encoding` of something like `deflate, gzip`, clj-http would not be able to handle the response as the response body has been compressed using zlib and then compressed again using gzip. This patch adds a recursive wrapper to the decompression handling that allows for multiple layers of compression to be unwrapped when receiving responses like this. The content encoding header is split by commas, then in reverse order is decompressed layer by layer until the final body is produced which is then returned to the caller. --- src/clj_http/client.clj | 31 +++++++++++++++++++++++++++--- test/clj_http/test/client_test.clj | 14 ++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/clj_http/client.clj b/src/clj_http/client.clj index 7d01d886..52e31ffc 100644 --- a/src/clj_http/client.clj +++ b/src/clj_http/client.clj @@ -386,14 +386,12 @@ [resp] (-> resp (update :body util/gunzip) - (assoc :orig-content-encoding (get-in resp [:headers "content-encoding"])) (dissoc-in [:headers "content-encoding"]))) (defmethod decompress-body "deflate" [resp] (-> resp (update :body util/inflate) - (assoc :orig-content-encoding (get-in resp [:headers "content-encoding"])) (dissoc-in [:headers "content-encoding"]))) (defmethod decompress-body :default [resp] @@ -412,7 +410,34 @@ [req resp] (if (false? (opt req :decompress-body)) resp - (decompress-body resp))) + ;; If we are decompressing the body there might be multiple content + ;; encodings. + ;; https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Content-Encoding + ;; Take each content encoding (in reverse order) and try to decompress the + ;; body using that content encording recursively. This way if content is + ;; compressed twice. Once with zlib and again with gzip, then we can + ;; properly decompress it in the correct order. + (loop [[encoding & rest-encodings] (some-> resp + (get-in [:headers "content-encoding"]) + (str/split #",") + (reverse)) + ;; Store the original content encoding somewhere so it can be + ;; referenced if needed. + resp (assoc resp + :orig-content-encoding + (get-in resp [:headers "content-encoding"]))] + (if encoding + ;; If there is an encoding then decompress the body using that encoding + ;; then recur with the remaining encodings. + (recur + rest-encodings + (decompress-body + ;; Force the content encoding to be a single encoding type for this. + (assoc-in resp + [:headers "content-encoding"] + (str/trim encoding)))) + ;; If there are no encodings left then we are done. + resp)))) (defn wrap-decompression "Middleware handling automatic decompression of responses from web servers. If diff --git a/test/clj_http/test/client_test.clj b/test/clj_http/test/client_test.clj index 1b111e99..a179a8e0 100644 --- a/test/clj_http/test/client_test.clj +++ b/test/clj_http/test/client_test.clj @@ -694,6 +694,20 @@ (is (= "gzip" (:orig-content-encoding resp))) (is (= nil (get-in resp [:headers "content-encoding"]))))) +(deftest apply-on-compressed-layered + (let [client (fn [req] + (is (= "gzip, deflate" + (get-in req [:headers "accept-encoding"]))) + {:body (util/gzip (util/deflate (util/utf8-bytes "foofoofoo"))) + :headers {"content-encoding" "deflate, gzip"}}) + c-client (client/wrap-decompression client) + resp (c-client {})] + ;; The response body here is going to be an input stream because of the + ;; nesting. So we need to slurp it, or force it to be a string of some sort. + (is (= "foofoofoo" (slurp (:body resp)))) + (is (= "deflate, gzip" (:orig-content-encoding resp))) + (is (= nil (get-in resp [:headers "content-encoding"]))))) + (deftest apply-on-compressed-async (let [client (fn [req respond raise] (is (= "gzip, deflate"