Skip to content

Commit

Permalink
remove "content-encoding" header if the body is automatically decompr…
Browse files Browse the repository at this point in the history
…essed to allow for pass-through. If header is removed, assoc :orig-content-encoding to response map.
  • Loading branch information
dakrone committed Jul 23, 2013
1 parent f73fae5 commit ede1a89
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 16 deletions.
21 changes: 11 additions & 10 deletions Readme.md
Expand Up @@ -247,21 +247,22 @@ can be specified:
;; Auto-decompression used: (google requires a user-agent to send gzip data)
(def h {"User-Agent" "Mozilla/5.0 (Windows NT 6.1;) Gecko/20100101 Firefox/13.0.1"})
(def resp (client/get "http://google.com" {:headers h}))
(:headers resp)
=> {"server" "gws",
"content-encoding" "gzip", ;; <= google sent response gzipped
"content-type" "text/html; charset=UTF-8",
"content-length" "26538",
"connection" "close"}
(:orig-content-encoding resp)
=> "gzip" ;; <= google sent response gzipped

;; and without decompression:
(def resp2 (client/get "http://google.com" {:headers h :decompress-body false})
(:headers resp2)
=> {"server" "gws",
"content-type" "text/html; charset=UTF-8"
"connection" "close"}
(:orig-content-encoding resp2)
=> nil
```

If clj-http decompresses something, the "content-encoding" header is
removed from the headers (because the encoding is no longer
true). This allows clj-http to be used as a pass-through proxy with
ring. The original content-encoding is available as
`:orig-content-encoding` in the response map if Auto-decompression is
enabled.

#### HTML Meta tag headers
HTML 4.01 allows using the tag `<meta http-equiv="..." />` and HTML 5
allows using the tag `<meta charset="..." />` to specify a header that
Expand Down
3 changes: 3 additions & 0 deletions changelog.org
Expand Up @@ -310,6 +310,9 @@
the changelog for the next release
** 2013-07-23
- add logging config for local testing only
- remove "content-encoding" header if the body is automatically
decompressed to allow for pass-through. If header is removed,
assoc :orig-content-encoding to response map.
** 2013-07-22
- merged https://github.com/dakrone/clj-http/pull/149 to fix
closing the stream when coerced to byte array
Expand Down
31 changes: 27 additions & 4 deletions src/clj_http/client.clj
Expand Up @@ -68,6 +68,20 @@
(defn when-pos [v]
(when (and v (pos? v)) v))

(defn dissoc-in
"Dissociates an entry from a nested associative structure returning a new
nested structure. keys is a sequence of keys. Any empty maps that result
will not be present in the new structure."
[m [k & ks :as keys]]
(if ks
(if-let [nextmap (clojure.core/get m k)]
(let [newmap (dissoc-in nextmap ks)]
(if (seq newmap)
(assoc m k newmap)
(dissoc m k)))
m)
(dissoc m k)))

(defn url-encode-illegal-characters
"Takes a raw url path or query and url-encodes any illegal characters.
Minimizes ambiguity by encoding space to %20."
Expand Down Expand Up @@ -215,13 +229,22 @@

(defmethod decompress-body "gzip"
[resp]
(update resp :body util/gunzip))
(-> resp
(update :body util/gunzip)
(assoc :orig-content-encoding (get-in resp [:headers "content-encoding"]))
(dissoc-in [:headers "content-encoding"])))

(defmethod decompress-body "deflate"
[resp]
(update resp :body util/inflate))

(defmethod decompress-body :default [resp] resp)
(-> resp
(update :body util/inflate)
(assoc :orig-content-encoding (get-in resp [:headers "content-encoding"]))
(dissoc-in [:headers "content-encoding"])))

(defmethod decompress-body :default [resp]
(assoc resp
:orig-content-encoding
(get-in resp [:headers "content-encoding"])))

(defn wrap-decompression
"Middleware handling automatic decompression of responses from web servers. If
Expand Down
34 changes: 32 additions & 2 deletions test/clj_http/test/client.clj
Expand Up @@ -217,7 +217,9 @@
:headers {"content-encoding" "gzip"}})
c-client (client/wrap-decompression client)
resp (c-client {})]
(is (= "foofoofoo" (util/utf8-string (:body resp))))))
(is (= "foofoofoo" (util/utf8-string (:body resp))))
(is (= "gzip" (:orig-content-encoding resp)))
(is (= nil (get-in resp [:headers "content-encoding"])))))

(deftest apply-on-deflated
(let [client (fn [req]
Expand All @@ -228,7 +230,35 @@
c-client (client/wrap-decompression client)
resp (c-client {})]
(is (= "barbarbar" (-> resp :body util/force-byte-array util/utf8-string))
"string correctly inflated")))
"string correctly inflated")
(is (= "deflate" (:orig-content-encoding resp)))
(is (= nil (get-in resp [:headers "content-encoding"])))))

(deftest t-disabled-body-decompression
(let [client (fn [req]
(is (not= "gzip, deflate"
(get-in req [:headers "accept-encoding"])))
{:body (util/deflate (util/utf8-bytes "barbarbar"))
:headers {"content-encoding" "deflate"}})
c-client (client/wrap-decompression client)
resp (c-client {:decompress-body false})]
(is (= (slurp (util/inflate (util/deflate (util/utf8-bytes "barbarbar"))))
(slurp (util/inflate (-> resp :body util/force-byte-array))))
"string not inflated")
(is (= nil (:orig-content-encoding resp)))
(is (= "deflate" (get-in resp [:headers "content-encoding"])))))

(deftest t-weird-non-known-compression
(let [client (fn [req]
(is (= "gzip, deflate"
(get-in req [:headers "accept-encoding"])))
{:body (util/utf8-bytes "foofoofoo")
:headers {"content-encoding" "pig-latin"}})
c-client (client/wrap-decompression client)
resp (c-client {})]
(is (= "foofoofoo" (util/utf8-string (:body resp))))
(is (= "pig-latin" (:orig-content-encoding resp)))
(is (= "pig-latin" (get-in resp [:headers "content-encoding"])))))

(deftest pass-on-non-compressed
(let [c-client (client/wrap-decompression (fn [req] {:body "foo"}))
Expand Down

0 comments on commit ede1a89

Please sign in to comment.