Browse files

Merge pull request #1 from EugenDueck/patch-1

Use character set from "Content-Type" response header in response-str
  • Loading branch information...
2 parents d6c804e + cd32d4f commit b61dd15e9c0a31525293ec64ff8f1756cacb5d31 @heyZeus committed Jun 25, 2011
Showing with 19 additions and 9 deletions.
  1. +19 −9 clj_web_crawler.clj
@@ -2,8 +2,7 @@
(:import (org.apache.commons.httpclient HttpClient NameValuePair URI HttpStatus)
(org.apache.commons.httpclient.cookie CookiePolicy CookieSpec)
(org.apache.commons.httpclient.methods GetMethod PostMethod DeleteMethod
- TraceMethod HeadMethod PutMethod))
- (:use [ :only (slurp*)]))
+ TraceMethod HeadMethod PutMethod)))
(defn redirect-location
"Returns the redirection location string in the method, nil or false if
@@ -71,18 +70,29 @@
(finally (.releaseConnection ~method))))
([server] (crawl server (method "/"))))
-(defn response-str
- "Returns the response from the method as a string."
+(defn response-reader
+ "Returns the response from the method as a
+ Should be used inside with-open to ensure the Reader is properly
+ closed."
- ; uses slurp* here otherwise we get a annoying warning from commons-client
- (slurp* (.getResponseBodyAsStream method)))
+ ( (.getResponseBodyAsStream method) :encoding (.getResponseCharSet method)))
([method client]
(let [redirect (redirect-location method)
new-method (if redirect (method redirect))]
- (if new-method
+ (if new-method
(crawl client new-method
- (response-str new-method))
- (response-str method)))))
+ (response-reader new-method))
+ (response-reader method)))))
+(defn response-str
+ "Returns the response from the method as a string."
+ ([method]
+ (with-open [reader (response-reader method)]
+ (slurp reader)))
+ ([method client]
+ (with-open [reader (response-reader method client)]
+ (slurp reader))))
(defmulti crawl-response (fn [server method] [(class server) (class method)]))

0 comments on commit b61dd15

Please sign in to comment.