Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

renamed scrape to crawl

  • Loading branch information...
commit f14b1879db5fca714421355c99ea2c3e484bbaf2 1 parent e2b58cf
@heyZeus authored
Showing with 40 additions and 45 deletions.
  1. +27 −38 clj_web_crawler.clj
  2. +13 −7 test/main.clj
View
65 clj_web_crawler.clj
@@ -5,8 +5,7 @@
TraceMethod HeadMethod PutMethod))
(:use [clojure.contrib.duck-streams :only (slurp*)]))
-
-(defmacro send-method
+(defmacro send-method
"Sends a request to the given method and client. The reponse from the server is
stored in the method and any cookies are stored in the client. The response and
any resources associated with the request are cleared from the method after this
@@ -23,34 +22,6 @@
; uses slurp* here otherwise we get a annoying warning from commons-client
(slurp* (.getResponseBodyAsStream method)))
-(defmacro scrape
- "Returns the HTML as a string. It will free up any resources associated
- with the method. If the resulting page is a redirect the redirect page
- will be returned. Also the optional body will be run against the
- redirected page."
- ([server http-method & body]
- `(let [s# (if (= String (class ~server)) (client ~server) ~server)
- m# (if (= String (class ~http-method)) (method ~http-method) ~http-method)]
- (send-method s# m#
- (let [location# (redirect-location m#)]
- (if location#
- (do
- (let [redirect-method# (method location#)]
- (send-method s# redirect-method#
- ~@body
- (response-str redirect-method#))))
- (do
- ~@body
- (response-str m#)))))))
- ([server] (scrape server "/")))
-
-(defn client
- "Creates a HttpClient for the given server."
- [host]
- (let [c (HttpClient.)]
- (.. c (getHostConfiguration) (setHost (URI. host true)))
- c))
-
(defn method
"Creates a commons-client method type object with the given path and type.
A type can be one of: :post, :get, :put, :delete, :trace or :head. If no
@@ -58,22 +29,40 @@
{:login \"foo\" :password \"bar\"}."
([path type url-params]
(let [key-type (cond
- (> (count url-params) 0) :post
- (nil? type) :get
- :else (keyword type))
+ (> (count url-params) 0) :post
+ (nil? type) :get
+ :else (keyword type))
+ p (if (.startsWith path "/") path (str \/ path))
m (cond
- (= :post key-type) (PostMethod. path)
- (= :delete key-type) (DeleteMethod. path)
+ (= :post key-type) (PostMethod. p)
+ (= :delete key-type) (DeleteMethod. p)
(= :put key-type) (PutMethod. path)
- (= :trace key-type) (TraceMethod. path)
- (= :head key-type) (HeadMethod. path)
- :else (GetMethod. path))]
+ (= :trace key-type) (TraceMethod. p)
+ (= :head key-type) (HeadMethod. p)
+ :else (GetMethod. p))]
(doseq [[k v] url-params]
(.addParameter m (name k) (str v)))
m))
([path type] (method path type nil))
([path] (method path nil nil)))
+(defmacro crawl
+ "Returns the HTML as a string. It will free up any resources associated
+ with the method. If the resulting page is a redirect the redirect page
+ will be returned. Also the optional body will be run against the
+ redirected page."
+ ([server method & body]
+ `(send-method ~server ~method
+ (do ~@body)))
+ ([server] (crawl server (method "/"))))
+
+(defn client
+ "Creates a HttpClient for the given server."
+ [host]
+ (let [c (HttpClient.)]
+ (.. c (getHostConfiguration) (setHost (URI. host true)))
+ c))
+
(defn cookies
"Convience function to get the cookies from the client."
[client]
View
20 test/main.clj
@@ -21,15 +21,20 @@
(is (= "POST" (.getName path-post-method)))
(is (= "/api" (.getPath path-post-method)))
+ (is (= "/api" (.getPath (wc/method "api"))))
+
(is (= "POST" (.getName params-method)))
(is (= "clojure" (.. params-method (getParameter "language") (getValue))))
(is (= "yes" (.. params-method (getParameter "happy") (getValue))))))
-(deftest scrape
- (let [html (wc/scrape clj-ws home)]
- (is (.contains html clj-home-page-text)))
- (is (.contains (wc/scrape "http://www.clojure.org") clj-home-page-text))
- (is (.contains (wc/scrape "http://www.clojure.org" "/api") "API")))
+(deftest crawl
+ (wc/crawl clj-ws home
+ (is (.contains (wc/response-str home) clj-home-page-text)))
+ (wc/crawl clj-ws home
+ (is (.contains (wc/response-str home) clj-home-page-text)))
+ (let [api (wc/method "/api")]
+ (wc/crawl clj-ws api
+ (is (.contains (wc/response-str api) "API")))))
; this test depends on a website that i don't have any control over,
@@ -46,7 +51,8 @@
(deftest redirect
; i setup this redirect at shorturl.com
(let [redirect-site (wc/client "http://alturl.com/")
- home-page (wc/method "/yew")]
- (is (= (.contains (wc/scrape redirect-site home-page) clj-home-page-text)))))
+ home (wc/method "/yew")]
+ (wc/crawl redirect-site home
+ (is (.contains (wc/response-str home) clj-home-page-text)))))
(run-tests)
Please sign in to comment.
Something went wrong with that request. Please try again.