chriskite · gnapse · Jul 24, 2012 · brutuscat · Dec 14, 2014 · gnapse
diff --git a/lib/anemone/core.rb b/lib/anemone/core.rb
@@ -79,6 +79,7 @@ def initialize(urls, opts = {})
       @skip_link_patterns = []
       @after_crawl_blocks = []
       @opts = opts
+      @stop_crawl = false
 
       yield self if block_given?
     end
@@ -142,6 +143,18 @@ def focus_crawl(&block)
       self
     end
 
+    #
+    # Signals the crawler that it should stop the crawl before visiting the
+    # next page.
+    #
+    # This method is expected to be called within a page block, and it signals
+    # the crawler that it must stop after the current page is completely
+    # processed.  All pages and links currently on queue are discared.
+    #
+    def stop_crawl
+      @stop_crawl = true
+    end
+
     #
     # Perform the crawl
     #
@@ -175,12 +188,17 @@ def run
 
         @pages[page.url] = page
 
+        if @stop_crawl
+          page_queue.clear
+          link_queue.clear
+        end
+
         # if we are done with the crawl, tell the threads to end
         if link_queue.empty? and page_queue.empty?
           until link_queue.num_waiting == @tentacles.size
             Thread.pass
           end
-          if page_queue.empty?
+          if page_queue.empty? || @stop_crawl
             @tentacles.size.times { link_queue << :END }
             break
           end