Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Adding crawl with block #2

Merged
merged 1 commit into from

2 participants

@danielnc

Possibility to change values within instance

crawler.crawl do
   list_page '/different/page'
end
@felipecsl
Owner

@danielnc looks awesome to me! merging it up. Thanks for the contribution

@felipecsl felipecsl merged commit ba857cb into felipecsl:master
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Apr 11, 2012
  1. @danielnc

    Adding crawl with block

    danielnc authored
This page is out of date. Refresh to see the latest.
Showing with 45 additions and 11 deletions.
  1. +13 −2 lib/wombat/crawler.rb
  2. +32 −9 spec/crawler_spec.rb
View
15 lib/wombat/crawler.rb
@@ -10,8 +10,19 @@ module Crawler
include Parser
extend ActiveSupport::Concern
- def crawl
- parse self.class.send(:metadata)
+ def crawl(&block)
+ if block
+ @metadata_dup = self.class.send(:metadata).clone
+ instance_eval do
+ def method_missing method, *args, &block
+ @metadata_dup.send method, *args, &block
+ end
+ end
+ self.instance_eval &block
+ parse @metadata_dup
+ else
+ parse self.class.send(:metadata)
+ end
end
module ClassMethods
View
41 spec/crawler_spec.rb
@@ -9,16 +9,16 @@
it 'should call the provided block' do
event_called = false
-
+
@crawler.event { event_called = true }
-
+
event_called.should be_true
end
it 'should provide metadata to yielded block' do
@crawler.event do |e|
e.should_not be_nil
- end
+ end
end
it 'should store assigned metadata information' do
@@ -38,7 +38,7 @@
arg["venue"]["name"].selector.should == "Scooba"
arg["location"]["latitude"].selector.should == -50.2323
end
-
+
@crawler_instance.crawl
end
@@ -57,8 +57,8 @@
end
it 'should be able to assign arbitrary plain text metadata' do
- @crawler.some_data("/event/list", :html, "geo") {|p| true }
-
+ @crawler.some_data("/event/list", :html, "geo") { |p| true }
+
@crawler_instance.should_receive(:parse) do |arg|
prop = arg['some_data']
prop.name.should == "some_data"
@@ -67,7 +67,7 @@
prop.namespaces.should == "geo"
prop.callback.should_not be_nil
end
-
+
@crawler_instance.crawl
end
@@ -107,16 +107,39 @@
it["title"].selector.should == "css=.title"
it["body"].selector.should == "css=.body"
it["event"]["all"].selector.should == "yeah"
- end
+ end
@crawler_instance.crawl
end
- it 'should assign metadata forma' do
+ it 'should assign metadata format' do
@crawler_instance.should_receive(:parse) do |arg|
arg[:format].should == :xml
end
@crawler.format :xml
@crawler_instance.crawl
end
+
+ it 'should crawl with block' do
+ @crawler.base_url "danielnc.com"
+ @crawler.list_page "/itens"
+
+ @crawler_instance.should_receive(:parse) do |arg|
+ arg[:base_url].should == "danielnc.com"
+ arg[:list_page].should == "/itens/1"
+ end
+
+ @crawler_instance.crawl do
+ list_page "/itens/1"
+ end
+
+ another_instance = @crawler.new
+
+ another_instance.should_receive(:parse) do |arg|
+ arg[:base_url].should == "danielnc.com"
+ arg[:list_page].should == "/itens"
+ end
+
+ another_instance.crawl
+ end
end
Something went wrong with that request. Please try again.