Skip to content

Commit

Permalink
Merge pull request #2 from danielnc/master
Browse files Browse the repository at this point in the history
Adding crawl with block
  • Loading branch information
felipecsl committed Apr 12, 2012
2 parents df8cf58 + 67a9bd8 commit ba857cb
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 11 deletions.
15 changes: 13 additions & 2 deletions lib/wombat/crawler.rb
Expand Up @@ -10,8 +10,19 @@ module Crawler
include Parser
extend ActiveSupport::Concern

def crawl
parse self.class.send(:metadata)
def crawl(&block)
if block
@metadata_dup = self.class.send(:metadata).clone
instance_eval do
def method_missing method, *args, &block
@metadata_dup.send method, *args, &block
end
end
self.instance_eval &block
parse @metadata_dup
else
parse self.class.send(:metadata)
end
end

module ClassMethods
Expand Down
41 changes: 32 additions & 9 deletions spec/crawler_spec.rb
Expand Up @@ -9,16 +9,16 @@

it 'should call the provided block' do
event_called = false

@crawler.event { event_called = true }

event_called.should be_true
end

it 'should provide metadata to yielded block' do
@crawler.event do |e|
e.should_not be_nil
end
end
end

it 'should store assigned metadata information' do
Expand All @@ -38,7 +38,7 @@
arg["venue"]["name"].selector.should == "Scooba"
arg["location"]["latitude"].selector.should == -50.2323
end

@crawler_instance.crawl
end

Expand All @@ -57,8 +57,8 @@
end

it 'should be able to assign arbitrary plain text metadata' do
@crawler.some_data("/event/list", :html, "geo") {|p| true }
@crawler.some_data("/event/list", :html, "geo") { |p| true }

@crawler_instance.should_receive(:parse) do |arg|
prop = arg['some_data']
prop.name.should == "some_data"
Expand All @@ -67,7 +67,7 @@
prop.namespaces.should == "geo"
prop.callback.should_not be_nil
end

@crawler_instance.crawl
end

Expand Down Expand Up @@ -107,16 +107,39 @@
it["title"].selector.should == "css=.title"
it["body"].selector.should == "css=.body"
it["event"]["all"].selector.should == "yeah"
end
end

@crawler_instance.crawl
end

it 'should assign metadata forma' do
it 'should assign metadata format' do
@crawler_instance.should_receive(:parse) do |arg|
arg[:format].should == :xml
end
@crawler.format :xml
@crawler_instance.crawl
end

it 'should crawl with block' do
@crawler.base_url "danielnc.com"
@crawler.list_page "/itens"

@crawler_instance.should_receive(:parse) do |arg|
arg[:base_url].should == "danielnc.com"
arg[:list_page].should == "/itens/1"
end

@crawler_instance.crawl do
list_page "/itens/1"
end

another_instance = @crawler.new

another_instance.should_receive(:parse) do |arg|
arg[:base_url].should == "danielnc.com"
arg[:list_page].should == "/itens"
end

another_instance.crawl
end
end

0 comments on commit ba857cb

Please sign in to comment.