-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b433883
commit 76b4f69
Showing
9 changed files
with
259 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
.rvmrc | ||
|
||
# rcov generated | ||
coverage | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
GEM | ||
remote: http://rubygems.org/ | ||
specs: | ||
diff-lcs (1.1.3) | ||
git (1.2.5) | ||
jeweler (1.6.4) | ||
bundler (~> 1.0) | ||
git (>= 1.2.5) | ||
rake | ||
mechanize (2.0.1) | ||
net-http-digest_auth (~> 1.1, >= 1.1.1) | ||
net-http-persistent (~> 1.8) | ||
nokogiri (~> 1.4) | ||
webrobots (~> 0.0, >= 0.0.9) | ||
net-http-digest_auth (1.1.1) | ||
net-http-persistent (1.9) | ||
nokogiri (1.5.0) | ||
rack (1.3.5) | ||
rack-protection (1.1.4) | ||
rack | ||
rake (0.9.2.2) | ||
rcov (0.9.11) | ||
rspec (2.7.0) | ||
rspec-core (~> 2.7.0) | ||
rspec-expectations (~> 2.7.0) | ||
rspec-mocks (~> 2.7.0) | ||
rspec-core (2.7.1) | ||
rspec-expectations (2.7.0) | ||
diff-lcs (~> 1.1.2) | ||
rspec-mocks (2.7.0) | ||
sham_rack (1.3.3) | ||
rack | ||
sinatra (1.3.1) | ||
rack (~> 1.3, >= 1.3.4) | ||
rack-protection (~> 1.1, >= 1.1.2) | ||
tilt (~> 1.3, >= 1.3.3) | ||
tilt (1.3.3) | ||
webrobots (0.0.12) | ||
nokogiri (>= 1.4.4) | ||
|
||
PLATFORMS | ||
ruby | ||
|
||
DEPENDENCIES | ||
bundler (~> 1.0.0) | ||
jeweler (~> 1.6.4) | ||
mechanize | ||
rcov | ||
rspec | ||
sham_rack | ||
sinatra |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
require 'mechanize' | ||
|
||
class Scrapie | ||
class ScrapieException < Exception; end | ||
class NoAttributesException < ScrapieException; end | ||
|
||
def self.url(url) | ||
@url = url | ||
end | ||
def self.params(params) | ||
@params = params | ||
end | ||
def self.http_method(method) | ||
@http_method = method | ||
end | ||
def self.attributes(attributes) | ||
@attributes = attributes | ||
attributes.each {|name,page_selector| | ||
self.send(:attr_accessor, name) | ||
} | ||
end | ||
|
||
# find() | ||
# find(:foo => bar) | ||
# find(:foo => bar, :baz => bizzle) | ||
def self.find(opts = {}) | ||
raise NoAttributesException unless (@attributes and @attributes.size > 0) | ||
a = Mechanize.new | ||
|
||
# Let's build out the parameters now | ||
params = Hash[opts.collect{|k,v| | ||
[@params[k], v] if @params and @params[k] | ||
}] | ||
|
||
page = a.send(@http_method || :get, @url, params) | ||
|
||
new_object = self.new | ||
@attributes.each {|name, page_selector| | ||
new_object.send(name + '=', page.search(page_selector).inner_html) | ||
} | ||
|
||
new_object | ||
end | ||
|
||
# Callbacks # TODO | ||
|
||
def self.before_fetch | ||
|
||
end | ||
|
||
def self.after_fetch | ||
|
||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
require 'helper' | ||
|
||
ShamRack.at("scrapietest").sinatra do | ||
get "/test1" do | ||
"No attributes here chief" | ||
end | ||
get "/test_with_params" do | ||
"<div id='param'>#{params[:test_param_for_getting]}</div><div id='param_upcased'>#{params[:test_param_for_getting].upcase}</div>" | ||
end | ||
get "/test" do | ||
"<div class='foo'>example</div>" | ||
end | ||
get '/500' do | ||
DERP | ||
end | ||
post '/post' do | ||
"<div id='post_param'>#{params[:le_post]}</div>" | ||
end | ||
end | ||
|
||
class NoAttributeScrapie < Scrapie | ||
url 'http://scrapietest/test1' | ||
end | ||
|
||
class BasicScrapie < Scrapie | ||
url 'http://scrapietest/test' | ||
attributes({ 'foo' => '.foo' }) | ||
end | ||
|
||
class ParamsScrapie < Scrapie | ||
url 'http://scrapietest/test_with_params' | ||
params({ :test_param => 'test_param_for_getting' }) | ||
attributes({ | ||
'param' => 'div#param', | ||
'param_upcased' => 'div#param_upcased' | ||
}) | ||
end | ||
|
||
class FourOhFourScrapie < Scrapie | ||
url 'http://scrapietest/ends_of_the_earth' | ||
attributes({ | ||
'results' => 'div#post_param' | ||
}) | ||
end | ||
|
||
class FiveHundredScrapie < Scrapie | ||
url 'http://scrapietest/500' | ||
attributes({ | ||
'results' => 'div#post_param' | ||
}) | ||
end | ||
|
||
class PostScrapie < Scrapie | ||
url 'http://scrapietest/post' | ||
http_method :post | ||
|
||
params({ :search => 'le_post' }) | ||
attributes({ | ||
'results' => 'div#post_param' | ||
}) | ||
end | ||
|
||
describe Scrapie do | ||
|
||
it 'whines if you don\'t specify any attributes' do | ||
lambda { nas = NoAttributeScrapie.find(:har => 'heh') }.should raise_error(Scrapie::NoAttributesException) | ||
end | ||
|
||
it 'does a basic fetch sans params' do | ||
basic = BasicScrapie.find | ||
basic.foo.should == 'example' | ||
end | ||
|
||
it 'handles params' do | ||
test_string = 'sdkfjhdsafjkladhfklzxcv123' # todo: random string | ||
|
||
paramtest = ParamsScrapie.find(:test_param => test_string) | ||
paramtest.param.should == test_string | ||
paramtest.param_upcased.should == test_string.upcase | ||
end | ||
|
||
it 'handles 404s' do | ||
lambda { nas = FourOhFourScrapie.find(:har => 'heh') }.should raise_error(Mechanize::ResponseCodeError) | ||
end | ||
it 'handles 500s' do | ||
lambda { nas = FiveHundredScrapie.find(:har => 'heh') }.should raise_error(Mechanize::ResponseCodeError) | ||
end | ||
it 'uses different HTTP methods' do | ||
post = PostScrapie.find(:search => 'le_search') | ||
|
||
post.results.should == 'le_search' | ||
end | ||
|
||
it 'uses a before_fetch' | ||
it 'uses an after_fetch' | ||
it 'sets agent options' | ||
|
||
end |
This file was deleted.
Oops, something went wrong.