Skip to content

Commit

Permalink
Move to Hpricot (Thanks, Jason)
Browse files Browse the repository at this point in the history
  • Loading branch information
courtenay committed Apr 14, 2008
1 parent 6c4f27a commit 8056532
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 11 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG
@@ -1,3 +1,7 @@
Added: Now uses Hpricot instead of HTML::Document for a ~10% speedup.

Moved to GitHub

Fixed: Workaround for inputs without a name (e.g. input type="button" onclick="...")
Fixed: Now accepts form method="GET" as well as "get"
Fixed: Better static file checking (checks if rails can generate it, THEN checks if the static file exists
Expand Down
11 changes: 6 additions & 5 deletions lib/caboose/spider_integrator.rb
Expand Up @@ -78,6 +78,7 @@
# locations, and up until now this has been impossible to test in an automated fashion
# or without being strongly coupled to your code.
#
require 'hpricot'
module Caboose::SpiderIntegrator

# Begin spidering your application.
Expand Down Expand Up @@ -115,18 +116,18 @@ def spider( body, uri, options )
# todo: use hpricot or something else more fun (we will need to validate
# the html in this case since HTML::Document does it by default)
def consume_page( html, url )
body = HTML::Document.new html
body.find_all(:tag=>'a').each do |tag|
body = Hpricot html
body.search('a').each do |tag|
queue_link( tag, url )
end
body.find_all(:tag=>'link').each do |tag|
body.search('link').each do |tag|
# Strip appended browser-caching numbers from asset paths like ?12341234
queue_link( tag, url )
end
body.find_all(:tag => 'input', :attributes => { :name => nil }) do |input|
body.search('input[name=""]') do |input|
queue_link( tag, url ) if tag['onclick']
end
body.find_all(:tag =>'form').each do |form|
body.search('form').each do |form|
form = SpiderableForm.new form
queue_form( form, url )
end
Expand Down
13 changes: 7 additions & 6 deletions lib/caboose/spider_integrator/spiderable_form.rb
@@ -1,3 +1,4 @@
require 'hpricot'
module Caboose::SpiderIntegrator

# This is an abstract representation of a form that we can spider.
Expand All @@ -15,14 +16,14 @@ def method=(m)
@method = m.downcase if m
end

def find_all(*args)
@form.find_all(*args)
def search(*args)
@form.search(*args)
end

def mutate_inputs!(mutate_existing_values = false)
input_hash = mutate_existing_values ? { '_mutated' => true } : { '_modified' => true }

@form.find_all(:tag => 'input').each do |input|
@form.search('input').each do |input|
if input['name'] == '_method' # and value.in?['put','post',..] # rails is faking the post/put etc
self.method = input['value']
else
Expand Down Expand Up @@ -52,11 +53,11 @@ def mutate_inputs!(mutate_existing_values = false)
end
end
end
@form.find_all(:tag => 'textarea').each do |input|
@form.search('textarea').each do |input|
input_hash[ input['name'] ] = create_data(input, mutate_existing_values)
end
@form.find_all(:tag => 'select').each do |select|
options = select.find_all(:tag => 'option')
@form.search('select').each do |select|
options = select.search('option')
option = options[ rand(options.length) ]
input_hash[ select['name'] ] = option['value']
end
Expand Down

0 comments on commit 8056532

Please sign in to comment.