Skip to content
Browse files

Update with transaction and step code

  • Loading branch information...
1 parent 664a23a commit da34376832a299f302999bed0ac385ebd47bf9e3 eric.beland@nemoves.com committed
View
BIN .redcar/lucene/_7i.cfs
Binary file not shown.
View
BIN .redcar/lucene/_7i_1.del
Binary file not shown.
View
BIN .redcar/lucene/_7j.cfs
Binary file not shown.
View
BIN .redcar/lucene/_7j_1.del
Binary file not shown.
View
BIN .redcar/lucene/_7k.cfs
Binary file not shown.
View
BIN .redcar/lucene/_7k_1.del
Binary file not shown.
View
BIN .redcar/lucene/_7l.cfs
Binary file not shown.
View
BIN .redcar/lucene/_7l_1.del
Binary file not shown.
View
BIN .redcar/lucene/_7m.cfs
Binary file not shown.
View
BIN .redcar/lucene/_7m_1.del
Binary file not shown.
View
BIN .redcar/lucene/_7n.cfs
Binary file not shown.
View
BIN .redcar/lucene/_7n_1.del
Binary file not shown.
View
BIN .redcar/lucene/_7o.cfs
Binary file not shown.
View
BIN .redcar/lucene/_7o_1.del
Binary file not shown.
View
BIN .redcar/lucene/_7p.cfs
Binary file not shown.
View
BIN .redcar/lucene/segments.gen
Binary file not shown.
View
BIN .redcar/lucene/segments_70
Binary file not shown.
View
1 .redcar/lucene_last_updated
@@ -0,0 +1 @@
+1308605469
View
1 .redcar/redcar.lock
@@ -0,0 +1 @@
+24570: Locked by 24570 at Mon Jun 20 10:03:35 -0400 2011
View
19 .redcar/tags
@@ -0,0 +1,19 @@
+1308605469
+ErrorRecovery /home/ebeland/apps/scrapeybara/lib/scrapeybara/error_recovery.rb module ErrorRecovery
+Scrape /home/ebeland/apps/scrapeybara/lib/scrapeybara/scrape.rb module Scrape
+Scraper /home/ebeland/apps/scrapeybara/lib/scrapeybara/scrape.rb class Scraper
+Scrapeybara /home/ebeland/apps/scrapeybara/lib/scrapeybara.rb module Scrapeybara
+Scrapeybara /home/ebeland/apps/scrapeybara/lib/scrapeybara/version.rb module Scrapeybara
+ScrapeybaraGenerator /home/ebeland/apps/scrapeybara/lib/generators/scrapeybara_generator.rb class ScrapeybaraGenerator
+VERSION /home/ebeland/apps/scrapeybara/lib/scrapeybara/version.rb VERSION =
+create /home/ebeland/apps/scrapeybara/lib/scrapeybara/scrape.rb def self.create(element = nil, options = {:default => :xpath})
+element /home/ebeland/apps/scrapeybara/lib/scrapeybara/scrape.rb attr_accessor :options, :element
+extract /home/ebeland/apps/scrapeybara/lib/scrapeybara/scrape.rb def extract(item_locator)
+file_name /home/ebeland/apps/scrapeybara/lib/generators/scrapeybara_generator.rb def file_name
+manifest /home/ebeland/apps/scrapeybara/lib/generators/scrapeybara_generator.rb def manifest
+method_missing /home/ebeland/apps/scrapeybara/lib/scrapeybara/scrape.rb def method_missing(method, *args)
+next_migration_number /home/ebeland/apps/scrapeybara/lib/generators/scrapeybara_generator.rb def self.next_migration_number(path)
+options /home/ebeland/apps/scrapeybara/lib/scrapeybara/scrape.rb attr_accessor :options, :element
+scrape /home/ebeland/apps/scrapeybara/lib/scrapeybara/scrape.rb def self.scrape(options, &block)
+source_root /home/ebeland/apps/scrapeybara/lib/generators/scrapeybara_generator.rb def self.source_root
+with_retry /home/ebeland/apps/scrapeybara/lib/scrapeybara/error_recovery.rb def with_retry(opts = {} , &block)
View
10 Gemfile
@@ -4,10 +4,14 @@ source "http://rubygems.org"
gem "hashie"
gem "rspec"
-gem "capybara", "~>0.4.1"
-# gem "capybara-webkit",:git => 'git://github.com/thoughtbot/capybara-webkit.git'
+gem "capybara", "~>0.4.1"
+gem "capybara-webkit", :git => 'git://github.com/thoughtbot/capybara-webkit.git'
gem 'headless'
+
+gem 'akephalos' #,:git => 'git://github.com/asceth/akephalos.git'
+
+
gem "pry_debug"
gem 'webmock'
@@ -17,4 +21,4 @@ gem 'autotest'
gem 'autotest-rails'
gem 'term-ansicolor'
gem 'turn'
-gem 'vcr'
+gem 'vcr'
View
21 Gemfile.lock
@@ -1,8 +1,18 @@
+GIT
+ remote: git://github.com/thoughtbot/capybara-webkit.git
+ revision: ec3a82706008d7002b7cf113bea231ef4a211345
+ specs:
+ capybara-webkit (0.5.0)
+ capybara (~> 0.4.1)
+
GEM
remote: http://rubygems.org/
specs:
ZenTest (4.5.0)
addressable (2.2.6)
+ akephalos (0.2.5)
+ capybara (~> 0.4.0)
+ jruby-jars
ansi (1.2.5)
archive-tar-minitar (0.5.2)
autotest (4.4.6)
@@ -29,13 +39,14 @@ GEM
ffi (1.0.9)
hashie (1.0.0)
headless (0.1.0)
- json_pure (1.5.2)
+ jruby-jars (1.6.2)
+ json_pure (1.5.3)
linecache19 (0.5.12)
ruby_core_source (>= 0.1.4)
method_source (0.6.0)
ruby_parser (>= 2.0.5)
mime-types (1.16)
- nokogiri (1.4.5)
+ nokogiri (1.4.6)
pry (0.9.1)
coderay (>= 0.9.8)
method_source (>= 0.6.0)
@@ -75,7 +86,7 @@ GEM
sexp_processor (3.0.5)
sinatra (1.2.6)
rack (~> 1.1)
- tilt (>= 1.2.2, < 2.0)
+ tilt (< 2.0, >= 1.2.2)
slop (1.9.1)
term-ansicolor (1.0.5)
tilt (1.3.2)
@@ -83,7 +94,7 @@ GEM
ansi (>= 1.2.2)
vcr (1.10.0)
webmock (1.6.4)
- addressable (~> 2.2, > 2.2.5)
+ addressable (> 2.2.5, ~> 2.2)
crack (>= 0.1.7)
xpath (0.1.4)
nokogiri (~> 1.3)
@@ -92,9 +103,11 @@ PLATFORMS
ruby
DEPENDENCIES
+ akephalos
autotest
autotest-rails
capybara (~> 0.4.1)
+ capybara-webkit!
hashie
headless
pry_debug
View
6 README
@@ -37,8 +37,10 @@ s = Scraper.new(:outputter => MyOutputter.new,
p listing.price
end
-
-
+
+ If you want to use the transaction/step capabilities within a rails project, run
+ ./script/generate scrapeybara
+
To Do:
View
38 lib/generators/scrapeybara_generator.rb
@@ -0,0 +1,38 @@
+# lib/generators/make_voteable/make_voteable_generator.rb
+require 'rails/generators/migration'
+require 'rails/generators/active_record'
+
+
+class ScrapeybaraGenerator < Rails::Generator::Base
+
+ def self.source_root
+ @source_root ||= File.dirname(__FILE__) + '/templates'
+ end
+
+ def self.next_migration_number(path)
+ ActiveRecord::Generators::Base.next_migration_number(path)
+ end
+
+ def manifest
+ record do |m|
+ # Models
+ m.file "models/run.rb", "app/models/run.rb"
+ m.file "models/transaction_result.rb", "app/models/transaction_result.rb"
+ m.file "models/step.rb", "app/models/step.rb"
+
+ # Tests
+ # m.file "test/unit/run_test.rb", "test/unit/run_test.rb"
+
+ m.migration_template "migrations/create_runs.rb", "db/migrate"
+ m.migration_template "migrations/create_steps.rb", "db/migrate"
+ m.migration_template "migrations/create_transaction_results.rb", "db/migrate"
+
+ m.readme "INSTALL me for activerecord storage of transaction and step results"
+ end
+ end
+
+ def file_name
+ "create_transaction_results"
+ end
+
+end
View
14 lib/generators/templates/migrations/create_runs.rb
@@ -0,0 +1,14 @@
+class RunMigration < ActiveRecord::Migration
+
+ def self.up
+ create_table :runs do |t|
+ t.column :id, :integer
+ t.timestamps
+ end
+ end
+
+ def self.down
+ drop_table :runs
+ end
+
+end
View
21 lib/generators/templates/migrations/create_steps.rb
@@ -0,0 +1,21 @@
+class Step < ActiveRecord::Migration
+
+ def self.up
+ create_table :steps do |t|
+ t.column :id, :integer
+ t.column :transaction_result_id, :integer
+ t.column :name, :string
+ t.column :error, :string
+ t.column :result, :string
+ t.column :http_results, :string
+ t.column :started_at, :datetime
+ t.column :ended_at, :datetime
+ t.timestamps
+ end
+ end
+
+ def self.down
+ drop_table :transaction_result
+ end
+
+end
View
13 lib/generators/templates/migrations/create_transaction_results.rb
@@ -0,0 +1,13 @@
+class TransactionResult < ActiveRecord::Migration
+ def self.up
+ create_table :transaction_results do |t|
+ t.column :id, :integer
+ t.column :run_id, :integer
+ t.timestamps
+ end
+ end
+
+ def self.down
+ drop_table :transaction_result
+ end
+end
View
0 lib/generators/templates/models/run.rb
No changes.
View
11 lib/generators/templates/models/step.rb
@@ -0,0 +1,11 @@
+class Step > ActiveRecord::Base
+ belongs_to :transaction_result
+
+
+ def name
+
+ end
+
+
+
+end
View
31 lib/generators/templates/models/transaction_result.rb
@@ -0,0 +1,31 @@
+class TransactionResult < ActiveRecord::Base
+ has_many :steps
+
+ # captures information about a scraping attempt, such as failures, screen captures, etc for debugging
+
+ def step(name)
+ current_step = Step.new(name)
+ yield @current_step if block_given?
+ end
+
+ def step_begin(name)
+ steps << Step.new(name)
+ end
+
+ def step_end
+ current_step = Step.new(name)
+ end
+
+ def current_step
+ steps.last unless finished
+ end
+
+ def result
+ steps.last.result
+ end
+
+ def failed?
+ steps.last.error
+ end
+
+end
View
7 lib/scrapeybara.rb
@@ -1,5 +1,4 @@
require_relative 'scrapeybara/error_recovery'
-require_relative 'scrapeybara/result_formatters'
require_relative 'scrapeybara/scrape'
require_relative 'scrapeybara/steps'
require_relative 'scrapeybara/transaction_result'
@@ -12,13 +11,11 @@
#Capybara.app_host = 'http://www.google.com'
-
module Scrapeybara
include ErrorRecovery
- include ResultFormatters
include Scrape
include Steps
-# include Capybara::DSL
+ include Capybara
-end
+end
View
35 lib/scrapeybara/error_recovery.rb
@@ -1,25 +1,20 @@
-module ErrorRecovery
-
- def retry(opts = {} , &block)
- options = {:retries => 3, :wait => 10 }.merge(opts)
- result = []
- success = nil
- retries = options[:retries]
- until success || retries == 0
+module ErrorRecovery
+
+ def with_retry(opts = {} , &block)
+ options = {:limit => 3, :wait => 10, :reset =>nil }.merge(opts)
+ results = []
+ retries = 0
+ until results.length > 0 and !(results.last.is_a?(Exception)) || (retries == options[:limit])
begin
- retries -= 1
- success = block.call
+ retries += 1
+ results << yield
rescue Exception => e
- result << e
+ results << e
end
- if success
- result << success
- else
- sleep options[:wait]
- end
+ (options[:reset].call rescue nil) if options[:reset]
+ sleep options[:wait]
end
- result
- end
-
+ results
+ end
-end
+end
View
5 lib/scrapeybara/scrape.rb
@@ -1,5 +1,6 @@
require 'rubygems'
require 'hashie'
+require 'json'
module Scrape
@@ -16,8 +17,8 @@ def self.create(element = nil, options = {:default => :xpath})
s.element = element # context of where I am in the document, so I can be searched-within
s
end
-
- def extract(item_locator)
+
+ def extract(item_locator)
return item_locator # stub
if item_locator.is_a?(Hash)
finder_key = item_locator[:xpath] ? :xpath : :css
View
117 spec/scrapey_spec.rb
@@ -2,8 +2,11 @@
require_relative '../lib/scrapeybara'
describe 'Scrapebara' do
-
- let :string do
+ include Scrapeybara
+
+ context 'Scraping Results' do
+
+ let :string do
Capybara.string <<-STRING
<div id="page">
<div id="content">
@@ -23,52 +26,102 @@
STRING
end
- before :each do
- @result = Scrapeybara::Scraper::scrape(:default => :xpath) do |s|
- s.listings :xpath => '//tr[@class=listing]' do |listing|
- listing.hello 'td[@class=name]'
- listing.hello do |h|
- h.goodbye 'bye'
- end
- end
- end
- end
+
+ before :each do
+ @result = Scrapeybara::Scraper::scrape(:default => :xpath) do |s|
+ s.listings :xpath => '//tr[@class=listing]' do |listing|
+ listing.hello 'td[@class=name]'
+ listing.hello do |h|
+ h.goodbye 'bye'
+ end
+ end
+ end
+ end
- it 'should return a result for listing' do
- @result.listings.nil?.should be_false
- @result.made_up_thing.nil?.should be_true
- @result.listings.hello.nil?.should be_false
- end
-
- it 'should return the element for each item' do
- string.nil?.should be_false
+ it 'should return a result for listing' do
+ @result.listings.nil?.should be_false
+ @result.made_up_thing.nil?.should be_true
+ @result.listings.hello.nil?.should be_false
+ end
+
+ it 'should return the element for each item' do
+ string.nil?.should be_false
- end
+ end
- it 'should use capybara' do
- visit 'http://www.google.com'
- page.nil?.should be_false
- end
+ it 'should provide json output format' do
+ @result.to_json
+ end
- it 'should run retry blocks the correct number of times' do
-
+ it 'should provide xml output format' do
+ @result.to_xml
+ end
+
+ end
+
+
+ it 'should use capybara' do
+ # visit 'http://www.google.com'
+ # page.nil?.should be_false
end
- it 'should collect failing results of retries' do
+
+ it 'should keep track of scraping steps and results' do
+
+
end
- it 'should stop retrying after success' do
+ it 'should provide a global transaction result' do
end
- it 'should capture failure information' do
+ context 'retries' do
- end
+ it 'should collect results of retries' do
+ result = with_retry :limit => 3, :wait => 0.0001 do
+ 3
+ end
+ (result.last == 3).should be_true
+ end
- it 'should ' do
+ it 'should stop retrying after success' do
+ retry_count = 0
+ with_retry :limit => 3, :wait => 0.00001 do
+ retry_count += 1
+ raise "I am an exception" if retry_count == 1
+ end
+ (retry_count == 2).should be_true
+ end
+
+ it 'should capture failure information' do
+ result = with_retry :limit => 3, :wait => 0.00001 do
+ retry_count += 1
+ raise "I am an exception" if retry_count == 1
+ end
+ result.last.is_a?(Exception).should be_true
+ end
+
+ it 'should run the optional reset block upon each retry' do
+ @reset_called = 0
+ reset = lambda { @reset_called += 1 }
+ with_retry(:limit => 2, :wait => 0.00001, :reset => reset ) do
+ raise 'foo'
+ end
+ (@reset_called == 2).should be_true
+ end
+ it 'should run retry blocks the correct number of times' do
+ retry_count = 0
+ with_retry :limit => 3, :wait => 0.00001 do
+ retry_count += 1
+ raise 'Foo'
+ end
+ retry_count.should == 3
+ end
+
end
+
end
View
9 spec/spec_helper.rb
@@ -13,14 +13,13 @@
path = File.join(File.dirname(__FILE__), "spec/factories/**/*.rb")
RSpec.configure do |config|
- include Capybara::DSL
+ include Capybara
end
Capybara.current_driver = :selenium
-app_host = 'http://www.google.com'
-Capybara.app_host = app_host
-Capybara.run_server = false
-
+Capybara.run_server = false
+#Capybara.app_host = 'http://www.google.com'
+
#VCR.config do |c|
# c.cassette_library_dir = 'fixtures/vcr_cassettes'
# c.stub_with :webmock # or :fakeweb

0 comments on commit da34376

Please sign in to comment.
Something went wrong with that request. Please try again.