Skip to content

Commit

Permalink
added cli options for --trace & criteria mistach
Browse files Browse the repository at this point in the history
  • Loading branch information
jaredfolkins committed Sep 23, 2011
1 parent ce2b13f commit 7d3bec7
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 29 deletions.
33 changes: 33 additions & 0 deletions lib/bendmaps.rb
@@ -0,0 +1,33 @@
class BendMaps

URI = 'http://www.bendmaps.com/dialvolpage.php?vol_page='

attr_accessor :browser

def initialize
setup_browser
end

def setup_browser
@browser = Mechanize.new
@browser.max_history = 10
end

def retrieve_address(volpage)
@browser.get(URI + volpage)
response = save_dial_record(@browser.page.body, volpage)
end

def save_dial_record(body, volpage)
unless body.nil?
DialRecord.create(
:volpage => volpage,
#:address => TODOARDDRESS <---
:account_number => mortgage.instrument_id) unless DialRecord.exists?(:volpage => mortgage.id)
end
end

def parse_response

end
end
71 changes: 56 additions & 15 deletions lib/bot.rb
Expand Up @@ -9,17 +9,69 @@ class Bot < Dbconnection

def initialize
setup_arguments
check_for_criteria_mismatch
setup_db
setup_browser
submit_search_form
end

def run
logic_path
end

def logic_path
case
when Choice.choices[:page]
traverse_tree_from_page go_to_page(Choice.choices[:page])
puts 'Complete!'
when Choice.choices[:skip]
skip_pages(Choice.choices[:skip])
run_loop
else
run_loop
end
end

def run_loop
while next_link?(@browser.page) do
Choice.choices[:trace] ? Memprof.trace{cycle} : cycle
end
shutdown_sequence(@browser.page)
end

def cycle
page = @browser.page
iterate_search_page(page)
click_next_link(page)
end

def check_for_criteria_mismatch
if Choice.choices[:page] && Choice.choices[:year]
puts 'ERROR (Criteria Mismatch): You cannot have both PAGE and YEAR specified'
exit 1
end
end


def setup_arguments
Choice.options do
header 'Deschutes WebCrawler Options:'
header 'Deschutes County Records WebCrawler Options:'
separator 'Optional:'
option :page do
long '--page=PAGE'
desc 'Crawl a specific page\'s tree. PAGE takes precedence over SKIP.'
end
option :skip do
long '--skip=SKIP'
desc 'Skip forward a certain number of pages on the search results page.'
end
option :year do
long '--year=YEAR'
desc 'Crawl through documents based by year'
desc 'Crawl through documents based by year.'
end
option :trace do
long '--trace=TRACE'
desc 'Enable Memprof to profile code.'
end
end
end
Expand Down Expand Up @@ -56,8 +108,7 @@ def setup_browser

@browser.request_headers = headers
@browser_two.request_headers = headers

end
end

# In order for your cookie to be set correctly
# You first have to call /Login.asp
Expand Down Expand Up @@ -125,7 +176,7 @@ def no_records_found?(page)
end

def skip_pages(total)
total.times do
total.to_i.times do
if next_link?(@browser.page)
click_next_link(@browser.page)
else
Expand All @@ -134,16 +185,6 @@ def skip_pages(total)
end
end

def run_loop
while next_link?(@browser.page) do
#Memprof.trace{
page = @browser.page
iterate_search_page(page)
click_next_link(page)
#}
end
shutdown_sequence(@browser.page)
end

def shutdown_sequence(page)
puts 'Shutdown Activated!'
Expand Down
3 changes: 3 additions & 0 deletions required.rb
Expand Up @@ -22,3 +22,6 @@
require 'lib/mortgage_deed'
require 'lib/default_sale'
require 'lib/convert'
require 'lib/bendmaps'
require 'lib/dial_record'

2 changes: 1 addition & 1 deletion schema.sql
Expand Up @@ -37,7 +37,7 @@ instrument_id varchar(100)

create table dial_records (
id int(11) NOT NULL AUTO_INCREMENT PRIMARY KEY,
instrument int(11) NOT NULL,
account_number int(11) NOT NULL,
volpage varchar(100) unique NOT NULL,
address varchar(150) NOT NULL
);
7 changes: 7 additions & 0 deletions worker_bendmaps.rb
@@ -0,0 +1,7 @@
#setup root path
Dir.chdir "#{File.dirname(__FILE__)}"

# setup file of required files
require 'required.rb'

bendmaps = Bendmaps.new
6 changes: 2 additions & 4 deletions worker_bot.rb
Expand Up @@ -4,7 +4,5 @@
# setup file of required files
require 'required.rb'

bot = Bot.new
bot.submit_search_form
#bot.skip_pages(88)
bot.run_loop
#run the bot
Bot.new.run
4 changes: 2 additions & 2 deletions worker_convert.rb
Expand Up @@ -3,6 +3,6 @@

require 'required.rb'

converter = Convert.new
converter.run
#run the converter
Convert.new.run

7 changes: 0 additions & 7 deletions worker_single_page_bot.rb

This file was deleted.

0 comments on commit 7d3bec7

Please sign in to comment.