Skip to content

Commit

Permalink
Use if rather than confusing guard clause
Browse files Browse the repository at this point in the history
  • Loading branch information
equivalentideas committed Feb 5, 2018
1 parent 483d57d commit b4b5671
Showing 1 changed file with 27 additions and 26 deletions.
53 changes: 27 additions & 26 deletions scraper.rb
Expand Up @@ -12,40 +12,41 @@ def web_archive(url)
archive_request_response.headers[:link].split(", ")[2][/\<.*\>/].gsub(/<|>/, "")
end

puts "Getting list of archived searches from https://web.archive.org/web/timemap/json/http://tenders.nsw.gov.au/?event=public.api.contract.searc"
timemap = JSON.parse(
Net::HTTP.get(
URI("https://web.archive.org/web/timemap/json/http://tenders.nsw.gov.au/?event=public.api.contract.search")
)
)
archive_times = timemap[1..-1].map { |t| t[1] }

# FOR EACH IN THE TIMEMAP
archive_times.each do |archive_timestamp|
return unless (ScraperWiki.select("archive_timestamp from data where archive_timestamp='#{archive_timestamp}'").empty? rescue true)
puts "Getting archived search data from #{archived_search_url(archive_timestamp)}"
archived_search_data = JSON.parse(
Net::HTTP.get(URI(archived_search_url(archive_timestamp)))
)

archived_search_data["releases"].each do |release|
release["awards"].each do |award|
url = "https://tenders.nsw.gov.au/?event=public.api.contract.view&CNUUID=#{award["CNUUID"]}"

puts "Getting contract data from #{url}"

record = {
scraped_at: Date.today.to_s,
web_archive_url: web_archive(url),
CNUUID: award["CNUUID"],
ocid: release["ocid"],
data_blob: Net::HTTP.get(URI(url)),
archive_timestamp: archive_timestamp
}

puts "Saving contract data from #{url}"
ScraperWiki.save_sqlite([:CNUUID, :archive_timestamp], record)

sleep 3
if (ScraperWiki.select("archive_timestamp from data where archive_timestamp='#{archive_timestamp}'").empty? rescue true)
puts "Getting archived search data from #{archived_search_url(archive_timestamp)}"
archived_search_data = JSON.parse(
Net::HTTP.get(URI(archived_search_url(archive_timestamp)))
)

archived_search_data["releases"].each do |release|
release["awards"].each do |award|
url = "https://tenders.nsw.gov.au/?event=public.api.contract.view&CNUUID=#{award["CNUUID"]}"

puts "Getting contract data from #{url}"

record = {
scraped_at: Date.today.to_s,
web_archive_url: web_archive(url),
CNUUID: award["CNUUID"],
ocid: release["ocid"],
data_blob: Net::HTTP.get(URI(url)),
archive_timestamp: archive_timestamp
}

puts "Saving contract data from #{url}"
ScraperWiki.save_sqlite([:CNUUID, :archive_timestamp], record)

sleep 3
end
end
end
end

0 comments on commit b4b5671

Please sign in to comment.