Permalink
Browse files

Add images, and timeout on getting url and between requests

  • Loading branch information...
jennahowe committed Sep 13, 2015
1 parent dd9735e commit a2e8aec15f2848597caeccde06dd67108ef4b16c
Showing with 17 additions and 10 deletions.
  1. +17 −10 scraper.rb
View
@@ -7,6 +7,7 @@
# OpenURI::Cache.cache_path = '.cache'
require 'open-uri'
class String
def tidy
self.gsub(/[[:space:]]+/, ' ').strip
@@ -17,11 +18,14 @@ def noko_for(url)
Nokogiri::HTML(open(url).read)
end
local = true
local = false
# local = true
sleep_between_requests = 60 # (seconds) be kind to El Salvador's server!
la_url = 'http://asamblea.gob.sv/pleno/pleno-legislativo'
la_url = open('http://asamblea.gob.sv/pleno/pleno-legislativo', read_timeout: 60)
if local do
if local
require 'pry'
la_url = 'http://localhost:8000/pleno_legislativo.html'
end
@@ -30,7 +34,7 @@ def noko_for(url)
noko.css('dl dt a').each do |a|
person_url = a.xpath('./@href').text
if local do
if local
person_url.sub!('asamblea.gob.sv/pleno', 'localhost')
end
puts person_url
@@ -52,19 +56,22 @@ def noko_for(url)
personal_email = p.xpath("//a[.//img[contains(@src,'personal-emailicon.png')]]/span").text
puts personal_email
image = p.xpath("//h1/following-sibling::img[1]/@src").text.sub(/.*\//, "#{person_url}/")
puts image
data = {
id: id,
name: name,
group: group.tidy,
faction: group.tidy,
email: email,
email__personal: personal_email
email__personal: personal_email,
image: image,
}
if local do
ScraperWiki.save_sqlite([:id], data)
if local
puts data
break
else
ScraperWiki.save_sqlite([:id], data)
end
sleep(sleep_between_requests)
end

0 comments on commit a2e8aec

Please sign in to comment.