-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.rb
39 lines (32 loc) · 883 Bytes
/
scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/env ruby
# encoding: utf-8
require 'scraperwiki'
require 'nokogiri'
require 'pry'
require 'open-uri/cached'
OpenURI::Cache.cache_path = '.cache'
class String
def tidy
self.gsub(/[[:space:]]+/, ' ').strip
end
end
def noko_for(url)
Nokogiri::HTML(open(url).read)
end
def scrape_list(url)
noko = noko_for(url)
noko.xpath('//tr[.//div[@class="persona"]]').each do |person|
source = person.css('.title a/@href').text
data = {
id: File.basename(source),
name: person.css('.title').text.tidy,
area: person.text[/округ № (\d+)/, 1],
image: person.css('a img/@src').text,
term: 6,
source: source,
}
%i(source image).each { |i| data[i] = URI.join(url, data[i]).to_s unless data[i].to_s.empty? }
ScraperWiki.save_sqlite([:id, :term], data)
end
end
scrape_list('http://vspmr.org/structure/deputies/')