Skip to content

Commit

Permalink
Add the scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
tmtmtmtm committed May 3, 2015
1 parent d7b3254 commit 0f987af
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@

*.swp

*.sqlite
37 changes: 37 additions & 0 deletions scraper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

require 'scraperwiki'
require 'open-uri'
require 'nokogiri'

def noko(url)
Nokogiri::HTML(open(url).read)
end

BASE = 'http://www.parlam.kz'
START = BASE + '/en/mazhilis/People/DeputyList/A'

@parties = {
'0' => "Independent",
'1' => "Nur Otan",
'14' => "Communist People's Party",
'17' => "Ak Zhol",
}

noko(START).css('.alphabets li a').each do |letter|
letter_url = BASE + letter['href']
puts "Fetching #{letter['href']}"
noko(letter_url).css('.persons li').each do |person|
person_url = BASE + person.at_css('a.links/@href').value
faktion_id = noko(person_url).at_css('.party img/@src').value[/fid=(\d+)/,1].to_s rescue "0"
data = {
id: (person.at_css('img/@src').value)[/PersonImage\/(\d+)/, 1],
name: person.at_css('a.links').text,
party_id: faktion_id,
party: @parties[faktion_id],
img: BASE + person.at_css('img/@src').value,
website: person_url,
source: letter_url,
}
ScraperWiki.save_sqlite([:id], data)
end
end

0 comments on commit 0f987af

Please sign in to comment.