Skip to content
Permalink
Browse files

Initial scraper

  • Loading branch information...
tmtmtmtm committed Jun 1, 2018
1 parent f950cff commit abe71ccbb7598bfc1efc576586a0723668aed3ec
Showing with 63 additions and 0 deletions.
  1. +63 −0 scraper.rb
@@ -0,0 +1,63 @@
#!/bin/env ruby
# frozen_string_literal: true

require 'pry'
require 'scraped'
require 'scraperwiki'
require 'wikidata_ids_decorator'

require 'open-uri/cached'
OpenURI::Cache.cache_path = '.cache'

class MembersPage < Scraped::HTML
decorator WikidataIdsDecorator::Links

field :members do
member_rows.map { |row| fragment(row => MemberRow).to_h }
end

private

def member_table
noko.xpath('//table[.//caption[contains(., "Conseillers nationaux")]]')
end

def member_rows
member_table.xpath('.//tr[td]')
end
end

class MemberRow < Scraped::HTML
field :name do
tds[0].css('a').map(&:text).map(&:tidy).first
end

field :id do
tds[0].css('a/@wikidata').map(&:text).first
end

field :party do
tds[1].css('a').map(&:text).map(&:tidy).first
end

field :party_id do
tds[1].css('a/@wikidata').map(&:text).first
end

field :area do
tds[2].css('a').map(&:text).map(&:tidy).first
end

field :area_id do
tds[2].css('a/@wikidata').map(&:text).first
end

private

def tds
noko.css('td')
end
end

url = URI.encode "https://fr.wikipedia.org/wiki/50e_législature_de_l'Assemblée_fédérale_suisse"
Scraped::Scraper.new(url => MembersPage).store(:members, index: %i[name party])

0 comments on commit abe71cc

Please sign in to comment.
You can’t perform that action at this time.