Skip to content
Permalink
Browse files

Update to latest approach

  • Loading branch information...
tmtmtmtm committed Feb 27, 2019
1 parent 37bfa4f commit 3467e409e433ee57edc1beaf70f9c7fd414e450f
Showing with 114 additions and 73 deletions.
  1. +112 −0 lib/cabinet.rb
  2. +2 −73 scraper.rb
@@ -0,0 +1,112 @@
# frozen_string_literal: true

# TODO: extend Scraped::Scraper with ability to add Strategies
class Scraped::Request::Strategy::LiveRequest
require 'rest-client'

def url
SPARQL_URL % CGI.escape(QUERY % [@url, @url])
end

private

def sparql(query)
result = RestClient.get WIKIDATA_SPARQL_URL, accept: 'text/csv', params: { query: query }
CSV.parse(result, headers: true, header_converters: :symbol)
rescue RestClient::Exception => e
raise "Wikidata query #{query} failed: #{e.message}"
end

SPARQL_URL = 'https://query.wikidata.org/sparql?format=json&query=%s'

QUERY = <<~SPARQL
SELECT DISTINCT ?ps ?item ?itemLabel ?minister ?ministerLabel ?ordinal ?start ?startprecision ?end ?endprecision ?cabinet ?cabinetLabel {
{
SELECT DISTINCT ?ps ?item ?minister ?ordinal ?start ?startprecision ?end ?endprecision ?cabinet {
?item p:P39/ps:P39 wd:%s .
?item p:P39 ?ps .
?ps ps:P39 ?minister .
?minister wdt:P279* wd:Q83307 .
OPTIONAL { ?ps pq:P1545 ?ordinal }
OPTIONAL { ?ps pqv:P580 [wikibase:timeValue ?start ; wikibase:timePrecision ?startprecision ] }
OPTIONAL { ?ps pqv:P582 [wikibase:timeValue ?end ; wikibase:timePrecision ?endprecision ] }
# Ignore anything with a different jurisdiction
OPTIONAL { wd:%s wdt:P1001 ?legislative_jurisdiction }
OPTIONAL { ?minister wdt:P1001 ?executive_jurisdiction }
FILTER (!BOUND(?legislative_jurisdiction) || !BOUND(?executive_jurisdiction) || (?legislative_jurisdiction = ?executive_jurisdiction))
}
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
SPARQL
end

class CabinetScraper < Scraped::JSON
field :memberships do
json[:results][:bindings].map { |result| fragment(result => Membership).to_h }
end

class Wikidate
def initialize(date, precision)
@date = date
@precision = precision
end

# not to_s, as this can return 'nil'
def as_string
return unless date && precision
return unless slice_point

date.slice(0, slice_point)
end

private

attr_reader :date, :precision

PRECISION_LENGTH = {
'9' => 4, # year
'10' => 7, # month
'11' => 10, # day
}.freeze

def slice_point
PRECISION_LENGTH[precision]
end
end

class Membership < Scraped::JSON
field :id do
json.dig(:item, :value).to_s.split('/').last
end

field :name do
json.dig(:itemLabel, :value)
end

field :position_id do
json.dig(:ps, :value).to_s.split('/').last
end

field :position do
json.dig(:minister, :value).to_s.split('/').last
end

field :label do
json.dig(:ministerLabel, :value)
end

field :start_date do
Wikidate.new(json.dig(:start, :value), json.dig(:startprecision, :value)).as_string
end

field :end_date do
Wikidate.new(json.dig(:end, :value), json.dig(:endprecision, :value)).as_string
end

field :ordinal do
json.dig(:ordinal, :value).to_i
end
end
end
@@ -1,79 +1,8 @@
#!/bin/env ruby
# frozen_string_literal: true

require 'json'
require 'pry'
require 'rest-client'
require 'scraped'
require 'scraperwiki'
require_relative 'lib/cabinet'

class Results < Scraped::JSON
field :memberships do
json[:results][:bindings].map { |result| fragment(result => Membership).to_h }
end
end

class Membership < Scraped::JSON
field :id do
json.dig(:item, :value).to_s.split('/').last
end

field :name do
json.dig(:itemLabel, :value)
end

field :position_id do
json.dig(:ps, :value).to_s.split('/').last
end

field :position do
json.dig(:minister, :value).to_s.split('/').last
end

field :label do
json.dig(:ministerLabel, :value)
end

field :start_date do
json.dig(:start, :value).to_s[0..9]
end

field :end_date do
json.dig(:end, :value).to_s[0..9]
end

field :ordinal do
json.dig(:ordinal, :value).to_i
end
end

WIKIDATA_SPARQL_URL = 'https://query.wikidata.org/sparql?format=json&query=%s'

def sparql(query)
result = RestClient.get WIKIDATA_SPARQL_URL, accept: 'text/csv', params: { query: query }
CSV.parse(result, headers: true, header_converters: :symbol)
rescue RestClient::Exception => e
raise "Wikidata query #{query} failed: #{e.message}"
end

memberships_query = <<SPARQL
SELECT DISTINCT ?ps ?item ?itemLabel ?minister ?ministerLabel ?ordinal ?start ?end ?cabinet ?cabinetLabel
WHERE {
?item p:P39/ps:P39 wd:Q3044918 .
?item p:P39 ?ps .
?ps ps:P39 ?minister .
?minister wdt:P279* wd:Q83307 .
OPTIONAL { ?ps pq:P1545 ?ordinal }
OPTIONAL { ?ps pq:P580 ?start }
OPTIONAL { ?ps pq:P582 ?end }
OPTIONAL { ?ps pq:P5054 ?cabinet }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
SPARQL

url = WIKIDATA_SPARQL_URL % CGI.escape(memberships_query)
data = Results.new(response: Scraped::Request.new(url: url).response).memberships
puts data.map(&:compact).map(&:sort).map(&:to_h) if ENV['MORPH_DEBUG']

ScraperWiki.sqliteexecute('DROP TABLE data') rescue nil
ScraperWiki.save_sqlite(%i[position_id], data)
Scraped::Scraper.new('Q3044918' => CabinetScraper).store(:memberships, index: %i[position_id])

0 comments on commit 3467e40

Please sign in to comment.
You can’t perform that action at this time.