Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jacksonj04 committed Mar 15, 2018
0 parents commit cdfed43
Show file tree
Hide file tree
Showing 4 changed files with 239 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -0,0 +1 @@
data.sqlite
29 changes: 29 additions & 0 deletions Gemfile
@@ -0,0 +1,29 @@
# frozen_string_literal: true

# It's easy to add more libraries or choose different versions. Any libraries
# specified here will be installed and made available to your morph.io scraper.
# Find out more: https://morph.io/documentation/ruby

ruby '2.3.3'

source 'https://rubygems.org'
git_source(:github) { |repo_name| "https://github.com/#{repo_name}.git" }

gem 'activesupport-inflector'
gem 'colorize'
gem 'everypolitician', github: 'everypolitician/everypolitician-ruby'
gem 'i18n'
gem 'mediawiki_api'
gem 'nokogiri'
gem 'open-uri-cached'
gem 'pry'
gem 'rest-client'
gem 'scraperwiki', github: 'openaustralia/scraperwiki-ruby',
branch: 'morph_defaults'
gem 'sqlite_magic', github: 'openc/sqlite_magic'
gem 'wikidata-fetcher', '>=0.20.0', github: 'everypolitician/wikidata-fetcher'
gem 'wikisnakker', github: 'everypolitician/wikisnakker'

group :quality do
gem 'rubocop'
end
147 changes: 147 additions & 0 deletions Gemfile.lock
@@ -0,0 +1,147 @@
GIT
remote: https://github.com/everypolitician/everypolitician-ruby.git
revision: 363f7abb3bf2596d89cc048b98deb1a83ac163ff
specs:
everypolitician (0.20.0)
everypolitician-popolo (>= 0.8.0)
require_all

GIT
remote: https://github.com/everypolitician/wikidata-fetcher.git
revision: bbbd4826f0fd5dcbac93f0d0214a8cb960b526eb
specs:
wikidata-fetcher (0.21.0)
colorize
diskcached
json
mediawiki_api
nokogiri
require_all
rest-client
scraperwiki
wikidata-client (~> 0.0.7)
wikisnakker

GIT
remote: https://github.com/everypolitician/wikisnakker.git
revision: 72dbb8b13c9ec1c8512974fa8d86d93b01b90a15
specs:
wikisnakker (0.9.1)
require_all
yajl-ruby

GIT
remote: https://github.com/openaustralia/scraperwiki-ruby.git
revision: fc50176812505e463077d5c673d504a6a234aa78
branch: morph_defaults
specs:
scraperwiki (3.0.1)
httpclient
sqlite_magic

GIT
remote: https://github.com/openc/sqlite_magic.git
revision: 4df975eb4e9891de54f870077c83f63762af9bf9
specs:
sqlite_magic (0.0.6)
sqlite3

GEM
remote: https://rubygems.org/
specs:
activesupport-inflector (0.1.0)
ast (2.4.0)
coderay (1.1.2)
colorize (0.8.1)
concurrent-ruby (1.0.5)
diskcached (1.1.3)
domain_name (0.5.20170404)
unf (>= 0.0.5, < 1.0.0)
everypolitician-popolo (0.8.0)
require_all
excon (0.60.0)
faraday (0.14.0)
multipart-post (>= 1.2, < 3)
faraday-cookie_jar (0.0.6)
faraday (>= 0.7.4)
http-cookie (~> 1.0.0)
faraday_middleware (0.12.2)
faraday (>= 0.7.4, < 1.0)
hashie (3.5.7)
http-cookie (1.0.3)
domain_name (~> 0.5)
httpclient (2.8.3)
i18n (0.9.4)
concurrent-ruby (~> 1.0)
json (2.1.0)
mediawiki_api (0.7.1)
faraday (~> 0.9, >= 0.9.0)
faraday-cookie_jar (~> 0.0, >= 0.0.6)
faraday_middleware (~> 0.10, >= 0.10.0)
method_source (0.9.0)
mime-types (3.1)
mime-types-data (~> 3.2015)
mime-types-data (3.2016.0521)
mini_portile2 (2.3.0)
multipart-post (2.0.0)
netrc (0.11.0)
nokogiri (1.8.2)
mini_portile2 (~> 2.3.0)
open-uri-cached (0.0.5)
parallel (1.12.1)
parser (2.4.0.2)
ast (~> 2.3)
powerpack (0.1.1)
pry (0.11.3)
coderay (~> 1.1.0)
method_source (~> 0.9.0)
rainbow (3.0.0)
require_all (1.5.0)
rest-client (2.0.2)
http-cookie (>= 1.0.2, < 2.0)
mime-types (>= 1.16, < 4.0)
netrc (~> 0.8)
rubocop (0.52.1)
parallel (~> 1.10)
parser (>= 2.4.0.2, < 3.0)
powerpack (~> 0.1)
rainbow (>= 2.2.2, < 4.0)
ruby-progressbar (~> 1.7)
unicode-display_width (~> 1.0, >= 1.0.1)
ruby-progressbar (1.9.0)
sqlite3 (1.3.13)
unf (0.1.4)
unf_ext
unf_ext (0.0.7.5)
unicode-display_width (1.3.0)
wikidata-client (0.0.12)
excon (~> 0.40)
faraday (~> 0.9)
faraday_middleware (~> 0.9)
hashie (~> 3.3)
yajl-ruby (1.3.1)

PLATFORMS
ruby

DEPENDENCIES
activesupport-inflector
colorize
everypolitician!
i18n
mediawiki_api
nokogiri
open-uri-cached
pry
rest-client
rubocop
scraperwiki!
sqlite_magic!
wikidata-fetcher (>= 0.20.0)!
wikisnakker!

RUBY VERSION
ruby 2.3.3p222

BUNDLED WITH
1.16.0
62 changes: 62 additions & 0 deletions scraper.rb
@@ -0,0 +1,62 @@
#!/bin/env ruby
# encoding: utf-8
# frozen_string_literal: true

require 'csv'
require 'pry'
require 'scraperwiki'
require 'wikidata/fetcher'

WIKIDATA_SPARQL_URL = 'https://query.wikidata.org/sparql'

def sparql(query)
result = RestClient.get WIKIDATA_SPARQL_URL, accept: 'text/csv', params: { query: query }
CSV.parse(result, headers: true, header_converters: :symbol)
rescue RestClient::Exception => e
raise "Wikidata query #{query} failed: #{e.message}"
end

def wikidata_id(url)
url.to_s.split('/').last
end

memberships_query = <<EOQ
SELECT DISTINCT ?item ?itemLabel ?start_date ?end_date ?hasRoleLabel ?constituency ?constituencyLabel ?party ?partyLabel ?term ?termLabel ?termOrdinal ?scraperName WHERE {
?item p:P39 ?statement.
?statement ps:P39 wd:Q33512801; pq:P2937 wd:Q29068722 .
OPTIONAL { ?statement pq:P580 ?start_date. }
OPTIONAL { ?statement pq:P582 ?end_date. }
OPTIONAL { ?statement pq:P768 ?constituency. }
OPTIONAL { ?statement pq:P4100 ?party. }
OPTIONAL { ?statement pq:P2868 ?hasRole. }
OPTIONAL {
?statement pq:P2937 ?term .
OPTIONAL { ?term p:P31/pq:P1545 ?termOrdinal . }
}
OPTIONAL {
?item p:P973 ?described .
?described ps:P973 ?url .
?described pq:P1810 ?scraperName .
FILTER(CONTAINS(LCASE(STR(?url)), "www.na.gov.pk/"))
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
EOQ

data = sparql(memberships_query).map(&:to_h).map do |r|
{
id: wikidata_id(r[:item]),
name: r[:officialname].to_s.empty? ? r[:itemlabel] : r[:officialname],
start_date: r[:start_date].to_s[0..9],
end_date: r[:end_date].to_s[0..9],
legislative_membership_type: r[:hasrolelabel].to_s.empty? ? '' : r[:hasrolelabel],
constituency: r[:constituencylabel],
constituency_id: wikidata_id(r[:constituency]),
party: r[:partylabel],
party_id: wikidata_id(r[:party]),
term: r[:termordinal],
}
end

ScraperWiki.sqliteexecute('DROP TABLE data') rescue nil
ScraperWiki.save_sqlite(%i[id], data)

0 comments on commit cdfed43

Please sign in to comment.