Permalink
Browse files

Modernise scraper

  • Loading branch information...
tmtmtmtm committed Mar 25, 2016
1 parent 1a097fb commit 8718a89c796082c3fc6244f396792dd8b26a6ea1
Showing with 2 additions and 27 deletions.
  1. +2 −27 scraper.rb
View
@@ -1,32 +1,7 @@
#!/bin/env ruby
# encoding: utf-8
require 'json'
require 'pry'
require 'rest-client'
require 'scraperwiki'
require 'wikidata/fetcher'
require 'mediawiki_api'
def members
morph_api_url = 'https://api.morph.io/tmtmtmtm/north-korea-assembly-wikipedia/data.json'
morph_api_key = ENV["MORPH_API_KEY"]
result = RestClient.get morph_api_url, params: {
key: morph_api_key,
query: "select DISTINCT(wikiname) AS wikiname from data"
}
JSON.parse(result, symbolize_names: true)
end
WikiData.ids_from_pages('en', members.map { |c| c[:wikiname] }).each_with_index do |p, i|
data = WikiData::Fetcher.new(id: p.last).data('ko') rescue nil
unless data
warn "No data for #{p}"
next
end
ScraperWiki.save_sqlite([:id], data)
end
warn RestClient.post ENV['MORPH_REBUILDER_URL'], {} if ENV['MORPH_REBUILDER_URL']
names = EveryPolitician::Wikidata.morph_wikinames(source: 'tmtmtmtm/north-korea-assembly-wikipedia', column: 'wikiname')
EveryPolitician::Wikidata.scrape_wikidata(names: { en: names })

0 comments on commit 8718a89

Please sign in to comment.