/
scraper.rb
84 lines (66 loc) · 1.43 KB
/
scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/env ruby
# encoding: utf-8
# frozen_string_literal: true
require 'pry'
require 'scraperwiki'
require 'wikidata/fetcher'
module Wikisnakker
class Item
SKIP = %i(P17 P18 P910).to_set
WANT = {
P31: :type,
P571: :start_date,
P576: :end_date,
}.freeze
def data
unknown_properties.each do |p|
warn "Unknown property for #{id}: #{p} = #{send(p).value}"
end
base_data.merge(wanted_data)
end
private
def base_data
{
id: id,
label: label(:en),
}
end
def unknown_properties
properties.reject { |p| SKIP.include?(p) || WANT.key?(p) }
end
def wanted_properties
properties.select { |p| WANT.key?(p) }
end
def wanted_data
wanted_properties.map { |p| [WANT[p], send(p).value.to_s] }.to_h
end
end
end
module Wikidata
require 'wikisnakker'
class Areas
def initialize(ids:)
@ids = ids
end
def areas
wikisnakker_items.flat_map(&:data).compact
end
private
attr_reader :ids
def wikisnakker_items
@wsitems ||= Wikisnakker::Item.find(ids)
end
end
end
query = <<QUERY
SELECT DISTINCT ?item
WHERE
{
?item wdt:P31/wdt:P279* wd:Q192611 .
?item wdt:P17 wd:Q%s .
}
QUERY
wanted = EveryPolitician::Wikidata.sparql(query % 191)
raise 'No ids' if wanted.empty?
data = Wikidata::Areas.new(ids: wanted).areas
ScraperWiki.save_sqlite(%i(id), data)