From bcc08cfc255d6625f6951d09f561e876c68a28f9 Mon Sep 17 00:00:00 2001 From: Tony Bowden Date: Wed, 14 Dec 2016 09:54:28 +0000 Subject: [PATCH] initial scraper --- scraper.rb | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 scraper.rb diff --git a/scraper.rb b/scraper.rb new file mode 100644 index 0000000..18e7af5 --- /dev/null +++ b/scraper.rb @@ -0,0 +1,84 @@ +#!/bin/env ruby +# encoding: utf-8 +# frozen_string_literal: true + +require 'pry' +require 'scraperwiki' +require 'wikidata/fetcher' + +module Wikisnakker + class Item + SKIP = %i(P17 P18 P910).to_set + WANT = { + P31: :type, + P571: :start_date, + P576: :end_date, + }.freeze + + def data + unknown_properties.each do |p| + warn "Unknown property for #{id}: #{p} = #{send(p).value}" + end + + base_data.merge(wanted_data) + end + + private + + def base_data + { + id: id, + label: label(:en), + } + end + + def unknown_properties + properties.reject { |p| SKIP.include?(p) || WANT.key?(p) } + end + + def wanted_properties + properties.select { |p| WANT.key?(p) } + end + + def wanted_data + wanted_properties.map { |p| [WANT[p], send(p).value.to_s] }.to_h + end + end +end + +module Wikidata + require 'wikisnakker' + + class Areas + def initialize(ids:) + @ids = ids + end + + def areas + wikisnakker_items.flat_map(&:data).compact + end + + private + + attr_reader :ids + + def wikisnakker_items + @wsitems ||= Wikisnakker::Item.find(ids) + end + end +end + +query = <