From 4f94dcf6968182e82b5e4677d279bcd7f1a1f41e Mon Sep 17 00:00:00 2001 From: Tony Bowden Date: Tue, 16 Oct 2018 06:05:28 +0100 Subject: [PATCH] Initial scraper --- scraper.rb | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100755 scraper.rb diff --git a/scraper.rb b/scraper.rb new file mode 100755 index 0000000..4c68d76 --- /dev/null +++ b/scraper.rb @@ -0,0 +1,69 @@ +#!/bin/env ruby +# frozen_string_literal: true + +require 'json' +require 'rest-client' +require 'scraped' +require 'scraperwiki' + +class Results < Scraped::JSON + field :terms do + json[:results][:bindings].map { |result| fragment(result => Term).to_h } + end +end + +class Term < Scraped::JSON + field :statement do + json.dig(:ps, :value).to_s.split('/').last + end + + field :id do + json.dig(:ordinal, :value).to_i + end + + field :name do + json.dig(:itemLabel, :value) + end + + field :start_date do + json.dig(:start_date, :value).to_s[0..9] + end + + field :end_date do + json.dig(:end_date, :value).to_s[0..9] + end + + field :wikidata do + json.dig(:item, :value).to_s.split('/').last + end +end + +WIKIDATA_SPARQL_URL = 'https://query.wikidata.org/sparql?format=json&query=%s' + +def sparql(query) + result = RestClient.get WIKIDATA_SPARQL_URL, accept: 'text/csv', params: { query: query } + CSV.parse(result, headers: true, header_converters: :symbol) +rescue RestClient::Exception => e + raise "Wikidata query #{query} failed: #{e.message}" +end + +query = <