From c2410353cc225a52f8d9b19439ae207cc9a607db Mon Sep 17 00:00:00 2001 From: Tony Bowden Date: Mon, 24 Apr 2017 09:57:01 +0100 Subject: [PATCH] Initial scraper --- lib/score.rb | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ scraper.rb | 16 ++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 lib/score.rb create mode 100644 scraper.rb diff --git a/lib/score.rb b/lib/score.rb new file mode 100644 index 0000000..1092226 --- /dev/null +++ b/lib/score.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true +# Calculate results from a Gender-Balance API data file +class GenderScore + # a single result from the CSV file + class Result + MIN_SELECTIONS = 5 # accept gender if at least this many votes + VOTE_THRESHOLD = 0.8 # and at least this ratio of votes were for it + + attr_reader :row + + def initialize(row) + @row = row + end + + def uuid + row[:uuid] + end + + def gender + return if total < MIN_SELECTIONS + %w(male female other).find { |g| percent(g) >= VOTE_THRESHOLD } + end + + private + + def total + row[:total].to_i - row[:skip].to_i + end + + def percent(gender) + row[gender.to_sym].to_f / total.to_f + end + end + + # Results for an entire CSV file (e.g. from the GenderBalance API) + def initialize(rawcsv) + @rawcsv = rawcsv + end + + def results + csv.map do |r| + r.to_h.merge(gender: Result.new(r).gender) + end + end + + private + + attr_reader :rawcsv + + def csv + @csv ||= CSV.parse(rawcsv, headers: true, converters: :numeric, header_converters: :symbol) + end +end diff --git a/scraper.rb b/scraper.rb new file mode 100644 index 0000000..c9f6b2b --- /dev/null +++ b/scraper.rb @@ -0,0 +1,16 @@ +#!/bin/env ruby +# encoding: utf-8 +# frozen_string_literal: true + +require 'csv' +require 'everypolitician' +require 'pry' +require 'scraperwiki' + +require_rel 'lib/score' + +URL = 'http://www.gender-balance.org/export/Singapore/Parliament' +data = GenderScore.new(open(URL).read).results + +ScraperWiki.sqliteexecute('DROP TABLE data') rescue nil +ScraperWiki.save_sqlite(%i(uuid), data)