Skip to content

Commit

Permalink
Initial scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
tmtmtmtm committed Apr 11, 2017
1 parent 76897b1 commit f5a881a
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 0 deletions.
53 changes: 53 additions & 0 deletions lib/score.rb
@@ -0,0 +1,53 @@
# frozen_string_literal: true
# Calculate results from a Gender-Balance API data file
class GenderScore
# a single result from the CSV file
class Result
MIN_SELECTIONS = 5 # accept gender if at least this many votes
VOTE_THRESHOLD = 0.8 # and at least this ratio of votes were for it

attr_reader :row

def initialize(row)
@row = row
end

def uuid
row[:uuid]
end

def gender
return if total < MIN_SELECTIONS
%w(male female other).find { |g| percent(g) >= VOTE_THRESHOLD }
end

private

def total
row[:total].to_i - row[:skip].to_i
end

def percent(gender)
row[gender.to_sym].to_f / total.to_f
end
end

# Results for an entire CSV file (e.g. from the GenderBalance API)
def initialize(rawcsv)
@rawcsv = rawcsv
end

def results
csv.map do |r|
r.to_h.merge(gender: Result.new(r).gender)
end
end

private

attr_reader :rawcsv

def csv
@csv ||= CSV.parse(rawcsv, headers: true, converters: :numeric, header_converters: :symbol)
end
end
16 changes: 16 additions & 0 deletions scraper.rb
@@ -0,0 +1,16 @@
#!/bin/env ruby
# encoding: utf-8
# frozen_string_literal: true

require 'csv'
require 'everypolitician'
require 'pry'
require 'scraperwiki'

require_rel 'lib/score'

URL = 'http://www.gender-balance.org/export/Australia/Senate'
data = GenderScore.new(open(URL).read).results

ScraperWiki.sqliteexecute('DROP TABLE data') rescue nil
ScraperWiki.save_sqlite(%i(uuid), data)

0 comments on commit f5a881a

Please sign in to comment.