Skip to content

Commit

Permalink
Find discrepancies between minutes and vote data (154)
Browse files Browse the repository at this point in the history
  • Loading branch information
jarib committed Mar 10, 2013
1 parent 6310e91 commit bfe96d6
Showing 1 changed file with 216 additions and 61 deletions.
277 changes: 216 additions & 61 deletions tools/154_read_vote.rb
@@ -1,69 +1,125 @@
#!/usr/bin/env ruby

require 'pp'
require 'csv'
require 'json'
require 'time'
require 'pathname'
require 'pry'

class IssueFinder
def self.instance
@instance ||= new
end
class VoteReader
attr_reader :identifier

def self.find(kartnr, saknr)
instance.index[[kartnr, saknr]]
end
class << self
include Enumerable

def initialize
@data ||= CSV.parse(File.read(("./rawdata/Fra NSD/154_saksopplysninger.csv")))
end
def print_counts
by_minutes.each do |minutes, votes|
puts "#{minutes}"

COLUMNS = %w[
period
date
time
session
room
kartnr
saknr
votnr
issue_type
vote_type
committee
issue_reference
issue_register
topic
president
president_party
internal_comment
link
]
votes.sort_by { |e| e.time }.each do |vote|
puts "\t #{vote.time.to_s.ljust(40)}: #{vote.counts.inspect}"
end
end
end

def index
@index ||= @data.inject({}) do |mem, var|
issue = {}
def by_minutes
groups = Hash.new { |hash, key| hash[key] = [] }

var.map(&:strip).each_with_index do |col, idx|
issue[COLUMNS.fetch(idx).to_sym] = col
each do |votes|
votes.each { |vote| groups[vote.minutes] << vote }
end

if issue[:time] =~ /^0:/
issue[:time] = "0#{issue[:time]}"
groups.sort_by { |m, v| m }
end

def find_errors
cache = Pathname.new(File.expand_path('.minutes-cache'))
cache.mkdir unless cache.exist?

vote_count = 0
error_count = 0

by_minutes.each do |minutes, votes|
vote_count += votes.size

local_minutes = cache.join(File.basename(minutes))
local_text = cache.join(File.basename(minutes).gsub(".pdf", ".txt"))

unless local_text.exist?
unless local_minutes.exist?
ok = system "curl -s -o #{local_minutes.to_s} #{minutes}"
ok or raise "unable to download #{minutes}"
end

ok = system "java -jar ~/Downloads/pdfbox-app-1.7.1.jar ExtractText #{local_minutes} #{local_text.to_s}"
ok or raise "could not convert #{local_minutes} to text"
end

lines = local_text.read.split("\n")
minute_votes = {}
current_vote = nil

lines.each_with_index do |line, index|
case line
when "Vo t e r i n g :"
current_vote = []
when /Voteringsutskrift kl\. (\d{2}\.\d{2}\.\d{2})/
next unless current_vote

minute_votes[$1] = current_vote.join(" ")
current_vote = nil
when /enstemmig bifalt/
current_vote = nil
else
current_vote << line if current_vote
end
end

votes.sort_by { |e| e.time }.each do |vote|
mvote = minute_votes[vote.time.strftime("%H.%M.%S")]
counts = vote.counts

if mvote
nums = mvote.scan(/\d+/).map { |e| e.to_i }
unless nums.include?(counts[:for]) && nums.include?(counts[:against])
error_count += 1

if ENV['HTML']
puts %{
<tr>
<td><a href="#{minutes}">#{vote.time}</a></td>
<td>#{counts[:for]}</td>
<td>#{counts[:against]}</td>
<td>#{mvote}</td>
</tr>
}
else
puts "FEIL: #{vote.time} | for=#{counts[:for]}, mot=#{counts[:against]} | #{mvote}"
end
end
end
end
end

votes = mem[[issue[:kartnr], issue[:saknr]]] ||= []
votes << issue
puts "#{error_count} / #{vote_count} = #{error_count * 100 / vote_count.to_f}%"
end

mem
def each(&blk)
Dir['./rawdata/stortinget-voteringer-154/*.154'].each do |path|
if File.basename(path) =~ /SK(\d+)S(\d+)/
yield VoteReader.new($1, $2).results
else
raise "bad path: #{path.inspect}"
end
end
end
end
end

class VoteReader
attr_reader :identifier

def initialize(kartnr, saksnr)
def initialize(kartnr, saknr)
@kartnr = kartnr
@saksnr = saksnr
@identifier = "SK#{kartnr}S#{saksnr}"
@saknr = saknr
@identifier = "SK#{kartnr}S#{saknr}"
end

def results
Expand All @@ -76,25 +132,42 @@ def results
end
end

result.map do |time, results|
Vote.new(time, results, issue_for(time))
result = result.map do |time, results|
begin
Vote.new(time, results, issue_for(time))
rescue NoIssueFoundError => ex
# trololol
vote = Vote.allocate
vote.instance_variable_set("@results", results)
counts = vote.counts

STDERR.puts "#{ex.message}: #{counts.inspect}"
end
end

result.compact
end

private

class NoIssueFoundError < StandardError
end

def issue_for(time)
issue = issues[time] or raise "no issue found for #{time}, found: #{issues.keys}"
issue = issues[time]
unless issue
raise NoIssueFoundError, "no issue found for kartnr=#{@kartnr} saknr=#{@saknr} @ #{time}, found: #{issues.keys}\n #{issues.values.map { |e| e.first[:link] }.uniq}"
end

if issue.size == 1
issue.first
else
raise "multiple issues for timestamp: #{time.inspect}"
raise "multiple issues for kartnr=#{@kartnr} saknr=#{@saknr} @ #{time}"
end
end

def issues
@issues ||= IssueFinder.find(@kartnr, @saksnr).group_by { |data| data[:time] }
@issues ||= IssueFinder.find(@kartnr, @saknr).group_by { |data| data[:time] }
end

def representatives
Expand Down Expand Up @@ -134,12 +207,32 @@ def votes
end

class Vote
attr_reader :time

def initialize(time, results, issue)
@time = time
@results = results
@issue = issue

check_handicap_seat
unless time == issue[:time]
raise "time #{time.inspect} doesn't match issue: #{issue.inspect}"
end

@time = Time.parse(issue.values_at(:date, :time).join(' '))

fix_handicap_seat
end

def minutes
@issue.fetch(:link)
end

def saknr
@issue.fetch(:saknr)
end

def kartnr
@issue.fetch(:kartnr)
end

def counts
Expand All @@ -163,7 +256,7 @@ def counts
)
end

def print
def print(include_votes = true)
puts "Tidspunkt : #{@time.inspect}"
puts "For : #{counts[:for]}"
puts "Mot : #{counts[:against]}"
Expand All @@ -178,7 +271,7 @@ def print

private

def check_handicap_seat
def fix_handicap_seat
s62 = @results.find { |e| e[:seat] == 62 }
s172 = @results.find { |e| e[:seat] == 172 }

Expand All @@ -199,13 +292,75 @@ def check_handicap_seat
end
end

class IssueFinder
def self.instance
@instance ||= new
end

def self.find(kartnr, saknr)
instance.index[[kartnr, saknr]]
end

def initialize
@data ||= CSV.parse(File.read(("./rawdata/Fra NSD/154_saksopplysninger.csv")))
end

COLUMNS = %w[
period
date
time
session
room
kartnr
saknr
votnr
issue_type
vote_type
committee
issue_reference
issue_register
topic
president
president_party
internal_comment
link
]

def index
@index ||= @data.inject({}) do |mem, var|
issue = {}

var.map(&:strip).each_with_index do |col, idx|
issue[COLUMNS.fetch(idx).to_sym] = col
end

if issue[:time] =~ /^0:/
issue[:time] = "0#{issue[:time]}"
end

votes = mem[[issue[:kartnr], issue[:saknr]]] ||= []
votes << issue

mem
end
end
end


if __FILE__ == $0
if ARGV.size == 2
kartnr, saksnr = ARGV
results = VoteReader.new(kartnr, saksnr).results
kartnr, saknr = ARGV
results = VoteReader.new(kartnr, saknr).results
results.first.print
else
# TODO: read all
abort "USAGE: #{$0} <kartnummer> <saksnummer>"
elsif ARGV.size == 1
cmd = ARGV.first
case cmd
when 'print-counts'
VoteReader.print_counts
when 'find-errors'
VoteReader.find_errors
else
raise "unknown command: #{cmd.inspect}"
end
end
end
end

0 comments on commit bfe96d6

Please sign in to comment.