In [1]:
import glob
import io
import json
import re
import time
import os
import urllib.parse
import logging
import zipfile

import IPython.core.display

from voteutil.irv import IRV
from voteutil.vrr import VRR
from voteutil.vrr2 import VRR2
from voteutil.pickone import PickOne
from voteutil.count import processFile

logger = logging.getLogger(__name__)

# https://sfelections.sfgov.org/november-3-2020-election-results-detailed-reports
# Final Report
# https://www.sfelections.org/results/20201103/data/20201201/CVR_Export_20201201091840.zip
zf = zipfile.ZipFile('CVR_Export_20201201091840.zip')
# Preliminary Report 16
# https://www.sfelections.org/results/20201103/data/20201124/CVR_Export_20201124150514.zip-
# zf = zipfile.ZipFile('CVR_Export_20201124150514.zip')
# Preliminary Report 15
# https://www.sfelections.org/results/20201103/data/20201119/CVR_Export_20201119152920.zip
#zf = zipfile.ZipFile('CVR_Export_20201119152920.zip')
# Preliminary Report 14
# https://www.sfelections.org/results/20201103/data/20201117/CVR_Export_20201117160040.zip
#zf = zipfile.ZipFile('CVR_Export_20201117160040.zip')
# Preliminary Report 13
# https://www.sfelections.org/results/20201103/data/20201113/CVR_Export_20201113155705.zip
#zf = zipfile.ZipFile('CVR_Export_20201113155705.zip')
# Preliminary Report 12
# https://www.sfelections.org/results/20201103/data/20201112/CVR_Export_20201112161239.zip
#zf = zipfile.ZipFile('CVR_Export_20201112161239.zip')

In [70]:
def phtml(x):
    IPython.core.display.display(IPython.core.display.HTML(x))

In [2]:
files = zf.infolist()

In [3]:
cvrpat = re.compile(r'CvrExport_(\d+).json')
cvrs = {}
otherFiles = []
for zi in files:
    #dirpath, fname = os.path.split(path)
    m = cvrpat.match(zi.filename)
    if m:
        cvrs[int(m.group(1))] = zi
    else:
        otherFiles.append(zi)

In [4]:
print('\n'.join(sorted([x.filename for x in otherFiles])))

BallotTypeContestManifest.json
BallotTypeManifest.json
CandidateManifest.json
Configuration.json
ContestManifest.json
CountingGroupManifest.json
DistrictManifest.json
DistrictPrecinctPortionManifest.json
DistrictTypeManifest.json
ElectionEventManifest.json
OutstackConditionManifest.json
PartyManifest.json
PrecinctManifest.json
PrecinctPortionManifest.json
TabulatorManifest.json


In [5]:
candidates = {}
with zf.open('CandidateManifest.json') as fin:
    ob = json.load(fin)
for rec in ob['List']:
    cont = rec['ContestId']
    cand = rec['Id']
    name = rec['Description']
    if cand in candidates:
        print('dup candidate {}'.format(cand))
    candidates[cand] = name

In [6]:
rcvContests = {}
with zf.open('ContestManifest.json') as fin:
    ob = json.load(fin)
for rec in ob['List']:
    if rec['NumOfRanks'] > 1:
        rcvContests[rec['Id']] = rec

In [7]:
print('\n'.join(['(ContestId={}) {}'.format(rc['Id'], rc['Description']) for rc in rcvContests.values()]))

(ContestId=12) BOARD OF SUPERVISORS DISTRICT 1
(ContestId=13) BOARD OF SUPERVISORS DISTRICT 3
(ContestId=14) BOARD OF SUPERVISORS DISTRICT 5
(ContestId=15) BOARD OF SUPERVISORS DISTRICT 7
(ContestId=16) BOARD OF SUPERVISORS DISTRICT 9
(ContestId=17) BOARD OF SUPERVISORS DISTRICT 11


In [8]:
with zf.open('CvrExport_1.json') as fin:
    cvr = json.load(fin)

In [9]:
print(dict(Version=cvr['Version'], ElectionId=cvr['ElectionId']))

{'Version': '5.10.50.85', 'ElectionId': 'San Francisco Consolidated General Election'}


In [10]:
# main extract
start = time.time()
rcvContestIds = set(rcvContests.keys())
nameqouts = {}
def wnameq(contestId, line):
    # keep name=rank&... url encoded votes, to file per contest
    fout = nameqouts.get(contestId)
    if fout is None:
        cont = rcvContests[contestId]
        path = cont['Description'] + '.nameq'
        fout = open(path, 'wt')
        nameqouts[contestId] = fout
    fout.write(line)
rawouts = {}
def wrawcont(contestId, rec):
    # keep json-per-line of RCV contest records, to file per contest
    fout = rawouts.get(contestId)
    if fout is None:
        cont = rcvContests[contestId]
        path = cont['Description'] + '.json'
        fout = open(path, 'wt')
        rawouts[contestId] = fout
    fout.write(json.dumps(rec) + '\n')
count = 0
for path in cvrs.values():
    #print(path)
    with zf.open(path) as fin:
        cvr = json.load(fin)
    fcount = 0
    for ses in cvr['Sessions']:
        for card in ses['Original']['Cards']:
            for cont in card['Contests']:
                if cont['Id'] in rcvContestIds:
                    wrawcont(cont['Id'], cont)
                    vote = {}
                    for mark in cont['Marks']:
                        if mark['IsVote']:
                            name = candidates[mark['CandidateId']]
                            vote[name] = mark['Rank']
                        elif False:
                            print(
                                '{}:warning: (TabulatorId={},BatchId={},RecordId={}) ContestId={} non vote mark: {!r}'.format(
                                    path, ses['TabulatorId'], ses['BatchId'], ses['RecordId'], cont['Id'], mark))
                    line = urllib.parse.urlencode(vote) + '\n'
                    wnameq(cont['Id'], line)
                    count += 1
                    fcount += 1
    #print('{}: {} votes'.format(path, fcount))
for fout in nameqouts.values():
    fout.close()
for fout in rawouts.values():
    fout.close()
print('Done: {} votes ({:.1f} seconds)'.format(count, time.time() - start))

Done: 233872 votes (94.9 seconds)


In [11]:
# generate HTML reports
import voteutil.rcvmatters
import glob

for fname in glob.glob('*.nameq'):
    voteutil.rcvmatters.testFile(fname)

BOARD OF SUPERVISORS DISTRICT 11.nameq.html
Virtual Round Robin (alt impl) [('CONNIE CHAN', 0), ('MARJAN PHILHOUR', -1), ('DAVID E. LEE', -2), ('VERONICA SHINZATO', -3), ('AMANDA INOCENCIO', -4), ("SHERMAN R. D'SILVA", -5), ('ANDREW N. MAJALYA', -6), ('Write-in', -7)]
Instant Runoff Vote [('MARJAN PHILHOUR', 17059), ('CONNIE CHAN', 16902), ('DAVID E. LEE', 7265), ("SHERMAN R. D'SILVA", 1957), ('VERONICA SHINZATO', 1514), ('Write-in', 791), ('AMANDA INOCENCIO', 722), ('ANDREW N. MAJALYA', 317)]
BOARD OF SUPERVISORS DISTRICT 1.nameq: Virtual Round Robin (alt impl) CONNIE CHAN != Instant Runoff Vote MARJAN PHILHOUR
BOARD OF SUPERVISORS DISTRICT 1.nameq.html
BOARD OF SUPERVISORS DISTRICT 3.nameq.html
BOARD OF SUPERVISORS DISTRICT 5.nameq.html
Instant Runoff Vote [('MYRNA MELGAR', 18499), ('JOEL ENGARDIO', 16329), ('VILASKA NGUYEN', 10811), ('STEPHEN W. MARTIN-PINTO', 6112), ('EMILY MURASE', 5419), ('BEN MATRANGA', 3594), ('KEN PIPER', 977), ('Write-in', 299)]
Pick One [('JOEL ENGARDIO', 93

In [None]:
voteutil.rcvmatters.testFile('BOARD OF SUPERVISORS DISTRICT 1.nameq.html')

In [12]:
recs = []
with open('BOARD OF SUPERVISORS DISTRICT 1.json') as fin:
    for line in fin:
        recs.append(json.loads(line))

In [13]:
ov = []
uv = []
for rec in recs:
    if rec['Overvotes']:
        ov.append(rec)
    if rec['Undervotes']:
        uv.append(rec)

In [14]:
print('all votes')
print(len(recs))
print('"Overvotes"')
print(len(ov))
print('"Undervotes"')
print(len(uv))

all votes
39909
"Overvotes"
347
"Undervotes"
3573


In [15]:
# official round one continuing 36076, non-transferrable 3833
# official round one blanks 3726
# official round one overvotes 107
3833+36076

39909

In [16]:
len(ov)+len(uv)

3920

In [17]:
nomarks = 0
for rec in recs:
    if not rec.get('Marks'):
        nomarks += 1
print(nomarks)

3500


In [18]:
novotes = 0
for rec in recs:
    marks = rec.get('Marks')
    if not marks:
        novotes += 1
        continue
    if not any([m['IsVote'] for m in marks]):
        novotes += 1
print(novotes)

3509


In [19]:
# find highest duplicate rank
rdups = {}
rcount = {}
amnc = {}
amvc = {}
for rec in recs:
    rc = {}
    for mark in rec.get('Marks',[]):
        rank = mark['Rank']
        if not mark['IsVote']:
            if mark['IsAmbiguous']:
                amnc[rank] = amnc.get(rank, 0) + 1
            continue
        if mark['IsAmbiguous']:
            amvc[rank] = amvc.get(rank, 0) + 1
        rcount[rank] = rcount.get(rank, 0) + 1
        rc[rank] = rc.get(rank, 0) + 1
    for rank, count in sorted(rc.items()):
        if count > 1:
            rdups[rank] = rdups.get(rank, 0) + 1
print('(rank,dups)...')
print(sorted(rdups.items()))
print('(rank, all votes at rank), ...')
print(sorted(rcount.items()))
print('(rank, ambiguous non-vote at rank), ...')
print(sorted(amnc.items()))
print('(rank, ambiguous vote at rank), ...')
print(sorted(amvc.items()))

(rank,dups)...
[(1, 347), (2, 204), (3, 135), (4, 106), (5, 97), (6, 102), (7, 122)]
(rank, all votes at rank), ...
[(1, 36768), (2, 28714), (3, 23425), (4, 17104), (5, 15107), (6, 14664), (7, 14484)]
(rank, ambiguous non-vote at rank), ...
[(1, 89), (2, 74), (3, 63), (4, 54), (5, 63), (6, 83), (7, 75)]
(rank, ambiguous vote at rank), ...
[]


In [20]:
# 347 overvotes == 347 votes with duplicate at rank=1

In [21]:
# https://www.sfelections.org/results/20201103/data/20201201/d1/20201201_d1_short.pdf
# Looking for official Round 1 counts:
# AMANDA INOCENCIO: 702
# ANDREW N. MAJALYA: 312
# CONNIE CHAN: 13508
# DAVID E. LEE: 6293
# MARJAN PHILHOUR: 12383
# SHERMAN R. D'SILVA: 1558
# VERONICA SHINZATO: 1320
expected = [
    ('AMANDA INOCENCIO', 702),
    ('ANDREW N. MAJALYA', 312),
    ('CONNIE CHAN', 13508),
    ('DAVID E. LEE', 6293),
    ('MARJAN PHILHOUR', 12383),
    ("SHERMAN R. D'SILVA", 1558),
    ('VERONICA SHINZATO', 1320),
]

In [22]:
702+312+13508+6293+12383+1558+1320

36076

In [40]:
sum([x[1] for x in expected])

36076

In [71]:
dexpected = dict(expected)
def presult(rname, result):
    rd = dict(result)
    keys = sorted(set(list(dexpected.keys()) + list(rd.keys())))
    deltas = [(k,rd.get(k,0)-dexpected.get(k,0)) for k in keys]
    sabsd = sum([abs(d) for _,d in deltas])
    phtml('<b>{}, ({}) (err {})</b>'.format(rname, sum([x[1] for x in rd.items()]), sabsd))
    print('  ' + repr(sorted(rd.items())))
    print('  ' + repr([(k,rd.get(k,0)-dexpected.get(k,0)) for k in keys]))

In [72]:
firsts = {}
firstsNoDups = {}
firstsNoOvervotes = {}
frdups = 0
for rec in recs:
    fr = []
    fra = []
    for mark in rec['Marks']:
        rank = mark['Rank']
        if rank != 1:
            continue
        name = candidates[mark['CandidateId']]
        if mark['IsVote']:
            fr.append(name)
        elif mark['IsAmbiguous']:
            fra.append(name)
    # if len(fr) == 0:
    #     fr = fra
    if len(fr) > 1:
        frdups += 1
    for name in fr:
        firsts[name] = firsts.get(name, 0) + 1
        if len(fr) == 1:
            firstsNoDups[name] = firstsNoDups.get(name, 0) + 1
        if not rec['Overvotes']:
            firstsNoOvervotes[name] = firstsNoOvervotes.get(name, 0) + 1
print('official final')
print(expected)
#print('firsts, all ({})'.format(sum([x[1] for x in firsts.items()])))
#print(sorted(firsts.items()))
#print('firsts, removing Overvotes ({})'.format(sum([x[1] for x in firstsNoOvervotes.items()])))
#print(sorted(firstsNoOvervotes.items()))
#print('firsts, removing dups at rank 1, frdups={} ({})'.format(frdups, sum([x[1] for x in firstsNoDups.items()])))
#print(sorted(firstsNoDups.items()))
presult('firsts, all', firsts)
presult('firsts, removing Overvotes', firstsNoOvervotes)
presult('firsts, removing dups at rank 1, frdups={}'.format(frdups), firstsNoDups)

official final
[('AMANDA INOCENCIO', 702), ('ANDREW N. MAJALYA', 312), ('CONNIE CHAN', 13508), ('DAVID E. LEE', 6293), ('MARJAN PHILHOUR', 12383), ("SHERMAN R. D'SILVA", 1558), ('VERONICA SHINZATO', 1320)]


  [('AMANDA INOCENCIO', 724), ('ANDREW N. MAJALYA', 324), ('CONNIE CHAN', 13449), ('DAVID E. LEE', 6149), ('MARJAN PHILHOUR', 12259), ("SHERMAN R. D'SILVA", 1569), ('VERONICA SHINZATO', 1316), ('Write-in', 978)]
  [('AMANDA INOCENCIO', 22), ('ANDREW N. MAJALYA', 12), ('CONNIE CHAN', -59), ('DAVID E. LEE', -144), ('MARJAN PHILHOUR', -124), ("SHERMAN R. D'SILVA", 11), ('VERONICA SHINZATO', -4), ('Write-in', 978)]


  [('AMANDA INOCENCIO', 685), ('ANDREW N. MAJALYA', 305), ('CONNIE CHAN', 13180), ('DAVID E. LEE', 6051), ('MARJAN PHILHOUR', 12176), ("SHERMAN R. D'SILVA", 1537), ('VERONICA SHINZATO', 1286), ('Write-in', 769)]
  [('AMANDA INOCENCIO', -17), ('ANDREW N. MAJALYA', -7), ('CONNIE CHAN', -328), ('DAVID E. LEE', -242), ('MARJAN PHILHOUR', -207), ("SHERMAN R. D'SILVA", -21), ('VERONICA SHINZATO', -34), ('Write-in', 769)]


  [('AMANDA INOCENCIO', 685), ('ANDREW N. MAJALYA', 305), ('CONNIE CHAN', 13180), ('DAVID E. LEE', 6051), ('MARJAN PHILHOUR', 12176), ("SHERMAN R. D'SILVA", 1537), ('VERONICA SHINZATO', 1286), ('Write-in', 769)]
  [('AMANDA INOCENCIO', -17), ('ANDREW N. MAJALYA', -7), ('CONNIE CHAN', -328), ('DAVID E. LEE', -242), ('MARJAN PHILHOUR', -207), ("SHERMAN R. D'SILVA", -21), ('VERONICA SHINZATO', -34), ('Write-in', 769)]


In [73]:
print('IsVote, but if no IsVote then IsAmbiguous')
firsts = {}
firstsNoDups = {}
firstsNoOvervotes = {}
frdups = 0
for rec in recs:
    fr = []
    fra = []
    for mark in rec['Marks']:
        rank = mark['Rank']
        if rank != 1:
            continue
        name = candidates[mark['CandidateId']]
        if mark['IsVote']:
            fr.append(name)
        elif mark['IsAmbiguous']:
            fra.append(name)
    if len(fr) == 0:
        fr = fra
    if len(fr) > 1:
        frdups += 1
    for name in fr:
        firsts[name] = firsts.get(name, 0) + 1
        if len(fr) == 1:
            firstsNoDups[name] = firstsNoDups.get(name, 0) + 1
        if not rec['Overvotes']:
            firstsNoOvervotes[name] = firstsNoOvervotes.get(name, 0) + 1
print('official final')
print(expected)
presult('firsts, all', firsts)
presult('firsts, removing Overvotes', firstsNoOvervotes)
presult('firsts, removing dups at rank 1, frdups={}'.format(frdups), firstsNoDups)

IsVote, but if no IsVote then IsAmbiguous
official final
[('AMANDA INOCENCIO', 702), ('ANDREW N. MAJALYA', 312), ('CONNIE CHAN', 13508), ('DAVID E. LEE', 6293), ('MARJAN PHILHOUR', 12383), ("SHERMAN R. D'SILVA", 1558), ('VERONICA SHINZATO', 1320)]


  [('AMANDA INOCENCIO', 725), ('ANDREW N. MAJALYA', 326), ('CONNIE CHAN', 13452), ('DAVID E. LEE', 6151), ('MARJAN PHILHOUR', 12263), ("SHERMAN R. D'SILVA", 1571), ('VERONICA SHINZATO', 1316), ('Write-in', 980)]
  [('AMANDA INOCENCIO', 23), ('ANDREW N. MAJALYA', 14), ('CONNIE CHAN', -56), ('DAVID E. LEE', -142), ('MARJAN PHILHOUR', -120), ("SHERMAN R. D'SILVA", 13), ('VERONICA SHINZATO', -4), ('Write-in', 980)]


  [('AMANDA INOCENCIO', 686), ('ANDREW N. MAJALYA', 307), ('CONNIE CHAN', 13183), ('DAVID E. LEE', 6053), ('MARJAN PHILHOUR', 12180), ("SHERMAN R. D'SILVA", 1539), ('VERONICA SHINZATO', 1286), ('Write-in', 771)]
  [('AMANDA INOCENCIO', -16), ('ANDREW N. MAJALYA', -5), ('CONNIE CHAN', -325), ('DAVID E. LEE', -240), ('MARJAN PHILHOUR', -203), ("SHERMAN R. D'SILVA", -19), ('VERONICA SHINZATO', -34), ('Write-in', 771)]


  [('AMANDA INOCENCIO', 685), ('ANDREW N. MAJALYA', 306), ('CONNIE CHAN', 13182), ('DAVID E. LEE', 6052), ('MARJAN PHILHOUR', 12179), ("SHERMAN R. D'SILVA", 1538), ('VERONICA SHINZATO', 1286), ('Write-in', 770)]
  [('AMANDA INOCENCIO', -17), ('ANDREW N. MAJALYA', -6), ('CONNIE CHAN', -326), ('DAVID E. LEE', -241), ('MARJAN PHILHOUR', -204), ("SHERMAN R. D'SILVA", -20), ('VERONICA SHINZATO', -34), ('Write-in', 770)]


In [74]:
print('highest rank (if not 1, then 2), and if no IsVote then IsAmbiguous')
firsts = {}
firstsNoDups = {}
firstsNoOvervotes = {}
frdups = 0
laterButNoFirst = 0
def da(d, k, v):
    # dict append
    l = d.get(k)
    if l is None:
        d[k] = [v]
    else:
        l.append(v)
for rec in recs:
    v = {}
    va = {}
    for mark in rec['Marks']:
        rank = mark['Rank']
        name = candidates[mark['CandidateId']]
        if mark['IsVote']:
            da(v,rank,name)
        elif mark['IsAmbiguous']:
            da(va,rank,name)
    if (not v) and (not va):
        continue
    minv = (v and min(v.keys())) or None
    mina = (va and min(va.keys())) or None
    if min(filter(None,[mina,minv])) != 1:
        laterButNoFirst += 1
    if mina and ((not minv) or (mina < minv)):
        fr = va[mina]
    elif minv and v:
        fr = v[minv]
    else:
        fr = []
    if len(fr) > 1:
        frdups += 1
    for name in fr:
        firsts[name] = firsts.get(name, 0) + 1
        if len(fr) == 1:
            firstsNoDups[name] = firstsNoDups.get(name, 0) + 1
        if not rec['Overvotes']:
            firstsNoOvervotes[name] = firstsNoOvervotes.get(name, 0) + 1
print('laterButNoFirst {}'.format(laterButNoFirst))
print('official final')
print(expected)
presult('firsts, all', firsts)
presult('firsts, removing Overvotes', firstsNoOvervotes)
presult('firsts, removing dups at rank 1, frdups={}'.format(frdups), firstsNoDups)

highest rank (if not 1, then 2), and if no IsVote then IsAmbiguous
laterButNoFirst 63
official final
[('AMANDA INOCENCIO', 702), ('ANDREW N. MAJALYA', 312), ('CONNIE CHAN', 13508), ('DAVID E. LEE', 6293), ('MARJAN PHILHOUR', 12383), ("SHERMAN R. D'SILVA", 1558), ('VERONICA SHINZATO', 1320)]


  [('AMANDA INOCENCIO', 736), ('ANDREW N. MAJALYA', 337), ('CONNIE CHAN', 13470), ('DAVID E. LEE', 6176), ('MARJAN PHILHOUR', 12285), ("SHERMAN R. D'SILVA", 1578), ('VERONICA SHINZATO', 1330), ('Write-in', 988)]
  [('AMANDA INOCENCIO', 34), ('ANDREW N. MAJALYA', 25), ('CONNIE CHAN', -38), ('DAVID E. LEE', -117), ('MARJAN PHILHOUR', -98), ("SHERMAN R. D'SILVA", 20), ('VERONICA SHINZATO', 10), ('Write-in', 988)]


  [('AMANDA INOCENCIO', 697), ('ANDREW N. MAJALYA', 318), ('CONNIE CHAN', 13201), ('DAVID E. LEE', 6078), ('MARJAN PHILHOUR', 12202), ("SHERMAN R. D'SILVA", 1546), ('VERONICA SHINZATO', 1300), ('Write-in', 779)]
  [('AMANDA INOCENCIO', -5), ('ANDREW N. MAJALYA', 6), ('CONNIE CHAN', -307), ('DAVID E. LEE', -215), ('MARJAN PHILHOUR', -181), ("SHERMAN R. D'SILVA", -12), ('VERONICA SHINZATO', -20), ('Write-in', 779)]


  [('AMANDA INOCENCIO', 687), ('ANDREW N. MAJALYA', 309), ('CONNIE CHAN', 13191), ('DAVID E. LEE', 6067), ('MARJAN PHILHOUR', 12192), ("SHERMAN R. D'SILVA", 1538), ('VERONICA SHINZATO', 1293), ('Write-in', 772)]
  [('AMANDA INOCENCIO', -15), ('ANDREW N. MAJALYA', -3), ('CONNIE CHAN', -317), ('DAVID E. LEE', -226), ('MARJAN PHILHOUR', -191), ("SHERMAN R. D'SILVA", -20), ('VERONICA SHINZATO', -27), ('Write-in', 772)]


In [76]:
print('highest rank (if not 1, then 2), and if no IsVote then IsAmbiguous, erase Write-in')
firsts = {}
firstsNoDups = {}
firstsNoOvervotes = {}
frdups = 0
laterButNoFirst = 0
def da(d, k, v):
    # dict append
    l = d.get(k)
    if l is None:
        d[k] = [v]
    else:
        l.append(v)
for rec in recs:
    v = {}
    va = {}
    for mark in rec['Marks']:
        rank = mark['Rank']
        name = candidates[mark['CandidateId']]
        if name == 'Write-in':
            continue
        if mark['IsVote']:
            da(v,rank,name)
        elif mark['IsAmbiguous']:
            da(va,rank,name)
    if (not v) and (not va):
        continue
    minv = (v and min(v.keys())) or None
    mina = (va and min(va.keys())) or None
    if min(filter(None,[mina,minv])) != 1:
        laterButNoFirst += 1
    if mina and ((not minv) or (mina < minv)):
        fr = va[mina]
    elif minv and v:
        fr = v[minv]
    else:
        fr = []
    if len(fr) > 1:
        frdups += 1
    for name in fr:
        firsts[name] = firsts.get(name, 0) + 1
        if len(fr) == 1:
            firstsNoDups[name] = firstsNoDups.get(name, 0) + 1
        if not rec['Overvotes']:
            firstsNoOvervotes[name] = firstsNoOvervotes.get(name, 0) + 1
print('laterButNoFirst {}'.format(laterButNoFirst))
print('official final')
print(expected)
presult('firsts, all', firsts)
presult('firsts, removing Overvotes', firstsNoOvervotes)
presult('firsts, removing dups at rank 1, dups={}'.format(frdups), firstsNoDups)

highest rank (if not 1, then 2), and if no IsVote then IsAmbiguous, erase Write-in
laterButNoFirst 608
official final
[('AMANDA INOCENCIO', 702), ('ANDREW N. MAJALYA', 312), ('CONNIE CHAN', 13508), ('DAVID E. LEE', 6293), ('MARJAN PHILHOUR', 12383), ("SHERMAN R. D'SILVA", 1558), ('VERONICA SHINZATO', 1320)]


  [('AMANDA INOCENCIO', 750), ('ANDREW N. MAJALYA', 341), ('CONNIE CHAN', 13551), ('DAVID E. LEE', 6396), ('MARJAN PHILHOUR', 12467), ("SHERMAN R. D'SILVA", 1596), ('VERONICA SHINZATO', 1359)]
  [('AMANDA INOCENCIO', 48), ('ANDREW N. MAJALYA', 29), ('CONNIE CHAN', 43), ('DAVID E. LEE', 103), ('MARJAN PHILHOUR', 84), ("SHERMAN R. D'SILVA", 38), ('VERONICA SHINZATO', 39)]


  [('AMANDA INOCENCIO', 711), ('ANDREW N. MAJALYA', 322), ('CONNIE CHAN', 13282), ('DAVID E. LEE', 6298), ('MARJAN PHILHOUR', 12384), ("SHERMAN R. D'SILVA", 1564), ('VERONICA SHINZATO', 1329)]
  [('AMANDA INOCENCIO', 9), ('ANDREW N. MAJALYA', 10), ('CONNIE CHAN', -226), ('DAVID E. LEE', 5), ('MARJAN PHILHOUR', 1), ("SHERMAN R. D'SILVA", 6), ('VERONICA SHINZATO', 9)]


  [('AMANDA INOCENCIO', 703), ('ANDREW N. MAJALYA', 314), ('CONNIE CHAN', 13455), ('DAVID E. LEE', 6294), ('MARJAN PHILHOUR', 12379), ("SHERMAN R. D'SILVA", 1558), ('VERONICA SHINZATO', 1322)]
  [('AMANDA INOCENCIO', 1), ('ANDREW N. MAJALYA', 2), ('CONNIE CHAN', -53), ('DAVID E. LEE', 1), ('MARJAN PHILHOUR', -4), ("SHERMAN R. D'SILVA", 0), ('VERONICA SHINZATO', 2)]


In [27]:
print('example record')
print(json.dumps(recs[1], indent=2))

example record
{
  "Id": 12,
  "ManifestationId": 58601,
  "Undervotes": 0,
  "Overvotes": 0,
  "OutstackConditionIds": [],
  "Marks": [
    {
      "CandidateId": 20,
      "ManifestationId": 218895,
      "PartyId": 0,
      "Rank": 1,
      "MarkDensity": 85,
      "IsAmbiguous": false,
      "IsVote": true,
      "OutstackConditionIds": []
    },
    {
      "CandidateId": 17,
      "ManifestationId": 218931,
      "PartyId": 0,
      "Rank": 2,
      "MarkDensity": 75,
      "IsAmbiguous": false,
      "IsVote": true,
      "OutstackConditionIds": []
    },
    {
      "CandidateId": 23,
      "ManifestationId": 218911,
      "PartyId": 0,
      "Rank": 3,
      "MarkDensity": 70,
      "IsAmbiguous": false,
      "IsVote": true,
      "OutstackConditionIds": []
    },
    {
      "CandidateId": 21,
      "ManifestationId": 218905,
      "PartyId": 0,
      "Rank": 4,
      "MarkDensity": 81,
      "IsAmbiguous": false,
      "IsVote": true,
      "OutstackConditionIds": []
    },

In [28]:
with zf.open('OutstackConditionManifest.json') as fin:
    ocm = json.load(fin)

In [29]:
outstackConditions = {x['Id']:x['Description'] for x in ocm['List']}

In [30]:
outstackConditions

{0: 'Ambiguous',
 1: 'Writein',
 2: 'BlankBallot',
 5: 'Overvote',
 4: 'Undervote',
 6: 'BlankContest',
 9: 'OvervotedRanking',
 10: 'InconsistentRcvOrdering',
 11: 'SkippedRanking',
 12: 'DuplicatedRcvCandidate',
 13: 'UnvotedRcvContest',
 14: 'UnusedRanking'}

In [31]:
oci = {}
moci = {}
def dil(d, l, v=1):
    for k in l:
        d[k] = d.get(k,0) + v
for rec in recs:
    dil(oci, rec['OutstackConditionIds'])
    for m in rec['Marks']:
        dil(moci, m['OutstackConditionIds'])
print('record OutstackConditionIds: {}'.format([(outstackConditions[x],c) for x,c in sorted(oci.items())]))
print('mark OutstackConditionIds: {}'.format([(outstackConditions[x],c) for x,c in sorted(moci.items())]))

record OutstackConditionIds: [('SkippedRanking', 456), ('UnvotedRcvContest', 3509), ('UnusedRanking', 22100)]
mark OutstackConditionIds: [('Ambiguous', 501), ('Writein', 2962), ('OvervotedRanking', 2458), ('InconsistentRcvOrdering', 980), ('DuplicatedRcvCandidate', 3105)]


In [87]:
errt_header = ['','expected','actual','diff']
def smartsub(a,b):
    if a is None:
        return b
    if b is None:
        return '-{!r}'.format(a)
    return a - b
def errt(expected, actual):
    ed = dict(expected)
    ad = dict(actual)
    keys = sorted(set(list(ed.keys()) + list(ad.keys())))
    return [[k, ed.get(k), ad.get(k), smartsub(ad.get(k), ed.get(k))] for k in keys]
import csv
import io
def errtcsv(expected,actual):
    t = errt(expected,actual)
    out = io.StringIO()
    w = csv.writer(out)
    w.writerow(errt_header)
    for row in t:
        w.writerow(row)
    return out.getvalue()
def errthtml(expected,actual):
    t = errt(expected,actual)
    out = '<table><tr>' + ''.join(['<th>{}</th>'.format(x) for x in errt_header]) + '</tr>'
    for row in t:
        out += '<tr>{}</tr>'.format(''.join(['<td>{}</td>'.format(x) for x in row]))
    return out + '</table>'

In [88]:
print(errtcsv(expected, firstsNoDups))

,expected,actual,diff
AMANDA INOCENCIO,702,703,1
ANDREW N. MAJALYA,312,314,2
CONNIE CHAN,13508,13455,-53
DAVID E. LEE,6293,6294,1
MARJAN PHILHOUR,12383,12379,-4
SHERMAN R. D'SILVA,1558,1558,0
VERONICA SHINZATO,1320,1322,2



In [89]:
phtml(errthtml(expected, firstsNoDups))

Unnamed: 0,expected,actual,diff
AMANDA INOCENCIO,702,703,1
ANDREW N. MAJALYA,312,314,2
CONNIE CHAN,13508,13455,-53
DAVID E. LEE,6293,6294,1
MARJAN PHILHOUR,12383,12379,-4
SHERMAN R. D'SILVA,1558,1558,0
VERONICA SHINZATO,1320,1322,2


In [90]:
print(errthtml(expected, firstsNoDups))

<table><tr><th></th><th>expected</th><th>actual</th><th>diff</th></tr><tr><td>AMANDA INOCENCIO</td><td>702</td><td>703</td><td>1</td></tr><tr><td>ANDREW N. MAJALYA</td><td>312</td><td>314</td><td>2</td></tr><tr><td>CONNIE CHAN</td><td>13508</td><td>13455</td><td>-53</td></tr><tr><td>DAVID E. LEE</td><td>6293</td><td>6294</td><td>1</td></tr><tr><td>MARJAN PHILHOUR</td><td>12383</td><td>12379</td><td>-4</td></tr><tr><td>SHERMAN R. D'SILVA</td><td>1558</td><td>1558</td><td>0</td></tr><tr><td>VERONICA SHINZATO</td><td>1320</td><td>1322</td><td>2</td></tr></table>
