# Compare Minerva audits from athena and r2b2 repos

In [1]:
# we either import locally (after the repo was cloned)
# or we first clone it and then use it (e.g., when run in Google Colab)
shell = get_ipython().__class__.__name__ 

if shell == 'Shell':
    # imports when launched in e.g., Google Colab
    !git clone https://github.com/filipzz/athena.git r2b2
    from r2b2.code.athena.athena import AthenaAudit
    from r2b2.code.athena.contest import Contest
    from r2b2.code.athena.audit import Audit
else: # shell ==  'ZMQInteractiveShell' or shell == 'TerminalInteractiveShell'
    # local imports if you run it with e.g., Jupyter
    from athena.athena import AthenaAudit
    from athena.contest import Contest
    from athena.election import Election
    from athena.audit import Audit

import pandas as pd
import json

In [2]:
import r2b2.contest as r2b2contest
import r2b2.minerva as r2b2minerva
import r2b2.election as r2b2election
import r2b2.tests.util as r2b2util

In [3]:
import json

# Helper functions

In [4]:
def compute_risk_r2b2(contest_dict, rounds, observations, risk_limit=0.1, max_fraction=0.5):
    """Compute risk level via r2b2 module, returning (for now) kmins and p_values"""

    assert len(rounds) == 1, "Can't handle more than one round yet"
    contest = r2b2contest.Contest(**contest_dict)
    audit = r2b2minerva.Minerva(risk_limit, max_fraction, contest)
    # FIXME: This approach to 2b2 minerva arbitrary compute_risk calculations only works for a single round
    audit.compute_min_winner_ballots(rounds)
    audit.current_dist_reported()
    audit.current_dist_null()
    #return audit
    return {'kmins': audit.min_winner_ballots,
            'p_values': [audit.compute_risk(observations[0])]}

In [5]:
def compute_risk_athena(contest_dict, rounds, observations, risk_limit=0.1, max_fraction=0.5):
    """Compute risk level via athena module, returning (for now) kmins and p_values"""

    total_ballots = sum(tally for tally in contest_dict['tally'].values())
    election_name = "hypothesis election"
    contest_name = "hypothesis_contest"
    election = {
        'name': election_name,
        'total_ballots': total_ballots,
        'contests': {contest_name: contest_dict}
    }
    audit = Audit("minerva", risk_limit)
    audit.add_election(election)
    #print(audit)
    audit.load_contest(contest_name)
    audit.set_observations(rounds[0], rounds[0], [observations[0], rounds[0] - observations[0]])
    status = audit.status[audit.active_contest]
    return {'kmins': status.min_kmins[:1],   # FIXME: why more than one kmin value?
            'p_values': [status.risks[0]]}

In [6]:
def dict_compare(d1, d2):
    d1_keys = set(d1.keys())
    d2_keys = set(d2.keys())
    shared_keys = d1_keys.intersection(d2_keys)
    added = d1_keys - d2_keys
    removed = d2_keys - d1_keys
    modified = {o : (d1[o], d2[o]) for o in shared_keys if d1[o] != d2[o]}
    same = set(o for o in shared_keys if d1[o] == d2[o])
    return modified
    #return added, removed, modified, same

# Compare various observations

In [7]:
contest_data = {
    'contest_ballots': 10000,
    'tally': {'A': 7500, 'B': 2500},
    'num_winners': 1,
    'reported_winners': ['A'],
    'contest_type': r2b2contest.ContestType.PLURALITY}

In [8]:
for obs in range(0, 50):
    rr = compute_risk_r2b2(contest_data, [50], [obs])
    ar = compute_risk_athena(contest_data, [50], [obs])
    print(obs, ar)
    comp = dict_compare(rr, ar)
    if comp: print(obs, comp)

0 {'kmins': [31], 'p_values': [0.999999999999995]}
1 {'kmins': [31], 'p_values': [0.9999999999999941]}
2 {'kmins': [31], 'p_values': [0.9999999999999497]}
3 {'kmins': [31], 'p_values': [0.9999999999988617]}
4 {'kmins': [31], 'p_values': [0.9999999999814534]}
5 {'kmins': [31], 'p_values': [0.9999999997769059]}
6 {'kmins': [31], 'p_values': [0.999999997895069]}
7 {'kmins': [31], 'p_values': [0.9999999837812922]}
8 {'kmins': [31], 'p_values': [0.9999998950661236]}
9 {'kmins': [31], 'p_values': [0.9999994182220927]}
10 {'kmins': [31], 'p_values': [0.9999971929499484]}
11 {'kmins': [31], 'p_values': [0.9999880693341571]}
12 {'kmins': [31], 'p_values': [0.9999548925494649]}
13 {'kmins': [31], 'p_values': [0.9998470679992498]}
14 {'kmins': [31], 'p_values': [0.9995318885450718]}
15 {'kmins': [31], 'p_values': [0.9986989142756701]}
16 {'kmins': [31], 'p_values': [0.9966997760460113]}
17 {'kmins': [31], 'p_values': [0.9923266612788878]}
18 {'kmins': [31], 'p_values': [0.9835804323980646]}
19 {'

# Older, Misc

In [9]:
# with open(r2b2_contest_file, 'r') as f: contest_data = json.load(f)

In [10]:
# contest_data['contest_type'] = r2b2contest.ContestType.PLURALITY

In [11]:
rr = compute_risk_r2b2(contest_data, [50], [32])

In [12]:
ar = compute_risk_athena(contest_data, [50], [32])

In [13]:
ar

{'kmins': [31], 'p_values': [0.03341442556675801]}

In [14]:
dict_compare(rr, ar)

{}

## Formats

In [15]:
r2b2_election_file = '/srv/s/electionaudits/bayes/r2b2/src/r2b2/tests/data/election_template.json'

In [16]:
re = r2b2util.parse_election(r2b2_election_file)

In [17]:
r2b2_contest_file = '/srv/s/electionaudits/bayes/r2b2/src/r2b2/tests/data/m50-contest.json'

In [18]:
rc = r2b2util.parse_contest(r2b2_contest_file)

In [19]:
with open(r2b2_contest_file, 'r') as f: print(f.read())

{
        "contest_ballots" : 10000,
        "tally" : {
                "A" : 7500,
                "B" : 2500
        },
        "num_winners" : 1,
        "reported_winners" : ["A"],
        "contest_type" : "PLURALITY"
}



In [20]:
athena_election_file = "athena/test_data/simple.json"

In [21]:
with open(athena_election_file, 'r') as f: print(f.read())

{
	"name": "x",
	"total_ballots": 10000,
	"contests": {
		"two_candidates": {
			"contest_ballots": 10000,
			"tally": {
				"A": 6000,
				"B": 4000
			},
			"num_winners": 1,
			"reported_winners": [
				"A"
			],
			"contest_type": "PLURALITY"
		}
    }
}



## Set parameters

In [22]:
risk_limit = 0.1
max_fraction = 0.5

In [23]:
num_winners = 1
election_name = "test0"
contest1_name = "margin50"
candidate1_name = 'A'
candidate2_name = 'B'
candidate1_tally = 7500
candidate2_tally = 2500
undervotes = 0
total_ballots = candidate1_tally + candidate2_tally + undervotes

## With r2b2

In [24]:
r2b2contest.Contest

r2b2.contest.Contest

In [25]:
rc = r2b2contest.Contest(10000, {
'A': 7500,
       'B': 2500
    }, 1, ['A'], r2b2contest.ContestType.PLURALITY)

In [26]:
rm = r2b2minerva.Minerva(risk_limit, max_fraction, rc)

In [27]:
rm.compute_min_winner_ballots([50])

In [28]:
rkmin = rm.min_winner_ballots

In [29]:
rkmin

[31]

FIXME: This approach to 2b2 minerva arbitrary compute_risk calculations only works for a single round

In [30]:
rm.current_dist_reported()
rm.current_dist_null()

In [31]:
r_p_value = rm.compute_risk(32)

In [32]:
r_p_value

0.03341442556675801

In [33]:
rm.distribution_null

array([8.88178420e-16, 4.44089210e-14, 1.08801856e-12, 1.74082970e-11,
       2.04547490e-10, 1.88183691e-09, 1.41137768e-08, 8.87151685e-08,
       4.76844031e-07, 2.22527214e-06, 9.12361579e-06, 3.31767847e-05,
       1.07824550e-04, 3.15179455e-04, 8.32974273e-04, 1.99913826e-03,
       4.37311493e-03, 8.74622987e-03, 1.60347548e-02, 2.70059027e-02,
       4.18591493e-02, 5.97987846e-02, 7.88256707e-02, 9.59616860e-02,
       1.07956897e-01, 1.12275173e-01, 1.07956897e-01, 9.59616860e-02,
       7.88256707e-02, 5.97987846e-02, 4.18591493e-02, 2.70059027e-02,
       1.60347548e-02, 8.74622987e-03, 4.37311493e-03, 1.99913826e-03,
       8.32974273e-04, 3.15179455e-04, 1.07824550e-04, 3.31767847e-05,
       9.12361579e-06, 2.22527214e-06, 4.76844031e-07, 8.87151685e-08,
       1.41137768e-08, 1.88183691e-09, 2.04547490e-10, 1.74082970e-11,
       1.08801856e-12, 4.44089210e-14, 8.88178420e-16])

## With Athena

### Load from variables

In [34]:
contest = {
    'num_winners': num_winners,
    'tally': {
        candidate1_name: candidate1_tally,
        candidate2_name: candidate2_tally,        
    }
}

In [35]:
election = {
    'name': election_name,
    'total_ballots': total_ballots,
    'contests': {contest1_name: contest}
}

In [36]:
am = Audit("minerva", risk_limit)

In [37]:
am.add_election(election)

In [38]:
am.load_contest(contest1_name)

In [39]:
am.set_observations(50, 50, [32,18])

In [40]:
am.present_state()

Unnamed: 0,Candidates,Results,Round 1,Total,Required
0,A,7500,32.0,32.0,31.0
1,B,2500,18.0,18.0,
2,,Sum,50.0,,
3,,LR,1.6458,,
4,,P-Value,0.0334,,


In [41]:
a_p_value = am.status[am.active_contest].risks[0]

In [42]:
a_p_value == r_p_value

True

In [43]:
am.active_contest

'margin50'

In [44]:
cs = am.status[am.active_contest]

In [45]:
type(cs)

athena.audit.Status

In [46]:
# attributes of audit Status
[attr for attr in cs.__dir__() if not attr.startswith('_')]

['round_number',
 'params',
 'min_kmins',
 'risks',
 'deltas',
 'audit_pairs',
 'audit_completed',
 'ballots_sampled']

In [47]:
type(am.status[am.active_contest])

athena.audit.Status

In [48]:
am

audit type: minerva
alpha: 0.1
observations: [[32], [18]]
status: {'margin50': {"round_number": 2, "min_kmins": [31, 0], "risks": [0.03341442556675801]}}

In [49]:
cs.risks

[0.03341442556675801]

In [50]:
type(am.status[am.active_contest])

athena.audit.Status

In [51]:
a_p_value = am.status

In [52]:
am.status

{'margin50': {"round_number": 2, "min_kmins": [31, 0], "risks": [0.03341442556675801]}}

### Load from json string

In [53]:
am = Audit("minerva", risk_limit)

In [54]:
am.add_election(json.loads("""{
	"name": "x",
	"total_ballots": 10000,
	"contests": {
		"two_candidates": {
			"contest_ballots": 10000,
			"tally": {
				"A": 6000,
				"B": 4000
			},
			"num_winners": 1,
			"reported_winners": [
				"A"
			],
			"contest_type": "PLURALITY"
		}
    }
}
"""))

In [55]:
am.load_contest("two_candidates")

In [56]:
am.set_observations(50, 50, [32,18])

In [57]:
am.present_state()

Unnamed: 0,Candidates,Results,Round 1,Total,Required
0,A,6000,32.0,32.0,32.0
1,B,4000,18.0,18.0,
2,,Sum,50.0,,
3,,LR,6.1577,,
4,,P-Value,0.0967,,


### Load from file

In [58]:
am = Audit("minerva", risk_limit)

In [59]:
am.read_election_results("athena/test_data/simple.json")

In [60]:
am.load_contest("two_candidates")

In [61]:
am.set_observations(50, 50, [32,18])

In [62]:
am.present_state()

Unnamed: 0,Candidates,Results,Round 1,Total,Required
0,A,6000,32.0,32.0,32.0
1,B,4000,18.0,18.0,
2,,Sum,50.0,,
3,,LR,6.1577,,
4,,P-Value,0.0967,,


### Read montgomery

In [63]:
am = Audit("minerva", risk_limit)

In [64]:
am.read_election_results("athena/test_data/2020_montgomery_formatted.json")

In [65]:
am.contest_list

['d_president',
 'd_congress',
 'd_senator',
 'd_cc_1_2_2021',
 'd_cc_1_3_2021',
 'r_10th',
 'r_senator',
 'r_42nd',
 'r_cc_1_2_2021']

In [66]:
am.load_contest('d_congress')

In [67]:
am.set_observations(50, 50, [32,18])

In [68]:
am.present_state()

Unnamed: 0,Candidates,Results,Round 1,Total,Required
0,Moyer,9799,32.0,32.0,
1,Tims,24178,18.0,18.0,31.0
2,,Sum,50.0,,
3,,LR,0.0,,
4,,P-Value,0.9836,,
