Skip to content

Commit

Permalink
additional test coverage
Browse files Browse the repository at this point in the history
added test coverage for athena, minerva, and abstract audit.
  • Loading branch information
sarahmorin committed Apr 7, 2021
1 parent 3a915f1 commit b863363
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 68 deletions.
12 changes: 6 additions & 6 deletions src/r2b2/athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def next_sample_size(self, *args, **kwargs):

def stopping_condition_pairwise(self, pair: str, verbose: bool = False) -> bool:
"""Check, without finding the kmin, whether the audit is complete.
Args:
pair (str): Dictionary key referencing pairwise subaudit. Evaluate the stopping
condition for this subaudit.
Expand Down Expand Up @@ -126,7 +126,7 @@ def stopping_condition_pairwise(self, pair: str, verbose: bool = False) -> bool:

def next_min_winner_ballots_pairwise(self, sub_audit: PairwiseAudit) -> int:
"""Compute stopping size for a given subaudit.
Args:
sub_audit (PairwiseAudit): Compute next stopping size for this subaudit.
Expand Down Expand Up @@ -260,18 +260,18 @@ def compute_all_min_winner_ballots(self, sub_audit: PairwiseAudit, max_sample_si
self._truncate_dist_null_pairwise(pair)
self._truncate_dist_reported_pairwise(pair)

def compute_risk(self, votes_for_winner: int, loser: str, *args, **kwargs):
def compute_risk(self, votes_for_winner: int, pair: str, *args, **kwargs):
"""Return the hypothetical (Minerva) p-value if votes_for_winner were obtained in the most recent
round."""

sub_audit = self.sub_audits[loser]
sub_audit = self.sub_audits[pair]
tail_null = sum(sub_audit.distribution_null[votes_for_winner:])
tail_reported = sum(sub_audit.distribution_reported_tally[votes_for_winner:])
return tail_null / tail_reported

def get_risk_level(self):
"""Return the risk level of an interactive Athena audit.
"""Return the risk level of an interactive Athena audit.
Non-interactive and bulk Athena audits are not considered here since the sampled number of
reported winner ballots is not available.
"""
Expand Down
34 changes: 24 additions & 10 deletions src/r2b2/audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def current_dist_null(self):

def _current_dist_null_pairwise(self, sub_audit: PairwiseAudit, bulk_use_round_size=False):
"""Update distribution_null for a single PairwiseAudit
Args:
sub_audit (PairwiseAudit): Pairwise subaudit for which to update distribution.
bulk_use_round_size (bool): Optional argument used by bulk methods. Since the bulk
Expand All @@ -223,6 +223,13 @@ def _current_dist_null_pairwise(self, sub_audit: PairwiseAudit, bulk_use_round_s
else:
round_draw = self.rounds[-1] - self.rounds[-2]
elif len(self.rounds) == 1:
if len(self.sample_ballots[sub_audit.sub_contest.reported_loser]) != len(self.rounds):
raise Exception('Currently {} rounds, but only {} samples for {}.'.format(
len(self.rounds), len(self.sample_ballots[sub_audit.sub_contest.reported_loser]), sub_audit.sub_contest.reported_loser))
if len(self.sample_ballots[sub_audit.sub_contest.reported_winner]) != len(self.rounds):
raise Exception('Currently {} rounds, but only {} samples for {}.'.format(
len(self.rounds), len(self.sample_ballots[sub_audit.sub_contest.reported_winner]), sub_audit.stopped.reported_winner))

round_draw = self.sample_ballots[sub_audit.sub_contest.reported_loser][0] + self.sample_ballots[
sub_audit.sub_contest.reported_winner][0]
else:
Expand Down Expand Up @@ -283,7 +290,7 @@ def current_dist_reported(self):

def _current_dist_reported_pairwise(self, sub_audit: PairwiseAudit, bulk_use_round_size=False):
"""Update dist_reported for a single PairwiseAudit.
Args:
sub_audit (PairwiseAudit): Pairwise subaudit for which to update distriution.
bulk_use_round_size (bool): Optional argument used by bulk methods. Since the bulk
Expand All @@ -300,6 +307,13 @@ def _current_dist_reported_pairwise(self, sub_audit: PairwiseAudit, bulk_use_rou
else:
round_draw = self.rounds[-1] - self.rounds[-2]
elif len(self.rounds) == 1:
if len(self.sample_ballots[sub_audit.sub_contest.reported_loser]) != len(self.rounds):
raise Exception('Currently {} rounds, but only {} samples for {}.'.format(
len(self.rounds), len(self.sample_ballots[sub_audit.sub_contest.reported_loser]), sub_audit.sub_contest.reported_loser))
if len(self.sample_ballots[sub_audit.sub_contest.reported_winner]) != len(self.rounds):
raise Exception('Currently {} rounds, but only {} samples for {}.'.format(
len(self.rounds), len(self.sample_ballots[sub_audit.sub_contest.reported_winner]), sub_audit.stopped.reported_winner))

round_draw = self.sample_ballots[sub_audit.sub_contest.reported_loser][0] + self.sample_ballots[
sub_audit.sub_contest.reported_winner][0]
else:
Expand Down Expand Up @@ -356,7 +370,7 @@ def truncate_dist_null(self):

def _truncate_dist_null_pairwise(self, pair: str):
"""Update risk schedule and truncate null distribution for a single subaudit.
Args:
pair (str): Dictionary key for subaudit (within the audit's subaudits) to truncate
distribution and update risk schedule.
Expand All @@ -373,12 +387,12 @@ def truncate_dist_reported(self):

def _truncate_dist_reported_pairwise(self, pair):
"""Update stopping prob schedule and truncate reported distribution for a single subaudit.
Args:
pair (str): Dictionary key for subaudit (within the audit's subaudits) to truncate
distribution and update stopping prob schedule.
"""

self.sub_audits[pair].stopping_prob_schedule.append(
sum(self.sub_audits[pair].distribution_reported_tally[self.sub_audits[pair].min_winner_ballots[-1]:]))
self.sub_audits[pair].distribution_reported_tally = self.sub_audits[pair].distribution_reported_tally[:self.sub_audits[pair].
Expand Down Expand Up @@ -532,7 +546,7 @@ def run(self, verbose: bool = False):
for r in range(1, curr_round):
click.echo('|{:^24}|{:^25}|'.format(r, '{:.12f}'.format(self.pvalue_schedule[r - 1])))
click.echo('+--------------------------------------------------+')

# Get next round sample size given desired stopping probability
while click.confirm('Would you like to enter a desired stopping probability for this round?'):
desired_sprob = click.prompt('Enter desired stopping probability for this round (.9 recommended)',
Expand Down Expand Up @@ -573,15 +587,15 @@ def run(self, verbose: bool = False):
click.echo('\n\n+----------------------------------------+')
click.echo('|{:^40}|'.format('Stopping Condition Met? {}'.format(self.stopped)))
click.echo('+----------------------------------------+')

# Determine if the audit should proceed
if self.stopped:
click.echo('\n\nAudit Complete.')
return
elif click.confirm('\nWould you like to force stop the audit'):
click.echo('\n\nAudit Complete: User stopped.')
return

# Compute kmin if audit has not stopped and truncate distributions
self.next_min_winner_ballots(verbose)
self.truncate_dist_null()
Expand All @@ -602,7 +616,7 @@ def __reset(self):
@abstractmethod
def get_min_sample_size(self, sub_audit: PairwiseAudit):
"""Get the minimum valid sample size in a sub audit
Args:
sub_audit (PairwiseAudit): Get minimum sample size for this sub_audit.
"""
Expand Down Expand Up @@ -643,7 +657,7 @@ def stopping_condition(self, verbose: bool = False) -> bool:
@abstractmethod
def stopping_condition_pairwise(self, pair: str, verbose: bool = False) -> bool:
"""Determine if pairwise subcontest meets stopping condition.
Args:
pair (str): Dictionary key referencing pairwise audit in audit's sub_audits.
Expand Down
12 changes: 6 additions & 6 deletions src/r2b2/minerva.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def get_min_sample_size(self, sub_audit: PairwiseAudit, min_sprob: float = 10**(
Args:
sub_audit (PairwiseAudit): Get minimum sample size for this subaudit.
min_sprob (float): Round sizes with below min_sprob stopping probability are excluded.
Returns:
int: The minimum sample size of the audit, adherent to the min_sprob.
"""
Expand Down Expand Up @@ -228,7 +228,7 @@ def next_sample_size(self, sprob=.9, verbose=False, *args, **kwargs):

def _next_sample_size_pairwise(self, sub_audit: PairwiseAudit, sprob=0.9):
"""Compute next sample size for a single pairwise subaudit.
Args:
sub_audit (PairwiseAudit): Compute the sample size for this sub_audit.
sprob (float): Get the sample size for this stopping probability.
Expand Down Expand Up @@ -272,7 +272,7 @@ def get_upper_bound(self, n, start):

def stopping_condition_pairwise(self, pair: str, verbose: bool = False) -> bool:
"""Check, without finding the kmin, whether the subaudit is complete.
Args:
pair (str): Dictionary key referencing pairwise subaudit to evaluate.
Expand All @@ -296,7 +296,7 @@ def stopping_condition_pairwise(self, pair: str, verbose: bool = False) -> bool:

def next_min_winner_ballots_pairwise(self, sub_audit: PairwiseAudit) -> int:
"""Compute stopping size for a given subaudit.
Args:
sub_audit (PairwiseAudit): Compute next stopping size for this subaudit.
Expand Down Expand Up @@ -437,8 +437,8 @@ def compute_risk(self, votes_for_winner: int, pair: str, *args, **kwargs):
return tail_null / tail_reported

def get_risk_level(self):
"""Return the risk level of an interactive Minerva audit.
"""Return the risk level of an interactive Minerva audit.
Non-interactive and bulk Minerva audits are not considered here since the sampled number of
reported winner ballots is not available.
"""
Expand Down
15 changes: 15 additions & 0 deletions src/r2b2/tests/test_athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,18 @@ def test_exceptions():
athena.compute_min_winner_ballots(athena.sub_audits['A-B'], [19])
with pytest.raises(ValueError):
athena.compute_min_winner_ballots(athena.sub_audits['A-B'], [10001])

contest2 = Contest(100, {'A': 60, 'B': 30}, 1, ['A'], ContestType.MAJORITY)
athena2 = Athena(0.1, 1, 1.0, contest2)
with pytest.raises(ValueError):
athena2.compute_min_winner_ballots(athena2.sub_audits['A-B'], [91])
athena2.rounds.append(10)
with pytest.raises(Exception):
athena2.compute_all_min_winner_ballots(athena2.sub_audits['A-B'])
athena2.rounds = []
with pytest.raises(ValueError):
athena2.compute_all_min_winner_ballots(athena2.sub_audits['A-B'], 0)
with pytest.raises(ValueError):
athena2.compute_all_min_winner_ballots(athena2.sub_audits['A-B'], 200)
with pytest.raises(ValueError):
athena2.compute_all_min_winner_ballots(athena2.sub_audits['A-B'], 0)
22 changes: 20 additions & 2 deletions src/r2b2/tests/test_audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,12 @@ def test_simple_audit_execution():
def test_simple_audit_execute_rounds():
"""Test execute_round method."""
simpleaudit1 = SimpleAudit(0.1, 0.05, 0.5, True, default_contest)
simpleaudit1.execute_round(10, {'a': 10, 'b': 0})
assert simpleaudit1.execute_round(10, {'a': 10, 'b': 0})
assert simpleaudit1.rounds == [10]
assert simpleaudit1.sample_ballots == {'a': [10], 'b': [0]}
assert simpleaudit1.sub_audits['a-b'].stopped
assert simpleaudit1.stopped
assert simpleaudit1.execute_round(20, {'a': 20, 'b': 0})


def test_repr():
Expand Down Expand Up @@ -140,6 +141,12 @@ def test_pairwise_str():
assert str(simpleaudit.sub_audits['a-b']) == pw_audit_str


def test_pairwise_repr():
simpleaudit = SimpleAudit(0.1, 0.01, 0.1, True, default_contest)
simpleaudit2 = SimpleAudit(0.1, 0.01, 0.1, True, default_contest)
assert repr(simpleaudit.sub_audits['a-b']) == repr(simpleaudit2.sub_audits['a-b'])


def test_initialization_errors():
"""Tests exceptions are raised correctly by __init__()."""
# alpha TypeError tests
Expand Down Expand Up @@ -192,7 +199,7 @@ def test_initialization_errors():
SimpleAudit(0.1, 0.05, 1.5, True, default_contest)


def test_expections():
def test_exceptions():
simpleaudit = SimpleAudit(0.1, 0.05, 0.1, True, default_contest)
with pytest.raises(Exception):
simpleaudit.current_dist_null()
Expand All @@ -208,6 +215,17 @@ def test_expections():
simpleaudit.current_dist_null()
with pytest.raises(Exception):
simpleaudit.current_dist_reported()
simpleaudit.rounds.append(2)
with pytest.raises(Exception):
simpleaudit.current_dist_null()
with pytest.raises(Exception):
simpleaudit.current_dist_reported()
simpleaudit.sample_ballots['b'].append(0)
with pytest.raises(Exception):
simpleaudit.current_dist_null()
with pytest.raises(Exception):
simpleaudit.current_dist_reported()

simpleaudit = SimpleAudit(0.1, 0.05, 0.1, True, default_contest)
simpleaudit.rounds.append(10)
with pytest.raises(Exception):
Expand Down
93 changes: 49 additions & 44 deletions src/r2b2/tests/test_minerva.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ def test_simple_minerva():
assert len(simple_minerva.rounds) == 0
assert len(simple_minerva.sub_audits) == 1
assert simple_minerva.get_risk_level() is None
simple_minerva.rounds.append(10)
simple_minerva.stopped = True
assert simple_minerva.next_sample_size() == 10
assert simple_minerva.next_sample_size(verbose=True) == (10, 0, 1)


def test_min_sample_size():
Expand Down Expand Up @@ -81,50 +85,33 @@ def test_minerva_second_round_estimate():
assert minerva2.next_sample_size() == 111257


# def test_minerva_georgia_senate_2020():
# ga_senate_race = Contest(2453876 + 2358432, {'A': 2453876, 'B': 2358432}, 1, ['A'], ContestType.PLURALITY)
#
# ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
# irrelevant_scale_up = 1.0238785631
# estimates = []
# for sprob in [.7, .8, .9]:
# estimates.append(math.ceil(irrelevant_scale_up * ga_senate_audit.next_sample_size(sprob=sprob)))
# assert estimates == [10486, 13205, 18005]
# ga_senate_audit.rounds.append(9903)
# ga_senate_audit.current_dist_null()
# ga_senate_audit.current_dist_reported()
# assert abs(ga_senate_audit.compute_risk(4950) - 0.527638189598802) < .000001
# ga_senate_audit.find_kmin(True)
# ga_senate_audit.truncate_dist_null()
# ga_senate_audit.truncate_dist_reported()
# ga_senate_audit.rounds.append(24000)
# ga_senate_audit.current_dist_null()
# ga_senate_audit.current_dist_reported()
# assert abs(ga_senate_audit.compute_risk(11900) - 2.663358309286826) < .000001
# ga_senate_audit.find_kmin(True)
# ga_senate_audit.truncate_dist_null()
# ga_senate_audit.truncate_dist_reported()
# ga_senate_audit.rounds.append(45600)
# ga_senate_audit.current_dist_null()
# assert abs(ga_senate_audit.compute_risk(24000)) < .000001
#
# ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
# ga_senate_audit.rounds.append(17605)
# ga_senate_audit.current_dist_null()
# ga_senate_audit.current_dist_reported()
# assert abs(ga_senate_audit.compute_risk(8900) - 0.081750333563781) < .000001
#
# ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
# ga_senate_audit.rounds.append(17605)
# ga_senate_audit.current_dist_null()
# ga_senate_audit.current_dist_reported()
# assert ga_senate_audit.compute_risk(17605) == 0
#
# ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
# ga_senate_audit.rounds.append(17605)
# ga_senate_audit.current_dist_null()
# ga_senate_audit.current_dist_reported()
# assert abs(ga_senate_audit.compute_risk(0) - 1) < .000001
def test_minerva_georgia_senate_2020():
ga_senate_race = Contest(2453876 + 2358432, {'A': 2453876, 'B': 2358432}, 1, ['A'], ContestType.PLURALITY)

ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
irrelevant_scale_up = 1.0238785631
estimates = []
for sprob in [.7, .8, .9]:
estimates.append(math.ceil(irrelevant_scale_up * ga_senate_audit.next_sample_size(sprob=sprob)))
assert estimates == [10486, 13205, 18005]
ga_senate_audit.execute_round(9903, {'A': 4950, 'B': 9903-4950})
assert abs(ga_senate_audit.pvalue_schedule[-1] - 0.527638189598802) < .000001
ga_senate_audit.execute_round(24000, {'A': 11900, 'B': 24000-11900})
assert abs(ga_senate_audit.pvalue_schedule[-1] - 2.663358309286826) < .000001
ga_senate_audit.execute_round(45600, {'A': 24000, 'B': 45600-24000})
assert abs(ga_senate_audit.pvalue_schedule[-1]) < 0.000001

ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
ga_senate_audit.execute_round(17605, {'A': 8900, 'B': 17605-8900})
assert abs(ga_senate_audit.get_risk_level() - 0.081750333563781) < .000001

ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
ga_senate_audit.execute_round(17605, {'A': 17605, 'B': 0})
assert ga_senate_audit.get_risk_level() == 0

ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
ga_senate_audit.execute_round(17605, {'A': 0, 'B': 17605})
assert abs(ga_senate_audit.get_risk_level() - 1) < 0.000001


def test_minerva_kmins():
Expand Down Expand Up @@ -266,3 +253,21 @@ def test_exceptions():
minerva.compute_min_winner_ballots(minerva.sub_audits['A-B'], [19])
with pytest.raises(ValueError):
minerva.compute_min_winner_ballots(minerva.sub_audits['A-B'], [10001])

contest2 = Contest(100, {'A': 60, 'B': 30}, 1, ['A'], ContestType.MAJORITY)
minerva2 = Minerva(.1, 1.0, contest2)
with pytest.raises(ValueError):
minerva2.compute_min_winner_ballots(minerva2.sub_audits['A-B'], [91])
minerva2.rounds = [10]
with pytest.raises(Exception):
minerva2.compute_all_min_winner_ballots(minerva2.sub_audits['A-B'])
minerva2.rounds = []
with pytest.raises(Exception):
minerva.compute_all_min_winner_ballots(minerva2.sub_audits['A-B'], 200)

minerva = Minerva(.1, .1, contest)
with pytest.raises(Exception):
minerva.stopping_condition_pairwise('A-B')
minerva.rounds.append(10)
with pytest.raises(ValueError):
minerva.stopping_condition_pairwise('x')

0 comments on commit b863363

Please sign in to comment.