additional test coverage

added test coverage for athena, minerva, and abstract audit.
gwexploratoryaudits · Apr 7, 2021 · b863363 · b863363
1 parent 3a915f1
commit b863363
Show file tree

Hide file tree

Showing 6 changed files with 120 additions and 68 deletions.
diff --git a/src/r2b2/athena.py b/src/r2b2/athena.py
@@ -96,7 +96,7 @@ def next_sample_size(self, *args, **kwargs):
 
     def stopping_condition_pairwise(self, pair: str, verbose: bool = False) -> bool:
         """Check, without finding the kmin, whether the audit is complete.
-            
+
         Args:
             pair (str): Dictionary key referencing pairwise subaudit. Evaluate the stopping
                 condition for this subaudit.
@@ -126,7 +126,7 @@ def stopping_condition_pairwise(self, pair: str, verbose: bool = False) -> bool:
 
     def next_min_winner_ballots_pairwise(self, sub_audit: PairwiseAudit) -> int:
         """Compute stopping size for a given subaudit.
-        
+
         Args:
             sub_audit (PairwiseAudit): Compute next stopping size for this subaudit.
 
@@ -260,18 +260,18 @@ def compute_all_min_winner_ballots(self, sub_audit: PairwiseAudit, max_sample_si
             self._truncate_dist_null_pairwise(pair)
             self._truncate_dist_reported_pairwise(pair)
 
-    def compute_risk(self, votes_for_winner: int, loser: str, *args, **kwargs):
+    def compute_risk(self, votes_for_winner: int, pair: str, *args, **kwargs):
         """Return the hypothetical (Minerva) p-value if votes_for_winner were obtained in the most recent
         round."""
 
-        sub_audit = self.sub_audits[loser]
+        sub_audit = self.sub_audits[pair]
         tail_null = sum(sub_audit.distribution_null[votes_for_winner:])
         tail_reported = sum(sub_audit.distribution_reported_tally[votes_for_winner:])
         return tail_null / tail_reported
 
     def get_risk_level(self):
-        """Return the risk level of an interactive Athena audit. 
-        
+        """Return the risk level of an interactive Athena audit.
+
         Non-interactive and bulk Athena audits are not considered here since the sampled number of
         reported winner ballots is not available.
         """

diff --git a/src/r2b2/audit.py b/src/r2b2/audit.py
@@ -207,7 +207,7 @@ def current_dist_null(self):
 
     def _current_dist_null_pairwise(self, sub_audit: PairwiseAudit, bulk_use_round_size=False):
         """Update distribution_null for a single PairwiseAudit
-        
+
         Args:
             sub_audit (PairwiseAudit): Pairwise subaudit for which to update distribution.
             bulk_use_round_size (bool): Optional argument used by bulk methods. Since the bulk
@@ -223,6 +223,13 @@ def _current_dist_null_pairwise(self, sub_audit: PairwiseAudit, bulk_use_round_s
             else:
                 round_draw = self.rounds[-1] - self.rounds[-2]
         elif len(self.rounds) == 1:
+            if len(self.sample_ballots[sub_audit.sub_contest.reported_loser]) != len(self.rounds):
+                raise Exception('Currently {} rounds, but only {} samples for {}.'.format(
+                    len(self.rounds), len(self.sample_ballots[sub_audit.sub_contest.reported_loser]), sub_audit.sub_contest.reported_loser))
+            if len(self.sample_ballots[sub_audit.sub_contest.reported_winner]) != len(self.rounds):
+                raise Exception('Currently {} rounds, but only {} samples for {}.'.format(
+                    len(self.rounds), len(self.sample_ballots[sub_audit.sub_contest.reported_winner]), sub_audit.stopped.reported_winner))
+
             round_draw = self.sample_ballots[sub_audit.sub_contest.reported_loser][0] + self.sample_ballots[
                 sub_audit.sub_contest.reported_winner][0]
         else:
@@ -283,7 +290,7 @@ def current_dist_reported(self):
 
     def _current_dist_reported_pairwise(self, sub_audit: PairwiseAudit, bulk_use_round_size=False):
         """Update dist_reported for a single PairwiseAudit.
-        
+
         Args:
             sub_audit (PairwiseAudit): Pairwise subaudit for which to update distriution.
             bulk_use_round_size (bool): Optional argument used by bulk methods. Since the bulk
@@ -300,6 +307,13 @@ def _current_dist_reported_pairwise(self, sub_audit: PairwiseAudit, bulk_use_rou
             else:
                 round_draw = self.rounds[-1] - self.rounds[-2]
         elif len(self.rounds) == 1:
+            if len(self.sample_ballots[sub_audit.sub_contest.reported_loser]) != len(self.rounds):
+                raise Exception('Currently {} rounds, but only {} samples for {}.'.format(
+                    len(self.rounds), len(self.sample_ballots[sub_audit.sub_contest.reported_loser]), sub_audit.sub_contest.reported_loser))
+            if len(self.sample_ballots[sub_audit.sub_contest.reported_winner]) != len(self.rounds):
+                raise Exception('Currently {} rounds, but only {} samples for {}.'.format(
+                    len(self.rounds), len(self.sample_ballots[sub_audit.sub_contest.reported_winner]), sub_audit.stopped.reported_winner))
+
             round_draw = self.sample_ballots[sub_audit.sub_contest.reported_loser][0] + self.sample_ballots[
                 sub_audit.sub_contest.reported_winner][0]
         else:
@@ -356,7 +370,7 @@ def truncate_dist_null(self):
 
     def _truncate_dist_null_pairwise(self, pair: str):
         """Update risk schedule and truncate null distribution for a single subaudit.
-        
+
         Args:
             pair (str): Dictionary key for subaudit (within the audit's subaudits) to truncate
                 distribution and update risk schedule.
@@ -373,12 +387,12 @@ def truncate_dist_reported(self):
 
     def _truncate_dist_reported_pairwise(self, pair):
         """Update stopping prob schedule and truncate reported distribution for a single subaudit.
-        
+
         Args:
             pair (str): Dictionary key for subaudit (within the audit's subaudits) to truncate
                 distribution and update stopping prob schedule.
         """
-        
+
         self.sub_audits[pair].stopping_prob_schedule.append(
             sum(self.sub_audits[pair].distribution_reported_tally[self.sub_audits[pair].min_winner_ballots[-1]:]))
         self.sub_audits[pair].distribution_reported_tally = self.sub_audits[pair].distribution_reported_tally[:self.sub_audits[pair].
@@ -532,7 +546,7 @@ def run(self, verbose: bool = False):
                 for r in range(1, curr_round):
                     click.echo('|{:^24}|{:^25}|'.format(r, '{:.12f}'.format(self.pvalue_schedule[r - 1])))
                 click.echo('+--------------------------------------------------+')
-            
+
             # Get next round sample size given desired stopping probability
             while click.confirm('Would you like to enter a desired stopping probability for this round?'):
                 desired_sprob = click.prompt('Enter desired stopping probability for this round (.9 recommended)',
@@ -573,15 +587,15 @@ def run(self, verbose: bool = False):
             click.echo('\n\n+----------------------------------------+')
             click.echo('|{:^40}|'.format('Stopping Condition Met? {}'.format(self.stopped)))
             click.echo('+----------------------------------------+')
-            
+
             # Determine if the audit should proceed
             if self.stopped:
                 click.echo('\n\nAudit Complete.')
                 return
             elif click.confirm('\nWould you like to force stop the audit'):
                 click.echo('\n\nAudit Complete: User stopped.')
                 return
-            
+
             # Compute kmin if audit has not stopped and truncate distributions
             self.next_min_winner_ballots(verbose)
             self.truncate_dist_null()
@@ -602,7 +616,7 @@ def __reset(self):
     @abstractmethod
     def get_min_sample_size(self, sub_audit: PairwiseAudit):
         """Get the minimum valid sample size in a sub audit
-        
+
         Args:
             sub_audit (PairwiseAudit): Get minimum sample size for this sub_audit.
         """
@@ -643,7 +657,7 @@ def stopping_condition(self, verbose: bool = False) -> bool:
     @abstractmethod
     def stopping_condition_pairwise(self, pair: str, verbose: bool = False) -> bool:
         """Determine if pairwise subcontest meets stopping condition.
-        
+
         Args:
             pair (str): Dictionary key referencing pairwise audit in audit's sub_audits.
 

diff --git a/src/r2b2/minerva.py b/src/r2b2/minerva.py
@@ -41,7 +41,7 @@ def get_min_sample_size(self, sub_audit: PairwiseAudit, min_sprob: float = 10**(
         Args:
             sub_audit (PairwiseAudit): Get minimum sample size for this subaudit.
             min_sprob (float): Round sizes with below min_sprob stopping probability are excluded.
-        
+
         Returns:
             int: The minimum sample size of the audit, adherent to the min_sprob.
         """
@@ -228,7 +228,7 @@ def next_sample_size(self, sprob=.9, verbose=False, *args, **kwargs):
 
     def _next_sample_size_pairwise(self, sub_audit: PairwiseAudit, sprob=0.9):
         """Compute next sample size for a single pairwise subaudit.
-        
+
         Args:
             sub_audit (PairwiseAudit): Compute the sample size for this sub_audit.
             sprob (float): Get the sample size for this stopping probability.
@@ -272,7 +272,7 @@ def get_upper_bound(self, n, start):
 
     def stopping_condition_pairwise(self, pair: str, verbose: bool = False) -> bool:
         """Check, without finding the kmin, whether the subaudit is complete.
-        
+
         Args:
             pair (str): Dictionary key referencing pairwise subaudit to evaluate.
 
@@ -296,7 +296,7 @@ def stopping_condition_pairwise(self, pair: str, verbose: bool = False) -> bool:
 
     def next_min_winner_ballots_pairwise(self, sub_audit: PairwiseAudit) -> int:
         """Compute stopping size for a given subaudit.
-        
+
         Args:
             sub_audit (PairwiseAudit): Compute next stopping size for this subaudit.
 
@@ -437,8 +437,8 @@ def compute_risk(self, votes_for_winner: int, pair: str, *args, **kwargs):
         return tail_null / tail_reported
 
     def get_risk_level(self):
-        """Return the risk level of an interactive Minerva audit. 
-        
+        """Return the risk level of an interactive Minerva audit.
+
         Non-interactive and bulk Minerva audits are not considered here since the sampled number of
         reported winner ballots is not available.
         """

diff --git a/src/r2b2/tests/test_athena.py b/src/r2b2/tests/test_athena.py
@@ -123,3 +123,18 @@ def test_exceptions():
         athena.compute_min_winner_ballots(athena.sub_audits['A-B'], [19])
     with pytest.raises(ValueError):
         athena.compute_min_winner_ballots(athena.sub_audits['A-B'], [10001])
+
+    contest2 = Contest(100, {'A': 60, 'B': 30}, 1, ['A'], ContestType.MAJORITY)
+    athena2 = Athena(0.1, 1, 1.0, contest2)
+    with pytest.raises(ValueError):
+        athena2.compute_min_winner_ballots(athena2.sub_audits['A-B'], [91])
+    athena2.rounds.append(10)
+    with pytest.raises(Exception):
+        athena2.compute_all_min_winner_ballots(athena2.sub_audits['A-B'])
+    athena2.rounds = []
+    with pytest.raises(ValueError):
+        athena2.compute_all_min_winner_ballots(athena2.sub_audits['A-B'], 0)
+    with pytest.raises(ValueError):
+        athena2.compute_all_min_winner_ballots(athena2.sub_audits['A-B'], 200)
+    with pytest.raises(ValueError):
+        athena2.compute_all_min_winner_ballots(athena2.sub_audits['A-B'], 0)
diff --git a/src/r2b2/tests/test_audit.py b/src/r2b2/tests/test_audit.py
@@ -105,11 +105,12 @@ def test_simple_audit_execution():
 def test_simple_audit_execute_rounds():
     """Test execute_round method."""
     simpleaudit1 = SimpleAudit(0.1, 0.05, 0.5, True, default_contest)
-    simpleaudit1.execute_round(10, {'a': 10, 'b': 0})
+    assert simpleaudit1.execute_round(10, {'a': 10, 'b': 0})
     assert simpleaudit1.rounds == [10]
     assert simpleaudit1.sample_ballots == {'a': [10], 'b': [0]}
     assert simpleaudit1.sub_audits['a-b'].stopped
     assert simpleaudit1.stopped
+    assert simpleaudit1.execute_round(20, {'a': 20, 'b': 0})
 
 
 def test_repr():
@@ -140,6 +141,12 @@ def test_pairwise_str():
     assert str(simpleaudit.sub_audits['a-b']) == pw_audit_str
 
 
+def test_pairwise_repr():
+    simpleaudit = SimpleAudit(0.1, 0.01, 0.1, True, default_contest)
+    simpleaudit2 = SimpleAudit(0.1, 0.01, 0.1, True, default_contest)
+    assert repr(simpleaudit.sub_audits['a-b']) == repr(simpleaudit2.sub_audits['a-b'])
+
+
 def test_initialization_errors():
     """Tests exceptions are raised correctly by __init__()."""
     # alpha TypeError tests
@@ -192,7 +199,7 @@ def test_initialization_errors():
         SimpleAudit(0.1, 0.05, 1.5, True, default_contest)
 
 
-def test_expections():
+def test_exceptions():
     simpleaudit = SimpleAudit(0.1, 0.05, 0.1, True, default_contest)
     with pytest.raises(Exception):
         simpleaudit.current_dist_null()
@@ -208,6 +215,17 @@ def test_expections():
         simpleaudit.current_dist_null()
     with pytest.raises(Exception):
         simpleaudit.current_dist_reported()
+    simpleaudit.rounds.append(2)
+    with pytest.raises(Exception):
+        simpleaudit.current_dist_null()
+    with pytest.raises(Exception):
+        simpleaudit.current_dist_reported()
+    simpleaudit.sample_ballots['b'].append(0)
+    with pytest.raises(Exception):
+        simpleaudit.current_dist_null()
+    with pytest.raises(Exception):
+        simpleaudit.current_dist_reported()
+
     simpleaudit = SimpleAudit(0.1, 0.05, 0.1, True, default_contest)
     simpleaudit.rounds.append(10)
     with pytest.raises(Exception):

diff --git a/src/r2b2/tests/test_minerva.py b/src/r2b2/tests/test_minerva.py
@@ -22,6 +22,10 @@ def test_simple_minerva():
     assert len(simple_minerva.rounds) == 0
     assert len(simple_minerva.sub_audits) == 1
     assert simple_minerva.get_risk_level() is None
+    simple_minerva.rounds.append(10)
+    simple_minerva.stopped = True
+    assert simple_minerva.next_sample_size() == 10
+    assert simple_minerva.next_sample_size(verbose=True) == (10, 0, 1)
 
 
 def test_min_sample_size():
@@ -81,50 +85,33 @@ def test_minerva_second_round_estimate():
     assert minerva2.next_sample_size() == 111257
 
 
-# def test_minerva_georgia_senate_2020():
-#     ga_senate_race = Contest(2453876 + 2358432, {'A': 2453876, 'B': 2358432}, 1, ['A'], ContestType.PLURALITY)
-#
-#     ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
-#     irrelevant_scale_up = 1.0238785631
-#     estimates = []
-#     for sprob in [.7, .8, .9]:
-#         estimates.append(math.ceil(irrelevant_scale_up * ga_senate_audit.next_sample_size(sprob=sprob)))
-#     assert estimates == [10486, 13205, 18005]
-#     ga_senate_audit.rounds.append(9903)
-#     ga_senate_audit.current_dist_null()
-#     ga_senate_audit.current_dist_reported()
-#     assert abs(ga_senate_audit.compute_risk(4950) - 0.527638189598802) < .000001
-#     ga_senate_audit.find_kmin(True)
-#     ga_senate_audit.truncate_dist_null()
-#     ga_senate_audit.truncate_dist_reported()
-#     ga_senate_audit.rounds.append(24000)
-#     ga_senate_audit.current_dist_null()
-#     ga_senate_audit.current_dist_reported()
-#     assert abs(ga_senate_audit.compute_risk(11900) - 2.663358309286826) < .000001
-#     ga_senate_audit.find_kmin(True)
-#     ga_senate_audit.truncate_dist_null()
-#     ga_senate_audit.truncate_dist_reported()
-#     ga_senate_audit.rounds.append(45600)
-#     ga_senate_audit.current_dist_null()
-#     assert abs(ga_senate_audit.compute_risk(24000)) < .000001
-#
-#     ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
-#     ga_senate_audit.rounds.append(17605)
-#     ga_senate_audit.current_dist_null()
-#     ga_senate_audit.current_dist_reported()
-#     assert abs(ga_senate_audit.compute_risk(8900) - 0.081750333563781) < .000001
-#
-#     ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
-#     ga_senate_audit.rounds.append(17605)
-#     ga_senate_audit.current_dist_null()
-#     ga_senate_audit.current_dist_reported()
-#     assert ga_senate_audit.compute_risk(17605) == 0
-#
-#     ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
-#     ga_senate_audit.rounds.append(17605)
-#     ga_senate_audit.current_dist_null()
-#     ga_senate_audit.current_dist_reported()
-#     assert abs(ga_senate_audit.compute_risk(0) - 1) < .000001
+def test_minerva_georgia_senate_2020():
+    ga_senate_race = Contest(2453876 + 2358432, {'A': 2453876, 'B': 2358432}, 1, ['A'], ContestType.PLURALITY)
+
+    ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
+    irrelevant_scale_up = 1.0238785631
+    estimates = []
+    for sprob in [.7, .8, .9]:
+        estimates.append(math.ceil(irrelevant_scale_up * ga_senate_audit.next_sample_size(sprob=sprob)))
+    assert estimates == [10486, 13205, 18005]
+    ga_senate_audit.execute_round(9903, {'A': 4950, 'B': 9903-4950})
+    assert abs(ga_senate_audit.pvalue_schedule[-1] - 0.527638189598802) < .000001
+    ga_senate_audit.execute_round(24000, {'A': 11900, 'B': 24000-11900})
+    assert abs(ga_senate_audit.pvalue_schedule[-1] - 2.663358309286826) < .000001
+    ga_senate_audit.execute_round(45600, {'A': 24000, 'B': 45600-24000})
+    assert abs(ga_senate_audit.pvalue_schedule[-1]) < 0.000001
+
+    ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
+    ga_senate_audit.execute_round(17605, {'A': 8900, 'B': 17605-8900})
+    assert abs(ga_senate_audit.get_risk_level() - 0.081750333563781) < .000001
+
+    ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
+    ga_senate_audit.execute_round(17605, {'A': 17605, 'B': 0})
+    assert ga_senate_audit.get_risk_level() == 0
+
+    ga_senate_audit = Minerva(.1, 1.0, ga_senate_race)
+    ga_senate_audit.execute_round(17605, {'A': 0, 'B': 17605})
+    assert abs(ga_senate_audit.get_risk_level() - 1) < 0.000001
 
 
 def test_minerva_kmins():
@@ -266,3 +253,21 @@ def test_exceptions():
         minerva.compute_min_winner_ballots(minerva.sub_audits['A-B'], [19])
     with pytest.raises(ValueError):
         minerva.compute_min_winner_ballots(minerva.sub_audits['A-B'], [10001])
+
+    contest2 = Contest(100, {'A': 60, 'B': 30}, 1, ['A'], ContestType.MAJORITY)
+    minerva2 = Minerva(.1, 1.0, contest2)
+    with pytest.raises(ValueError):
+        minerva2.compute_min_winner_ballots(minerva2.sub_audits['A-B'], [91])
+    minerva2.rounds = [10]
+    with pytest.raises(Exception):
+        minerva2.compute_all_min_winner_ballots(minerva2.sub_audits['A-B'])
+    minerva2.rounds = []
+    with pytest.raises(Exception):
+        minerva.compute_all_min_winner_ballots(minerva2.sub_audits['A-B'], 200)
+
+    minerva = Minerva(.1, .1, contest)
+    with pytest.raises(Exception):
+        minerva.stopping_condition_pairwise('A-B')
+    minerva.rounds.append(10)
+    with pytest.raises(ValueError):
+        minerva.stopping_condition_pairwise('x')