# Maine Rank Choice Voting (RCV) Analysis 

* Data source: https://www.maine.gov/sos/cec/elec/results/results18.html#Nov6

In [1]:
import pandas as pd

ballot_df = pd.read_csv('data/ME_all.csv')
# ballot_df = ballot_df[ballot_df.index < 1000]
n_ballots = ballot_df.shape[0]
print("n_ballots:", n_ballots)
ballot_df.head()

n_ballots: 296077


Unnamed: 0,record,precinct,ballot_style,rank1,rank2,rank3,rank4,rank5
0,1,Fayette,CAN Ballot Style 130,"REP Poliquin, Bruce (5931)","REP Poliquin, Bruce","REP Poliquin, Bruce",undervote,undervote
1,2,Fayette,CAN Ballot Style 130,"REP Poliquin, Bruce (5931)",undervote,undervote,undervote,undervote
2,3,Fayette,CAN Ballot Style 130,"DEM Golden, Jared F. (5471)","Bond, Tiffany L.",undervote,undervote,undervote
3,4,Fayette,CAN Ballot Style 130,"REP Poliquin, Bruce (5931)","DEM Golden, Jared F.","Bond, Tiffany L.","DEM Golden, Jared F.","Hoar, William R.S."
4,6,Fayette,CAN Ballot Style 130,"REP Poliquin, Bruce (5931)",undervote,undervote,undervote,undervote


In [2]:
ballot_df['rank1'].value_counts()

REP Poliquin, Bruce (5931)     133793
DEM Golden, Jared F. (5471)    131003
Bond, Tiffany L.                16415
Hoar, William R.S.               6782
undervote                        6641
DEM Golden, Jared F.              819
overvote                          424
REP Poliquin, Bruce               200
Name: rank1, dtype: int64

## Data Cleaning

In [3]:
# remove ' (5931)'' from 'REP Poliquin, Bruce (5931)' and ' (5471)' from DEM Golden, Jared F. (5471)
# now each candidate has exaclty one string representing a vote for them.
ballot_df['rank1'] = ballot_df['rank1'].map(lambda choice: choice.split(' (')[0].strip())
ballot_df.rank1.value_counts()

REP Poliquin, Bruce     133993
DEM Golden, Jared F.    131822
Bond, Tiffany L.         16415
Hoar, William R.S.        6782
undervote                 6641
overvote                   424
Name: rank1, dtype: int64

In [4]:
# apply the same cleaning to all other ranks
for index in range(2, 6):
    rank = 'rank{}'.format(index)
    print("Cleaning", rank)
    ballot_df[rank] = ballot_df[rank].map(lambda choice: choice.split(' (')[0].strip())

Cleaning rank2
Cleaning rank3
Cleaning rank4
Cleaning rank5


In [5]:
# ensure strings are consistent across all columns
unique_candidate_set = set()
for index in range(1, 6):
    rank = 'rank{}'.format(index)
    rank_i_unique_candidate_set = set(ballot_df[rank].unique())
    # take the union of unique candidates so far and unique candidates in this round
    unique_candidate_set = unique_candidate_set | rank_i_unique_candidate_set 
unique_candidate_set

{'Bond, Tiffany L.',
 'DEM Golden, Jared F.',
 'Hoar, William R.S.',
 'REP Poliquin, Bruce',
 'overvote',
 'undervote'}

In [6]:
ballot_df.columns

Index(['record', 'precinct', 'ballot_style', 'rank1', 'rank2', 'rank3',
       'rank4', 'rank5'],
      dtype='object')

We now see that across all ranks, each candidate is represented by exaclty one unique string.

## Procedure

In [7]:
continuing_candidate_set = {
    'Bond, Tiffany L.',
    'DEM Golden, Jared F.',
    'Hoar, William R.S.',
    'REP Poliquin, Bruce'
}

>D. "Exhausted ballot" means a ballot that does not rank any continuing candidate, contains an overvote at the highest continuing ranking or contains 2 or more sequential skipped rankings before its highest continuing ranking. 

In [8]:
# Constants
OVERVOTE = 'overvote'
UNDERVOTE = 'undervote'

# Helper functions
def ballot_has_two_consecutive_skipped_rankings(ballot):
    highest_continuing_rank = ballot['highest_continuing_rank']
    if highest_continuing_rank < 3:
        return False
    else:
        rank_one_before = "rank{}".format(highest_continuing_rank - 1)
        rank_two_before = "rank{}".format(highest_continuing_rank - 2)
    return (ballot[rank_one_before] == UNDERVOTE) and (ballot[rank_two_before] == UNDERVOTE)

def ballot_exhausted_for_overvote(ballot):
    highest_continuing_rank = ballot['highest_continuing_rank']
    if highest_continuing_rank in {1,2,3,4,5}:
        return ballot["rank{}".format(highest_continuing_rank)] == OVERVOTE
    else:
        return False

def mark_exhausted_ballots(continuing_ballot_df, continuing_candidate_set):
    # determine which ballot rank a continuing candidate according to https://legislature.maine.gov/statutes/21-A/title21-Asec723-A.html
    ranks_a_continuing_candidate = (continuing_ballot_df['rank1'].isin(continuing_candidate_set)
        | continuing_ballot_df['rank2'].isin(continuing_candidate_set)
        | continuing_ballot_df['rank3'].isin(continuing_candidate_set)
        | continuing_ballot_df['rank4'].isin(continuing_candidate_set)
        | continuing_ballot_df['rank5'].isin(continuing_candidate_set))
    continuing_ballot_df['does_not_rank_any_continuing_candidate'] = ~ranks_a_continuing_candidate
    n_exhausted_no_rank = continuing_ballot_df['does_not_rank_any_continuing_candidate'].value_counts()[True]
    print(n_exhausted_no_rank, "ballots exhausted for not ranking any continuing candidate. ({:.2%} of total ballots)".format(n_exhausted_no_rank/n_ballots))
    
    # determine the highest continuing rank according to https://legislature.maine.gov/statutes/21-A/title21-Asec723-A.html
    # E. "Highest continuing ranking" means the highest ranking on a voter's ballot for a continuing candidate.   [IB 2015, c. 3, §5 (NEW).]
    continuing_ballot_df['highest_continuing_rank'] = 6 # defaults to 6 so that if the ballot is all undervotes, then ballot_has_two_consecutive_skipped_rankings will catch it
    for rank_idx in range(5, 0, -1):
        continuing_ballot_df['rank_choice_in_continuing_candidate_set'] = continuing_ballot_df["rank{}".format(rank_idx)].isin(continuing_candidate_set | {OVERVOTE})
        continuing_ballot_df.loc[continuing_ballot_df['rank_choice_in_continuing_candidate_set'], 'highest_continuing_rank'] = rank_idx
    
    # determine which ballots have an overvote at their highest continuing ranking according to https://legislature.maine.gov/statutes/21-A/title21-Asec723-A.html
    # H. "Overvote" means a circumstance in which a voter has ranked more than one candidate at the same ranking.   [IB 2015, c. 3, §5 (NEW).]
    continuing_ballot_df['overvote'] = continuing_ballot_df.apply(lambda ballot: ballot_exhausted_for_overvote(ballot), axis=1)
    n_exhausted_overvote = continuing_ballot_df['overvote'].value_counts()[True]
    print(n_exhausted_overvote, "ballots exhausted for overvotes. ({:.2%} of total ballots)".format(n_exhausted_overvote/n_ballots))
     
    continuing_ballot_df['two_consecutive_skipped_rankings'] = continuing_ballot_df.apply(lambda ballot: ballot_has_two_consecutive_skipped_rankings(ballot), axis=1)
    n_exhausted_skipped = continuing_ballot_df['two_consecutive_skipped_rankings'].value_counts()[True]    
    print(n_exhausted_skipped, "ballots exhausted for two consecutive skipped rankings. ({:.2%} of total ballots)".format(n_exhausted_skipped/n_ballots))
    
    continuing_ballot_df['exhausted'] = continuing_ballot_df['overvote'] | continuing_ballot_df['two_consecutive_skipped_rankings'] | continuing_ballot_df['does_not_rank_any_continuing_candidate']
    n_exhausted = continuing_ballot_df['exhausted'].value_counts()[True]
    print(n_exhausted, "total ballots exhausted.({:.2%} of total ballots)".format(n_exhausted/n_ballots))
    return continuing_ballot_df

## Procedure
Note: in its current form this script follows the "procedure" section of the [statute](https://legislature.maine.gov/statutes/21-A/title21-Asec723-A.html). Therefore it does not implement batch elimination, which seems to have been in 2018 used based on the Maine SOS [summary report](https://www.maine.gov/sos/cec/elec/results/2018/updated-summary-report-CD2.xls) having only two rounds. This doesn't effect the outcome, but if one wanted to make arguements based on number of ballots exhausted per round it would make sense to implement batch elimination.

>Except as provided in subsections 3 and 4, the following procedures are used to determine the winner of an election determined by ranked-choice voting. The ranked‑choice voting count must proceed in rounds. In each round, the number of votes for each continuing candidate must be counted. Each continuing ballot counts as one vote for its highest-ranked continuing candidate for that round. Exhausted ballots are not counted for any continuing candidate. The round then ends with one of the following 2 potential outcomes.

>A. If there are 2 or fewer continuing candidates, the candidate with the most votes is declared the winner of the election.   [IB 2015, c. 3, §5 (NEW).]

>B. If there are more than 2 continuing candidates, the last-place candidate is defeated and a new round begins.   [IB 2015, c. 3, §5 (NEW).]

In [9]:
decided = False
round_index = 1
votes_by_round_df = pd.DataFrame()
continuing_ballot_df = ballot_df
round_to_ballot_df = {}

def get_highest_ranked_candidate(ballot):
    rank_idx = ballot['highest_continuing_rank']
    if rank_idx in {1,2,3,4,5}:
        return ballot["rank{}".format(rank_idx)]
    else:
        return False

while not decided:
    print("\nStarting Round", round_index)

    # determine which ballots are exhausted
    print("Determining which ballots are exhausted")
    continuing_ballot_df_exhausted_marked = mark_exhausted_ballots(continuing_ballot_df, continuing_candidate_set)
    
    # determine the highest ranked continuing candidate for each ballot
    continuing_ballot_df['highest_ranked_continuing_candidate'] = continuing_ballot_df.apply(lambda ballot: get_highest_ranked_candidate(ballot), axis=1)  
    # record this round's ballot dataframe for analysis
    round_to_ballot_df[round_index] = continuing_ballot_df_exhausted_marked
    
    # remove exhausted ballots from consideration
    continuing_ballot_df = pd.DataFrame(continuing_ballot_df_exhausted_marked[~continuing_ballot_df_exhausted_marked['exhausted']])
    assert set(continuing_ballot_df['highest_ranked_continuing_candidate'].unique()) <= continuing_candidate_set
                                                                              
    # for each continuing ballot, assign one vote for its highest ranked continuing candidate                              
    votes_by_round_df[round_index] = continuing_ballot_df['highest_ranked_continuing_candidate'].value_counts()

    if len(continuing_candidate_set) <= 2:
        decided = True
        winning_candidate = votes_by_round_df[votes_by_round_df[round_index]==votes_by_round_df[round_index].max()].index[0]
        print(winning_candidate, "won!")
    else:
        # Eliminate the continuing candidate with the fewest
        eliminated_candidate = votes_by_round_df[votes_by_round_df[round_index]==votes_by_round_df[round_index].min()].index[0]
        continuing_candidate_set.remove(eliminated_candidate)
        print(eliminated_candidate, "was eliminated.")
    round_index += 1
                                                                              
votes_by_round_df


Starting Round 1
Determining which ballots are exhausted
5928 ballots exhausted for not ranking any continuing candidate. (2.00% of total ballots)
462 ballots exhausted for overvotes. (0.16% of total ballots)
6018 ballots exhausted for two consecutive skipped rankings. (2.03% of total ballots)
6453 total ballots exhausted.(2.18% of total ballots)
Hoar, William R.S. was eliminated.

Starting Round 2
Determining which ballots are exhausted
2130 ballots exhausted for not ranking any continuing candidate. (0.72% of total ballots)
27 ballots exhausted for overvotes. (0.01% of total ballots)
2044 ballots exhausted for two consecutive skipped rankings. (0.69% of total ballots)
2162 total ballots exhausted.(0.73% of total ballots)
Bond, Tiffany L. was eliminated.

Starting Round 3
Determining which ballots are exhausted
5991 ballots exhausted for not ranking any continuing candidate. (2.02% of total ballots)
123 ballots exhausted for overvotes. (0.04% of total ballots)
5728 ballots exhausted 

Unnamed: 0,1,2,3
"REP Poliquin, Bruce",134184,135073.0,138932.0
"DEM Golden, Jared F.",132013,133216.0,142442.0
"Bond, Tiffany L.",16552,19173.0,
"Hoar, William R.S.",6875,,


In [26]:
not_exhausted_idxs = continuing_ballot_df.index

In [10]:
# Constants
OVERVOTE = 'overvote'
UNDERVOTE = 'undervote'
RANKS = ['rank1', 'rank2', 'rank3','rank4', 'rank5']
CANDIDATE_SET = {
    'Bond, Tiffany L.',
    'DEM Golden, Jared F.',
    'Hoar, William R.S.',
    'REP Poliquin, Bruce'
}

# remove unnessecary columns
ballot_df = ballot_df[RANKS]
ballot_df.head(10)

Unnamed: 0,rank1,rank2,rank3,rank4,rank5
0,"REP Poliquin, Bruce","REP Poliquin, Bruce","REP Poliquin, Bruce",undervote,undervote
1,"REP Poliquin, Bruce",undervote,undervote,undervote,undervote
2,"DEM Golden, Jared F.","Bond, Tiffany L.",undervote,undervote,undervote
3,"REP Poliquin, Bruce","DEM Golden, Jared F.","Bond, Tiffany L.","DEM Golden, Jared F.","Hoar, William R.S."
4,"REP Poliquin, Bruce",undervote,undervote,undervote,undervote
5,"Hoar, William R.S.","REP Poliquin, Bruce","Bond, Tiffany L.","DEM Golden, Jared F.",undervote
6,undervote,undervote,undervote,undervote,undervote
7,"DEM Golden, Jared F.","Bond, Tiffany L.","Hoar, William R.S.","REP Poliquin, Bruce",undervote
8,"REP Poliquin, Bruce",undervote,undervote,undervote,undervote
9,"REP Poliquin, Bruce","Hoar, William R.S.","Bond, Tiffany L.","DEM Golden, Jared F.",undervote


## Table 2: Non-Strategic Votes

In [11]:
category_to_number_of_ballots = {}

Skipped at least one round of voting between candidates (e.g., Candidate A, blank, Candidate B)

In [12]:
def skipped_at_least_one_round(ballot):
    candidate_ranked = False
    for idx, rank in enumerate(RANKS):
        if candidate_ranked and ballot[rank] == UNDERVOTE and ballot[RANKS[idx+1]] in CANDIDATE_SET:
            return True
        if idx == 3: # next iteration would be an index out of bounds
            return False
        candidate_ranked = ballot[rank] in CANDIDATE_SET
        
skipped_at_least_one_round_mask = ballot_df.apply(skipped_at_least_one_round, axis=1)
skipped_at_least_one_round_ballots = ballot_df[skipped_at_least_one_round_mask]
n_skipped_at_least_one_round = skipped_at_least_one_round_ballots.shape[0]
category_to_number_of_ballots['1.) n_skipped_at_least_one_round'] = n_skipped_at_least_one_round
print("n_skipped_at_least_one_round:", n_skipped_at_least_one_round)
skipped_at_least_one_round_ballots.head(10)

n_skipped_at_least_one_round: 11569


Unnamed: 0,rank1,rank2,rank3,rank4,rank5
31,"Bond, Tiffany L.",undervote,"Hoar, William R.S.","REP Poliquin, Bruce","DEM Golden, Jared F."
38,"Bond, Tiffany L.","DEM Golden, Jared F.","Hoar, William R.S.",undervote,"REP Poliquin, Bruce"
73,"REP Poliquin, Bruce",undervote,"Bond, Tiffany L.","DEM Golden, Jared F.",undervote
127,"DEM Golden, Jared F.","DEM Golden, Jared F.","DEM Golden, Jared F.",undervote,"Bond, Tiffany L."
186,"DEM Golden, Jared F.","Bond, Tiffany L.","Hoar, William R.S.",undervote,"REP Poliquin, Bruce"
189,"REP Poliquin, Bruce",undervote,"Hoar, William R.S.","Bond, Tiffany L.","DEM Golden, Jared F."
231,"REP Poliquin, Bruce","Bond, Tiffany L.",undervote,"DEM Golden, Jared F.","Hoar, William R.S."
273,"DEM Golden, Jared F.","Bond, Tiffany L.","Hoar, William R.S.",undervote,"REP Poliquin, Bruce"
290,"REP Poliquin, Bruce",undervote,"Hoar, William R.S.",undervote,"Bond, Tiffany L."
306,"DEM Golden, Jared F.","Bond, Tiffany L.",undervote,"Hoar, William R.S.","REP Poliquin, Bruce"


Filled out at least one round but left the first round blank (e.g.,blank, Candidate A, Candidate B)

In [29]:
def filled_out_at_least_one_round_but_left_the_first_blank(ballot):
    first_round_blank = ballot['rank1'] == UNDERVOTE
    filled_out_at_least_one_round = len(CANDIDATE_SET.intersection(ballot)) > 0
    return filled_out_at_least_one_round and first_round_blank

filled_out_at_least_one_round_but_left_the_first_blank_mask = ballot_df.apply(filled_out_at_least_one_round_but_left_the_first_blank, axis=1)
filled_out_at_least_one_round_but_left_the_first_blank_ballots = ballot_df[filled_out_at_least_one_round_but_left_the_first_blank_mask]
n_filled_out_at_least_one_round_but_left_the_first_blank = filled_out_at_least_one_round_but_left_the_first_blank_ballots.shape[0]
category_to_number_of_ballots['2.) n_filled_out_at_least_one_round_but_left_the_first_blank'] = n_filled_out_at_least_one_round_but_left_the_first_blank
print("n_filled_out_at_least_one_round_but_left_the_first_blank:", n_filled_out_at_least_one_round_but_left_the_first_blank)
filled_out_at_least_one_round_but_left_the_first_blank_ballots.head(10)

n_filled_out_at_least_one_round_but_left_the_first_blank: 902


Unnamed: 0,rank1,rank2,rank3,rank4,rank5
269,undervote,undervote,undervote,undervote,"REP Poliquin, Bruce"
671,undervote,"Bond, Tiffany L.","Hoar, William R.S.","DEM Golden, Jared F.",undervote
743,undervote,undervote,undervote,"REP Poliquin, Bruce",undervote
846,undervote,"REP Poliquin, Bruce",undervote,undervote,undervote
1034,undervote,undervote,undervote,"Hoar, William R.S.",undervote
1225,undervote,undervote,"DEM Golden, Jared F.",undervote,undervote
1830,undervote,"DEM Golden, Jared F.",undervote,undervote,undervote
2191,undervote,undervote,"Hoar, William R.S.",undervote,undervote
2296,undervote,"DEM Golden, Jared F.",undervote,undervote,undervote
2354,undervote,"Hoar, William R.S.","Bond, Tiffany L.","DEM Golden, Jared F.",undervote


Ranked the same candidate in non-consecutive rounds (e.g., Candidate A, Candidate B, Candidate A)

In [30]:
def ranked_the_same_candidate_in_non_consecutive_rounds(ballot):
    ranked_candidate_set = set()
    for idx, rank in enumerate(RANKS):
        choice = ballot[rank]
        if choice in ranked_candidate_set and ballot[RANKS[idx-1]] != choice:
            return True
        elif choice in CANDIDATE_SET:
            ranked_candidate_set.add(choice)
    return False

ranked_the_same_candidate_in_non_consecutive_rounds_mask = ballot_df.apply(ranked_the_same_candidate_in_non_consecutive_rounds, axis=1)
ranked_the_same_candidate_in_non_consecutive_rounds_ballots = ballot_df[ranked_the_same_candidate_in_non_consecutive_rounds_mask]
n_ranked_the_same_candidate_in_non_consecutive_rounds = ranked_the_same_candidate_in_non_consecutive_rounds_ballots.shape[0]
category_to_number_of_ballots['3.) n_ranked_the_same_candidate_in_non_consecutive_rounds'] = n_ranked_the_same_candidate_in_non_consecutive_rounds
print("n_ranked_the_same_candidate_in_non_consecutive_rounds:", n_ranked_the_same_candidate_in_non_consecutive_rounds)
ranked_the_same_candidate_in_non_consecutive_rounds_ballots.head(10)


n_ranked_the_same_candidate_in_non_consecutive_rounds: 2053


Unnamed: 0,rank1,rank2,rank3,rank4,rank5
3,"REP Poliquin, Bruce","DEM Golden, Jared F.","Bond, Tiffany L.","DEM Golden, Jared F.","Hoar, William R.S."
27,"REP Poliquin, Bruce","Hoar, William R.S.","Bond, Tiffany L.","REP Poliquin, Bruce",undervote
208,"DEM Golden, Jared F.","REP Poliquin, Bruce","Hoar, William R.S.","Bond, Tiffany L.","DEM Golden, Jared F."
330,"REP Poliquin, Bruce","DEM Golden, Jared F.","Bond, Tiffany L.","DEM Golden, Jared F.",undervote
362,"REP Poliquin, Bruce","Hoar, William R.S.","Bond, Tiffany L.","Hoar, William R.S.",undervote
997,"Bond, Tiffany L.","Hoar, William R.S.",undervote,"Hoar, William R.S.","REP Poliquin, Bruce"
1364,"REP Poliquin, Bruce","DEM Golden, Jared F.","Bond, Tiffany L.","REP Poliquin, Bruce",undervote
1549,"DEM Golden, Jared F.","Hoar, William R.S.","Bond, Tiffany L.","REP Poliquin, Bruce","Hoar, William R.S."
1960,"DEM Golden, Jared F.","REP Poliquin, Bruce","Bond, Tiffany L.","REP Poliquin, Bruce",undervote
2028,"REP Poliquin, Bruce","Bond, Tiffany L.","REP Poliquin, Bruce","Hoar, William R.S.","DEM Golden, Jared F."


Ranked one candidate consecutively and also ranked at least one other candidate on the ballot (e.g., Candidate A, Candidate A, Candidate B)

In [31]:
def ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate(ballot):
    ranked_candidate_set = CANDIDATE_SET.intersection(ballot)
    if len(ranked_candidate_set) < 2:
        return False
    else:
        for idx, rank in enumerate(RANKS):
            choice = ballot[rank]
            if choice in ranked_candidate_set and ballot[RANKS[idx+1]] == choice:
                return True
            elif idx == 3: # next iteration would be an index out of bounds
                return False
        return False
            
ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate_mask = ballot_df.apply(ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate, axis=1)
ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate_ballots = ballot_df[ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate_mask]
n_ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate = ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate_ballots.shape[0]
category_to_number_of_ballots['4.) n_ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate'] = n_ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate
print("n_ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate:", n_ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate)
ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate_ballots.head(10)

n_ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate: 1858


Unnamed: 0,rank1,rank2,rank3,rank4,rank5
127,"DEM Golden, Jared F.","DEM Golden, Jared F.","DEM Golden, Jared F.",undervote,"Bond, Tiffany L."
473,"DEM Golden, Jared F.","DEM Golden, Jared F.","DEM Golden, Jared F.","Bond, Tiffany L.",undervote
843,"Hoar, William R.S.","Bond, Tiffany L.","REP Poliquin, Bruce","DEM Golden, Jared F.","DEM Golden, Jared F."
878,"REP Poliquin, Bruce","Bond, Tiffany L.","Bond, Tiffany L.","Hoar, William R.S.",undervote
995,"DEM Golden, Jared F.","DEM Golden, Jared F.","Bond, Tiffany L.","Hoar, William R.S.",undervote
1238,"REP Poliquin, Bruce","Bond, Tiffany L.","Hoar, William R.S.","DEM Golden, Jared F.","DEM Golden, Jared F."
1779,"REP Poliquin, Bruce","REP Poliquin, Bruce","REP Poliquin, Bruce","DEM Golden, Jared F.","Bond, Tiffany L."
1893,"DEM Golden, Jared F.","DEM Golden, Jared F.","DEM Golden, Jared F.","DEM Golden, Jared F.","REP Poliquin, Bruce"
3047,"DEM Golden, Jared F.","Bond, Tiffany L.","Hoar, William R.S.","REP Poliquin, Bruce","REP Poliquin, Bruce"
3054,"DEM Golden, Jared F.","REP Poliquin, Bruce","Hoar, William R.S.","Bond, Tiffany L.","Bond, Tiffany L."


Ranked (i) only one candidate (ii) more than once but (iii) left at least one round blank (e.g., Candidate A, Candidate A, blank)

In [32]:
def ranked_only_one_candidate_multiple_times_with_undervote(ballot):
    if UNDERVOTE not in set(ballot):
        return False
    else:
        ranked_candidate_set = CANDIDATE_SET.intersection(ballot)
        if len(ranked_candidate_set) == 1:
            choice = list(ranked_candidate_set)[0]
            return list(ballot).count(choice) > 1
        else:
            return False
        
ranked_only_one_candidate_multiple_times_with_undervote_mask = ballot_df.apply(ranked_only_one_candidate_multiple_times_with_undervote, axis=1)
ranked_only_one_candidate_multiple_times_with_undervote_ballots = ballot_df[ranked_only_one_candidate_multiple_times_with_undervote_mask]
n_ranked_only_one_candidate_multiple_times_with_undervote= ranked_only_one_candidate_multiple_times_with_undervote_ballots.shape[0]
category_to_number_of_ballots['5.) n_ranked_only_one_candidate_multiple_times_with_undervote'] = n_ranked_only_one_candidate_multiple_times_with_undervote
print("n_ranked_only_one_candidate_multiple_times_with_undervote:", n_ranked_only_one_candidate_multiple_times_with_undervote)
ranked_only_one_candidate_multiple_times_with_undervote_ballots.head(10)

n_ranked_only_one_candidate_multiple_times_with_undervote: 1434


Unnamed: 0,rank1,rank2,rank3,rank4,rank5
0,"REP Poliquin, Bruce","REP Poliquin, Bruce","REP Poliquin, Bruce",undervote,undervote
66,"DEM Golden, Jared F.","DEM Golden, Jared F.","DEM Golden, Jared F.",undervote,undervote
82,"DEM Golden, Jared F.","DEM Golden, Jared F.","DEM Golden, Jared F.",undervote,undervote
100,"REP Poliquin, Bruce","REP Poliquin, Bruce","REP Poliquin, Bruce","REP Poliquin, Bruce",undervote
272,"REP Poliquin, Bruce","REP Poliquin, Bruce",undervote,undervote,undervote
286,"DEM Golden, Jared F.","DEM Golden, Jared F.","DEM Golden, Jared F.","DEM Golden, Jared F.",undervote
397,"DEM Golden, Jared F.","DEM Golden, Jared F.","DEM Golden, Jared F.",undervote,undervote
410,"DEM Golden, Jared F.","DEM Golden, Jared F.",undervote,undervote,undervote
460,"REP Poliquin, Bruce","REP Poliquin, Bruce",undervote,undervote,undervote
694,"DEM Golden, Jared F.","DEM Golden, Jared F.",undervote,undervote,undervote


Overvotes (e.g. more than one candidate at the same ranking)

In [33]:
def overvoted(ballot):
    return OVERVOTE in set(ballot)

overvoted_mask = ballot_df.apply(overvoted, axis=1)
overvoted_ballots = ballot_df[overvoted_mask]
n_overvoted = overvoted_ballots.shape[0]
category_to_number_of_ballots['6.) n_overvoted'] = n_overvoted
print("n_overvoted:", n_overvoted)
overvoted_ballots.head(10)

n_overvoted: 1994


Unnamed: 0,rank1,rank2,rank3,rank4,rank5
18,"DEM Golden, Jared F.",overvote,"Hoar, William R.S.","REP Poliquin, Bruce",undervote
160,"DEM Golden, Jared F.",undervote,undervote,undervote,overvote
198,overvote,undervote,undervote,undervote,undervote
438,"Hoar, William R.S.",undervote,"REP Poliquin, Bruce",undervote,overvote
692,"REP Poliquin, Bruce",overvote,"Hoar, William R.S.",undervote,"DEM Golden, Jared F."
886,overvote,undervote,undervote,undervote,undervote
907,"DEM Golden, Jared F.","REP Poliquin, Bruce","Hoar, William R.S.",overvote,undervote
919,"DEM Golden, Jared F.","Bond, Tiffany L.",overvote,undervote,undervote
1311,"DEM Golden, Jared F.",overvote,"REP Poliquin, Bruce","Hoar, William R.S.",undervote
1400,"Bond, Tiffany L.","Hoar, William R.S.",undervote,undervote,overvote


Total (Excluding Duplicates Between Categories):

In [34]:
non_strategic_index_set = set(skipped_at_least_one_round_ballots.index)
non_strategic_index_set = non_strategic_index_set.union(filled_out_at_least_one_round_but_left_the_first_blank_ballots.index)
non_strategic_index_set = non_strategic_index_set.union(ranked_the_same_candidate_in_non_consecutive_rounds_ballots.index)
non_strategic_index_set = non_strategic_index_set.union(ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate_ballots.index)
non_strategic_index_set = non_strategic_index_set.union(ranked_only_one_candidate_multiple_times_with_undervote_ballots.index)
non_strategic_index_set = non_strategic_index_set.union(overvoted_ballots.index)
category_to_number_of_ballots['Total (Excluding Duplicates Between Categories)'] = len(non_strategic_index_set)

In [46]:
df = pd.DataFrame.from_dict(category_to_number_of_ballots, orient='index', columns=['Number of Ballots'])
df['Percent of Total Ballots'] = df['Number of Ballots'].apply(lambda x: "{:.1%}".format(x/n_ballots))
df

Unnamed: 0,Number of Ballots,Percent of Total Ballots
1.) n_skipped_at_least_one_round,11569,3.9%
2.) n_filled_out_at_least_one_round_but_left_the_first_blank,902,0.3%
3.) n_ranked_the_same_candidate_in_non_consecutive_rounds,2053,0.7%
4.) n_ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate,1858,0.6%
5.) n_ranked_only_one_candidate_multiple_times_with_undervote,1434,0.5%
6.) n_overvoted,1994,0.7%
Total (Excluding Duplicates Between Categories),18835,6.4%


In [None]:
df['Percent of Ballots Exhausted'] = df.apply(lambda row: "{:.1%}".format(row['Number of Ballots Exhausted']/row['Number of Ballots']), axis=1)

In [40]:
df[~df['Number of Ballots'].index.isin(not_exhausted_idxs)]

Unnamed: 0,Number of Ballots,Percent of Total Ballots,Number of Ballots Exhausted
1.) n_skipped_at_least_one_round,11569,3.9%,7
2.) n_filled_out_at_least_one_round_but_left_the_first_blank,902,0.3%,7
3.) n_ranked_the_same_candidate_in_non_consecutive_rounds,2053,0.7%,7
4.) n_ranked_one_candidate_consecutively_and_also_ranked_at_least_one_other_candidate,1858,0.6%,7
5.) n_ranked_only_one_candidate_multiple_times_with_undervote,1434,0.5%,7
6.) n_overvoted,1994,0.7%,7
Total (Excluding Duplicates Between Categories),18835,6.4%,7
