# Ranked Choice Voting

In [1]:
import numpy as np
import random

In [2]:
import pandas as pd

## Generate Sample Data

This function generates random ranking of candidates (including randomizing number of candidates ranked).

In [4]:
# SET CONSTANTS

# CANDIDATE POOL
CANDIDATES = ['Zion', 'Yosemite', 'Mt Rainier', 'Yellowstone', 'Glacier', 'Rocky', 'Grand Canyon']
# VOTING POPULATION SIZE
N = 1000

In [49]:
def generate_ballots(n, candidates):
    max_ranking = len(candidates)

    ballots_dict = {}

    for i in range(0, n):
        # randomly decide how many of candidates to rank
        ranked = np.random.randint(0, max_ranking+1)
    
        # choose that many candidates randomly
        order = random.sample(candidates, ranked)

        print(f'Voter {i}: {order}')

        ballots_dict[i] = dict(zip(range(1, len(order)+1), order))

    return pd.DataFrame.from_dict(ballots_dict, orient='index').melt(ignore_index=False, var_name='ranking', value_name='candidate').reset_index(names='voter_id')

In [52]:
np_ballots = generate_ballots(10, CANDIDATES).sort_values('voter_id')

Voter 0: ['Zion', 'Yosemite', 'Mt Rainier']
Voter 1: []
Voter 2: ['Zion']
Voter 3: ['Glacier', 'Rocky', 'Grand Canyon', 'Zion']
Voter 4: ['Yellowstone']
Voter 5: ['Zion', 'Rocky', 'Grand Canyon', 'Mt Rainier', 'Yosemite']
Voter 6: ['Zion', 'Yellowstone']
Voter 7: ['Glacier', 'Zion', 'Mt Rainier', 'Grand Canyon', 'Yosemite', 'Yellowstone']
Voter 8: ['Mt Rainier', 'Grand Canyon', 'Rocky', 'Yosemite', 'Yellowstone']
Voter 9: ['Yosemite']


In [53]:
np_ballots['voter_id'].unique()

array([0, 2, 3, 4, 5, 6, 7, 8, 9])

## Tabulate Ballots

In [57]:
vote_share = np_ballots.loc[np_ballots['ranking'] == 1, 'candidate'].value_counts(normalize=True)

In [61]:
vote_share

candidate
Zion           0.444444
Glacier        0.222222
Yellowstone    0.111111
Mt Rainier     0.111111
Yosemite       0.111111
Name: proportion, dtype: float64

In [None]:
no_votes = set(CANDIDATES) - set(vote_share.index)

no_votes.add(vote_share.idxmin())

In [84]:
no_votes

{'Grand Canyon', 'Rocky', 'Yellowstone'}

In [85]:
set(CANDIDATES) - set(vote_share.index)

{'Grand Canyon', 'Rocky'}

In [62]:
np_ballots_w_o = np_ballots.loc[~np_ballots['candidate'].isin(no_votes)]

In [67]:
new_col_name = 'ranking_1'
np_ballots_w_o.loc[:, new_col_name] = np_ballots_w_o.loc[:, 'ranking']

# re-rank candidates for each voter
np_ballots_w_o.loc[:, 'ranking'] = np_ballots_w_o.groupby('voter_id')['ranking'].rank()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  np_ballots_w_o.loc[:, new_col_name] = np_ballots_w_o.loc[:, 'ranking']


In [68]:
np_ballots_w_o.sort_values(['voter_id', 'ranking_1'])

Unnamed: 0,voter_id,ranking,candidate,ranking_1
0,0,1,Zion,1
9,0,2,Yosemite,2
18,0,3,Mt Rainier,3
27,0,4,,4
36,0,5,,5
45,0,6,,6
1,2,1,Zion,1
10,2,2,,2
19,2,3,,3
28,2,4,,4


In [46]:
np_ballots[:10]

Unnamed: 0,voter_id,ranking,candidate
0,0,1,Grand Canyon
9,0,2,Mt Rainier
27,0,4,
18,0,3,
45,0,6,
54,0,7,
36,0,5,
19,2,3,Grand Canyon
46,2,6,
28,2,4,


In [115]:
def tabulate_ranked_choice_ballots(ballots, candidates, threshold):

    # drop all empty ranks (candidate is null)
    ballots = ballots.loc[ballots['candidate'].notnull()]
    
    # initialize vote share
    candidate_vote_share = pd.Series(0, index=candidates)

    # initialize remaining candidates list
    cand_remain = set(candidates)

    rnd = 1

    while candidate_vote_share.max() < threshold:

        # calculate vote share of current tier
        candidate_vote_share = ballots.loc[ballots['ranking'] == 1, 'candidate'].value_counts(normalize=True)

        print(f'\nRound {round} Candidate Vote Share:')
        print(candidate_vote_share)

        if candidate_vote_share.max() < threshold:
            # if no candidates meet threshold of voter share
            # eliminate candidate with lowest share, as well as candidates who got no vote share

            # get candidate with lowest share
            cand_eliminated = candidate_vote_share.idxmin()
            # store number of voters who ranked this candidate (in any slot)
            eliminated = ballots.loc[ballots['candidate'] == cand_eliminated].count()

            # get candidates who received 0 votes in this round
            cand_dq = cand_remain - set(candidate_vote_share.index)
            
            # eliminate votes for candidate in last place and candidates with 0 votes
            cand_dq.add(cand_eliminated)
            ballots = ballots.loc[~ballots['candidate'].isin(cand_dq)]

            new_col_name = 'ranking_' + str(rnd)
            ballots[new_col_name] = ballots['ranking']

            # re-rank candidates for each voter
            ballots['ranking'] = ballots.groupby('voter_id')['ranking'].rank()

            print(f'Round {rnd}: Eliminating Candidate {cand_eliminated} with {candidate_vote_share.min():.1%} and Redistributing {len(eliminated)} Votes')

            rnd += 1
            cands_remain = set(ballots['candidate'].unique())

    return candidate_vote_share

    

In [113]:
ballots = np_ballots

# drop all empty ranks (candidate is null)
ballots = ballots.loc[ballots['candidate'].notnull()]

# # initialize vote share
candidate_vote_share = pd.Series(0, index=CANDIDATES)

# # initialize remaining candidates list
cand_remain = set(CANDIDATES)

rnd = 1

# while candidate_vote_share.max() < threshold:

#     # calculate vote share of current tier
candidate_vote_share = ballots.loc[ballots['ranking'] == 1, 'candidate'].value_counts(normalize=True)

#     print(f'\nRound {round} Candidate Vote Share:')
#     print(candidate_vote_share)

#     if candidate_vote_share.max() < threshold:
#         # if no candidates meet threshold of voter share
#         # eliminate candidate with lowest share, as well as candidates who got no vote share

#         # get candidate with lowest share
cand_to_elim = candidate_vote_share.idxmin()
#         # store number of voters who ranked this candidate (in any slot)
eliminated = ballots.loc[ballots['candidate'] == cand_to_elim].shape[0]

#         # get candidates who received 0 votes in this round
cands_to_elim = cand_remain - set(candidate_vote_share.index)
        
#         # eliminate votes for candidate in last place and candidates with 0 votes
cands_to_elim.add(cand_to_elim)
ballots = ballots.loc[~ballots['candidate'].isin(cands_to_elim)]

#         new_col_name = 'ranking_' + str(rnd)
#         ballots[new_col_name] = ballots['ranking']

#         # re-rank candidates for each voter
#         ballots['ranking'] = ballots.groupby('voter_id')['ranking'].rank()

#         print(f'Round {rnd}: Eliminating Candidate {cand_to_elim} with {candidate_vote_share.min():.1%} and Redistributing {len(eliminated)} Votes')

#         rnd += 1
#         cands_remain = set(ballots['candidate'].unique())



In [116]:
tabulate_ranked_choice_ballots(np_ballots, CANDIDATES, 0.8)


Round <built-in function round> Candidate Vote Share:
candidate
Zion           0.444444
Glacier        0.222222
Yellowstone    0.111111
Mt Rainier     0.111111
Yosemite       0.111111
Name: proportion, dtype: float64
Round 1: Eliminating Candidate Yellowstone with 11.1% and Redistributing 3 Votes

Round <built-in function round> Candidate Vote Share:
candidate
Zion          0.500
Glacier       0.250
Mt Rainier    0.125
Yosemite      0.125
Name: proportion, dtype: float64
Round 2: Eliminating Candidate Mt Rainier with 12.5% and Redistributing 4 Votes

Round <built-in function round> Candidate Vote Share:
candidate
Zion        0.50
Glacier     0.25
Yosemite    0.25
Name: proportion, dtype: float64
Round 3: Eliminating Candidate Glacier with 25.0% and Redistributing 5 Votes

Round <built-in function round> Candidate Vote Share:
candidate
Zion        0.75
Yosemite    0.25
Name: proportion, dtype: float64
Round 4: Eliminating Candidate Yosemite with 25.0% and Redistributing 6 Votes

Round 

candidate
Zion    1.0
Name: proportion, dtype: float64

In [9]:
vote_share = np_ballots.loc[np_ballots['ranking'] == 1, 'candidate'].value_counts(normalize=True)

In [14]:
# slow way

def tabulate_slow(ballots, candidates, threshold):

    cand_ct = len(candidates)
    
    # initialize vote share
    candidate_vote_share = pd.Series(0, index=candidates)

    round = 1

    while candidate_vote_share.max() < threshold:

        # calculate vote share of current tier
        candidate_vote_share = ballots.loc[ballots['ranking'] == 1, 'candidate'].value_counts(normalize=True)

        print(f'\nRound {round} Candidate Vote Share:')
        print(candidate_vote_share)

        if candidate_vote_share.max() < threshold:
            # prep data for next round

            # check to see if any candidates got 0% of vote
            candidates_wo_votes = set(candidates) - set(candidate_vote_share.index)
            if size(candidates_wo_votes) > 0:
                # if there are, drop their votes first
                
            # candidate in last place
            cand_to_elim = candidate_vote_share.idxmin()

            # create filter of rows to drop
            elim = (ballots['candidate'] == cand_to_elim)
            # store affected voters
            voters_w_elim = ballots[elim]['voter_id'].unique()

            # iterate over every affected voter
            for voter in voters_w_elim:

                # get voter's ranking of eliminated candidate
                rank = ballots.loc[(ballots['voter_id'] == voter) & (ballots['candidate'] == cand_to_elim), 'ranking'].iloc[0]

                if rank == cand_ct:
                    # if voter ranked this candidate last, continue to next voter
                    # vote will simply be dropped
                    continue
                    
                for r in range(rank + 1, cand_ct + 1):
                    try:
                        # decrement ranking of all subsequent candidates on voter's ballot
                        temp_rank = ballots.loc[(ballots['voter_id'] == voter) & (ballots['ranking']), 'ranking'].iloc[0]
                        ballots.loc[(ballots['voter_id'] == voter) & (ballots['ranking']), 'ranking'] = temp_rank - 1
                    except IndexError as e:
                        # end of ranking, continue to next voter
                        continue    
            
            # drop rows with eliminated candidate ranked 1
            ballots = ballots[~elim]

            # iterate over every affected voter
            ballots.loc[ballots['voter_id'].isin(voters_w_elim), 'ranking'] = ballots.loc[ballots['voter_id'].isin(voters_w_elim), 'ranking'].apply(lambda x: x - 1)

            print(f'Round {round}: Eliminating Candidate {cand_to_elim} with {candidate_vote_share.min():.1%} and Redistributing {len(voters_w_elim)} Votes')

            round += 1

    return candidate_vote_share
