In [1]:
import pandas as pd 
from os import path
import numpy as np 

In [47]:
votes_filepath = "data/1976-2020-president.csv"
electoral_college_filepath = "data/electoral_votes.csv"

data_total = pd.read_csv(votes_filepath)
data_electoral = pd.read_csv(electoral_college_filepath)
data_total.tail(5)


Unnamed: 0,year,state,state_po,state_fips,state_cen,state_ic,office,candidate,party_detailed,writein,candidatevotes,totalvotes,version,notes,party_simplified
4282,2020,WYOMING,WY,56,83,68,US PRESIDENT,"JORGENSEN, JO",LIBERTARIAN,False,5768,278503,20210113,,LIBERTARIAN
4283,2020,WYOMING,WY,56,83,68,US PRESIDENT,"PIERCE, BROCK",INDEPENDENT,False,2208,278503,20210113,,OTHER
4284,2020,WYOMING,WY,56,83,68,US PRESIDENT,,,True,1739,278503,20210113,,OTHER
4285,2020,WYOMING,WY,56,83,68,US PRESIDENT,OVERVOTES,,False,279,278503,20210113,,OTHER
4286,2020,WYOMING,WY,56,83,68,US PRESIDENT,UNDERVOTES,,False,1459,278503,20210113,,OTHER


In [48]:
def expand_electoral_year(text):
    if ('-' in text): 
        start,end = text.strip().split('-')
    else: 
        start = int(text.strip())
        end = start + 1
    return [ year for year in range(int(start), int(end) +1) if year % 4 ==0] 

def process_electoral_data(electoral_data): 
    electoral_temp = electoral_data.copy()
    electoral_temp['state'] = electoral_temp['state'].apply(lambda x:x.lower())
    electoral_temp = (
            electoral_temp
            # .set_index('state_name')
            .melt(id_vars=['state'])
            .rename(columns={'variable':'year'})
        )
    electoral_temp['year'] = electoral_temp['year'].apply(expand_electoral_year)
    # modify district of columbia 
    return electoral_temp.explode(column='year').pivot(index='state',columns='year',values='value')

def get_data_for_year(votes_data,electoral_ata, year,drop_under=0.05):
    # process data for a given year, selecting just major candidates, and dropping unimportant columns 
    # input checking: ensure that the year is usable 
    if year %4 != 0: 
        print(f"invalid year: {year} is not divisible by 4") 
        return None 
    # select year 
    year_data = (votes_data[votes_data.year == year]
                 .copy()
                 .rename(columns={
                     'candidatevotes':'votes', 
                     'party_simplified':'party'
                 })
                )
    year_data['state'] = year_data['state'].apply(lambda x:x.lower())

    # drop candidates whose votes are less than 10% of votes 
    votes_by_cand =(
            year_data
            .groupby("candidate")
            .agg({"votes":sum, "party":"first","office":"first"} )
            .reset_index()
    )
    total_votes = sum(votes_by_cand.votes)
    candidates = votes_by_cand.loc[votes_by_cand.votes >= total_votes*drop_under,'candidate']
    year_data = year_data[year_data.candidate.isin(set(candidates.to_list()))]
    year_data = year_data.drop_duplicates(subset=['state','candidate'],keep='first')
    keep = ['state','candidate','party','votes','electoral_votes']
    # tack on electoral votes
    electoral_data_year = pd.DataFrame(electoral_data[year]).rename(columns={year:'electoral_votes'})
                                            
    year_data = year_data.merge(on='state',right=electoral_data_year)
    
    return year_data[keep].copy()

def tabulate_votes(data_in, count_function, out_feild): 
    data = data_in.copy()
    states = list(data.state.unique())
    for state in states: 
        state_votes = data[data.state == state]['electoral_votes'].iloc[0]
        data.loc[data.state == state, out_feild] = count_function(list(data[data.state == state]['votes']),state_votes)
    return data

In [4]:
#preprocess data from electoral college 
electoral_data = process_electoral_data(data_electoral)
# select data from year, and process 
year_data = get_data_for_year(data_total,electoral_data,1992)
# display 
year_data

  uniques = Index(uniques)


Unnamed: 0,state,candidate,party,votes,electoral_votes
0,alabama,"BUSH, GEORGE H.W.",REPUBLICAN,804283,9.0
1,alabama,"CLINTON, BILL",DEMOCRAT,690080,9.0
2,alabama,"PEROT, ROSS",OTHER,183109,9.0
3,alaska,"BUSH, GEORGE H.W.",REPUBLICAN,102000,3.0
4,alaska,"CLINTON, BILL",DEMOCRAT,78294,3.0
...,...,...,...,...,...
148,wisconsin,"BUSH, GEORGE H.W.",REPUBLICAN,930855,11.0
149,wisconsin,"PEROT, ROSS",OTHER,544479,11.0
150,wyoming,"BUSH, GEORGE H.W.",REPUBLICAN,79347,3.0
151,wyoming,"CLINTON, BILL",DEMOCRAT,68160,3.0


In [38]:
# define user-function for calculating votes: 
#     follow the following format: 
#      func( [list of vote totals], number of votes) -> [list of electoral votes] 

# First: current system 
def winner_takes_all(vote_totals,electoral_votes):
    # candidate who wins state gets all the votes 
    
    # make everyone have 0
    result = [0]*len(vote_totals)
    # find the guy who won and give him all the votes
    result[np.argmax(vote_totals)] = electoral_votes
    return result

assert(winner_takes_all([13,22,10,17],5) == [0,5,0,0])


def parlimentary_style(vote_totals,electoral_seats): 
    # Assign votes according to the following algoithm: 
    # 1. Define a threshold equal to V/s where V is the 
    #        total number of votes and s is the number of electoral seats
    #        This will serve as the minimum number of votes candidates must
    #        have to recieve electoral college electors
    # 2. Find the candidate with the fewest votes 
    # 3. If that candidate has fewer than the threshold, 
    #   (a)  Zero that candidate's votes (discount them from the race)
    #   (b)  Return to (1); excluding the candidate's votes from the total number of votes
    # 4. Assign each candidate a number E := (v_c/V)*s, the number of votes that candidate 
    #        got divided by the total number of non-discounted votes, all multiplied by 
    #        the total number of electors for the state. This should be a decimal number 
    #        greater than or equal to 1 
    # 5. Give each candidate the whole number portion of E in electors. 
    #        (that is the greatest whole number number of electors less than E) 
    # 6. Give any remaining electors to candidates by order of greatest-to-least 
    #        non-whole number portion (that is the candidate with the difference 
    #        between E and the number of electors recieved gets the first remaining elector, 
    #        that ith the second greatest difference gets the second remaining elector, 
    #        until all remaining electors electors have been assigned) 
    
    valid = False 
    while not valid: 
        # find the threshold at one vote more than total votes / electoral seats
        total_votes = sum(vote_totals)
        threshold = total_votes//electoral_seats+1

        if min(vote_totals,key = lambda x: x if x !=0 else float('inf')) < threshold: 
            # null out the loser's votes
            vote_totals[np.argmin(np.where(np.array(vote_totals)!=0, vote_totals, total_votes))] = 0
        else: 
            valid = True 
    vote_totals = np.array(vote_totals)
    # now we've nulled out everyone who didn't meet the minimum threshold. 
    raw_electorates = (vote_totals/total_votes)*electoral_seats 
    # take whatever's after the decimal point 
    results = np.floor(raw_electorates)
    # If we need to split up a vote, give it to the person that won the greatest portion of the vote
    if (left_overs := int(electoral_seats - sum(results))) !=0: 
        decimals = list(raw_electorates%1)
        for _ in range(left_overs): 
            last_vote_winner = np.argmax(decimals)
            decimals[last_vote_winner] = 0 
            results[last_vote_winner] +=1 
    return [int(res) for res in results]

assert(parlimentary_style([181, 480, 339,39],10) == [2,5,3,0])
print(parlimentary_style([181, 480, 339,39],10))


[2, 5, 3, 0]


In [39]:
parlimentary_style([192619,20698,9681],3)

[3, 0, 0]

In [57]:
year_data = get_data_for_year(data_total,electoral_data,1992)

out = tabulate_votes(year_data,winner_takes_all,'wta_votes')
out = tabulate_votes(out,parlimentary_style,'parl_votes')
out['pop_votes_scaled'] = out['votes'] *538/(out['votes'].sum())
out.groupby('candidate').agg({'electoral_votes':'sum','wta_votes':'sum','parl_votes':'sum','pop_votes_scaled':'sum'})


Unnamed: 0_level_0,electoral_votes,wta_votes,parl_votes,pop_votes_scaled
candidate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"BUSH, GEORGE H.W.",538.0,168.0,205.0,201.917965
"CLINTON, BILL",538.0,370.0,243.0,233.444248
"PEROT, ROSS",538.0,0.0,90.0,102.637787
