In [1]:
import pandas as pd 
from os import path
import numpy as np 

In [2]:
votes_filepath = "data/1976-2020-president.csv"
electoral_college_filepath = "data/electoral_votes.csv"

data_total = pd.read_csv(votes_filepath)
data_electoral = pd.read_csv(electoral_college_filepath)
data_total.tail(5)
data_electoral

Unnamed: 0,state,1788,1792,1796-1800,1804-1808,1812,1816,1820,1824-1828,1832,...,1932-1940,1944-1948,1952-1956,1960,1964-1968,1972-1980,1984-1988,1992-2000,2004-2008,2012-2020
0,Alabama,,,,,,,3.0,5.0,7.0,...,11.0,11.0,11.0,11.0,10,9,9,9,9,9
1,Alaska,,,,,,,,,,...,,,,3.0,3,3,3,3,3,3
2,Arizona,,,,,,,,,,...,3.0,4.0,4.0,4.0,5,6,7,8,10,11
3,Arkansas,,,,,,,,,,...,9.0,9.0,8.0,8.0,6,6,6,6,6,6
4,California,,,,,,,,,,...,22.0,25.0,32.0,32.0,40,45,47,54,55,55
5,Colorado,,,,,,,,,,...,6.0,6.0,6.0,6.0,6,7,8,8,9,9
6,Connecticut,7.0,9.0,9.0,9.0,9.0,9.0,9.0,8.0,8.0,...,8.0,8.0,8.0,8.0,8,8,8,8,7,7
7,D.C.,,,,,,,,,,...,,,,,3,3,3,3,3,3
8,Delaware,3.0,3.0,3.0,3.0,4.0,4.0,4.0,3.0,3.0,...,3.0,3.0,3.0,3.0,3,3,3,3,3,3
9,Florida,,,,,,,,,,...,7.0,8.0,10.0,10.0,14,17,21,25,27,29


In [3]:
def expand_electoral_year(text):
    if ('-' in text): 
        start,end = text.strip().split('-')
    else: 
        start = int(text.strip())
        end = start + 1
    return [ year for year in range(int(start), int(end) +1) if year % 4 ==0] 
def process_electoral_data(electoral_data): 
    electoral_temp = electoral_data.copy()
    electoral_temp['state'] = electoral_temp['state'].apply(lambda x:x.lower())
    electoral_temp = (
            electoral_temp
            # .set_index('state_name')
            .melt(id_vars=['state'])
            .rename(columns={'variable':'year'})
        )
    electoral_temp['year'] = electoral_temp['year'].apply(expand_electoral_year)
    return electoral_temp.explode(column='year').pivot(index='state',columns='year',values='value')

def get_data_for_year(votes_data,electoral_ata, year,drop_under=0.05):
    # process data for a given year, selecting just major candidates, and dropping unimportant columns 
    # input checking: ensure that the year is usable 
    if year %4 != 0: 
        print(f"invalid year: {year} is not divisible by 4") 
        return None 
    # select year 
    year_data = (votes_data[votes_data.year == year]
                 .copy()
                 .rename(columns={
                     'candidatevotes':'votes', 
                     'party_simplified':'party'
                 })
                )
    year_data['state'] = year_data['state'].apply(lambda x:x.lower())

    # drop candidates whose votes are less than 10% of votes 
    votes_by_cand =(
            year_data
            .groupby("candidate")
            .agg({"votes":sum, "party":"first","office":"first"} )
            .reset_index()
    )
    total_votes = sum(votes_by_cand.votes)
    candidates = votes_by_cand.loc[votes_by_cand.votes >= total_votes*drop_under,'candidate']
    year_data = year_data[year_data.candidate.isin(set(candidates.to_list()))]
    year_data = year_data.drop_duplicates(subset=['state','candidate'],keep='first')
    keep = ['state','candidate','party','votes','electoral_votes']
    # tack on electoral votes
    electoral_data_year = pd.DataFrame(electoral_data[year]).rename(columns={year:'electoral_votes'})
                                            
    year_data = year_data.merge(on='state',right=electoral_data_year)
    
    return year_data[keep].copy()

def tabulate_votes(data, count_function, out_feild): 
    states = list(data.state.unique())
    for state in states: 
        state_votes = data[data.state == state]['electoral_votes'].iloc[0]
        data.loc[data.state == state, out_feild] = count_function(list(data[data.state == state]['votes']),state_votes)
    return data

In [4]:
#preprocess data from electoral college 
electoral_data = process_electoral_data(data_electoral)
# select data from year, and process 
year_data = get_data_for_year(data_total,electoral_data,1992)
# display 
year_data

  uniques = Index(uniques)


Unnamed: 0,state,candidate,party,votes,electoral_votes
0,alabama,"BUSH, GEORGE H.W.",REPUBLICAN,804283,9.0
1,alabama,"CLINTON, BILL",DEMOCRAT,690080,9.0
2,alabama,"PEROT, ROSS",OTHER,183109,9.0
3,alaska,"BUSH, GEORGE H.W.",REPUBLICAN,102000,3.0
4,alaska,"CLINTON, BILL",DEMOCRAT,78294,3.0
...,...,...,...,...,...
145,wisconsin,"BUSH, GEORGE H.W.",REPUBLICAN,930855,11.0
146,wisconsin,"PEROT, ROSS",OTHER,544479,11.0
147,wyoming,"BUSH, GEORGE H.W.",REPUBLICAN,79347,3.0
148,wyoming,"CLINTON, BILL",DEMOCRAT,68160,3.0


In [12]:
# define user-function for calculating votes: 
#     follow the following format: 
#      func( [list of vote totals], number of votes) -> [list of electoral votes] 

# First: current system 
def winner_takes_all(vote_totals,electoral_votes):
    # candidate who wins state gets all the votes 
    
    # make everyone have 0
    result = [0]*len(vote_totals)
    # find the guy who won and give him all the votes
    result[np.argmax(vote_totals)] = electoral_votes
    return result

assert(winner_takes_all([13,22,10,17],5) == [0,5,0,0])

# Congressional district method (not doing) 
# Proportional 
def parlimentary_style(vote_totals,electoral_seats):
    # null out votes until everyone is able to get at least one. 
    valid = False 
    while not valid: 
        # find the threshold at one vote more than total votes / electoral seats
        total_votes = sum(vote_totals)
        threshold = total_votes//electoral_seats+1
        if min(vote_totals,key = lambda x: x if x !=0 else float('inf')) < threshold: 
            # null out the loser's votes
            vote_totals[np.argmin(vote_totals)] = 0
        else: 
            valid = True 
    vote_totals = np.array(vote_totals)
    # now we've nulled out everyone who didn't meet the minimum threshold. 
    raw_electorates = (vote_totals/total_votes)*electoral_seats 
    # take whatever's after the decimal point 
    results = np.floor(raw_electorates)
    # If we need to split up a vote, give it to the person that won the greatest portion of the vote
    if (left_overs := int(electoral_seats - sum(results))) !=0: 
        decimals = list(raw_electorates%1)
        for _ in range(left_overs): 
            last_vote_winner = np.argmax(decimals)
            decimals[last_vote_winner] = 0 
            results[last_vote_winner] +=1 
    return [int(res) for res in results]

assert(parlimentary_style([181, 480, 339,39],10) == [2,5,3,0])
print(parlimentary_style([181, 480, 339,39],10))


[2, 5, 3, 0]


In [13]:
out = tabulate_votes(year_data,parlimentary_style,'wta_votes')
out = tabulate_votes(year_data,winner_takes_all,'parl_votes')
out.groupby('candidate').agg({'electoral_votes':'sum','wta_votes':'sum','parl_votes':'sum'})

Unnamed: 0_level_0,electoral_votes,wta_votes,parl_votes
candidate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"BUSH, GEORGE H.W.",535.0,205.0,168.0
"CLINTON, BILL",535.0,240.0,367.0
"PEROT, ROSS",535.0,90.0,0.0
