# Examining Allocation of Electoral Votes Proportionally
### Brendan Leonard // GOVT 1356 // December 2020

In [1]:
import pandas as pd
import numpy as np
import requests
import json

pd.options.display.max_columns = None

%load_ext lab_black

In [2]:
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

In [3]:
FETCH_POLITICO_DATA = True
JSON_DIR = "./json"
DATA_DIR = "./data"

## Collect & Transform Data

### FIPS Data from Census Bureau

In [4]:
# Source: https://www2.census.gov/geo/docs/reference/state.txt
geo = pd.read_csv("https://www2.census.gov/geo/docs/reference/state.txt", sep="|")
geo.columns = ("fips", "state_abb", "state_name", "state_gnisid")
geo = geo.drop(columns=["state_gnisid"])

In [5]:
geo.head()

Unnamed: 0,fips,state_abb,state_name
0,1,AL,Alabama
1,2,AK,Alaska
2,4,AZ,Arizona
3,5,AR,Arkansas
4,6,CA,California


## Fetch and Load Elections Data from Politico

In [6]:
# Source: https://www.politico.com/2020-election/results/president/
DATA_SOURCES = {
    "president_meta": {
        "url": "https://www.politico.com/2020-national-metadata/potus.meta.json",
        "json_file": f"{JSON_DIR}/politico_president_meta.json",
    },
    "president_results": {
        "url": "https://www.politico.com/2020-national-results/president-overall.json",
        "json_file": f"{JSON_DIR}/politico_president_results.json",
    },
}

In [7]:
if FETCH_POLITICO_DATA:
    for k, v in DATA_SOURCES.items():
        with open(v["json_file"], "w") as f:
            json.dump(requests.get(v["url"]).json(), f)

In [8]:
with open(DATA_SOURCES["president_meta"]["json_file"], "r") as f:
    pres_meta = json.load(f)

with open(DATA_SOURCES["president_results"]["json_file"], "r") as f:
    pres = json.load(f)["races"]

### Wrangle and Transform Data

In [9]:
delete_pres_keys = True
pres_candidates = []
for i, state in enumerate(pres_meta):
    for candidate in state.get("candidates"):
        candidate["stateFips"] = state.get("stateFips")
        candidate["electors"] = state.get("electors")
        candidate["rating"] = state.get("rating")
        pres_candidates.append(candidate)

    for election in state.get("historical").get("forPresident"):
        year = str(election.get("year"))[-2:]
        pres_meta[i][f"dem_vote_{year}"] = election.get("demVote")
        pres_meta[i][f"dem_vote_pct_{year}"] = election.get("demVotePct")
        pres_meta[i][f"rep_vote_{year}"] = election.get("gopVote")
        pres_meta[i][f"rep_vote_pct_{year}"] = election.get("gopVotePct")
        pres_meta[i][f"win_party_{year}"] = election.get("winner")

    if delete_pres_keys:
        del state["candidates"]
        del state["historical"]

pres_meta = pd.DataFrame(pres_meta)
pres_candidates = pd.DataFrame(pres_candidates)

In [10]:
pres_results = []
for i, race in enumerate(pres):
    for i, candidate in enumerate(race["candidates"]):
        candidate["stateFips"] = race.get("stateFips")
        pres_results.append(candidate)

pres_results = pd.DataFrame(pres_results)

In [11]:
pres_meta = pres_meta.drop(
    columns=["holdingParty", "isBallot", "isSpecial", "isUnopposed"]
)
pres_candidates = pres_candidates.drop(columns=["aggregable", "ballotOrder"])

In [12]:
# Combine data sets
pres_results = pres_results.merge(pres_candidates)

In [13]:
pres_results = (
    pres_results.sort_values("electors", ascending=False)[
        [
            "stateFips",
            "vote",
            "shortName",
        ]
    ]
    .drop_duplicates()
    .reset_index(drop=True)
)

In [14]:
pres_results = pres_results.pivot_table(
    values="vote", index="stateFips", columns="shortName"
)

In [15]:
third_party = [
    x
    for x in pres_results.columns
    if x not in ("Biden", "Trump", "stateFips", "electors", "party", "rating")
]

pres_results["third_party_vote"] = pres_results[third_party].sum(axis=1)
pres_results = pres_results.drop(columns=third_party)
pres_results = pres_results.reset_index().rename(
    columns={"Biden": "biden_vote", "Trump": "trump_vote", "stateFips": "state_fips"}
)

In [16]:
pres_results.state_fips = pres_results.state_fips.astype(int)
pres_results = pres_results.merge(geo, left_on="state_fips", right_on="fips").drop(
    columns=["fips", "state_name"]
)

In [17]:
pres_results["total_vote"] = pres_results[
    ["biden_vote", "trump_vote", "third_party_vote"]
].sum(axis=1)

In [18]:
for candidate in ["biden", "trump", "third_party"]:
    pres_results[f"{candidate}_pct"] = (
        pres_results[f"{candidate}_vote"] / pres_results["total_vote"]
    )

In [19]:
pres_meta = pres_meta[pres_meta.districtNumber.isna()].drop(
    columns=["districtNumber", "raceid"]
)

In [20]:
pres_meta.stateFips = pres_meta.stateFips.astype(int)
pres_results = pres_results.merge(pres_meta, left_on="state_fips", right_on="stateFips")

In [21]:
# Data has ME and NE split by districts, so combine for state totals
pres_results.loc[pres_results.state_abb == "ME", "electors"] = 4
pres_results.loc[pres_results.state_abb == "NE", "electors"] = 5

In [22]:
pres_results.head()

Unnamed: 0,state_fips,biden_vote,trump_vote,third_party_vote,state_abb,total_vote,biden_pct,trump_pct,third_party_pct,dem_vote_08,dem_vote_12,dem_vote_16,dem_vote_pct_08,dem_vote_pct_12,dem_vote_pct_16,electors,rating,rep_vote_08,rep_vote_12,rep_vote_16,rep_vote_pct_08,rep_vote_pct_12,rep_vote_pct_16,stateFips,win_party_08,win_party_12,win_party_16
0,1,849624.0,1441170.0,25176.0,AL,2315970.0,0.366854,0.622275,0.010871,813479.0,795696.0,729547.0,0.3911,0.3878,0.3563,9,solid-r,1266546.0,1255925.0,1318255.0,0.6089,0.6122,0.6437,1,gop,gop,gop
1,2,153778.0,189951.0,13840.0,AK,357569.0,0.430065,0.531229,0.038706,123594.0,122640.0,116454.0,0.3894,0.4268,0.4161,3,lean-r,193841.0,164676.0,163387.0,0.6106,0.5732,0.5839,2,gop,gop,gop
2,4,1672143.0,1661686.0,51465.0,AZ,3385294.0,0.493943,0.490854,0.015203,1034707.0,1025232.0,1161167.0,0.4569,0.4539,0.4811,11,toss-up,1230111.0,1233654.0,1252401.0,0.5431,0.5461,0.5189,4,gop,gop,gop
3,5,423932.0,760647.0,34490.0,AR,1219069.0,0.347751,0.623957,0.028292,422310.0,394409.0,380494.0,0.3983,0.3785,0.3571,6,solid-r,638017.0,647744.0,684872.0,0.6017,0.6215,0.6429,5,gop,gop,gop
4,6,11109764.0,6005961.0,380101.0,CA,17495826.0,0.634995,0.34328,0.021725,8274473.0,7854285.0,8753788.0,0.6228,0.6187,0.6613,55,solid-d,5011781.0,4839958.0,4483810.0,0.3772,0.3813,0.3387,6,dem,dem,dem


In [23]:
pres_results[["biden_vote", "trump_vote"]].sum() / pres_results[
    ["biden_vote", "trump_vote"]
].sum().sum()

biden_vote    0.523021
trump_vote    0.476979
dtype: float64

## Allocate Electors of Past Elections

In [24]:
vote_cols_20 = {
    "biden_vote": "dem_vote_20",
    "trump_vote": "rep_vote_20",
    "third_party_vote": "third_vote_20",
}

In [25]:
hist = pres_results.rename(columns=vote_cols_20)[
    ["state_abb", "electors", "rating"]
    + [
        f"{party}_vote_{year}"
        for party in ["dem", "rep"]
        for year in ["08", "12", "16", "20"]
    ]
]

In [26]:
hist = pd.wide_to_long(
    hist, stubnames=["dem_vote_", "rep_vote_", "third_vote_"], i="state_abb", j="year"
)

In [27]:
hist = hist.reset_index().rename(
    columns={
        "state_abb": "state",
        "dem_vote_": "dem_vote",
        "rep_vote_": "rep_vote",
        "third_vote_": "third_vote",
    }
)

In [28]:
hist["year"] = hist.year + 2000

In [29]:
hist = hist.drop(columns=["third_vote"])
hist["total_vote"] = hist.dem_vote + hist.rep_vote
hist["dem_pct"] = hist.dem_vote / hist.total_vote * 100
hist["rep_pct"] = hist.rep_vote / hist.total_vote * 100

In [30]:
hist.electors = hist.electors.astype(int)

In [31]:
def determine_prop_electors(x):
    votes_per_elector = x.total_vote / x.electors

    dem_electors = x.dem_vote // votes_per_elector
    dem_remainder = x.dem_vote % votes_per_elector

    rep_electors = x.rep_vote // votes_per_elector
    rep_remainder = x.rep_vote % votes_per_elector

    remaining_elector = x.electors - (dem_electors + rep_electors)

    if remaining_elector == 1:
        if dem_remainder > rep_remainder:
            dem_electors += 1
        elif dem_remainder < rep_remainder:
            rep_electors += 1
        elif dem_remainder == rep_remainder:
            raise ("Exact tie for remaining elector.")
    elif remaining_elector > 1:
        raise ("More than one elector unassigned.")

    x["prop_dem_electors"] = int(dem_electors)
    x["prop_rep_electors"] = int(rep_electors)

    return x

In [32]:
hist = hist.apply(determine_prop_electors, axis=1)

In [33]:
hist.head()

Unnamed: 0,state,year,rating,electors,dem_vote,rep_vote,total_vote,dem_pct,rep_pct,prop_dem_electors,prop_rep_electors
0,AL,2008,solid-r,9,813479.0,1266546.0,2080025.0,39.109097,60.890903,4,5
1,AK,2008,lean-r,3,123594.0,193841.0,317435.0,38.935215,61.064785,1,2
2,AZ,2008,toss-up,11,1034707.0,1230111.0,2264818.0,45.686099,54.313901,5,6
3,AR,2008,solid-r,6,422310.0,638017.0,1060327.0,39.828279,60.171721,2,4
4,CA,2008,solid-d,55,8274473.0,5011781.0,13286254.0,62.27845,37.72155,34,21


In [34]:
def electors_actual(x):
    if x.dem_pct > x.rep_pct:
        x["act_dem_electors"] = x.electors
        x["act_rep_electors"] = 0
    else:
        x["act_rep_electors"] = x.electors
        x["act_dem_electors"] = 0

    return x


hist = hist.apply(electors_actual, axis=1)

# Correct ME & NE split actual allocations
hist.loc[(hist.state == "ME") & (hist.year == 2020), "act_dem_electors"] = 3
hist.loc[(hist.state == "ME") & (hist.year == 2020), "act_rep_electors"] = 1
hist.loc[(hist.state == "NE") & (hist.year == 2020), "act_dem_electors"] = 1
hist.loc[(hist.state == "NE") & (hist.year == 2020), "act_rep_electors"] = 4

hist.loc[(hist.state == "ME") & (hist.year == 2016), "act_dem_electors"] = 3
hist.loc[(hist.state == "ME") & (hist.year == 2016), "act_rep_electors"] = 1

hist.loc[(hist.state == "NE") & (hist.year == 2016), "act_dem_electors"] = 1
hist.loc[(hist.state == "NE") & (hist.year == 2016), "act_rep_electors"] = 4

In [35]:
hist.rating = hist.rating.str.title()

In [36]:
hist.head()

Unnamed: 0,act_dem_electors,act_rep_electors,dem_pct,dem_vote,electors,prop_dem_electors,prop_rep_electors,rating,rep_pct,rep_vote,state,total_vote,year
0,0,9,39.109097,813479.0,9,4,5,Solid-R,60.890903,1266546.0,AL,2080025.0,2008
1,0,3,38.935215,123594.0,3,1,2,Lean-R,61.064785,193841.0,AK,317435.0,2008
2,0,11,45.686099,1034707.0,11,5,6,Toss-Up,54.313901,1230111.0,AZ,2264818.0,2008
3,0,6,39.828279,422310.0,6,2,4,Solid-R,60.171721,638017.0,AR,1060327.0,2008
4,55,0,62.27845,8274473.0,55,34,21,Solid-D,37.72155,5011781.0,CA,13286254.0,2008


In [37]:
hist.to_csv(f"{DATA_DIR}/presidential_elections_allocation.csv", index=False)

In [38]:
hist_national = hist.groupby("year").agg(
    {
        "dem_vote": sum,
        "rep_vote": sum,
        "total_vote": sum,
        "act_dem_electors": sum,
        "act_rep_electors": sum,
        "prop_dem_electors": sum,
        "prop_rep_electors": sum,
    }
)

In [39]:
hist_national["dem_pct"] = (hist_national.dem_vote / hist_national.total_vote) * 100
hist_national["rep_pct"] = (hist_national.rep_vote / hist_national.total_vote) * 100

In [40]:
hist_national

Unnamed: 0_level_0,dem_vote,rep_vote,total_vote,act_dem_electors,act_rep_electors,prop_dem_electors,prop_rep_electors,dem_pct,rep_pct
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2008,69338850.0,59613835.0,128952700.0,358,180,286,252,53.770767,46.229233
2012,65752020.0,60670117.0,126422100.0,332,206,276,262,52.009893,47.990107
2016,65677170.0,62692411.0,128369600.0,233,305,270,268,51.162564,48.837436
2020,80950720.0,73824626.0,154775300.0,306,232,276,262,52.302078,47.697922


In [41]:
# Correlation of actual allocation and percent of vote
np.corrcoef((hist_national.act_dem_electors / 538), hist_national.dem_pct)[0][1]

0.8654281362262602

In [42]:
# Correlation of proportional allocation and percent of vote
np.corrcoef((hist_national.prop_dem_electors / 538), hist_national.dem_pct)[0][1]

0.9939211689596883

In [43]:
# Copy for report
hist_national[
    [
        "dem_pct",
        "rep_pct",
        "act_dem_electors",
        "act_rep_electors",
        "prop_dem_electors",
        "prop_rep_electors",
    ]
].to_clipboard()

In [44]:
# Copy for report
hist[hist.year == 2020][
    [
        "state",
        "electors",
        "rating",
        "dem_pct",
        "rep_pct",
        "act_dem_electors",
        "act_rep_electors",
        "prop_dem_electors",
        "prop_rep_electors",
    ]
].to_clipboard(index=False)

## 2020 Election

In [45]:
allocation_2020 = hist[hist.year == 2020][
    [
        "state",
        "electors",
        "rating",
        "dem_pct",
        "rep_pct",
        "act_dem_electors",
        "act_rep_electors",
        "prop_dem_electors",
        "prop_rep_electors",
    ]
]

In [46]:
allocation_2020.to_csv(f"{DATA_DIR}/2020_election_allocation.csv", index=False)
allocation_2020.head()

Unnamed: 0,state,electors,rating,dem_pct,rep_pct,act_dem_electors,act_rep_electors,prop_dem_electors,prop_rep_electors
153,AL,9,Solid-R,37.088625,62.911375,0,9,3,6
154,AK,3,Lean-R,44.738151,55.261849,0,3,1,2
155,AZ,11,Toss-Up,50.156832,49.843168,11,0,6,5
156,AR,6,Solid-R,35.787567,64.212433,0,6,2,4
157,CA,55,Solid-D,64.90969,35.09031,55,0,36,19


## Correlations

In [47]:
# Correlation of actual allocation to percent of vote
dem_corr = np.corrcoef(
    allocation_2020.dem_pct,
    (allocation_2020.act_dem_electors / allocation_2020.electors),
)[0][1]
rep_corr = np.corrcoef(
    allocation_2020.rep_pct,
    (allocation_2020.act_rep_electors / allocation_2020.electors),
)[0][1]
print("Dem:", dem_corr)
print("Rep:", rep_corr)

Dem: 0.7898214664159974
Rep: 0.7898214664159974


In [48]:
# Correlation of proportional allocation to percent of vote
dem_corr = np.corrcoef(
    allocation_2020.dem_pct,
    (allocation_2020.prop_dem_electors / allocation_2020.electors),
)[0][1]
rep_corr = np.corrcoef(
    allocation_2020.rep_pct,
    (allocation_2020.prop_rep_electors / allocation_2020.electors),
)[0][1]
print("Dem:", dem_corr)
print("Rep:", rep_corr)

Dem: 0.9435143508678683
Rep: 0.9435143508678685


## Georgia 2020 Example

In [49]:
ga_2020 = pres_results[
    [
        "state_abb",
        "electors",
        "biden_vote",
        "trump_vote",
    ]
][pres_results.state_abb == "GA"]
ga_2020.head(20)

Unnamed: 0,state_abb,electors,biden_vote,trump_vote
10,GA,16,2473633.0,2461854.0


In [50]:
total_votes = 2473633 + 2461854
electors = 16
votes_per_elector = total_votes / electors

biden_vote = 2473633
trump_vote = 2461854

In [51]:
# Whole number of electoral votes for Biden
biden_vote // votes_per_elector

8.0

In [52]:
# Whole number of electoral votes for Trump
trump_vote // votes_per_elector

7.0

In [53]:
# Remaining votes towards next marginal EV for Biden
biden_vote % votes_per_elector

5889.5

In [54]:
# Remaining votes towards next marginal EV for Trump
trump_vote % votes_per_elector

302578.4375