# 2020 election results

### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import matplotlib.pyplot as plt

In [3]:
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

### Read data from elections repo

In [4]:
# https://github.com/tonmcg/US_County_Level_Election_Results_08-20
df = pd.read_csv(
    "../../US_County_Level_Election_Results_08-20/2020_US_County_Level_Presidential_Results.csv",
    dtype={"county_fips": str},
)

In [5]:
df.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,diff,per_gop,per_dem,per_point_diff
0,Alabama,1001,Autauga County,19838,7503,27770,12335,0.714368,0.270184,0.444184
1,Alabama,1003,Baldwin County,83544,24578,109679,58966,0.761714,0.22409,0.537623
2,Alabama,1005,Barbour County,5622,4816,10518,806,0.534512,0.457882,0.076631
3,Alabama,1007,Bibb County,7525,1986,9595,5539,0.784263,0.206983,0.57728
4,Alabama,1009,Blount County,24711,2640,27588,22071,0.895716,0.095694,0.800022


### Winners by county?

In [6]:
df["winner"] = df.loc[:, ["votes_gop", "votes_dem"]].idxmax(axis=1)

In [7]:
df["winner"] = (
    df["winner"].str.replace("votes_gop", "Trump").str.replace("votes_dem", "Biden")
)

In [8]:
df["per_point_diff"] = (df["per_dem"] - df["per_gop"]).abs()

In [9]:
df["county_name"] = df["county_name"].str.replace(" County", "", regex=False)

In [10]:
df

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,diff,per_gop,per_dem,per_point_diff,winner
0,Alabama,01001,Autauga,19838,7503,27770,12335,0.714368,0.270184,0.444184,Trump
1,Alabama,01003,Baldwin,83544,24578,109679,58966,0.761714,0.224090,0.537623,Trump
2,Alabama,01005,Barbour,5622,4816,10518,806,0.534512,0.457882,0.076631,Trump
3,Alabama,01007,Bibb,7525,1986,9595,5539,0.784263,0.206983,0.577280,Trump
4,Alabama,01009,Blount,24711,2640,27588,22071,0.895716,0.095694,0.800022,Trump
...,...,...,...,...,...,...,...,...,...,...,...
3147,Wyoming,56037,Sweetwater,12229,3823,16603,8406,0.736554,0.230260,0.506294,Trump
3148,Wyoming,56039,Teton,4341,9848,14677,-5507,0.295769,0.670982,0.375213,Biden
3149,Wyoming,56041,Uinta,7496,1591,9402,5905,0.797277,0.169219,0.628058,Trump
3150,Wyoming,56043,Washakie,3245,651,4012,2594,0.808824,0.162263,0.646560,Trump


In [11]:
df["county_fips"] = df["county_fips"].str[2:]

In [12]:
df.winner.value_counts()

Trump    2595
Biden     557
Name: winner, dtype: int64

---

### States

In [13]:
states = (
    df.groupby(["state_name"])
    .agg({"votes_dem": "sum", "votes_gop": "sum"})
    .reset_index()
)

In [14]:
states["diff"] = (states["votes_dem"] - states["votes_gop"]).abs()

In [15]:
states["total_votes"] = states["votes_dem"] + states["votes_gop"]

In [16]:
states["per_gop"] = (states["votes_dem"] / states["total_votes"]).round(3)

In [17]:
states["per_dem"] = (states["votes_gop"] / states["total_votes"]).round(3)

In [18]:
states["per_point_diff"] = (states["per_dem"] - states["per_gop"]).abs()

In [19]:
states["winner"] = states.loc[:, ["votes_gop", "votes_dem"]].idxmax(axis=1)

In [20]:
states["winner"] = (
    states["winner"].str.replace("votes_gop", "Trump").str.replace("votes_dem", "Biden")
)

In [21]:
states.head()

Unnamed: 0,state_name,votes_dem,votes_gop,diff,total_votes,per_gop,per_dem,per_point_diff,winner
0,Alabama,849648,1441168,591520,2290816,0.371,0.629,0.258,Trump
1,Alaska,153405,189892,36487,343297,0.447,0.553,0.106,Trump
2,Arizona,1672143,1661686,10457,3333829,0.502,0.498,0.004,Biden
3,Arkansas,423932,760647,336715,1184579,0.358,0.642,0.284,Trump
4,California,11109764,6005961,5103803,17115725,0.649,0.351,0.298,Biden


---

#### Just Cali counties

In [22]:
ca_df = df[df["state_name"] == "California"]

---

## Export

In [23]:
df.to_json("data/election_results_2020.json", indent=4, orient="records")
df.to_csv("data/election_results_2020.csv", index=False)
ca_df.to_csv("data/ca_counties_election_results_2020.csv", index=False)