# Explore elections data to determine political leanings of county

Source: Scraped data from NYT.

In [None]:
import pandas as pd

compiled = pd.read_parquet("/workspaces/greener/static/county_score/compiled_data.parquet")
compiled.head()

Unnamed: 0,fips,StateName,RegionName,AverageHomeValue,HousingScore,Houses,WinterAvg,Winter_Low_Temp_Score,Winter_High_Temp_Score,SummerAvg,...,Jobs,Summary,Pop_Est_July_1_2024,Low_Pop_Score,High_Pop_Score,R_Votes,D_Votes,Total_Votes,R_Score,D_Score
0,6037,CA,Los Angeles County,898170.75,0.273441,https://www.zillow.com/Los-Angeles-County-CA,59.1,0.253504,0.746496,76.6,...,https://www.indeed.com/jobs?q=&l=Los+Angeles+C...,,9757179.0,0.0,1.0,1189862.0,2417109.0,3728427.0,0.319132,0.648292
1,17031,IL,Cook County,322945.46875,0.507035,https://www.zillow.com/Cook-County-IL,45.3,0.524383,0.475617,73.1,...,https://www.indeed.com/jobs?q=&l=Cook+County%2...,,5182617.0,0.051765,0.948235,583852.0,1447821.0,2056800.0,0.283864,0.703919
2,48201,TX,Harris County,287587.96875,0.533516,https://www.zillow.com/Harris-County-TX,68.4,0.104639,0.895361,84.6,...,https://www.indeed.com/jobs?q=&l=Harris+County...,,5009302.0,0.054548,0.945452,722695.0,808771.0,1556310.0,0.464364,0.519672
3,4013,AZ,Maricopa County,470511.5,0.421092,https://www.zillow.com/Maricopa-County-AZ,64.6,0.162863,0.837137,92.5,...,https://www.indeed.com/jobs?q=&l=Maricopa+Coun...,,4673096.0,0.060233,0.939767,1051531.0,980016.0,2053945.0,0.511957,0.477138
4,6073,CA,San Diego County,945139.125,0.261801,https://www.zillow.com/San-Diego-County-CA,60.7,0.226294,0.773706,77.1,...,https://www.indeed.com/jobs?q=&l=San+Diego+Cou...,,3298799.0,0.088727,0.911273,593270.0,841372.0,1477786.0,0.401459,0.569346


In [72]:
# get list of states from compiled data
states = compiled['StateName'].unique()
type(states)

numpy.ndarray

In [73]:
# convert abbreviations to full state name
us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "Virgin Islands, U.S.": "VI",
}
for state in states:
    name = next(key for key, value in us_state_to_abbrev.items() if value == state)
    name = name.replace(' ', '-')
    states[states==state] = name.lower()
print(states)

['california' 'illinois' 'texas' 'arizona' 'new-york' 'florida'
 'washington' 'nevada' 'michigan' 'massachusetts' 'pennsylvania' 'ohio'
 'minnesota' 'utah' 'virginia' 'north-carolina' 'maryland' 'georgia'
 'hawaii' 'missouri' 'indiana' 'connecticut' 'new-jersey' 'wisconsin'
 'tennessee' 'oregon' 'oklahoma' 'kentucky' 'colorado'
 'district-of-columbia' 'new-mexico' 'alabama' 'rhode-island' 'kansas'
 'nebraska' 'delaware' 'south-carolina' 'iowa' 'idaho' 'louisiana'
 'new-hampshire' 'arkansas' 'maine' 'alaska' 'mississippi' 'south-dakota'
 'north-dakota' 'west-virginia' 'vermont' 'montana' 'wyoming']


In [74]:
# remove DC from array because NYT doesnt have it.
import numpy as np
states = np.delete(states, np.where(states=='district-of-columbia'))

In [75]:
# Scrape NYT for county results for each state
import requests

counties = pd.DataFrame(columns=['fips', "R_votes", "D_votes", "Total_votes"])
# define function to scrape page
def get_results(state):
    url = f"https://static01.nyt.com/elections-assets/pages/data/2024-11-05/results-{state}-president.json"
    response = requests.get(url)
    data = response.json()
    race = data.get('races')
    race = race[0]
    units = race.get("reporting_units")
    for unit in units:
        name = unit.get('name')
        level= unit.get('level')
        if level in ['county', 'township']:
            fips = unit.get('fips_state')+unit.get('fips_county')
            candidates=unit.get("candidates")
            for candidate in candidates:
                id = candidate['nyt_id']
                if id == 'harris-k':
                    d_votes = candidate['votes']['total']
                elif id == 'trump-d': 
                    r_votes = candidate['votes']['total']
                else: pass
            total = unit.get('total_votes')
            counties.loc[len(counties)] = [fips, r_votes, d_votes, total]
        else: pass
# call function on all selected states
for state in states:
    get_results(state)
counties.shape

(4587, 4)

In [76]:
# get DC results
url = f"https://static01.nyt.com/elections-assets/pages/data/2024-11-05/results-washington-dc-president.json"
response = requests.get(url)
data = response.json()
race = data.get('races')
race = race[0]
units = race.get("reporting_units")
for unit in units:
    name = unit.get('name')
    level= unit.get('level')
    if level == 'state':
        fips = '11001'
        candidates=unit.get("candidates")
        for candidate in candidates:
            id = candidate['nyt_id']
            if id == 'harris-k':
                d_votes = candidate['votes']['total']
            elif id == 'trump-d': 
                r_votes = candidate['votes']['total']
            else: pass
        total = unit.get('total_votes')
        counties.loc[len(counties)] = [fips, r_votes, d_votes, total]
    else: pass
counties.head()

Unnamed: 0,fips,R_votes,D_votes,Total_votes
0,6037,1189862,2417109,3728427
1,6073,593270,841372,1477786
2,6059,654815,691731,1390965
3,6065,463677,451782,940405
4,6071,378416,362114,761714


In [77]:
print(f"Before aggregation: {counties.shape}")
counties = counties.groupby(counties['fips'], as_index=False).aggregate({'fips': 'first', 'R_votes':"sum", 'D_votes': 'sum', 'Total_votes': 'sum'})
print(f"After aggregation: {agg_counties.shape}")

Before aggregation: (4588, 4)
After aggregation: (3113, 4)


In [78]:
counties['R_score'] = counties['R_votes']/counties['Total_votes']
counties['D_score'] = counties['D_votes']/counties['Total_votes']
counties.head()

Unnamed: 0,fips,R_votes,D_votes,Total_votes,R_score,D_score
0,1001,20484,7439,28190,0.726641,0.263888
1,1003,95798,24934,121808,0.786467,0.204699
2,1005,5606,4158,9832,0.570179,0.422905
3,1007,7572,1619,9241,0.819392,0.175197
4,1009,25354,2576,28115,0.901796,0.091624
