# Political parties
---

We record political party affiliation for county (1988-2024) and states (1976-2024). This is calculated based on the **majority vote** for that year's presidential election.

## Data sources


#### State

- **1976-2020:**
 MIT Election Data and Science Lab. (2017). U.S. President 1976-2020 (Version V8) [dataset]. Harvard Dataverse. https://doi.org/10.7910/DVN/42MVDX 
 
- **2024:**
Federal Election Commission. (2025, January 16). Official 2024 presidential general election results. https://www.fec.gov/resources/cms-content/documents/2024presgeresults.xlsx

#### County

- **1988-1996:**
 Dave Leip’s Atlas of U.S. Presidential Elections | Leip, David. Dave Leip’s Atlas of U.S. Presidential Elections. http://uselectionatlas.org

- **2000-2024:**
MIT Election Data and Science Lab. (2018). County Presidential Election Returns 2000-2024 (Version V16) [dataset]. Harvard Dataverse. https://doi.org/10.7910/DVN/VOQCHQ 




In [None]:
import ibis
from ibis import _
import ibis.expr.datatypes as dt  # Make sure to import the necessary module
from cng.utils import *
from cng.h3 import *

duckdb_install_h3()
con = ibis.duckdb.connect(extensions = ["spatial"])
set_secrets(con)

In [None]:
url_state_pt1 = 's3://shared-election/by_state/dataverse/1976-2020-president.csv'
url_state_pt2 = 's3://shared-election/by_state/2024presgeresults.xlsx'

url_county_pt1 = 's3://shared-election/by_county/1988_0_0_2.csv'
url_county_pt2 = 's3://shared-election/by_county/1992_0_0_2.csv'
url_county_pt3 = 's3://shared-election/by_county/1996_0_0_2.csv'
url_county_pt4 = 's3://shared-election/by_county/dataverse/countypres_2000-2024.csv'

# State 

1976 to 2020

In [None]:
#getting party
state = (con
        .read_csv(url_state_pt1)
        .rename(state="state_po" , party = "party_simplified")  # rename columns
        .group_by(["year", "state"])
        .aggregate(party=_.party.argmax(_.candidatevotes))  # winning party 
        .select("year", "state", "party")  # select only relevant columns
        )

adding 2024 results 

In [None]:
t = (con.read_xlsx(url_state_pt2)
    .drop('ELECTORAL VOTES','ELECTORAL VOTE: TRUMP (R)','ELECTORAL VOTE: HARRIS (D)')
    .filter(~_.STATE.isin(['Percentage:','Total:']))
    .rename(state="STATE")
    )

party_map = ibis.memtable(
    [
        ("TRUMP", "Republican"),
        ("HARRIS", "Democrat"),
    ],
    columns=["candidate", "party"],
)

candidate_cols = [c for c in t.columns if c not in {"state", "TOTAL VOTES"}]
long = t.pivot_longer(
    candidate_cols,
    names_to="candidate",
    values_to="votes",
)

max_votes = (
    long
    .group_by("state")
    .aggregate(max_votes=long.votes.max())
)

results = (
    long
    .join(max_votes,
        [long.state == max_votes.state,
         long.votes == max_votes.max_votes]
    )
    .join(party_map, "candidate", how="inner")
    .mutate(year = 2024)
    .cast({'year': 'int64'})
    .select('year','state','party')
    .order_by('state')
)
state = state.union(results)

getting geospatial columns and save to minio

In [None]:
# merging with state polygons
state_boundaries = 's3://public-census/2024/state/2024_us_state.parquet'

state_geom = (con
        .read_parquet(state_boundaries)
        .rename(state_name = "state", state = "state_id")
        .select('state','geom','state_name')
        .join(state,"state",how = "inner")
        .select("year","state","party","geom")
        .order_by(['year','state'])
            )
state_geom.to_parquet('s3://public-election/state/state_political_parties_1976-2024.parquet')

# County

In [None]:
# helper functions 
def calculate_county_party(candidates, year, county):
    fips_url = 'https://www2.census.gov/geo/docs/reference/codes2020/national_state2020.txt'
    fips_codes = con.read_csv(fips_url).rename(state="STATE").select("STATEFP","state")

    #need to get the states through fips codes 
    county = (
        county
        .rename(county="Geographic Name")
        .filter(_.FIPS != "fips")
        .cast({"FIPS": "string"})
        .mutate(FIPS = _.FIPS.lpad(5, "0"))
        .mutate(STATEFP=_.FIPS.substr(0, 2),
            county=ibis.ifelse(
                _.county.endswith("County"),
                _.county,
                _.county + " County",
            ),
        )
        .join(fips_codes, 'STATEFP',how = 'left')
    )

    long = (
        county
        .pivot_longer(
            candidates,
            names_to="candidate",
            values_to="candidate_votes",
        )
        .mutate(
            year=year,
            candidate_votes=_.candidate_votes.cast("int64"),
        )
        .filter(
            _.candidate_votes.notnull()
            & (_.candidate_votes > 0)
        )
        .select("year","state","county","candidate","candidate_votes")
        .mutate(
            county=ibis.ifelse(
                (_.county == "Dade County"),
                "Miami-Dade County",
                _.county
            )
        )
    )

    df = (long
        .group_by(["year", "state", "county"])
        .aggregate(candidate=_.candidate.argmax(_.candidate_votes))  # winning party 
        .select('year','state','county','candidate')
    )  
    return df
    

1988 - 1996

In [None]:
county = con.read_csv(url_county_pt1)
candidates = ['Michael Dukakis', 'George Bush', 'Ron Paul', 'Lenora Fulani', 'David Duke', 'Eugene McCarthy', 
              'James Griffen', 'Lyndon LaRouche', 'William Marra', 'Write-In', 'Edward Winn', 'James Warren', 
              'Herbert Lewin', 'Earl Dodge', 'Larry Holmes', 'None o.t. Candidates', 'Willa Kenoyer', 
              'Delmar Dennis', 'Jack Herer', 'Louis Youngkeit', 'John Martin']
year = 1988
year1 = calculate_county_party(candidates,year,county)
party_map = ibis.memtable(
    [
        ("George Bush", "Republican"),
        ("Michael Dukakis", "Democrat"),
    ],
    columns=["candidate", "party"],
)

df_1988 = (year1.join(party_map, "candidate", how="inner")).drop('candidate')


#### 1992  
county = con.read_csv(url_county_pt2)
candidates = ['William Clinton', 'George Bush', 'H. Ross Perot', 'Andre Marrou', 'James Bo Gritz', 'Lenore Fulani', 
              'Howard Phillips', 'Dr. John Hagelin', 'Ron Daniels', 'Lyndon LaRouche Jr.', 'James Warren', 
              'Write-ins', 'Drew Bradford', 'Jack Herer', 'J. Quinn Brisben', 'Helen Halyard', 
              'None o.t. Candidates', 'John Yiamouyiannis', 'Ehlers', 'Earl Dodge', 'Jim Boren', 
              'Hem', 'Isabell Masters', 'Smith', 'Gloria LaRiva']
year = 1992
year2 = calculate_county_party(candidates,year,county)
party_map = ibis.memtable(
    [
        ("George Bush", "Republican"),
        ("William Clinton", "Democrat"),
        ("H. Ross Perot", "Reform"),
    ],
    columns=["candidate", "party"],
)
df_1992 = (year2.join(party_map, "candidate", how="inner")).drop('candidate')

#### 1996
county = con.read_csv(url_county_pt3)
candidates = [
    'William Clinton', 'Robert Dole', 'H. Ross Perot', 'Ralph Nader', 'Harry Browne', 
    'Howard Phillips', 'Dr. John Hagelin', 'Monica Moorehead', 'Marsha Feinland', 
    'Write-ins', 'Charles Collins', 'James Harris', 'None o.t. Candidates', 
    'Dennis Peron', 'Mary Cal Hollis', 'Jerome White', 'Diane Templin', 'Earl Dodge', 
    'A. Peter Crane', 'Just. Ralph Forbes', 'John Birrenback', 'Isabell Masters pHD', 
    'Steve Michael'
]
year = 1996
year3 = calculate_county_party(candidates,year,county)
party_map = ibis.memtable(
    [
        ("Robert Dole", "Republican"),
        ("William Clinton", "Democrat"),
    ],
    columns=["candidate", "party"],
)
df_1996 = (year3.join(party_map, "candidate", how="inner")).drop('candidate')


2000 - 2024

In [None]:
county_vals = {
    "CA": {
        "San Luis obispo County":"San Luis Obispo County",
    },
    "FL": {
        "Miami-dade County":"Miami-Dade County",
    },    
    "GA": {
        "Dekalb County":"DeKalb County",
    },
    "IL": {
        "Dupage County": "DuPage County",
        "Dekalb County":"DeKalb County",
    },
    "LA":{
        "East Baton rouge County":"East Baton Rouge Parish",
        "St. Helena County":"St. Helena Parish",
        "St. Tammany County":"St. Tammany Parish",
    },
    "MO":{
        "St. Louis city County":"St. Louis city",
        "St. Louis county County":"St. Louis County"
    },
    "MT":{
        "Lewis And clark County":"Lewis and Clark County"
    },
    "NV": {
        "Carson City County":"Carson City"
    },
    "VA":{
        "Fairfax City County":"Fairfax city",
        "Roanoke City County":"Roanoke city",
},
}


expr = _.county
for state, mapping in county_vals.items():
    for old, new in mapping.items():
        expr = ibis.ifelse((_.state == state) & (_.county == old), new, expr)

    
county_post2000 = (con
        .read_csv(url_county_pt4)
        .filter((_.totalvotes > 0))  # filter empty votes
    .filter(~_.candidate.startswith("TOTAL")) # filter non candidate votes
        .rename(state="state_po", state_name = "state") 
        .mutate(county_name=_.county_name.capitalize())
        .mutate( # converts Mchenry -> McHenry
            county=ibis.ifelse(
                _.county_name.startswith("Mc"),
                _.county_name.re_replace(
                    "Mc(.*)", 'Mc'+_.county_name.substr(2).capitalize()
                ),
                _.county_name,
            )
        )
        .mutate( # capitalizes 2nd word (e.g., Santa cruz -> Santa Cruz)
            county=ibis.ifelse(
               _.county.re_extract(" (.*)",1)!=None,
                _.county.re_replace(
                    " (.*)", ' '+_.county.re_extract(" (.*)",1).capitalize()
                ),
                _.county,
            )
        )
        .mutate(county = _.county + ibis.literal(" County"))
        .mutate(county=expr)  
        .group_by(["year", "state", "county", "state_name", "party"])
        .aggregate(total_candidate_votes=_.candidatevotes.sum()) #getting total votes per candidate 
        .group_by(["year", "state", "county", "state_name"])
        .aggregate(party=_.party.argmax(_.total_candidate_votes)) # party with the highest total votes)
        .mutate(party=_.party.capitalize())
        .cast({'year':'int16'})
        .select("year", "state", "county", "party")  

)
county = county_post2000.union(df_1988).union(df_1992).union(df_1996).mutate(county=expr)  


In [None]:
# merging with county polygons

county_boundaries = "s3://public-census/2024/county/2024_us_county.parquet"

county_geom = (con.read_parquet(county_boundaries)
    .drop('state')
    .rename(state='state_id')
    .join(county, ['state','county'], how='inner')
    .select('year','FIPS','state','county','party','geom')
    .order_by(['year','FIPS'])
         )
county_geom.to_parquet('s3://public-election/county/county_political_parties_1988-2024.parquet')

# Save as PMTILEs

In [None]:
state_fgb_file = 'state_political_parties_1976-2024.fgb'
state_geom.execute().to_file(state_fgb_file)
pmtiles_file = 'state_political_parties_1976-2024.pmtiles'
pmtiles = f"s3://public-election/state/{pmtiles_file}"
source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(pmtiles_file))[0])

options =[f'--layer={source_layer_name}',
            '--extend-zooms-if-still-dropping',  
         ]
new_pmtiles = to_pmtiles(state_fgb_file, pmtiles_file, options = options )
s3_cp(new_pmtiles,pmtiles)

In [None]:
county_fgb_file = '/tmp/county_political_parties_1988-2024.fgb'
county_geom.execute().to_file(county_fgb_file)
pmtiles_file = 'county_political_parties_1988-2024.pmtiles'
pmtiles = f"s3://public-election/county/{pmtiles_file}"
source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(pmtiles_file))[0])

options =[f'--layer={source_layer_name}',
            '--extend-zooms-if-still-dropping',  
         ]
new_pmtiles = to_pmtiles(county_fgb_file, pmtiles_file, options = options )
s3_cp(new_pmtiles,pmtiles)