In [80]:
import os 

# data manipulation
import numpy as np
import pandas as pd

# geospatial
import geopandas as gpd
from gerrychain import Graph
import networkx as nx
from networkx import is_connected

pd.options.mode.chained_assignment = None

## Data

In [81]:
state_dict = {
    '01': 'Alabama',
    '02': 'Alaska',
    '04': 'Arizona',
    '05': 'Arkansas',
    '06': 'California',
    '08': 'Colorado',
    '09': 'Connecticut',
    '10': 'Delaware',
    '11': 'District_of_Columbia',
    '12': 'Florida',
    '13': 'Georgia',
    '15': 'Hawaii',
    '16': 'Idaho',
    '17': 'Illinois',
    '18': 'Indiana',
    '19': 'Iowa',
    '20': 'Kansas',
    '21': 'Kentucky',
    '22': 'Louisiana',
    '23': 'Maine',
    '24': 'Maryland',
    '25': 'Massachusetts',
    '26': 'Michigan',
    '27': 'Minnesota',
    '28': 'Mississippi',
    '29': 'Missouri',
    '30': 'Montana',
    '31': 'Nebraska',
    '32': 'Nevada',
    '33': 'New_Hampshire',
    '34': 'New_Jersey',
    '35': 'New_Mexico',
    '36': 'New_York',
    '37': 'North_Carolina',
    '38': 'North_Dakota',
    '39': 'Ohio',
    '40': 'Oklahoma',
    '41': 'Oregon',
    '42': 'Pennsylvania',
    '44': 'Rhode_Island',
    '45': 'South_Carolina',
    '46': 'South_Dakota',
    '47': 'Tennessee',
    '48': 'Texas',
    '49': 'Utah',
    '50': 'Vermont',
    '51': 'Virginia',
    '53': 'Washington',
    '54': 'West_Virginia',
    '55': 'Wisconsin',
    '56': 'Wyoming'
}

###### Voting Data ######
# From: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/VOQCHQ
    # county_fips column used to join election data to graph. Format is {state_fips}{county_fips}
    # We Drop NAN values. These are state write in ballots for Conneticut and Rhode Island. They account for less than 10,000 votes. 
    # We do not consider alaska because the county FIPS codes correspond to different districts than census population tracts
        # Note: County results in Alaska for 2004 are based on official Alaska data, but it is clear the district returns significantly overstate the number of votes cast. In Alaska, the county_fips field stores a combination of state FIPS code and district.


election_df = pd.read_csv('../dataIn/elections/countypres_2000-2024.csv')
election_df.dropna(subset = ['county_fips'], inplace=True)
election_df['county_fips'] = election_df['county_fips'].astype(str)


###### BVAP Data ######
# src: https://www.census.gov/programs-surveys/decennial-census/about/voting-rights/cvap.2020.html#list-tab-1518558936
    
    # Data joined using last 5 digits of geiod {STATEFIPS}{COUNTYFIPS} 
    # Columns Used:
        # cvap_est:      The rounded estimate of the total number of United States citizens 18 years of age or older for that geographic area or group.
        # lnnumber:      The line number of the record.
            # 1: Total Population
            # 5: Black or African American alone
            # 7: White Alone
            # 13: Hispanic or Latino Alone
            
acs_df = pd.read_csv('../dataIn/elections/CVAP_ACS_2016-2020_county.csv', encoding = 'latin-1')
acs_df['COUNTYFP'] = acs_df['geoid'].str[-5:].astype(str)


###### Tigerline Shapefiles ######
# src: https://www.nhgis.org/gis-files
    # used to generate Json graph using gerrychain
gdf = gpd.read_file("../dataIn/elections/shapefiles/US_county_2020.shp")


### Generate Graphs and Load Data

In [86]:
###### Out Dir ###### 
outDir = "../dataIn/elections/state_graphs_2020/"
os.makedirs(outDir, exist_ok = True)

elections = [2000, 2004, 2008, 2012, 2016, 2020]

###### Graph Generation ######
## Iterate through each state
for stateFP in state_dict.keys():
    print('-' * 50)
    print(f"Constructing graph for: {state_dict[stateFP]}")
    # Remove States:
        # Alaska removed because the Harvard Election Dataset does not have county-level results
        # Hawaii removed because we restrict analysis to continental U.S. 
        # DC removed because it only has one node 
    if stateFP in ['02', '15', '11']:
        continue

    # filter out state shapefile
    state_gdf = gdf[gdf['STATEFP'] == stateFP]
    # Generate graph using Gerrychain
    G = Graph.from_geodataframe(state_gdf, adjacency = 'queen', ignore_errors = True)


###### Add BVAP, DEM%, and REP% Graph Nodes ######
    ## Iterate through nodes
    for node in G.nodes:

        # FIPS Code from graph node
        countyFIPS = ('{}{}'.format(G.nodes[node]['STATEFP'], G.nodes[node]['COUNTYFP']))
        # Filter election data by county
        election_slice = election_df[election_df['county_fips'] == countyFIPS]
        # Filter ACS data by county
        acs_slice = acs_df[acs_df['COUNTYFP'] == countyFIPS].set_index('lnnumber')

        ## Iterate through Elections
        for election in elections:
            
            ###### Add Voting Data ######
            try:
                total_votes = election_slice[election_slice['year'] == election]['totalvotes'].iloc[0]
                G.nodes[node][f'{election}_V'] = total_votes
                # DEM%
                G.nodes[node][f'{election}_D'] = election_slice[(election_slice['party'] == 'DEMOCRAT') & (election_slice['year'] == election)]['candidatevotes'].sum(skipna = True, numeric_only = True) / total_votes
                # REP%
                G.nodes[node][f'{election}_R'] = election_slice[(election_slice['party'] == 'REPUBLICAN') & (election_slice['year'] == election)]['candidatevotes'].sum(skipna = True, numeric_only = True) / total_votes
                # No Fill Metadata
                G.nodes[node][f'{election}_filled_from'] = {
                    "filled": False
                }
            # Assign highest DEM% of neighbors if no data is available for that node
            except IndexError:
                # init dem% = 0 and best neighbor
                G.nodes[node][f'{election}_D'] = 0 
                best_neighbor = None

                ## Iterate neighbors
                for neighbor in G.neighbors(node):
                    try:
                        neighbor_dem = G.nodes[neighbor][f'{election}_D']

                        # if neighbor value > current max, set as new max and record neighbor
                        if neighbor_dem > G.nodes[node][f'{election}_D']:
                            best_neighbor = neighbor
                            G.nodes[node][f'{election}_D'] = neighbor_dem

                    except KeyError:
                        pass

                # Node gets best neighbor's data
                print("-" * 10)
                print("County election data missing for: ")
                print(f"\t State            : {state_dict[stateFP]}")
                print(f"\t County FIPS      : {countyFIPS}")
                print(f"\t Election         : {election}")
                print(f"Filling with max value from neighboring nodes:")
                print(f"\t County FIPS      : {G.nodes[best_neighbor]['COUNTYFP']}{G.nodes[best_neighbor]['STATEFP']}")
                print(f"\t Fill Value Dem%  : {G.nodes[best_neighbor][f'{election}_D']}")
                print(f"\t Fill Value Rep%  : {G.nodes[best_neighbor][f'{election}_R']}")
                print(f"\t Fill Value Votes : {G.nodes[best_neighbor][f'{election}_V']}")
                print("-" * 10)

                G.nodes[node][f'{election}_D'] = G.nodes[best_neighbor][f'{election}_D']      
                G.nodes[node][f'{election}_R'] = G.nodes[best_neighbor][f'{election}_R']
                G.nodes[node][f'{election}_V'] = G.nodes[best_neighbor][f'{election}_V']

                # Fill Metadata
                G.nodes[node][f'{election}_filled_from'] = {
                "filled": True,
                "used_neighbor": best_neighbor,
                "values_used": 
                    {
                    "D": G.nodes[best_neighbor][f'{election}_D'],
                    "R": G.nodes[best_neighbor][f'{election}_R'],
                    "V": G.nodes[best_neighbor][f'{election}_V']
                    }
                }

            ###### Add ACS Data ######
            total_vap = acs_slice.loc[1]['cvap_est']
            G.nodes[node]['VAP'] = total_vap
            G.nodes[node]['BVAP'] = acs_slice.loc[5]['cvap_est'] / total_vap
            G.nodes[node]['HVAP'] = acs_slice.loc[13]['cvap_est'] / total_vap

    ###### Save Graph ######
    out_file = os.path.join(outDir, f'{state_dict[stateFP]}.json')
    G.to_json(out_file)
    print(f'{state_dict[stateFP]} graph completed and saved to: "{out_file}"', sep = '\t')
    print('-' * 50)

--------------------------------------------------
Constructing graph for: Alabama




Alabama graph completed and saved to: "../dataIn/elections/state_graphs_2020/Alabama.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Alaska
--------------------------------------------------
Constructing graph for: Arizona




Arizona graph completed and saved to: "../dataIn/elections/state_graphs_2020/Arizona.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Arkansas




Arkansas graph completed and saved to: "../dataIn/elections/state_graphs_2020/Arkansas.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: California




California graph completed and saved to: "../dataIn/elections/state_graphs_2020/California.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Colorado




----------
County election data missing for: 
	 State            : Colorado
	 County FIPS      : 08014
	 Election         : 2000
Filling with max value from neighboring nodes:
	 County FIPS      : 00108
	 Fill Value Dem%  : 0.5019100248488669
	 Fill Value Rep%  : 0.4409839409561251
	 Fill Value Votes : 107852
----------
Colorado graph completed and saved to: "../dataIn/elections/state_graphs_2020/Colorado.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Connecticut




Connecticut graph completed and saved to: "../dataIn/elections/state_graphs_2020/Connecticut.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Delaware
Delaware graph completed and saved to: "../dataIn/elections/state_graphs_2020/Delaware.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: District_of_Columbia
--------------------------------------------------
Constructing graph for: Florida




Florida graph completed and saved to: "../dataIn/elections/state_graphs_2020/Florida.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Georgia




Georgia graph completed and saved to: "../dataIn/elections/state_graphs_2020/Georgia.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Hawaii
--------------------------------------------------
Constructing graph for: Idaho




Idaho graph completed and saved to: "../dataIn/elections/state_graphs_2020/Idaho.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Illinois




Illinois graph completed and saved to: "../dataIn/elections/state_graphs_2020/Illinois.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Indiana




Indiana graph completed and saved to: "../dataIn/elections/state_graphs_2020/Indiana.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Iowa




Iowa graph completed and saved to: "../dataIn/elections/state_graphs_2020/Iowa.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Kansas




Kansas graph completed and saved to: "../dataIn/elections/state_graphs_2020/Kansas.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Kentucky




Kentucky graph completed and saved to: "../dataIn/elections/state_graphs_2020/Kentucky.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Louisiana




Louisiana graph completed and saved to: "../dataIn/elections/state_graphs_2020/Louisiana.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Maine




Maine graph completed and saved to: "../dataIn/elections/state_graphs_2020/Maine.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Maryland




Maryland graph completed and saved to: "../dataIn/elections/state_graphs_2020/Maryland.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Massachusetts




Massachusetts graph completed and saved to: "../dataIn/elections/state_graphs_2020/Massachusetts.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Michigan




Michigan graph completed and saved to: "../dataIn/elections/state_graphs_2020/Michigan.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Minnesota




Minnesota graph completed and saved to: "../dataIn/elections/state_graphs_2020/Minnesota.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Mississippi




Mississippi graph completed and saved to: "../dataIn/elections/state_graphs_2020/Mississippi.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Missouri




Missouri graph completed and saved to: "../dataIn/elections/state_graphs_2020/Missouri.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Montana




Montana graph completed and saved to: "../dataIn/elections/state_graphs_2020/Montana.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Nebraska




Nebraska graph completed and saved to: "../dataIn/elections/state_graphs_2020/Nebraska.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Nevada




Nevada graph completed and saved to: "../dataIn/elections/state_graphs_2020/Nevada.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: New_Hampshire




New_Hampshire graph completed and saved to: "../dataIn/elections/state_graphs_2020/New_Hampshire.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: New_Jersey




New_Jersey graph completed and saved to: "../dataIn/elections/state_graphs_2020/New_Jersey.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: New_Mexico




New_Mexico graph completed and saved to: "../dataIn/elections/state_graphs_2020/New_Mexico.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: New_York




New_York graph completed and saved to: "../dataIn/elections/state_graphs_2020/New_York.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: North_Carolina




North_Carolina graph completed and saved to: "../dataIn/elections/state_graphs_2020/North_Carolina.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: North_Dakota




North_Dakota graph completed and saved to: "../dataIn/elections/state_graphs_2020/North_Dakota.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Ohio




Ohio graph completed and saved to: "../dataIn/elections/state_graphs_2020/Ohio.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Oklahoma




Oklahoma graph completed and saved to: "../dataIn/elections/state_graphs_2020/Oklahoma.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Oregon




Oregon graph completed and saved to: "../dataIn/elections/state_graphs_2020/Oregon.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Pennsylvania




Pennsylvania graph completed and saved to: "../dataIn/elections/state_graphs_2020/Pennsylvania.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Rhode_Island
Rhode_Island graph completed and saved to: "../dataIn/elections/state_graphs_2020/Rhode_Island.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: South_Carolina




South_Carolina graph completed and saved to: "../dataIn/elections/state_graphs_2020/South_Carolina.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: South_Dakota




----------
County election data missing for: 
	 State            : South_Dakota
	 County FIPS      : 46102
	 Election         : 2000
Filling with max value from neighboring nodes:
	 County FIPS      : 00746
	 Fill Value Dem%  : 0.3378136200716846
	 Fill Value Rep%  : 0.6379928315412187
	 Fill Value Votes : 1116
----------
----------
County election data missing for: 
	 State            : South_Dakota
	 County FIPS      : 46102
	 Election         : 2004
Filling with max value from neighboring nodes:
	 County FIPS      : 00746
	 Fill Value Dem%  : 0.4656441717791411
	 Fill Value Rep%  : 0.5110429447852761
	 Fill Value Votes : 1630
----------
----------
County election data missing for: 
	 State            : South_Dakota
	 County FIPS      : 46102
	 Election         : 2008
Filling with max value from neighboring nodes:
	 County FIPS      : 00746
	 Fill Value Dem%  : 0.46846089150546677
	 Fill Value Rep%  : 0.5164003364171573
	 Fill Value Votes : 1189
----------
----------
County election 



Tennessee graph completed and saved to: "../dataIn/elections/state_graphs_2020/Tennessee.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Texas




Texas graph completed and saved to: "../dataIn/elections/state_graphs_2020/Texas.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Utah




Utah graph completed and saved to: "../dataIn/elections/state_graphs_2020/Utah.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Vermont




Vermont graph completed and saved to: "../dataIn/elections/state_graphs_2020/Vermont.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Virginia




Virginia graph completed and saved to: "../dataIn/elections/state_graphs_2020/Virginia.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Washington




Washington graph completed and saved to: "../dataIn/elections/state_graphs_2020/Washington.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: West_Virginia




West_Virginia graph completed and saved to: "../dataIn/elections/state_graphs_2020/West_Virginia.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Wisconsin




Wisconsin graph completed and saved to: "../dataIn/elections/state_graphs_2020/Wisconsin.json"
--------------------------------------------------
--------------------------------------------------
Constructing graph for: Wyoming




Wyoming graph completed and saved to: "../dataIn/elections/state_graphs_2020/Wyoming.json"
--------------------------------------------------
