In [1]:
import pandas as pd
import geopandas as gpd
import maup
import time
from maup import smart_repair
from gerrychain import Graph

maup.progress.enabled = True

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
# state = Pennsylvania
state_ab = "pa"

## Data
1. Download all the data in directory "pa_data"
2. Extract them all

In [4]:
data_folder = state_ab + "_data/"
population1_data = "./{}{}_pl2020_b/{}_pl2020_p1_b.shp".format(data_folder, state_ab, state_ab)
population2_data = "./{}{}_pl2020_b/{}_pl2020_p2_b.shp".format(data_folder, state_ab, state_ab)
vap_data =  "./{}{}_pl2020_b/{}_pl2020_p4_b.shp".format(data_folder, state_ab, state_ab)
vest20_data = "./{}{}_vest_20/{}_vest_20.shp".format(data_folder, state_ab, state_ab)
vest18_data = "./{}{}_vest_18/{}_vest_18.shp".format(data_folder, state_ab, state_ab)
vest16_data = "./{}{}_vest_16/{}_vest_16.shp".format(data_folder, state_ab, state_ab)
cd_data = "./{}{}_cong_adopted_2022/carter_boundaries.shp".format(data_folder, state_ab)
send_data = "./{}{}_sldu_adopted_2022/2022 LRC-Senate-Final.shp".format(data_folder, state_ab)
hdist_data = "./{}{}_sldl_adopted_2022/2022 LRC-House-Final.shp".format(data_folder, state_ab)

In [5]:
def do_smart_repair(df):
    # change it to the UTM it needs for smart_repair
    df = df.to_crs(df.estimate_utm_crs())
    df = smart_repair(df)
    if maup.doctor(df):
        print('smart_repair successful')
            
        # change it back to this UTM for this data
        df = df.to_crs('EPSG:4269')
    else:
        raise Exception('smart_repair failed')
    return df

In [6]:
def add_district(dist_df, dist_name, election_df, col_name):
    # check if it needs to be smart_repair
    if maup.doctor(dist_df) != True:
        dist_df = do_smart_repair(dist_df)
    
    election_df = gpd.GeoDataFrame(election_df, crs="EPSG:4269")
    
    # assign the pricincts
    precincts_to_district_assignment = maup.assign(election_df.geometry, dist_df.geometry)
    election_df[dist_name] = precincts_to_district_assignment
    for precinct_index in range(len(election_df)):
        election_df.at[precinct_index, dist_name] = dist_df.at[election_df.at[precinct_index, dist_name], col_name]
    
    return election_df

In [7]:
def rename(original, year):
    party = original[6]
    if party == 'R' or party == 'D':
        return original[3:6] + year + original[6]
    else:
        return original[3:6] + year + 'O'

In [8]:
pop_col = ['TOTPOP', 'HISP', 'NH_WHITE', 'NH_BLACK', 'NH_AMIN', 'NH_ASIAN', 'NH_NHPI', 'NH_OTHER', 'NH_2MORE', 'H_WHITE', 'H_BLACK', 'H_AMIN', 'H_ASIAN', 'H_NHPI', 'H_OTHER', 'H_2MORE', 'VAP', 'HVAP', 'WVAP', 'BVAP', 'AMINVAP', 'ASIANVAP', 'NHPIVAP', 'OTHERVAP', '2MOREVAP']

In [9]:
def check_population(population, df):
    pop_check = pd.DataFrame({
        'pop_col': pop_col,
        'population_df': population[pop_col].sum(), 
        'vest_base': df[pop_col].sum(),
        'equal': [x == y for x, y in zip(population[pop_col].sum(), df[pop_col].sum())]
    })
    if pop_check['equal'].mean() < 1:
        print(pop_check)
        raise Exception("population doesn't agree")

    else:
        print("population agrees")

In [10]:
def add_vest(vest, df, year, population, start_col):    
     # check if it needs to be smart_repair
    if maup.doctor(vest) != True:
        vest = do_smart_repair(vest)
    
    # rename the columns
    original_col = vest.columns[start_col:-1]
    new_col = [rename(i, year) for i in original_col]
    rename_dict = dict(zip(original_col, new_col))
    vest = vest.rename(columns=rename_dict)
    vest = vest.groupby(level=0, axis=1).sum() # combine all the other party's vote into columns with sufix "O"
    col_name = list(set(new_col))
    col_name.sort()
    
    # make the blocks from precincts by weight
    vest = gpd.GeoDataFrame(vest, crs="EPSG:4269")
    election_in_block = population[["VAP", 'geometry']] # population_df is in block scale
    blocks_to_precincts_assignment = maup.assign(election_in_block.geometry, vest.geometry)

    weights = election_in_block["VAP"] / blocks_to_precincts_assignment.map(election_in_block["VAP"].groupby(blocks_to_precincts_assignment).sum())
    weights = weights.fillna(0)
    prorated = maup.prorate(blocks_to_precincts_assignment, vest[col_name], weights)
    election_in_block[col_name] = prorated

    # assign blocks to precincts
    election_in_block = gpd.GeoDataFrame(election_in_block, crs="EPSG:4269")
    df = gpd.GeoDataFrame(df, crs="EPSG:4269")
    block_to_pricinct_assginment = maup.assign(election_in_block.geometry, df.geometry)
    df[col_name] = election_in_block[col_name].groupby(block_to_pricinct_assginment).sum()
    df = df.groupby(level=0, axis=1).sum()
    
    # check if population agrees
    check_population(population, df)
        
    return df

### Read the census data

In [11]:
population1_df = gpd.read_file(population1_data)
population2_df = gpd.read_file(population2_data)
vap_df = gpd.read_file(vap_data)

In [12]:
population2_df = population2_df.drop(columns=['SUMLEV', 'LOGRECNO', 'GEOID', 'COUNTY', 'geometry'])
vap_df = vap_df.drop(columns=['SUMLEV', 'LOGRECNO', 'GEOID', 'COUNTY', 'geometry'])

In [13]:
population_df = pd.merge(population1_df, population2_df, on='GEOID20')
population_df = pd.merge(population_df, vap_df, on='GEOID20')

In [14]:
maup.doctor(population_df)

100%|██████████| 336985/336985 [16:26<00:00, 341.46it/s]  


True

In [15]:
population_df['H_WHITE'] = population_df.apply(lambda t: t['P0010003'] - t['P0020005'], 1)
population_df['H_BLACK'] = population_df.apply(lambda t: t['P0010004'] - t['P0020006'], 1)
population_df['H_AMIN'] = population_df.apply(lambda t: t['P0010005'] - t['P0020007'], 1)
population_df['H_ASIAN'] = population_df.apply(lambda t: t['P0010006'] - t['P0020008'], 1)
population_df['H_NHPI'] = population_df.apply(lambda t: t['P0010007'] - t['P0020009'], 1)
population_df['H_OTHER'] = population_df.apply(lambda t: t['P0010008'] - t['P0020010'], 1)
population_df['H_2MORE'] = population_df.apply(lambda t: t['P0010009'] - t['P0020011'], 1)

In [16]:
rename_dict = {'P0020001': 'TOTPOP', 'P0020002': 'HISP', 'P0020005': 'NH_WHITE', 'P0020006': 'NH_BLACK', 'P0020007': 'NH_AMIN',
                    'P0020008': 'NH_ASIAN', 'P0020009': 'NH_NHPI', 'P0020010': 'NH_OTHER', 'P0020011': 'NH_2MORE',
                    'P0040001': 'VAP', 'P0040002': 'HVAP', 'P0040005': 'WVAP', 'P0040006': 'BVAP', 'P0040007': 'AMINVAP',
                                        'P0040008': 'ASIANVAP', 'P0040009': 'NHPIVAP', 'P0040010': 'OTHERVAP', 'P0040011': '2MOREVAP'}

In [17]:
population_df.rename(columns=rename_dict, inplace = True)

In [18]:
cong_df = gpd.read_file(cd_data)
if maup.doctor(cong_df) != True:
    cong_df = do_smart_repair(cong_df)

100%|██████████| 17/17 [00:00<00:00, 38.03it/s]


## Read the vest 20 data

Now using it as a "base pricinct"

In [19]:
def add_vest_base(vest, start_col, year):
    original_col = vest.columns[start_col:-1]
    new_col = [rename(i, year) for i in original_col]
    rename_dict = dict(zip(original_col, new_col))
    vest = vest.rename(columns=rename_dict)
    vest = vest.groupby(level=0, axis=1).sum()
    vest = gpd.GeoDataFrame(vest, crs="EPSG:4269")
    
    return vest

## Check if vest20 can be used as base

In [20]:
vest20 = gpd.read_file(vest20_data)
if maup.doctor(vest20) != True:
    vest20 = do_smart_repair(vest20)

100%|██████████| 9150/9150 [00:19<00:00, 459.02it/s]


There are 709 overlaps.
There are 1872 holes.
Snapping all geometries to a grid with precision 10^( -5 ) to avoid GEOS errors.
Identifying overlaps...


100%|██████████| 11883/11883 [00:12<00:00, 940.21it/s] 


Resolving overlaps...
Assigning order 2 pieces...
Assigning order 3 pieces...
Filling gaps...


Gaps to simplify: 100%|██████████| 979/979 [6:41:42<00:00, 24.62s/it]      
Gaps to fill: 100%|██████████| 357/357 [30:41<00:00,  5.16s/it] 
100%|██████████| 9150/9150 [00:19<00:00, 469.19it/s]


smart_repair successful


In [21]:
vest20.columns

Index(['STATEFP', 'COUNTYFP', 'VTDST', 'NAME', 'G20PREDBID', 'G20PRERTRU',
       'G20PRELJOR', 'G20ATGDSHA', 'G20ATGRHEI', 'G20ATGLWAS', 'G20ATGGWEI',
       'G20AUDDAHM', 'G20AUDRDEF', 'G20AUDLMOO', 'G20AUDGFAI', 'G20TREDTOR',
       'G20TRERGAR', 'G20TRELSOL', 'G20TREGRUN', 'geometry'],
      dtype='object')

In [22]:
start_col = 4
vest_base_data = vest20
year = '20'

In [23]:
vest_base = add_vest_base(vest_base_data, start_col, year)

In [24]:
# vap and population have the same GEOID20
blocks_to_precincts_assignment = maup.assign(population_df.geometry, vest_base.geometry)

100%|██████████| 9150/9150 [00:25<00:00, 358.00it/s]
 75%|███████▌  | 6889/9150 [00:59<00:19, 116.56it/s]


GEOSException: TopologyException: Input geom 1 is invalid: Self-intersection at -75.328815953537884 40.595119145679654

### Doing SNAP_PRECISION because of the above error

In [25]:
def do_smart_repair_with_snap_precision(df):
    # change it to the UTM it needs for smart_repair
    df = df.to_crs(df.estimate_utm_crs())
    df = smart_repair(df, snap_precision=8)
    if maup.doctor(df):
        print('smart_repair successful')
            
        # change it back to this UTM for this data
        df = df.to_crs('EPSG:4269')
    else:
        raise Exception('smart_repair failed')
    return df

In [26]:
vest20 = gpd.read_file(vest20_data)
if maup.doctor(vest20) != True:
    vest20 = do_smart_repair_with_snap_precision(vest20)

100%|██████████| 9150/9150 [00:20<00:00, 457.06it/s]


There are 709 overlaps.
There are 1872 holes.
Snapping all geometries to a grid with precision 10^( -3 ) to avoid GEOS errors.
Identifying overlaps...


100%|██████████| 11697/11697 [00:12<00:00, 950.59it/s] 


Resolving overlaps...
Assigning order 2 pieces...
Assigning order 3 pieces...
Filling gaps...


Gaps to simplify: 100%|██████████| 953/953 [57:48<00:00,  3.64s/it]  
Gaps to fill: 100%|██████████| 365/365 [18:52<00:00,  3.10s/it]
100%|██████████| 9150/9150 [00:11<00:00, 817.35it/s] 


smart_repair successful


In [27]:
vest20.columns

Index(['STATEFP', 'COUNTYFP', 'VTDST', 'NAME', 'G20PREDBID', 'G20PRERTRU',
       'G20PRELJOR', 'G20ATGDSHA', 'G20ATGRHEI', 'G20ATGLWAS', 'G20ATGGWEI',
       'G20AUDDAHM', 'G20AUDRDEF', 'G20AUDLMOO', 'G20AUDGFAI', 'G20TREDTOR',
       'G20TRERGAR', 'G20TRELSOL', 'G20TREGRUN', 'geometry'],
      dtype='object')

In [28]:
start_col = 4
vest_base_data = vest20
year = '20'

In [29]:
vest_base = add_vest_base(vest_base_data, start_col, year)

In [30]:
# vap and population have the same GEOID20
blocks_to_precincts_assignment = maup.assign(population_df.geometry, vest_base.geometry)

100%|██████████| 9150/9150 [00:14<00:00, 623.28it/s] 
100%|██████████| 9150/9150 [00:46<00:00, 195.42it/s]


In [38]:
vest_base[pop_col] = population_df[pop_col].groupby(blocks_to_precincts_assignment).sum()

In [39]:
election_df = gpd.GeoDataFrame(vest_base, crs="EPSG:4269")

In [40]:
pd.DataFrame({
    'pop_col': pop_col,
    'population_df': population_df[pop_col].sum(), 
    'vest_base': vest_base[pop_col].sum(),
    'equal': [x == y for x, y in zip(population_df[pop_col].sum(), vest_base[pop_col].sum())]
})

Unnamed: 0,pop_col,population_df,vest_base,equal
TOTPOP,TOTPOP,13002700,13002700.0,True
HISP,HISP,1049615,1049615.0,True
NH_WHITE,NH_WHITE,9553417,9553417.0,True
NH_BLACK,NH_BLACK,1368978,1368978.0,True
NH_AMIN,NH_AMIN,15028,15028.0,True
NH_ASIAN,NH_ASIAN,506674,506674.0,True
NH_NHPI,NH_NHPI,3162,3162.0,True
NH_OTHER,NH_OTHER,54541,54541.0,True
NH_2MORE,NH_2MORE,451285,451285.0,True
H_WHITE,H_WHITE,197270,197270.0,True


In [31]:
vest18 = gpd.read_file(vest18_data)
if maup.doctor(vest18) != True:
    vest18 = do_smart_repair(vest18)

100%|██████████| 9160/9160 [00:12<00:00, 734.88it/s] 


There are 697 overlaps.
There are 1923 holes.
Snapping all geometries to a grid with precision 10^( -5 ) to avoid GEOS errors.
Identifying overlaps...


100%|██████████| 11951/11951 [00:07<00:00, 1582.88it/s]


Resolving overlaps...
Assigning order 2 pieces...
Assigning order 3 pieces...
Filling gaps...


Gaps to simplify: 100%|██████████| 1000/1000 [51:20<00:00,  3.08s/it]
Gaps to fill: 100%|██████████| 370/370 [20:05<00:00,  3.26s/it]
100%|██████████| 9160/9160 [00:11<00:00, 785.43it/s] 


smart_repair successful


In [32]:
vest18.columns

Index(['STATEFP', 'COUNTYFP', 'VTDST', 'NAME', 'G18USSDCAS', 'G18USSRBAR',
       'G18USSLKER', 'G18USSGGAL', 'G18GOVDWOL', 'G18GOVRWAG', 'G18GOVLKRA',
       'G18GOVGGLO', 'geometry'],
      dtype='object')

In [33]:
start_col = 4
vest_base_data = vest18
year = '18'

In [34]:
vest_base = add_vest_base(vest_base_data, start_col, year)

In [35]:
# vap and population have the same GEOID20
blocks_to_precincts_assignment = maup.assign(population_df.geometry, vest_base.geometry)

100%|██████████| 9160/9160 [00:16<00:00, 563.86it/s] 
 78%|███████▊  | 7123/9160 [00:37<00:10, 189.01it/s]


GEOSException: TopologyException: Input geom 1 is invalid: Self-intersection at -75.328815953537884 40.595119145679654

In [36]:
def add_vest_with_snap_precision(vest, df, year, population, start_col):    
     # check if it needs to be smart_repair
    if maup.doctor(vest) != True:
        vest = do_smart_repair_with_snap_precision(vest)
    
    # rename the columns
    original_col = vest.columns[start_col:-1]
    new_col = [rename(i, year) for i in original_col]
    rename_dict = dict(zip(original_col, new_col))
    vest = vest.rename(columns=rename_dict)
    vest = vest.groupby(level=0, axis=1).sum() # combine all the other party's vote into columns with sufix "O"
    col_name = list(set(new_col))
    col_name.sort()
    
    # make the blocks from precincts by weight
    vest = gpd.GeoDataFrame(vest, crs="EPSG:4269")
    election_in_block = population[["VAP", 'geometry']] # population_df is in block scale
    blocks_to_precincts_assignment = maup.assign(election_in_block.geometry, vest.geometry)

    weights = election_in_block["VAP"] / blocks_to_precincts_assignment.map(election_in_block["VAP"].groupby(blocks_to_precincts_assignment).sum())
    weights = weights.fillna(0)
    prorated = maup.prorate(blocks_to_precincts_assignment, vest[col_name], weights)
    election_in_block[col_name] = prorated

    # assign blocks to precincts
    election_in_block = gpd.GeoDataFrame(election_in_block, crs="EPSG:4269")
    df = gpd.GeoDataFrame(df, crs="EPSG:4269")
    block_to_pricinct_assginment = maup.assign(election_in_block.geometry, df.geometry)
    df[col_name] = election_in_block[col_name].groupby(block_to_pricinct_assginment).sum()
    df = df.groupby(level=0, axis=1).sum()
    
    # check if population agrees
    check_population(population, df)
        
    return df

In [None]:
vest18 = gpd.read_file(vest18_data)

In [None]:
election_df = add_vest_with_snap_precision(vest18, election_df, '18', population_df, 4)

GEOSException: TopologyException: side location conflict at -75.328815953537884 40.595119145679654. This can occur if the input geometry is invalid.

In [43]:
vest16 = gpd.read_file(vest16_data)
if maup.doctor(vest16) != True:
    vest16 = do_smart_repair(vest16)

100%|██████████| 9167/9167 [00:14<00:00, 650.58it/s] 


There are 723 overlaps.
There are 2070 holes.
Snapping all geometries to a grid with precision 10^( -5 ) to avoid GEOS errors.
Identifying overlaps...


100%|██████████| 12277/12277 [00:08<00:00, 1370.97it/s]


Resolving overlaps...
Assigning order 2 pieces...
Assigning order 3 pieces...
Filling gaps...


Gaps to simplify: 100%|██████████| 1153/1153 [59:25<00:00,  3.09s/it]
Gaps to fill: 100%|██████████| 393/393 [22:16<00:00,  3.40s/it]
100%|██████████| 9167/9167 [00:11<00:00, 784.98it/s] 


smart_repair successful


In [44]:
vest16.columns

Index(['STATEFP', 'COUNTYFP', 'VTDST', 'NAME', 'G16PREDCLI', 'G16PRERTRU',
       'G16PRELJOH', 'G16PREGSTE', 'G16PRECCAS', 'G16USSDMCG', 'G16USSRTOO',
       'G16USSLCLI', 'G16ATGDSHA', 'G16ATGRRAF', 'G16AUDDDEP', 'G16AUDRBRO',
       'G16AUDLMIN', 'G16AUDGSWE', 'G16TREDTOR', 'G16TRERVOI', 'G16TRELBAB',
       'G16TREGCOM', 'geometry'],
      dtype='object')

In [45]:
start_col = 4
vest_base_data = vest16
year = '16'

In [46]:
vest_base = add_vest_base(vest_base_data, start_col, year)

In [47]:
# vap and population have the same GEOID20
blocks_to_precincts_assignment = maup.assign(population_df.geometry, vest_base.geometry)

100%|██████████| 9167/9167 [00:14<00:00, 617.07it/s] 
 78%|███████▊  | 7126/9167 [00:36<00:10, 194.81it/s]


GEOSException: TopologyException: Input geom 1 is invalid: Self-intersection at -75.328815953537884 40.595119145679654

In [49]:
vest16 = gpd.read_file(vest16_data)

In [50]:
election_df = add_vest_with_snap_precision(vest16, election_df, '16', population_df, start_col)

100%|██████████| 9167/9167 [00:20<00:00, 447.18it/s]


There are 723 overlaps.
There are 2070 holes.
Snapping all geometries to a grid with precision 10^( -3 ) to avoid GEOS errors.
Identifying overlaps...


100%|██████████| 11831/11831 [00:12<00:00, 952.33it/s] 


Resolving overlaps...
Assigning order 2 pieces...
Assigning order 3 pieces...
Filling gaps...


Gaps to simplify: 100%|██████████| 1004/1004 [1:10:11<00:00,  4.19s/it]
Gaps to fill: 100%|██████████| 388/388 [20:24<00:00,  3.16s/it]
100%|██████████| 9167/9167 [00:11<00:00, 796.32it/s] 


smart_repair successful


100%|██████████| 9167/9167 [00:14<00:00, 621.90it/s] 
100%|██████████| 9167/9167 [00:48<00:00, 190.11it/s]
100%|██████████| 9160/9160 [00:14<00:00, 639.86it/s] 
 78%|███████▊  | 7123/9160 [00:36<00:10, 195.80it/s]


GEOSException: TopologyException: Input geom 1 is invalid: Self-intersection at -75.328815953537884 40.595119145679654

##  Add the district data

In [51]:
send = gpd.read_file(send_data)
hdist = gpd.read_file(hdist_data)

In [52]:
cong_df.head()

Unnamed: 0,ID,AREA,DISTRICT,geometry
0,2,67.39,2,"POLYGON ((-74.97398 40.04859, -74.97743 40.045..."
1,3,2836.43,8,"POLYGON ((-75.33468 41.97021, -75.33425 41.970..."
2,5,1183.0,7,"POLYGON ((-75.58984 40.48466, -75.59184 40.485..."
3,8,54.74,3,"POLYGON ((-75.22066 40.00394, -75.21923 40.004..."
4,10,717.3,1,"POLYGON ((-75.19680 40.60858, -75.19671 40.608..."


In [53]:
election_df = add_district(cong_df, "CD", election_df, "DISTRICT")

100%|██████████| 17/17 [00:00<00:00, 33.41it/s]
100%|██████████| 17/17 [00:00<00:00, 35.95it/s]
 12%|█▏        | 2/17 [00:00<00:02,  6.26it/s]


GEOSException: TopologyException: Input geom 0 is invalid: Self-intersection at -75.328815953537884 40.595119145679654

In [56]:
cong_df = gpd.read_file(cd_data)

In [57]:
def add_district_with_snap_precision(dist_df, dist_name, election_df, col_name):
    # check if it needs to be smart_repair
    if maup.doctor(dist_df) != True:
        dist_df = do_smart_repair_with_snap_precision(dist_df)
    
    election_df = gpd.GeoDataFrame(election_df, crs="EPSG:4269")
    
    # assign the pricincts
    precincts_to_district_assignment = maup.assign(election_df.geometry, dist_df.geometry)
    election_df[dist_name] = precincts_to_district_assignment
    for precinct_index in range(len(election_df)):
        election_df.at[precinct_index, dist_name] = dist_df.at[election_df.at[precinct_index, dist_name], col_name]
    
    return election_df

In [58]:
election_df = add_district(cong_df, "CD", election_df, "DISTRICT")

100%|██████████| 17/17 [00:00<00:00, 40.25it/s]
100%|██████████| 17/17 [00:00<00:00, 36.93it/s]
 12%|█▏        | 2/17 [00:00<00:02,  7.30it/s]


GEOSException: TopologyException: Input geom 0 is invalid: Self-intersection at -75.328815953537884 40.595119145679654

In [None]:
send.head()

In [None]:
election_df = add_district(send, "SEND", election_df, "DISTRICT")

In [None]:
hdist.head()

In [None]:
hdist = hdist.to_crs("EPSG:4269")
election_df = add_district(hdist, "HDIST", election_df, "Name")

In [None]:
election_df.columns

### Put the base precinct year after the precinct information column

In [None]:
base_columns = {}
if 'COUNTYFP' + year not in election_df.columns:
    base_columns = {
        'COUNTY':'COUNTY'+year,
        'PCODE':'PCODE'+year,
        'CODE_NAME':'CODE_NAME'+year}
election_df.rename(columns=base_columns, inplace = True)

In [None]:
election_df.columns

In [None]:
# reorder the columns
fixed_columns = [
    'COUNTY'+year,
    'PCODE'+year,
    'CODE_NAME'+year,
    'CD',
    'SEND',
    'HDIST',
    'TOTPOP',
    'NH_2MORE',
    'NH_AMIN',
    'NH_ASIAN',
    'NH_BLACK',
    'NH_NHPI',
    'NH_OTHER',
    'NH_WHITE',
    'HISP',
    'H_AMIN',
    'H_ASIAN',
    'H_BLACK',
    'H_NHPI',
    'H_OTHER',
    'H_WHITE',
    'H_2MORE',
    'VAP',
    'HVAP',
    'WVAP',
    'BVAP',
    'AMINVAP',
    'ASIANVAP',
    'NHPIVAP',
    'OTHERVAP',
    '2MOREVAP']

election_columns = [col for col in election_df.columns if col not in fixed_columns]
final_col = fixed_columns + election_columns
election_df = election_df[final_col]

In [None]:
import os

# store the result in directory "il"
directory = "./{}".format(state_ab)
if not os.path.exists(directory):
    os.makedirs(directory)

shapefile_path = "./{}/{}.shp".format(state_ab, state_ab)
geojson_path = './{}/{}.geojson'.format(state_ab, state_ab)
json_path = "./{}/{}.json".format(state_ab, state_ab)

# Check if the shapefile or geojson file already exists
if os.path.exists(shapefile_path):
    os.remove(shapefile_path)
if os.path.exists(geojson_path):
    os.remove(geojson_path)

election_df.to_file(shapefile_path)
election_df.to_file(geojson_path, driver='GeoJSON')

# Only do once to build json and read from file when generating ensembles
graph = Graph.from_file(shapefile_path, ignore_errors=True)
graph.to_json(json_path)

In [None]:
shapefile_path = "./{}/{}.shp".format(state_ab, state_ab)
shape=gpd.read_file(shapefile_path)
shape.plot()

In [None]:
if (vest18.NAME == 'Burton 1A').any():
    print(vest18.loc[vest18.NAME == 'Burton 1A'])


In [None]:
vest20.head()

In [None]:
vest18.head()