In [64]:
from pathlib import Path

import geopandas as gpd
import pandas as pd
from shapely.validation import make_valid
from shapely.geometry import Point

from tqdm import tqdm #&&&#


In [2]:
#data_dir_path = Path(__file__).parent.resolve() #&&&#
data_dir_path = Path('.') #&&&#

(data_dir_path / '_temp').mkdir(parents=False, exist_ok=True)


In [15]:
if (data_dir_path / '_temp' / '_temp_state_level_locale_polygons.shp').exists():
    state_level_locales_dataframe = gpd.read_file(data_dir_path / '_temp' / '_temp_state_level_locale_polygons.shp')
    
else:
    locales_dataframe = gpd.read_file(data_dir_path / 'edge_locales' / 'edge_locale21_nces_all_us.shp')
    locations_dataframe = gpd.read_file(data_dir_path / 'edge_locations' / 'EDGE_GEOCODE_PUBLICSCH_2021.shp')

    locale_dict = {1: 'City', 2: 'Suburban', 3: 'Town', 4: 'Rural'}
    locales_dataframe['localetype'] = locales_dataframe.LOCALE.apply(lambda l: locale_dict[int(l) // 10])

    state_level_locale_polygon_data = []

    for state in tqdm(locales_dataframe.STATEFP.unique()):
        locale_state_dateframe = locales_dataframe[locales_dataframe.STATEFP == state]

        for localetype in locale_dict.values():
            localetype_dataframe = locale_state_dateframe[locale_state_dateframe.localetype == localetype]

            intersected_geo = None
            for geo in localetype_dataframe.geometry:
                if not geo.is_valid:
                    locale_geo = make_valid(geo)
                else:
                    locale_geo = geo

                if intersected_geo is None:
                    intersected_geo = locale_geo
                else:
                    intersected_geo = intersected_geo.union(locale_geo)

            state_level_locale_polygon_data.append({'STATEFP': state,
                                                    'LOCALETYPE': localetype,
                                                    'geometry': intersected_geo})

    state_level_locales_dataframe = gpd.geodataframe.GeoDataFrame(state_level_locale_polygon_data)
    state_level_locales_dataframe.to_file(data_dir_path / '_temp' / '_temp_state_level_locale_polygons.shp')
    del locales_dataframe
    

In [38]:
zipcodes_dataframe = gpd.read_file(data_dir_path / 'zip_codes' / 'tl_2022_us_zcta520.shp')
states_dataframe = gpd.read_file(data_dir_path / 'states' / 'tl_2022_us_state.shp')

fips_state_code_dataframe = pd.read_csv(data_dir_path / 'fips_state_codes.csv')


In [48]:
zipcodes_dataframe['STATEFP'] = None
for zip_index, zip_row in tqdm(zipcodes_dataframe.iterrows(), total=len(zipcodes_dataframe)):
    states = set()
    for _, state_row in states_dataframe.iterrows():
        if zip_row.geometry.intersects(state_row.geometry):
            states.add(state_row.GEOID)
    
    zipcodes_dataframe.at[zip_index, 'STATEFP'] = list(states)
            

33791it [04:34, 122.94it/s]


In [81]:
with tqdm(state_level_locales_dataframe.STATEFP.unique(), position=0, leave=False) as progress:
    for state in progress:
        state_string = fips_state_code_dataframe[fips_state_code_dataframe.STATEFP == int(state)].iloc[0].STATENAME

        for localetype in locale_dict.values():
            progress.set_postfix({'state': state_string, 'locale_type': localetype}, refresh=True)
            state_level_locale_geo = state_level_locales_dataframe[(state_level_locales_dataframe.STATEFP == state) & \
                                                                   (state_level_locales_dataframe.LOCALETYPE == localetype)].iloc[0].geometry

            if state_level_locale_geo is None:
                continue

            zip_weights = []
            for _, zip_row in zipcodes_dataframe.iterrows():
                if state in zip_row.STATEFP:
                    inter_area = state_level_locale_geo.intersection(zip_row.geometry).area
                    zip_weights.append({'ZCTA5CE20': zip_row.ZCTA5CE20,
                                        'ALLOC_AREA_FRACTARGET': inter_area / state_level_locale_geo.area,
                                        'ALLOC_AREA_FRACSOURCE': inter_area / zip_row.geometry.area,
                                       })           
            pd.DataFrame(zip_weights).to_csv(data_dir_path / '_temp' / '_temp_{}-{}_zipcode_weights.csv'.format(state, localetype), index=False)


                                                                                                                         

----------------