In [69]:
import pandas as pd, geopandas as gpd, h3, shapely
import urllib, time, requests, psycopg2, urllib.parse, glob, os, us
from sqlalchemy import create_engine  
from h3 import h3
from shapely.ops import unary_union, cascaded_union
from shapely.geometry import mapping, Polygon

from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

from pathlib import Path

import warnings
warnings.filterwarnings("ignore")

### 1. Generate US state mapping and geoDataframe

In [70]:
def generate_state_abbreviations():
    return {state.name: state.abbr for state in us.STATES_AND_TERRITORIES}

state_abbreviations = generate_state_abbreviations()
print(state_abbreviations)

{'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA', 'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ', 'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH', 'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY', 'American Samoa': 'AS', 'Guam': 'GU', 'Northern Mariana Islands': 'MP', 'Puer

In [71]:
# Load US states GeoDataFrame
us_states = gpd.read_file("https://eric.clst.org/assets/wiki/uploads/Stuff/gz_2010_us_040_00_5m.json")
us_states['area'] = us_states.geometry.area
us_states = us_states.sort_values(by='area', ascending=True)
us_states['st'] = us_states['NAME'].map(state_abbreviations)
us_states.head(5)

Unnamed: 0,GEO_ID,STATE,NAME,LSAD,CENSUSAREA,geometry,area,st
8,0400000US11,11,District of Columbia,,61.048,"POLYGON ((-77.03860 38.79151, -77.03890 38.800...",0.01838,
39,0400000US44,44,Rhode Island,,1033.814,"MULTIPOLYGON (((-71.38359 41.46478, -71.38928 ...",0.309236,RI
7,0400000US10,10,Delaware,,1948.543,"MULTIPOLYGON (((-75.56493 39.58325, -75.57627 ...",0.545358,DE
51,0400000US72,72,Puerto Rico,,3423.775,"MULTIPOLYGON (((-65.32770 18.29584, -65.33745 ...",0.76508,PR
6,0400000US09,9,Connecticut,,4842.355,"POLYGON ((-71.79924 42.00807, -71.79792 41.935...",1.395161,CT


In [72]:
# Filter data to include only rows where the state is Arkansas
state = us_states[us_states['NAME'] == 'Arkansas']
state.head()

# Assuming 'arkansas_row' is the row for Arkansas from your original DataFrame
state_name = state['NAME'].values[0]

### 2. Generate H3 by State -- Set the Resolution: 1 - 15

In [73]:
def h3_id_to_geometry(h3_id):
    """
    Convert an H3 ID to a Shapely geometry.
    :param h3_id: The H3 ID
    :return: A Shapely Polygon representing the H3 hexagon
    """
    geo_boundary = h3.h3_to_geo_boundary(h3_id, geo_json=True)
    polygon = Polygon(geo_boundary)
    return polygon

def generate_h3_hexagons(state_geometry, resolution=resolution):
    """
    Generate H3 hexagons for a given state geometry.
    :param state_geometry: A Shapely geometry representing the state
    :param resolution: The resolution of the H3 hexagons
    :return: A DataFrame with H3 hexagons and their geometries
    """
    polygons = []

    if state_geometry.geom_type == 'Polygon':
        polygons.append(state_geometry)
    elif state_geometry.geom_type == 'MultiPolygon':
        for polygon in state_geometry:
            polygons.append(polygon)
    else:
        raise ValueError('Invalid geometry type. Only Polygon and MultiPolygon are supported.')

    hexagons = set()

    for polygon in polygons:
        # Get the bounding box of the polygon
        min_lng, min_lat, max_lng, max_lat = polygon.bounds

        # Convert the polygon to GeoJSON format
        polygon_geojson = gpd.GeoSeries([polygon]).__geo_interface__["features"][0]["geometry"]

        # Generate a list of H3 hexagons within the bounding box
        hexagons_in_bbox = h3.polyfill(
            polygon_geojson,
            res=resolution,
            geo_json_conformant=True
        )

        # Filter out hexagons that are not within the polygon
        for hexagon in hexagons_in_bbox:
            hexagon_boundary = Polygon(h3.h3_to_geo_boundary(hexagon, geo_json=True))
            if polygon.contains(hexagon_boundary):
                hexagons.add(hexagon)

    # Convert the list of hexagons into a DataFrame
    hexagon_df = pd.DataFrame(list(hexagons), columns=['h3_hexagon'])

    # Add the geometry column to the DataFrame
    hexagon_df['geometry'] = hexagon_df['h3_hexagon'].apply(h3_id_to_geometry)

    return hexagon_df


In [74]:
resolution = 4

In [75]:
# Generate the H3 hexagons
gdf = generate_h3_hexagons(state['geometry'].values[0])
gdf = gpd.GeoDataFrame(gdf)

# Call the state name
state_name = state['NAME'].values[0]

# Add the state name to the output DataFrame
gdf['state'] = state_name


In [76]:
gdf.head(10)

Unnamed: 0,h3_hexagon,geometry,state
0,8444497ffffffff,"POLYGON ((-94.00079 34.24337, -93.75144 34.363...",Arkansas
1,842658bffffffff,"POLYGON ((-91.00257 35.63646, -90.75238 35.747...",Arkansas
2,84265a1ffffffff,"POLYGON ((-92.76347 35.32404, -92.51247 35.439...",Arkansas
3,8426587ffffffff,"POLYGON ((-91.75301 35.29910, -91.50289 35.412...",Arkansas
4,84265bdffffffff,"POLYGON ((-91.99951 34.94257, -91.75001 35.057...",Arkansas
5,84444a9ffffffff,"POLYGON ((-92.22060 33.11856, -91.97503 33.236...",Arkansas
6,84265a7ffffffff,"POLYGON ((-93.00852 34.96421, -92.75817 35.081...",Arkansas
7,84444a5ffffffff,"POLYGON ((-91.48375 33.46990, -91.23805 33.585...",Arkansas
8,84444bdffffffff,"POLYGON ((-92.73203 33.86163, -92.48440 33.979...",Arkansas
9,8426e99ffffffff,"POLYGON ((-93.78177 35.34066, -93.53003 35.458...",Arkansas


In [77]:
gdf.to_file(f"data/h3_{state_name}_{resolution}.gpkg", driver="GPKG")