In [1]:
import itertools
import json
import h3
import pandas as pd
import psycopg2
import psycopg2.extras
from inspect import cleandoc
from utils.h3_transformation import H3Transformation
from utils.pg_client import PgClient

## Setup

In [None]:
# It is a wrapper class of psycopg2 to connect to PostgreSQL
pg = PgClient(database="spatial_dwh")

## Functions

### Administrative

In [None]:
def query_administrative_df(
    pg: PgClient,
    table: str,
    geom_col: str = "geometry",
    centroid_col: str = "centroid",
    as_geojson: bool = True,
) -> pd.DataFrame:
    if as_geojson:
        st_function = "ST_AsGeoJSON"
        geom_alias = "geojson"
    else:
        st_function = "ST_AsText"
        geom_alias = "geometry"

    query = f"""
    SELECT 
        *,
        {st_function}({geom_col}) AS {geom_alias},
        ST_X({centroid_col}) AS {centroid_col}_lng,
        ST_Y({centroid_col}) AS {centroid_col}_lat
    FROM {table}
    """

    # Fetch database
    records: list = pg.fetchall(query)
    adm_border_df = pd.DataFrame(records, columns=[desc[0] for desc in pg.cursor().description])
    return adm_border_df

### GeoJSON -> H3

In [None]:
# Transform MultiPolygon GeoJSON into several Polygon GeoJSONs
def geojson_multipolygon_to_polygons(geojson_multipolygon: dict) -> list[dict]:
    multipolygon_coordinates: list = geojson_multipolygon["coordinates"]
    geojson_polygons = [{"type": "Polygon", "coordinates": polygon} for polygon in multipolygon_coordinates]
    return geojson_polygons

# Make sure your GeoJSON is of type Polygon (h3.polyfill() only accepts Polygon GeoJSON)
def geojson_polygons_to_h3(geojson_polygons: list[dict] | None, h3_res: int) -> set[str]:
    if geojson_polygons is None:
        raise Exception("Polygon GeoJSONs must not be None")
    if h3_res < 0 or h3_res > 15: # resolution must be within [0, 15]
        raise Exception("Resolution must be in range [0, 15]")

    # Transform Polygons into sets of H3 cells
    h3_cells_sets: list[set] = [
        h3.polyfill(geojson=polygon, res=h3_res, geo_json_conformant=True)
        for polygon in geojson_polygons
    ]

    # Union sets into a single set of H3 cells
    h3_cells: set[str] = set().union(*h3_cells_sets)
    return h3_cells

### PostgreSQL Operations

In [None]:
# Helper functions
def _generate_sql_columns(schema: dict[str, str]) -> str:
    cols: str = ', '.join([col for col in schema.keys()])
    return cols

def _generate_h3_table_name(adm_name: str, h3_res: int) -> str:
    return f"h3_{adm_name}_r{h3_res}"

# Main functions
def create_h3_table(pg: PgClient, table: str, schema: dict) -> None:
    #  Generate CREATE TABLE SQL (add CHECK constraint for valid H3 resolution)
    table_column_definitions = ",\n  ".join([f"{k:{18}}{v}" for k, v in schema.items()])
    create_table_sql = f"""
    CREATE TABLE {table} (
        {table_column_definitions}
        CONSTRAINT ck_resolution CHECK (resolution >= 0 AND resolution <= 15)
    );
    """

    pg.execute(create_table_sql)
    print("Successfully created table", table)

def insert_h3_cells_to_table(
    pg: PgClient,
    h3_cells: set[str],
    table: str,
    schema: dict[str, str],
) -> None:
    # Constants
    TABLE = table
    COLUMNS = _generate_sql_columns(schema=schema)

    def _execute_values(pg: PgClient, h3_cells: set[str]) -> None:
        DEFAULT_PAGE_SIZE = 1000
        INSERT_STATEMENT = f"""
        INSERT INTO {TABLE} ({COLUMNS})
        VALUES
        """

        cursor = pg.cursor()
        print("Begin execute_values")
        psycopg2.extras.execute_values(
            cursor,
            sql=INSERT_STATEMENT + "%s;",
            argslist=(
                (
                    h3.string_to_h3(cell),
                    h3.h3_get_resolution(cell),
                    h3.edge_length(h3.h3_get_resolution(cell), unit="m"),
                    h3.cell_area(cell, unit="m^2"),
                    h3.h3_to_geo(h=cell)[1],
                    h3.h3_to_geo(h=cell)[0],
                    H3Transformation.cell_to_geojson(
                        h3_cell=cell,
                        default_properties=False,
                        as_geometry=True,
                    ),
                ) for cell in h3_cells
            ),
            template="""(
                %s, 
                %s, 
                %s, 
                %s,  
                ST_SetSRID(ST_Point(%s, %s), 4326), 
                ST_SetSRID(ST_GeomFromGeoJSON(%s), 4326)
            )""",
            page_size=DEFAULT_PAGE_SIZE,
        )

        # Commit changes
        pg.commit()
        print("Successfully inserted H3 cells into table", TABLE)

    # Begin execution
    _execute_values(pg=pg, h3_cells=h3_cells)

def add_primary_key_to_table(pg: PgClient, table: str, column: str = "idx") -> None:
    pg.execute(f"ALTER TABLE {table} ADD PRIMARY KEY ({column});")
    print("Successfully added primary key to table", table)

def create_gist_idx_to_table(pg: PgClient, table: str) -> None:
    create_gist_index_sql = f"""
    CREATE INDEX gidx_{table}
    ON {table} USING GIST (geometry);
    """

    pg.execute(create_gist_index_sql)
    print("Successfully created GIST index for table", table)

## H3 Polyfill Vietnam Pipeline

### Configurations

In [None]:
adm_name = "vietnam"
h3_res = 7

h3_table_name = _generate_h3_table_name(adm_name=adm_name, h3_res=h3_res)
h3_table_schema = {
    "idx": "INT8 NOT NULL",
    "resolution": "INT2 NOT NULL",
    "circumradius_m": "FLOAT8 NOT NULL",
    "area_m2": "FLOAT8 NOT NULL",
    "centroid": "GEOMETRY (POINT, 4326) NOT NULL",
    "geometry": "GEOMETRY (POLYGON, 4326) NOT NULL",
}

### Extract Vietnam's border Polygons (GeoJSON)

In [None]:
# Get Vietnam border
vietnam_border_df = query_administrative_df(
    pg=pg,
    table="vietnam_border",
    geom_col="geometry",
    centroid_col="centroid",
    as_geojson=True,
)

display(vietnam_border_df)

In [None]:
# Extract data from PostgreSQL (with Pandas)
vn_centroid_lng = vietnam_border_df["centroid_lng"].values[0]
vn_centroid_lat = vietnam_border_df["centroid_lat"].values[0]
vn_multipolygon_geojson = json.loads(vietnam_border_df["geojson"].values[0])
print(vn_centroid_lng, vn_centroid_lat)
print(type(vn_multipolygon_geojson))

In [None]:
# Convert Vietnam border MultiPolygon to Polygons
vn_polygon_geojsons: list[dict] = geojson_multipolygon_to_polygons(vn_multipolygon_geojson)
print(vn_polygon_geojsons[0])

### Generate H3 & save to PostgreSQL

In [None]:
# Generate H3 cells
vietnam_h3_cells: set[str] = geojson_polygons_to_h3(
    geojson_polygons=vn_polygon_geojsons, h3_res=h3_res
)
print(len(vietnam_h3_cells))

# H3 subset in case of Testing (h3_cells could grow very large)
vietnam_border_cells_subset = set(itertools.islice(vietnam_h3_cells, 10))

In [None]:
# PostgreSQL operations
h3_cells = vietnam_h3_cells

try:
    create_h3_table(pg, h3_table_name, h3_table_schema)
    insert_h3_cells_to_table(pg, h3_cells, h3_table_name, h3_table_schema)
    add_primary_key_to_table(pg, h3_table_name)
    create_gist_idx_to_table(pg, h3_table_name)
except (Exception, psycopg2.DatabaseError, psycopg2.ProgrammingError) as error:
    print(error)
finally:
    pg.close()