In [None]:
import itertools
import json
import h3
import pandas as pd
import psycopg2
import psycopg2.extras
from inspect import cleandoc
from h3_transformation import H3Transformation
from pg_client import PgClient

## Setup

In [None]:
# It is a wrapper class of psycopg2 to connect to PostgreSQL
pg = PgClient(database="spatial_dwh")

## Functions

### Administrative

In [None]:
def get_administrative_df(
    pg: PgClient,
    table: str,
    center_column: str = "center",
    geometry_column: str = "geometry",
    as_geojson: bool = True,
) -> pd.DataFrame:
    sql = """
    SELECT 
        *,
        {st_function}({geometry}) AS {geometry_alias},
        ST_X({center}) AS {center}_lng,
        ST_Y({center}) AS {center}_lat
    FROM {table}
    """.format(
        geometry=geometry_column, center=center_column
    )

    # Check as_geojson parameter
    if as_geojson:
        query = sql.format(
            st_function="ST_AsGeoJSON", geometry_alias="geojson", table=table
        )
    else:
        query = sql.format(
            st_function="ST_AsText", geometry_alias="geometry", table=table
        )

    # Fetch database
    records: list = []
    try:
        records = pg.fetchall(query)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)

    adm_border_df = pd.DataFrame(
        records, columns=[desc[0] for desc in pg.cursor().description]
    )

    return adm_border_df

### GeoJSON -> H3

In [None]:
# Transform MultiPolygon GeoJSON into several Polygon GeoJSONs
def multipolygon_to_polygons(multipolygon_geojson: dict) -> list:
    polygon_geojsons = []
    for polygon in multipolygon_geojson["coordinates"]:
        vn_polygon_geojson = {"type": "Polygon", "coordinates": polygon}
        polygon_geojsons.append(vn_polygon_geojson)

    return polygon_geojsons


# Make sure your GeoJSON is of type Polygon (h3.polyfill() only accepts Polygon GeoJSON)
def polygon_geojsons_to_h3(polygon_geojsons: list[str] | None, res: int) -> set[str]:
    # Exceptions
    if polygon_geojsons is None:
        # throw exception
        raise Exception("Polygon GeoJSONs must not be None")

    # resolution must be within [0, 15]
    if res < 0 or res > 15:
        raise Exception("Resolution must be in range [0, 15]")

    # Transform Polygons into sets of H3 cells
    h_cell_sets: list[set] = []
    for polygon_geojson in polygon_geojsons:
        h_cell_sets.append(
            h3.polyfill(geojson=polygon_geojson, res=res, geo_json_conformant=True)
        )

    # Union sets into a single set of H3 cells
    h_cells: set[str] = set().union(*h_cell_sets)

    return h_cells

### PostgreSQL Operations

In [None]:
def get_h3_table_name(adm: str, res: int) -> str:
    return f"h3_{adm}_r{res}"


def get_sql_columns(schema: dict[str, str]) -> str:
    columns = ""
    last_column = list(schema.keys())[-1]
    for column in schema.keys():
        columns += column
        if column != last_column:
            columns += ", "

    return columns


def create_h3_table(pg: PgClient, table: str, schema: dict) -> None:
    COLUMN_MAX_LENGTH = 18
    SEP = ",\n    "

    # Parse schema and build column parameters
    columns_definition = ""
    for key, value in schema.items():
        columns_definition += f"{key:{COLUMN_MAX_LENGTH}}{value}{SEP}"

    # Format column_parameters into CREATE TABLE SQL
    create_table_sql = f"""
    CREATE TABLE {table} (
        {columns_definition}
        CONSTRAINT ck_resolution CHECK (resolution >= 0 AND resolution <= 15)
    );
    """

    # Execute CREATE TABLE SQL
    pg.execute(create_table_sql)
    print("Successfully created table", table)

    return None


def insert_h3_to_table(
    pg: PgClient,
    h3_cells: set[str],
    table: str,
    schema: dict[str, str],
    multi_values_exec: bool = False,
) -> None:
    # Constants
    TABLE = table
    COLUMNS = get_sql_columns(schema=schema)
    TOTAL_HEXAGONS = len(h3_cells)
    POSTGRES_STATEMENT_MAX_RECORDS = 1000

    # Batch INSERT methods
    def _multi_values_execute(pg: PgClient, h3_cells: set[str]) -> None:
        print("WARN: Using multi-values execute mode.")

        # Constants
        INSERT_STATEMENT = f"""
        INSERT INTO {TABLE} ({COLUMNS})
        VALUES
        """

        execution_count = 0
        record_count = 0
        values_arguments_count = 0
        values_arguments_sql = ""
        # Iterate over H3 cells & perform database execution
        for cell in h3_cells:
            record_count += 1

            # Extract H3 data for each hexagon
            idx = h3.string_to_h3(cell)
            resolution = h3.h3_get_resolution(cell)
            circumradius_m = h3.edge_length(resolution, unit="m")
            area_m2 = h3.cell_area(cell, unit="m^2")
            centroid_lat, centroid_lng = h3.h3_to_geo(
                h=cell
            )  # the centroid_lat, centroid_lng order matters
            geometry_geojson = H3Transformation.cell_to_geojson(
                h3_cell=cell, include_default_properties=False, geometry_only=True
            )

            # Prepare values_arguments
            values_arguments_sql += f"""
            (
                {idx},
                {resolution},
                {circumradius_m},
                {area_m2},
                ST_SetSRID(ST_Point({centroid_lng}, {centroid_lat}), 4326),
                ST_GeomFromGeoJSON('{geometry_geojson}')
            ),"""
            values_arguments_count += 1

            # If reached POSTGRES_STATEMENT_MAX_RECORDS or record_count == TOTAL_HEXAGONS
            if (record_count % POSTGRES_STATEMENT_MAX_RECORDS == 0) or (
                record_count == TOTAL_HEXAGONS
            ):
                # Raise exception if values_arguments_count > POSTGRES_STATEMENT_MAX_RECORDS
                if values_arguments_count > POSTGRES_STATEMENT_MAX_RECORDS:
                    raise Exception(
                        f"values_arguments_count ({values_arguments_count}) > POSTGRES_STATEMENT_MAX_RECORDS ({POSTGRES_STATEMENT_MAX_RECORDS})"
                    )

                # Format values_arguments to execute query
                values_arguments_sql = values_arguments_sql.rstrip(
                    ","
                )  # Remove last comma
                insert_query = INSERT_STATEMENT + values_arguments_sql
                pg.execute(query=insert_query)

                execution_count += 1
                # Print records_count every N_EXECUTIONS
                N_EXECUTIONS = 10
                if execution_count % N_EXECUTIONS == 0:
                    print(f"--- Inserted records:", record_count)

                # Reset
                values_arguments_sql = ""
                values_arguments_count = 0

    def _execute_values(pg: PgClient, h3_cells: set[str]) -> None:
        DEFAULT_PAGE_SIZE = 1000
        INSERT_STATEMENT = f"""
        INSERT INTO {TABLE} ({COLUMNS})
        VALUES
        """

        cursor = pg.cursor()
        print("Begin execute_values")
        psycopg2.extras.execute_values(
            cursor,
            sql=INSERT_STATEMENT + "%s;",
            argslist=(
                (
                    h3.string_to_h3(cell),
                    h3.h3_get_resolution(cell),
                    h3.edge_length(h3.h3_get_resolution(cell), unit="m"),
                    h3.cell_area(cell, unit="m^2"),
                    h3.h3_to_geo(h=cell)[1],
                    h3.h3_to_geo(h=cell)[0],
                    H3Transformation.cell_to_geojson(
                        h3_cell=cell,
                        include_default_properties=False,
                        geometry_only=True,
                    ),
                ) for cell in h3_cells
            ),
            template="""(
                %s, 
                %s, 
                %s, 
                %s,  
                ST_SetSRID(ST_Point(%s, %s), 4326), 
                ST_SetSRID(ST_GeomFromGeoJSON(%s), 4326)
            )""",
            page_size=DEFAULT_PAGE_SIZE,
        )

        # Commit changes
        pg.commit()
        print("Successfully inserted H3 cells into table", TABLE)

    # EXECUTION METHODS
    if multi_values_exec:
        _multi_values_execute(pg=pg, h3_cells=h3_cells)
    else:
        # Default execution method (faster)
        _execute_values(pg=pg, h3_cells=h3_cells)

    return None


def add_primary_key(pg: PgClient, table: str, column: str = "idx") -> None:
    # Get ALTER TABLE SQL
    alter_table_sql = f"ALTER TABLE {table} ADD PRIMARY KEY ({column});"

    # Execute ALTER TABLE SQL
    pg.execute(alter_table_sql)
    print("Successfully added primary key to table", table)

    return None


def create_gist_index_sql(pg: PgClient, table: str) -> None:
    create_gist_index_sql = f"""
    CREATE INDEX gidx_{table}
    ON {table} USING GIST (geometry);
    """

    pg.execute(create_gist_index_sql)
    print("Successfully created GIST index for table", table)

    return None

## H3 Polyfill Vietnam Pipeline

### Configurations

In [None]:
administrative = "vietnam"
resolution = 7

table = get_h3_table_name(adm=administrative, res=resolution)

h3_table_schema = {
    "idx": "INT8 NOT NULL",
    "resolution": "INT2 NOT NULL",
    "circumradius_m": "FLOAT8 NOT NULL",
    "area_m2": "FLOAT8 NOT NULL",
    "centroid": "GEOMETRY (POINT, 4326) NOT NULL",
    "geometry": "GEOMETRY (POLYGON, 4326) NOT NULL",
}

### Extract Vietnam's border Polygons (GeoJSON)

In [None]:
# Get Vietnam border
vietnam_border_df = get_administrative_df(
    pg=pg,
    table="vietnam_border",
    geometry_column="geometry",
    center_column="centroid",
    as_geojson=True,
)

display(vietnam_border_df)

In [None]:
# Extract data from PostgreSQL (with Pandas)
vn_centroid_lng = vietnam_border_df["centroid_lng"].values[0]
vn_centroid_lat = vietnam_border_df["centroid_lat"].values[0]
vn_multipolygon_geojson = json.loads(vietnam_border_df["geojson"].values[0])
print(vn_centroid_lng, vn_centroid_lat)
print(type(vn_multipolygon_geojson))

In [None]:
# Convert Vietnam border MultiPolygon to Polygons
vn_polygon_geojsons = multipolygon_to_polygons(vn_multipolygon_geojson)
print(vn_polygon_geojsons[0])

### Generate H3 & save to PostgreSQL

In [None]:
# Generate H3 cells
vietnam_border_cells = polygon_geojsons_to_h3(
    polygon_geojsons=vn_polygon_geojsons, res=resolution
)
print(len(vietnam_border_cells))

# H3 subset in case of Testing (h3_cells could grow very large)
vietnam_border_cells_subset = set(itertools.islice(vietnam_border_cells, 10))

In [None]:
# PostgreSQL operations
h3_cells = vietnam_border_cells

try:
    create_h3_table(pg, table, h3_table_schema)
    insert_h3_to_table(pg, h3_cells, table, h3_table_schema, multi_values_exec=True)
    add_primary_key(pg, table)
    create_gist_index_sql(pg, table)
except (Exception, psycopg2.DatabaseError, psycopg2.ProgrammingError) as error:
    print(error)
finally:
    pg.close()