In [1]:
import os
import h3
import psycopg2
import json
import itertools
import time
import pandas as pd
from dotenv import load_dotenv
from h3_transformation import H3Transformation
from pg_client import PgClient

## PostgreSQL

In [2]:
pg = PgClient(database='spatial_dwh')

Connected to PostgreSQL database 'spatial_dwh'


In [3]:
table = "vietnam_border"
limit = 100

conn = None
try:
    cur = pg.cursor()
    cur.execute(
        f"""
        SELECT 
            country_id, 
            country_name, 
            ST_AsGeoJSON(geometry) AS geojson, 
            CONCAT_WS(',', ST_X(centroid), ST_Y(centroid)) AS centroid
        FROM {table}
        """
    )
    values_arguments = cur.fetchall()
    
    # Pandas
    records_df = pd.DataFrame(values_arguments, columns=[desc[0] for desc in cur.description])
    display(records_df.head())
    
except(Exception, psycopg2.DatabaseError) as error:
    print(error)
    
finally:
    pg.close()
    

Unnamed: 0,country_id,country_name,geojson,centroid
0,1,Vietnam,"{""type"":""MultiPolygon"",""coordinates"":[[[[113.8...","106.4063821609223,16.57755915233502"


In [None]:
# Extract data from PostgreSQL
(vn_centroid_lng, vn_centroid_lat) = records_df["centroid"].values[0].split(',')
vn_multipolygon_geojson = json.loads(records_df["geojson"].values[0])

# Transform MultiPolygon GeoJSON into several Polygon GeoJSONs
vn_polygon_geojsons = []
for polygon in vn_multipolygon_geojson["coordinates"]:
    vn_polygon_geojson = {
        "type": "Polygon",
        "coordinates": polygon
    }
    
    vn_polygon_geojsons.append(vn_polygon_geojson)

print(vn_polygon_geojsons[-1])

## H3

In [None]:
resolution = 9 # r10: area = 0.0150475 km^2, edge = 0.065907807 km, unique_hex = 33,897,029,882

h_cell_sets = []
for polygon_geojson in vn_polygon_geojsons:
    h_cell_sets.append(h3.polyfill(geojson=polygon_geojson, res=resolution, geo_json_conformant=True))

# polyfill_geojson = H3Transformation.h3_to_geojson(h_cells)
# print(polyfill_geojson)``

In [None]:
h_cells = set().union(*h_cell_sets)
len(h_cells)

In [None]:
# Database logic
administrative = "vietnam"
table = f"h3_{administrative}_r{resolution}"

conn = None
try:
    # Connect to database
    conn = psycopg2.connect(**PG_CONF)
    print(f'Connection established to database: "{PG_CONF["database"]}".')
    
    # CREATE TABLE
    create_table_sql = f"""
    CREATE TABLE {table} (
        idx             INT8 NOT NULL,
        resolution      INT2 NOT NULL,
        circumradius_m  FLOAT8 NOT NULL,
        area_m2         FLOAT8 NOT NULL,
        centroid        GEOMETRY (POINT, 4326) NOT NULL,
        geometry        GEOMETRY (POLYGON, 4326) NOT NULL,
        
        CONSTRAINT  ck_resolution CHECK (resolution >= 0 AND resolution <= 15)
    );
    """
    
    # ADD PRIMARY KEY and CREATE INDEX
    alter_table_pk_sql = f"""
    ALTER TABLE {table} ADD PRIMARY KEY (idx);
    """
    create_gist_index_sql = f"""
    CREATE INDEX gidx_{table}
    ON {table} USING GIST (geometry);
    """
    
    # INSERT sql
    insert_sql = f"""
    INSERT INTO {table} (idx, resolution, circumradius_m, area_m2, centroid, geometry)
    VALUES
    """
    
    # Get database cursor
    cur = conn.cursor()
    
    ## Create table
    cur.execute(create_table_sql)
    conn.commit()
    
    ## Batch insert
    POSTGRES_STATEMENT_MAX_RECORDS = 1000
    TOTAL_HEXAGONS = len(h_cells)

    record_count = 0
    values_arguments_count = 0
    values_arguments = ""
    for h_cell in h_cells:
        # Increase record count
        record_count += 1
        
        # Extract H3 data for each hexagon
        idx = h3.string_to_h3(h_cell)
        resolution = h3.h3_get_resolution(h_cell)
        circumradius_m = h3.edge_length(resolution, unit='m')
        area_m2 = h3.cell_area(h_cell, unit='m^2')
        centroid_lat, centroid_lng = h3.h3_to_geo(h=h_cell)
        geojson = H3Transformation.h3cell_to_geojson(h3_cell=h_cell, include_properties=False, geometry_only=True)
        
        # Prepare values_arguments
        values_arguments += f"""
        (
            {idx},
            {resolution},
            {circumradius_m},
            {area_m2},
            ST_SetSRID(ST_Point({centroid_lng}, {centroid_lat}), 4326),
            ST_GeomFromGeoJSON('{geojson}')
        ),"""
        # values_arguments counter
        values_arguments_count += 1
        
        # If reached POSTGRES_STATEMENT_MAX_RECORDS or record_count == TOTAL_HEXAGONS
        if ((record_count % POSTGRES_STATEMENT_MAX_RECORDS == 0) or (record_count == TOTAL_HEXAGONS)):
            print("--- Executing record: ", record_count)
            
            # Format values_arguments to execute query
            values_arguments = values_arguments.rstrip(',')
            query = insert_sql + values_arguments
            cur.execute(query)
            conn.commit()
            print(f'--- Executed {values_arguments_count} values_arguments.')
            
            # Reset
            values_arguments = ""
            values_arguments_count = 0

except(Exception, psycopg2.DatabaseError) as error:
    print(error)
    
finally:
    cur.execute(alter_table_pk_sql)
    conn.commit()
    cur.execute(create_gist_index_sql)
    conn.commit()
    conn.close()

In [None]:
h_cells_subset = set(itertools.islice(h_cells, 10))

# # sample hexagons
# hex = h3.geo_to_h3(lat=10.823098, lng=106.629663, resolution=5)
# children = h3.h3_to_children(hex)

# insert_into_sql = f"""
# INSERT INTO sample_table (idx, resolution, geometry, centroid, area_m2, area_km2)
# VALUES
# """

for c in h_cells_subset:
    print(H3Transformation.h3cell_to_geojson(h3_cell=h_cell, include_properties=False, geometry_only=True))

In [None]:
# polyfill_geojson = H3Transformation.h3_to_geojson(h_cells_subset)
# print(polyfill_geojson)

In [None]:
lat,lng = h3.h3_to_geo(h=h_cell)
print(lat)
print(lng)