In [3]:
from overturemapsdownloader.utils import write_geospatial_data

In [4]:
import duckdb
from typing import Any
import json
import geopandas as gpd
from osgeo import ogr

In [5]:
con = duckdb.connect()
print(con.execute('SELECT 42').fetchall())

[(42,)]


In [6]:
# To install an extension (this is usually done only once)
try:
    con.install_extension('httpfs')
    con.install_extension('spatial')
    con.load_extension('httpfs')
    con.load_extension('spatial')
    con.execute("SET s3_region='us-west-2'")
except Exception as e:
    print(f"Failed to install extension: {e}")

In [None]:
# Define the query to read from S3 and filter the data
query = '''
SELECT
    type,
    subType,
    localityType,
    adminLevel,
    isoCountryCodeAlpha2,
    JSON(names) AS names,
    JSON(sources) AS sources,
    ST_GeomFromWkb(geometry) AS geometry
FROM read_parquet(
    's3://overturemaps-us-west-2/release/2023-07-26-alpha.0/theme=admins/type=*/*',
    filename=true,
    hive_partitioning=1
)
WHERE adminLevel = 2
    AND ST_GeometryType(ST_GeomFromWkb(geometry)) IN ('POLYGON','MULTIPOLYGON')
LIMIT 5
'''

In [None]:
# Execute the query and store the result
df = con.execute(query).fetchdf()
result = con.execute(query).fetchall()

In [None]:
print(df['geometry'].head())

In [None]:
columns = ['type', 'subType', 'localityType', 'adminLevel', 'isoCountryCodeAlpha2', 'names', 'sources', 'geometry']
gdf = gpd.GeoDataFrame(result, columns=columns)

In [None]:
gdf.head()

In [None]:
first_geometry = gdf['geometry'].iloc[0]

In [None]:
# Try to convert it to a Shapely geometry object
def try_wkb_loads(data):
    try:
        return wkb.loads(data)
    except Exception as e:
        print(f"Failed to convert geometry: {e}")
        return None

In [None]:
gdf['geometry'] = gdf['geometry'].apply(try_wkb_loads)

In [None]:
print(gdf['geometry'].iloc[0][:10])

In [None]:
# Convert the WKB geometry to Shapely geometry objects
gdf['geometry'] = gdf['geometry'].apply(lambda x: wkb.loads(ogr.CreateGeometryFromWkb(x)))

In [None]:
gdf.head()

In [None]:
# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry='geometry')

In [None]:
gdf.head()

In [None]:
# Convert GeoDataFrame to GeoJSON
geojson_data = json.loads(gdf.to_json())

# Write the GeoJSON data to a file
with open('countries.geojson', 'w') as f:
    json.dump(geojson_data, f)

In [None]:
write_geospatial_data(gdf,'countries.gpkg',layer='countries')

In [None]:
# Close the DuckDB connection
con.close()