## extracting overture data using the API

In [12]:
import duckdb

duckdb.install_extension('httpfs')
duckdb.load_extension('httpfs')

duckdb.sql("""
SET s3_region='us-west-2';
SET s3_access_key_id='';
SET s3_secret_access_key='';
SET s3_session_token='';
SET s3_url_style='path';
SET s3_use_ssl=true;
""")

# Try to read file names from the public Overture S3 bucket
df_files = duckdb.sql("""
SELECT DISTINCT filename
FROM read_parquet(
    's3://overturemaps-us-west-2/release/2025-10-22.0/theme=base/type=infrastructure/*.parquet',
    filename=true
)
LIMIT 10;
""").df()

print(df_files)


                                            filename
0  s3://overturemaps-us-west-2/release/2025-10-22...
1  s3://overturemaps-us-west-2/release/2025-10-22...
2  s3://overturemaps-us-west-2/release/2025-10-22...
3  s3://overturemaps-us-west-2/release/2025-10-22...
4  s3://overturemaps-us-west-2/release/2025-10-22...
5  s3://overturemaps-us-west-2/release/2025-10-22...
6  s3://overturemaps-us-west-2/release/2025-10-22...
7  s3://overturemaps-us-west-2/release/2025-10-22...
8  s3://overturemaps-us-west-2/release/2025-10-22...
9  s3://overturemaps-us-west-2/release/2025-10-22...


In [22]:
import duckdb
import geopandas as gpd
from shapely import from_wkb

# 1️⃣ Connect + load extensions
duckdb.install_extension('httpfs')
duckdb.install_extension('spatial')
duckdb.load_extension('httpfs')
duckdb.load_extension('spatial')

# 2️⃣ S3 config
duckdb.sql("""
SET s3_region='us-west-2';
SET s3_url_style='path';
SET s3_use_ssl=true;
""")

# 3️⃣ Bounding box
xmin, ymin = 2.10, 41.35
xmax, ymax = 2.25, 41.45

# 4️⃣ Query Overture "places"
query = f"""
SELECT
    basic_category AS category,
    geometry
FROM read_parquet(
  's3://overturemaps-us-west-2/release/2025-10-22.0/theme=places/type=place/*.parquet'
)
WHERE geometry IS NOT NULL
  AND ST_Intersects(
      ST_GeomFromText('POLYGON(({xmin} {ymin}, {xmax} {ymin}, {xmax} {ymax}, {xmin} {ymax}, {xmin} {ymin}))'),
      geometry
  )
LIMIT 1000;
"""




In [26]:
from shapely import wkb

# 5️⃣ Read into Pandas DataFrame
df = duckdb.sql(query).df()
df["geometry"] = df["geometry"].apply(wkb.loads)
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")  # or your CRS


TypeError: Expected bytes or string, got int

In [25]:
df

Unnamed: 0,category,geometry
0,gym,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
1,sport_fitness_facility,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
2,,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
3,sport_recreation_club,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
4,sport_recreation_club,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
...,...,...
995,specialty_store,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
996,restaurant,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
997,human_resource_service,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
998,b2b_service,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."


In [24]:

# 6️⃣ Convert WKB geometry to Shapely
df["geometry"] = df["geometry"].apply(lambda g: from_wkb(g) if g is not None else None)

# 7️⃣ Wrap in GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")

# 8️⃣ Inspect and plot
print(gdf.head())
gdf.plot(figsize=(8,8))

TypeError: Expected bytes or string, got int