# SedonaDB Overture Examples

This notebook shows how to query the Overture data with SedonaDB!

In [None]:
import sedona.db
import os

os.environ["AWS_SKIP_SIGNATURE"] = "true"
os.environ["AWS_DEFAULT_REGION"] = "us-west-2"

sd = sedona.db.connect()

## Overture buildings table

In [7]:
df = sd.read_parquet(
    "s3://overturemaps-us-west-2/release/2025-08-20.0/theme=buildings/type=building/"
)

In [8]:
df.limit(10).show()

┌──────────────────────────────────────┬─────────────────────────────────────────┬───┬─────────────┐
│                  id                  ┆                 geometry                ┆ … ┆ roof_height │
│               utf8view               ┆           wkb_view <ogc:crs84>          ┆   ┆   float64   │
╞══════════════════════════════════════╪═════════════════════════════════════════╪═══╪═════════════╡
│ 06533301-f2ec-42e0-8138-732ac25a7497 ┆ POLYGON((-58.4757066 -34.7389169,-58.4… ┆ … ┆             │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ cc0c048c-088d-4cb3-9982-3961edfdf416 ┆ POLYGON((-58.4755777 -34.7389131,-58.4… ┆ … ┆             │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ e52a0dbc-fb93-40e2-b1df-03626855299c ┆ POLYGON((-58.4754112 -34.7394253,-58.4… ┆ … ┆             │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌

In [9]:
df.to_view("buildings")

In [10]:
# the buildings table is large and contains millions of rows
sd.sql("""
SELECT
    COUNT(*)
FROM
    buildings
""").show()

┌────────────┐
│  count(*)  │
│    int64   │
╞════════════╡
│ 2539170484 │
└────────────┘


In [11]:
# check out the schema of the buildings table to see what it contains
df.schema

SedonaSchema with 24 fields:
  id: Utf8View
  geometry: wkb_view <ogc:crs84>
  bbox: Struct(xmin Float32, xmax Float32, ymin Float32, ymax Float32)
  version: Int32
  sources: List(Field { name: "element", data_type: Struct([Field { name: "property", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "dataset", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "record_id", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "update_time", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "confidence", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "between", data_type: List(Field { name: "element", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metad

In [12]:
# find all the buildings in New York city that are taller than 20 meters
nyc_bbox_wkt = "POLYGON((-74.2591 40.4774, -74.2591 40.9176, -73.7004 40.9176, -73.7004 40.4774, -74.2591 40.4774))"
sd.sql(f"""
SELECT
    id,
    height,
    num_floors,
    roof_shape,
    ST_Centroid(geometry) as centroid
FROM
    buildings
WHERE
    is_underground = FALSE
    AND height IS NOT NULL
    AND height > 20
    AND ST_Intersects(geometry, ST_SetSRID(ST_GeomFromText('{nyc_bbox_wkt}'), 4326))
LIMIT 5;
""").show()

┌─────────────────────────┬────────────────────┬────────────┬────────────┬─────────────────────────┐
│            id           ┆       height       ┆ num_floors ┆ roof_shape ┆         centroid        │
│         utf8view        ┆       float64      ┆    int32   ┆  utf8view  ┆     wkb <ogc:crs84>     │
╞═════════════════════════╪════════════════════╪════════════╪════════════╪═════════════════════════╡
│ 1b9040c2-2e79-4f56-aba… ┆               22.4 ┆            ┆            ┆ POINT(-74.230407502993… │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 1b5e1cd2-d697-489e-892… ┆               21.5 ┆            ┆            ┆ POINT(-74.231451103592… │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ c1afdf78-bf84-4b8f-ae1… ┆               20.9 ┆            ┆            ┆ POINT(-74.232593032240… │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌

## Overture divisions table

In [13]:
df = sd.read_parquet(
    "s3://overturemaps-us-west-2/release/2025-08-20.0/theme=divisions/type=division_area/"
)

In [14]:
# take a look at a few rows of data
df.show(10)

┌────────────────┬────────────────┬────────────────┬───┬────────────────┬──────────┬───────────────┐
│       id       ┆    geometry    ┆      bbox      ┆ … ┆ is_territorial ┆  region  ┆  division_id  │
│    utf8view    ┆ wkb_view <ogc… ┆ struct(xmin f… ┆   ┆     boolean    ┆ utf8view ┆    utf8view   │
╞════════════════╪════════════════╪════════════════╪═══╪════════════════╪══════════╪═══════════════╡
│ 61912ffd-060b… ┆ POLYGON((23.3… ┆ {xmin: 22.735… ┆ … ┆ true           ┆ ZA-EC    ┆ 2711d6ca-ac1… │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 7647b992-e0d6… ┆ POLYGON((26.5… ┆ {xmin: 26.521… ┆ … ┆ true           ┆ ZA-EC    ┆ 0e8a08eb-6f2… │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 4058785b-82c9… ┆ MULTIPOLYGON(… ┆ {xmin: 22.735… ┆ … ┆ false          ┆ ZA-EC    ┆ 2711d6ca-ac1… │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌

In [15]:
df.to_view("division_area")

In [None]:
sd.sql("""
SELECT
    COUNT(*)
FROM division_area
""").show()

┌──────────┐
│ count(*) │
│   int64  │
╞══════════╡
│  1035749 │
└──────────┘


In [17]:
df.schema

SedonaSchema with 13 fields:
  id: Utf8View
  geometry: wkb_view <ogc:crs84>
  bbox: Struct(xmin Float32, xmax Float32, ymin Float32, ymax Float32)
  country: Utf8View
  version: Int32
  sources: List(Field { name: "element", data_type: Struct([Field { name: "property", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "dataset", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "record_id", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "update_time", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "confidence", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "between", data_type: List(Field { name: "element", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_o

In [None]:
# get all the divisions in Nova Scotia and save them in memory with collect()
nova_scotia_bbox_wkt = (
    "POLYGON((-66.5 43.4, -66.5 47.1, -59.8 47.1, -59.8 43.4, -66.5 43.4))"
)
ns = sd.sql(f"""
SELECT
    country, region, names, geometry
FROM division_area
WHERE
    ST_Intersects(geometry, ST_SetSRID(ST_GeomFromText('{nova_scotia_bbox_wkt}'), 4326))
""").collect()

In [19]:
ns.to_view("ns_divisions")

In [20]:
df = sd.sql("""
SELECT UNNEST(names), geometry
FROM ns_divisions
WHERE region = 'CA-NS'
""")

In [24]:
%%time
# this executes quickly because the Nova Scotia data was persisted in memory with collect()
df.show(2)

┌────────────────────────┬────────────────────────┬────────────────────────┬───────────────────────┐
│ __unnest_placeholder(n ┆ __unnest_placeholder(n ┆ __unnest_placeholder(n ┆        geometry       │
│ s_divisions.names).pr… ┆ s_divisions.names).co… ┆ s_divisions.names).ru… ┆  wkb_view <ogc:crs84> │
╞════════════════════════╪════════════════════════╪════════════════════════╪═══════════════════════╡
│ Seal Island            ┆                        ┆                        ┆ POLYGON((-66.0528452… │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ Mud Island             ┆                        ┆                        ┆ POLYGON((-66.0222822… │
└────────────────────────┴────────────────────────┴────────────────────────┴───────────────────────┘
CPU times: user 8.75 ms, sys: 2.41 ms, total: 11.2 ms
Wall time: 8.47 ms
