In [None]:
# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "geoarrow-rust-core",
#     "geoarrow-rust-io",
#     "lonboard",
#     "obstore",
# ]
# ///

# Visualizing Overture Data in Lonboard with Obstore and GeoArrow-Rust

This demo will show how to inspect the spatial partitioning of [GeoParquet](https://geoparquet.org/) datasets and visualize spatial extracts in Lonboard.

This notebook uses [Obstore](https://developmentseed.org/obstore/latest/) to inspect data on S3. 

This notebook uses the [Python bindings of GeoArrow Rust](https://geoarrow.org/geoarrow-rs/python/latest/) to load spatially-filtered GeoParquet data.

## Dependencies

First install [`uv`](https://docs.astral.sh/uv) and then run this notebook with

```
uvx juv run overture-geoparquet.ipynb
```

That will automatically create a temporary `uv` environment with the dependencies required by this notebook.

## Imports

In [1]:
from geoarrow.rust.io import GeoParquetDataset
from obstore.store import S3Store

from lonboard import Map, viz
from lonboard.basemap import CartoBasemap

We'll create an [S3Store](https://developmentseed.org/obstore/latest/api/store/aws/#obstore.store.S3Store) to access Overture data on AWS S3.

Alternatively, we could create an [`AzureStore`](https://developmentseed.org/obstore/latest/api/store/azure/#obstore.store.AzureStore) to access the copy of Overture data stored on Azure and all the following steps would be the same.

In [2]:
store = S3Store.from_url(
    "s3://overturemaps-us-west-2/release/2025-06-25.0/",
    region="us-west-2",
    skip_signature=True,
)

Let's inspect the directories under this release. `list_with_delimiter` shows us the `common_prefixes` (i.e. directories).

In [4]:
store.list_with_delimiter()

{'common_prefixes': ['theme=addresses',
  'theme=base',
  'theme=buildings',
  'theme=divisions',
  'theme=places',
  'theme=transportation'],
 'objects': []}

Now we can inspect the directories within `theme=buildings`

In [5]:
store.list_with_delimiter("theme=buildings")

{'common_prefixes': ['theme=buildings/type=building',
  'theme=buildings/type=building_part'],
 'objects': []}

And the directories within `theme=buildings/type=building`.

This gives us the object metadata for each of these raw Parquet files:

In [6]:
objects = store.list_with_delimiter("theme=buildings/type=building")["objects"]
objects[:2]

[{'path': 'theme=buildings/type=building/part-00000-8a741876-e04d-4e66-bc96-0171910fa1b1-c000.zstd.parquet',
  'last_modified': datetime.datetime(2025, 6, 25, 10, 19, 2, tzinfo=datetime.timezone.utc),
  'size': 1057456618,
  'e_tag': '"d02e00f5c225cd5fb55db8c57074a6f7-202"',
  'version': None},
 {'path': 'theme=buildings/type=building/part-00001-8a741876-e04d-4e66-bc96-0171910fa1b1-c000.zstd.parquet',
  'last_modified': datetime.datetime(2025, 6, 25, 10, 19, 2, tzinfo=datetime.timezone.utc),
  'size': 1039045476,
  'e_tag': '"c0bc02d43c98db116791ac54b9eac087-199"',
  'version': None}]

Now we can open a `GeoParquetDataset` with these objects:

In [7]:
dataset = GeoParquetDataset.open(objects, store=store)

  dataset = GeoParquetDataset.open(objects, store=store)


In [8]:
dataset.num_row_groups

16861

In [9]:
dataset.num_rows

2534560183

We can call `dataset.fragments` to access the raw underlying GeoParquet files:

In [10]:
dataset.fragments[:5]

[<geoarrow.rust.io.GeoParquetFile at 0x11b077330>,
 <geoarrow.rust.io.GeoParquetFile at 0x11b0776f0>,
 <geoarrow.rust.io.GeoParquetFile at 0x11b0778d0>,
 <geoarrow.rust.io.GeoParquetFile at 0x11b077930>,
 <geoarrow.rust.io.GeoParquetFile at 0x11b077990>]

Each fragment is a `GeoParquetFile` with a `row_groups_bounds` method to access the bounding box information from the GeoParquet covering column:

In [11]:
row_groups_bounds = [fragment.row_groups_bounds() for fragment in dataset.fragments]

So for example, the first GeoParquet file has these bounds for each GeoParquet row group.

In [12]:
row_groups_bounds[0]

arro3.core.Array<Struct(xmin Float64, ymin Float64, xmax Float64, ymax Float64)>
[
  {xmin: -180.0, ymin: -84.29460906982422, xmax: -135.00332641601562, ymax: -13.186331748962402},
  {xmin: -172.5196075439453, ymin: -22.49992561340332, xmax: -148.06675720214844, ymax: -2.7685303688049316},
  {xmin: -155.93409729003906, ymin: -77.65557861328125, xmax: -46.41456985473633, ymax: -0.0031376995611935854},
  {xmin: -73.12500762939453, ymin: -53.43754577636719, xmax: -68.20381927490234, ymax: -50.6250114440918},
  {xmin: -75.63844299316406, ymin: -53.43662643432617, xmax: -67.66509246826172, ymax: -44.99996566772461},
  {xmin: -73.12500762939453, ymin: -49.33192825317383, xmax: -67.50086975097656, ymax: -44.999977111816406},
  {xmin: -71.71881103515625, ymin: -47.812496185302734, xmax: -67.49979400634766, ymax: -45.000179290771484},
  {xmin: -80.82723236083984, ymin: -77.87444305419922, xmax: -2.8229823112487793, ymax: -33.76310729980469},
  {xmin: -74.19165802001953, ymin: -42.92656707763672

Create Lonboard layers for each of these chunks

This is a quick hack to make each chunk have a different color

In [13]:
layers = [viz(chunk).layers[0] for chunk in row_groups_bounds[:10]]

  warn(


In [14]:
for i in range(len(layers)):
    layers[i].opacity = 0.05

In [15]:
m = Map(layers, _height=600, basemap_style=CartoBasemap.DarkMatter)

In [16]:
m

Map(basemap_style=<CartoBasemap.DarkMatter: 'https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json'…

In [17]:
m.selected_bounds

(-70.71328781156221,
 -33.48881085205176,
 -70.59366078909468,
 -33.39647614556973)

Make sure you select a bounding box area on the map. Keep in mind that selecting too large of an area will make the data download take a long time.

In [18]:
assert m.selected_bounds is not None, (
    "Use the map's bounding box selection to select a small download area"
)

In [19]:
dataset.read?

[31mSignature:[39m
dataset.read(
    *,
    batch_size=[38;5;28;01mNone[39;00m,
    bbox=[38;5;28;01mNone[39;00m,
    parse_to_native=[38;5;28;01mTrue[39;00m,
    coord_type=[38;5;28;01mNone[39;00m,
)
[31mDocstring:[39m <no docstring>
[31mType:[39m      builtin_function_or_method

In [20]:
data = dataset.read(bbox=m.selected_bounds)

In [21]:
data.num_rows

165684

In [22]:
m = viz(data)
m._height = 600
m

Map(basemap_style=<CartoBasemap.DarkMatter: 'https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json'…