# Overture buildings example


In [33]:
from lonboard import viz, SolidPolygonLayer, Map
import pyarrow.parquet as pq
import shapely
import numpy as np
import s3fs
import geopandas as gpd

In [2]:
fs = s3fs.S3FileSystem()
dataset = pq.ParquetDataset(
    "omf-internal-usw2/testing/jwasserman-test-spatial-partition/test-buildings13/",
    filesystem=fs,
)

In [3]:
# This takes a **while** (~two minutes) because the .metadata property of each fragment
# is lazily loaded
total_num_chunks = 0
for fragment in dataset.fragments:
    total_num_chunks += fragment.num_row_groups

In [30]:
data_columns = {
    "path": [],
    "row_group_idx": np.zeros(total_num_chunks, dtype=np.uint16),
}
geom_columns = {
    "minx": np.zeros(total_num_chunks, dtype=np.float64),
    "miny": np.zeros(total_num_chunks, dtype=np.float64),
    "maxx": np.zeros(total_num_chunks, dtype=np.float64),
    "maxy": np.zeros(total_num_chunks, dtype=np.float64),
}

In [31]:
# For now hard-coded, (minx, miny, maxx, maxy)
bbox_column_indexes = [("minx", 2), ("miny", 4), ("maxx", 3), ("maxy", 5)]

row_group_counter = 0
for fragment in dataset.fragments:
    for file_row_group_idx in range(fragment.num_row_groups):
        data_columns["path"].append(fragment.path)
        data_columns["row_group_idx"][row_group_counter] = file_row_group_idx

        row_group_meta = fragment.metadata.row_group(file_row_group_idx)
        geom_columns["minx"][row_group_counter] = row_group_meta.column(
            2
        ).statistics.min
        geom_columns["miny"][row_group_counter] = row_group_meta.column(
            4
        ).statistics.min
        geom_columns["maxx"][row_group_counter] = row_group_meta.column(
            3
        ).statistics.max
        geom_columns["maxy"][row_group_counter] = row_group_meta.column(
            5
        ).statistics.max

        row_group_counter += 1

In [56]:
from lonboard.basemap import CartoBasemap

In [35]:
box_geoms = shapely.box(
    geom_columns["minx"],
    geom_columns["miny"],
    geom_columns["maxx"],
    geom_columns["maxy"],
)
gdf = gpd.GeoDataFrame(
    {
        "path": data_columns["path"],
        "row_group_idx": data_columns["row_group_idx"],
    },
    geometry=box_geoms,
    crs="epsg:4326",
)

In [36]:
layer = SolidPolygonLayer.from_geopandas(
    gdf, get_fill_color=[0, 50, 10, 40], auto_highlight=True
)

In [57]:
m = Map(layer, basemap_style=CartoBasemap.Voyager)

In [58]:
m

Map(basemap_style=<CartoBasemap.Voyager: 'https://basemaps.cartocdn.com/gl/voyager-gl-style/style.json'>, laye…

In [46]:
layer.get_fill_color = [0, 50, 10, 40]
layer.auto_highlight = True

In [47]:
layer.selected_index

4277

In [49]:
selected_path = gdf["path"].iloc[layer.selected_index]
selected_row_group_idx = gdf["row_group_idx"].iloc[layer.selected_index]

In [51]:
selected_pq_file = pq.ParquetFile(selected_path, filesystem=fs)
small_data = selected_pq_file.read_row_group(selected_row_group_idx)

In [55]:
viz(small_data)

Map(basemap_style=<CartoBasemap.DarkMatter: 'https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json'…

In [27]:
dataset.fragments[0].metadata

<pyarrow._parquet.FileMetaData object at 0x2a6250720>
  created_by: parquet-mr version 1.12.3-databricks-0002 (build 2484a95dbe16a0023e3eb29c201f99ff9ea771ee)
  num_columns: 36
  num_rows: 9555507
  num_row_groups: 82
  format_version: 1.0
  serialized_size: 443501

In [10]:
dataset.schema

id: string
geometry: binary
bbox: struct<minx: double, maxx: double, miny: double, maxy: double>
  child 0, minx: double
  child 1, maxx: double
  child 2, miny: double
  child 3, maxy: double
names: struct<common: list<element: struct<value: string, language: string>>, official: list<element: struct<value: string, language: string>>, alternate: list<element: struct<value: string, language: string>>, short: list<element: struct<value: string, language: string>>>
  child 0, common: list<element: struct<value: string, language: string>>
      child 0, element: struct<value: string, language: string>
          child 0, value: string
          child 1, language: string
  child 1, official: list<element: struct<value: string, language: string>>
      child 0, element: struct<value: string, language: string>
          child 0, value: string
          child 1, language: string
  child 2, alternate: list<element: struct<value: string, language: string>>
      child 0, element: struct<value: st