diff --git a/src/utils/stac-geoparquet.ts b/src/utils/stac-geoparquet.ts index 97d3b35..2ce8e92 100644 --- a/src/utils/stac-geoparquet.ts +++ b/src/utils/stac-geoparquet.ts @@ -9,6 +9,7 @@ import { vectorFromArray, } from "apache-arrow"; import * as stacWasm from "stac-wasm"; +import { toaster } from "../components/ui/toaster"; import type { StacItemCollection } from "../types/stac"; const SUPPORTED_GEOMETRY_TYPES = ["point", "polygon", "linestring"] as const; @@ -43,14 +44,11 @@ export async function fetchStacGeoparquetValue({ connection: AsyncDuckDBConnection; hivePartitioning: boolean; }): Promise { - const result = await executeDuckdbQuery({ - connection, + const { count, bbox } = await fetchStacGeoparquetSummary({ href, + connection, hivePartitioning, - select: - "COUNT(*) as count, MIN(bbox.xmin) as xmin, MIN(bbox.ymin) as ymin, MAX(bbox.xmax) as xmax, MAX(bbox.ymax) as ymax", }); - const row = result.toArray().map((row) => row.toJSON())[0]; const datetimeExtent = await fetchStacGeoparquetDatetimeExtent({ href, connection, @@ -59,8 +57,8 @@ export async function fetchStacGeoparquetValue({ return { type: "FeatureCollection", id: href.split("/").pop(), - description: `A stac-geoparquet file with ${row.count} item${row.count === 1 ? "" : "s"}`, - bbox: [row.xmin, row.ymin, row.xmax, row.ymax], + description: `A stac-geoparquet file with ${count} item${count === 1 ? "" : "s"}`, + bbox, features: [], datetimeExtent, assets: { @@ -72,6 +70,98 @@ export async function fetchStacGeoparquetValue({ }; } +async function fetchStacGeoparquetSummary({ + href, + connection, + hivePartitioning, +}: { + href: string; + connection: AsyncDuckDBConnection; + hivePartitioning: boolean; +}): Promise<{ count: number; bbox: [number, number, number, number] }> { + const bboxType = await getBboxColumnType({ + href, + connection, + hivePartitioning, + }); + + if (bboxType?.startsWith("STRUCT")) { + const result = await executeDuckdbQuery({ + connection, + href, + hivePartitioning, + select: + "COUNT(*) as count, MIN(bbox.xmin) as xmin, MIN(bbox.ymin) as ymin, MAX(bbox.xmax) as xmax, MAX(bbox.ymax) as ymax", + }); + const row = result.toArray().map((row) => row.toJSON())[0]; + return { + count: row.count, + bbox: [row.xmin, row.ymin, row.xmax, row.ymax], + }; + } + + if (bboxType === "DOUBLE[]") { + toaster.create({ + title: "Non-spec stac-geoparquet", + description: + "The 'bbox' column is a list of doubles, but the stac-geoparquet spec requires a struct of {xmin, ymin, xmax, ymax}. Reading bbox values from the list instead.", + type: "warning", + }); + const result = await executeDuckdbQuery({ + connection, + href, + hivePartitioning, + select: + "COUNT(*) as count, MIN(bbox[1]) as xmin, MIN(bbox[2]) as ymin, MAX(bbox[3]) as xmax, MAX(bbox[4]) as ymax", + }); + const row = result.toArray().map((row) => row.toJSON())[0]; + return { + count: row.count, + bbox: [row.xmin, row.ymin, row.xmax, row.ymax], + }; + } + + if (bboxType) { + toaster.create({ + title: "Non-spec stac-geoparquet", + description: `The 'bbox' column has type '${bboxType}', but the stac-geoparquet spec requires a struct of {xmin, ymin, xmax, ymax}. Computing the extent from the geometry column instead.`, + type: "warning", + }); + } + + const result = await executeDuckdbQuery({ + connection, + href, + hivePartitioning, + select: "COUNT(*) as count, ST_Extent_Agg(geometry) as extent", + }); + const row = result.toArray().map((row) => row.toJSON())[0]; + const extent = row.extent; + return { + count: row.count, + bbox: [extent.min_x, extent.min_y, extent.max_x, extent.max_y], + }; +} + +async function getBboxColumnType({ + href, + connection, + hivePartitioning, +}: { + href: string; + connection: AsyncDuckDBConnection; + hivePartitioning: boolean; +}): Promise { + try { + const query = `SELECT typeof(bbox) as bbox_type FROM read_parquet('${href}', hive_partitioning = ${hivePartitioning}) LIMIT 1`; + const result = (await connection.query(query)) as unknown as Table; + const row = result.toArray().map((row) => row.toJSON())[0]; + return (row?.bbox_type as string | undefined) ?? null; + } catch { + return null; + } +} + async function fetchStacGeoparquetDatetimeExtent({ href, connection,