Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 97 additions & 7 deletions src/utils/stac-geoparquet.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
vectorFromArray,
} from "apache-arrow";
import * as stacWasm from "stac-wasm";
import { toaster } from "../components/ui/toaster";
import type { StacItemCollection } from "../types/stac";

const SUPPORTED_GEOMETRY_TYPES = ["point", "polygon", "linestring"] as const;
Expand Down Expand Up @@ -43,14 +44,11 @@ export async function fetchStacGeoparquetValue({
connection: AsyncDuckDBConnection;
hivePartitioning: boolean;
}): Promise<StacItemCollection> {
const result = await executeDuckdbQuery({
connection,
const { count, bbox } = await fetchStacGeoparquetSummary({
href,
connection,
hivePartitioning,
select:
"COUNT(*) as count, MIN(bbox.xmin) as xmin, MIN(bbox.ymin) as ymin, MAX(bbox.xmax) as xmax, MAX(bbox.ymax) as ymax",
});
const row = result.toArray().map((row) => row.toJSON())[0];
const datetimeExtent = await fetchStacGeoparquetDatetimeExtent({
href,
connection,
Expand All @@ -59,8 +57,8 @@ export async function fetchStacGeoparquetValue({
return {
type: "FeatureCollection",
id: href.split("/").pop(),
description: `A stac-geoparquet file with ${row.count} item${row.count === 1 ? "" : "s"}`,
bbox: [row.xmin, row.ymin, row.xmax, row.ymax],
description: `A stac-geoparquet file with ${count} item${count === 1 ? "" : "s"}`,
bbox,
features: [],
datetimeExtent,
assets: {
Expand All @@ -72,6 +70,98 @@ export async function fetchStacGeoparquetValue({
};
}

async function fetchStacGeoparquetSummary({
href,
connection,
hivePartitioning,
}: {
href: string;
connection: AsyncDuckDBConnection;
hivePartitioning: boolean;
}): Promise<{ count: number; bbox: [number, number, number, number] }> {
const bboxType = await getBboxColumnType({
href,
connection,
hivePartitioning,
});

if (bboxType?.startsWith("STRUCT")) {
const result = await executeDuckdbQuery({
connection,
href,
hivePartitioning,
select:
"COUNT(*) as count, MIN(bbox.xmin) as xmin, MIN(bbox.ymin) as ymin, MAX(bbox.xmax) as xmax, MAX(bbox.ymax) as ymax",
});
const row = result.toArray().map((row) => row.toJSON())[0];
return {
count: row.count,
bbox: [row.xmin, row.ymin, row.xmax, row.ymax],
};
}

if (bboxType === "DOUBLE[]") {
toaster.create({
title: "Non-spec stac-geoparquet",
description:
"The 'bbox' column is a list of doubles, but the stac-geoparquet spec requires a struct of {xmin, ymin, xmax, ymax}. Reading bbox values from the list instead.",
type: "warning",
});
const result = await executeDuckdbQuery({
connection,
href,
hivePartitioning,
select:
"COUNT(*) as count, MIN(bbox[1]) as xmin, MIN(bbox[2]) as ymin, MAX(bbox[3]) as xmax, MAX(bbox[4]) as ymax",
});
const row = result.toArray().map((row) => row.toJSON())[0];
return {
count: row.count,
bbox: [row.xmin, row.ymin, row.xmax, row.ymax],
};
}

if (bboxType) {
toaster.create({
title: "Non-spec stac-geoparquet",
description: `The 'bbox' column has type '${bboxType}', but the stac-geoparquet spec requires a struct of {xmin, ymin, xmax, ymax}. Computing the extent from the geometry column instead.`,
type: "warning",
});
}

const result = await executeDuckdbQuery({
connection,
href,
hivePartitioning,
select: "COUNT(*) as count, ST_Extent_Agg(geometry) as extent",
});
const row = result.toArray().map((row) => row.toJSON())[0];
const extent = row.extent;
return {
count: row.count,
bbox: [extent.min_x, extent.min_y, extent.max_x, extent.max_y],
};
}

async function getBboxColumnType({
href,
connection,
hivePartitioning,
}: {
href: string;
connection: AsyncDuckDBConnection;
hivePartitioning: boolean;
}): Promise<string | null> {
try {
const query = `SELECT typeof(bbox) as bbox_type FROM read_parquet('${href}', hive_partitioning = ${hivePartitioning}) LIMIT 1`;
const result = (await connection.query(query)) as unknown as Table;
const row = result.toArray().map((row) => row.toJSON())[0];
return (row?.bbox_type as string | undefined) ?? null;
} catch {
return null;
}
}

async function fetchStacGeoparquetDatetimeExtent({
href,
connection,
Expand Down
Loading