From 7030697d3905a0185b92e3b4619d2142ed1bf75f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 29 Oct 2025 21:14:35 +0000 Subject: [PATCH] refactor: update geo "spec" and split geo ops in ibis compiler --- .../core/compile/ibis_compiler/__init__.py | 1 + .../ibis_compiler/operations/geo_ops.py | 159 ++++++++++++++++++ .../ibis_compiler/scalar_op_registry.py | 134 --------------- specs/2025-08-04-geoseries-scalars.md | 13 +- 4 files changed, 168 insertions(+), 139 deletions(-) create mode 100644 bigframes/core/compile/ibis_compiler/operations/geo_ops.py diff --git a/bigframes/core/compile/ibis_compiler/__init__.py b/bigframes/core/compile/ibis_compiler/__init__.py index aef0ed9267..6b9d284c53 100644 --- a/bigframes/core/compile/ibis_compiler/__init__.py +++ b/bigframes/core/compile/ibis_compiler/__init__.py @@ -21,4 +21,5 @@ from __future__ import annotations import bigframes.core.compile.ibis_compiler.operations.generic_ops # noqa: F401 +import bigframes.core.compile.ibis_compiler.operations.geo_ops # noqa: F401 import bigframes.core.compile.ibis_compiler.scalar_op_registry # noqa: F401 diff --git a/bigframes/core/compile/ibis_compiler/operations/geo_ops.py b/bigframes/core/compile/ibis_compiler/operations/geo_ops.py new file mode 100644 index 0000000000..f9155fed5a --- /dev/null +++ b/bigframes/core/compile/ibis_compiler/operations/geo_ops.py @@ -0,0 +1,159 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from typing import cast + +from bigframes_vendored.ibis.expr import types as ibis_types +import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes +import bigframes_vendored.ibis.expr.operations.udf as ibis_udf + +from bigframes.core.compile.ibis_compiler import scalar_op_compiler +from bigframes.operations import geo_ops as ops + +register_unary_op = scalar_op_compiler.scalar_op_compiler.register_unary_op +register_binary_op = scalar_op_compiler.scalar_op_compiler.register_binary_op + + +# Geo Ops +@register_unary_op(ops.geo_area_op) +def geo_area_op_impl(x: ibis_types.Value): + return cast(ibis_types.GeoSpatialValue, x).area() + + +@register_unary_op(ops.geo_st_astext_op) +def geo_st_astext_op_impl(x: ibis_types.Value): + return cast(ibis_types.GeoSpatialValue, x).as_text() + + +@register_unary_op(ops.geo_st_boundary_op, pass_op=False) +def geo_st_boundary_op_impl(x: ibis_types.Value): + return st_boundary(x) + + +@register_unary_op(ops.GeoStBufferOp, pass_op=True) +def geo_st_buffer_op_impl(x: ibis_types.Value, op: ops.GeoStBufferOp): + return st_buffer( + x, + op.buffer_radius, + op.num_seg_quarter_circle, + op.use_spheroid, + ) + + +@register_unary_op(ops.geo_st_centroid_op, pass_op=False) +def geo_st_centroid_op_impl(x: ibis_types.Value): + return cast(ibis_types.GeoSpatialValue, x).centroid() + + +@register_unary_op(ops.geo_st_convexhull_op, pass_op=False) +def geo_st_convexhull_op_impl(x: ibis_types.Value): + return st_convexhull(x) + + +@register_binary_op(ops.geo_st_difference_op, pass_op=False) +def geo_st_difference_op_impl(x: ibis_types.Value, y: ibis_types.Value): + return cast(ibis_types.GeoSpatialValue, x).difference( + cast(ibis_types.GeoSpatialValue, y) + ) + + +@register_binary_op(ops.GeoStDistanceOp, pass_op=True) +def geo_st_distance_op_impl( + x: ibis_types.Value, y: ibis_types.Value, op: ops.GeoStDistanceOp +): + return st_distance(x, y, op.use_spheroid) + + +@register_unary_op(ops.geo_st_geogfromtext_op) +def geo_st_geogfromtext_op_impl(x: ibis_types.Value): + # Ibis doesn't seem to provide a dedicated method to cast from string to geography, + # so we use a BigQuery scalar function, st_geogfromtext(), directly. + return st_geogfromtext(x) + + +@register_binary_op(ops.geo_st_geogpoint_op, pass_op=False) +def geo_st_geogpoint_op_impl(x: ibis_types.Value, y: ibis_types.Value): + return cast(ibis_types.NumericValue, x).point(cast(ibis_types.NumericValue, y)) + + +@register_binary_op(ops.geo_st_intersection_op, pass_op=False) +def geo_st_intersection_op_impl(x: ibis_types.Value, y: ibis_types.Value): + return cast(ibis_types.GeoSpatialValue, x).intersection( + cast(ibis_types.GeoSpatialValue, y) + ) + + +@register_unary_op(ops.geo_st_isclosed_op, pass_op=False) +def geo_st_isclosed_op_impl(x: ibis_types.Value): + return st_isclosed(x) + + +@register_unary_op(ops.geo_x_op) +def geo_x_op_impl(x: ibis_types.Value): + return cast(ibis_types.GeoSpatialValue, x).x() + + +@register_unary_op(ops.GeoStLengthOp, pass_op=True) +def geo_length_op_impl(x: ibis_types.Value, op: ops.GeoStLengthOp): + # Call the st_length UDF defined in this file (or imported) + return st_length(x, op.use_spheroid) + + +@register_unary_op(ops.geo_y_op) +def geo_y_op_impl(x: ibis_types.Value): + return cast(ibis_types.GeoSpatialValue, x).y() + + +@ibis_udf.scalar.builtin +def st_convexhull(x: ibis_dtypes.geography) -> ibis_dtypes.geography: # type: ignore + """ST_CONVEXHULL""" + ... + + +@ibis_udf.scalar.builtin +def st_geogfromtext(a: str) -> ibis_dtypes.geography: # type: ignore + """Convert string to geography.""" + + +@ibis_udf.scalar.builtin +def st_boundary(a: ibis_dtypes.geography) -> ibis_dtypes.geography: # type: ignore + """Find the boundary of a geography.""" + + +@ibis_udf.scalar.builtin +def st_buffer( + geography: ibis_dtypes.geography, # type: ignore + buffer_radius: ibis_dtypes.Float64, + num_seg_quarter_circle: ibis_dtypes.Float64, + use_spheroid: ibis_dtypes.Boolean, +) -> ibis_dtypes.geography: # type: ignore + ... + + +@ibis_udf.scalar.builtin +def st_distance(a: ibis_dtypes.geography, b: ibis_dtypes.geography, use_spheroid: bool) -> ibis_dtypes.float: # type: ignore + """Convert string to geography.""" + + +@ibis_udf.scalar.builtin +def st_length(geog: ibis_dtypes.geography, use_spheroid: bool) -> ibis_dtypes.float: # type: ignore + """ST_LENGTH BQ builtin. This body is never executed.""" + pass + + +@ibis_udf.scalar.builtin +def st_isclosed(a: ibis_dtypes.geography) -> ibis_dtypes.boolean: # type: ignore + """Checks if a geography is closed.""" diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index e983fc7e21..0876722990 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -837,98 +837,6 @@ def normalize_op_impl(x: ibis_types.Value): return result.cast(result_type) -# Geo Ops -@scalar_op_compiler.register_unary_op(ops.geo_area_op) -def geo_area_op_impl(x: ibis_types.Value): - return typing.cast(ibis_types.GeoSpatialValue, x).area() - - -@scalar_op_compiler.register_unary_op(ops.geo_st_astext_op) -def geo_st_astext_op_impl(x: ibis_types.Value): - return typing.cast(ibis_types.GeoSpatialValue, x).as_text() - - -@scalar_op_compiler.register_unary_op(ops.geo_st_boundary_op, pass_op=False) -def geo_st_boundary_op_impl(x: ibis_types.Value): - return st_boundary(x) - - -@scalar_op_compiler.register_unary_op(ops.GeoStBufferOp, pass_op=True) -def geo_st_buffer_op_impl(x: ibis_types.Value, op: ops.GeoStBufferOp): - return st_buffer( - x, - op.buffer_radius, - op.num_seg_quarter_circle, - op.use_spheroid, - ) - - -@scalar_op_compiler.register_unary_op(ops.geo_st_centroid_op, pass_op=False) -def geo_st_centroid_op_impl(x: ibis_types.Value): - return typing.cast(ibis_types.GeoSpatialValue, x).centroid() - - -@scalar_op_compiler.register_unary_op(ops.geo_st_convexhull_op, pass_op=False) -def geo_st_convexhull_op_impl(x: ibis_types.Value): - return st_convexhull(x) - - -@scalar_op_compiler.register_binary_op(ops.geo_st_difference_op, pass_op=False) -def geo_st_difference_op_impl(x: ibis_types.Value, y: ibis_types.Value): - return typing.cast(ibis_types.GeoSpatialValue, x).difference( - typing.cast(ibis_types.GeoSpatialValue, y) - ) - - -@scalar_op_compiler.register_binary_op(ops.GeoStDistanceOp, pass_op=True) -def geo_st_distance_op_impl( - x: ibis_types.Value, y: ibis_types.Value, op: ops.GeoStDistanceOp -): - return st_distance(x, y, op.use_spheroid) - - -@scalar_op_compiler.register_unary_op(ops.geo_st_geogfromtext_op) -def geo_st_geogfromtext_op_impl(x: ibis_types.Value): - # Ibis doesn't seem to provide a dedicated method to cast from string to geography, - # so we use a BigQuery scalar function, st_geogfromtext(), directly. - return st_geogfromtext(x) - - -@scalar_op_compiler.register_binary_op(ops.geo_st_geogpoint_op, pass_op=False) -def geo_st_geogpoint_op_impl(x: ibis_types.Value, y: ibis_types.Value): - return typing.cast(ibis_types.NumericValue, x).point( - typing.cast(ibis_types.NumericValue, y) - ) - - -@scalar_op_compiler.register_binary_op(ops.geo_st_intersection_op, pass_op=False) -def geo_st_intersection_op_impl(x: ibis_types.Value, y: ibis_types.Value): - return typing.cast(ibis_types.GeoSpatialValue, x).intersection( - typing.cast(ibis_types.GeoSpatialValue, y) - ) - - -@scalar_op_compiler.register_unary_op(ops.geo_st_isclosed_op, pass_op=False) -def geo_st_isclosed_op_impl(x: ibis_types.Value): - return st_isclosed(x) - - -@scalar_op_compiler.register_unary_op(ops.geo_x_op) -def geo_x_op_impl(x: ibis_types.Value): - return typing.cast(ibis_types.GeoSpatialValue, x).x() - - -@scalar_op_compiler.register_unary_op(ops.GeoStLengthOp, pass_op=True) -def geo_length_op_impl(x: ibis_types.Value, op: ops.GeoStLengthOp): - # Call the st_length UDF defined in this file (or imported) - return st_length(x, op.use_spheroid) - - -@scalar_op_compiler.register_unary_op(ops.geo_y_op) -def geo_y_op_impl(x: ibis_types.Value): - return typing.cast(ibis_types.GeoSpatialValue, x).y() - - # Parameterized ops @scalar_op_compiler.register_unary_op(ops.StructFieldOp, pass_op=True) def struct_field_op_impl(x: ibis_types.Value, op: ops.StructFieldOp): @@ -2092,17 +2000,6 @@ def _ibis_num(number: float): return typing.cast(ibis_types.NumericValue, ibis_types.literal(number)) -@ibis_udf.scalar.builtin -def st_convexhull(x: ibis_dtypes.geography) -> ibis_dtypes.geography: # type: ignore - """ST_CONVEXHULL""" - ... - - -@ibis_udf.scalar.builtin -def st_geogfromtext(a: str) -> ibis_dtypes.geography: # type: ignore - """Convert string to geography.""" - - @ibis_udf.scalar.builtin def timestamp(a: str) -> ibis_dtypes.timestamp: # type: ignore """Convert string to timestamp.""" @@ -2113,32 +2010,6 @@ def unix_millis(a: ibis_dtypes.timestamp) -> int: # type: ignore """Convert a timestamp to milliseconds""" -@ibis_udf.scalar.builtin -def st_boundary(a: ibis_dtypes.geography) -> ibis_dtypes.geography: # type: ignore - """Find the boundary of a geography.""" - - -@ibis_udf.scalar.builtin -def st_buffer( - geography: ibis_dtypes.geography, # type: ignore - buffer_radius: ibis_dtypes.Float64, - num_seg_quarter_circle: ibis_dtypes.Float64, - use_spheroid: ibis_dtypes.Boolean, -) -> ibis_dtypes.geography: # type: ignore - ... - - -@ibis_udf.scalar.builtin -def st_distance(a: ibis_dtypes.geography, b: ibis_dtypes.geography, use_spheroid: bool) -> ibis_dtypes.float: # type: ignore - """Convert string to geography.""" - - -@ibis_udf.scalar.builtin -def st_length(geog: ibis_dtypes.geography, use_spheroid: bool) -> ibis_dtypes.float: # type: ignore - """ST_LENGTH BQ builtin. This body is never executed.""" - pass - - @ibis_udf.scalar.builtin def unix_micros(a: ibis_dtypes.timestamp) -> int: # type: ignore """Convert a timestamp to microseconds""" @@ -2272,11 +2143,6 @@ def str_lstrip_op( # type: ignore[empty-body] """Remove leading and trailing characters.""" -@ibis_udf.scalar.builtin -def st_isclosed(a: ibis_dtypes.geography) -> ibis_dtypes.boolean: # type: ignore - """Checks if a geography is closed.""" - - @ibis_udf.scalar.builtin(name="rtrim") def str_rstrip_op( # type: ignore[empty-body] x: ibis_dtypes.String, to_strip: ibis_dtypes.String diff --git a/specs/2025-08-04-geoseries-scalars.md b/specs/2025-08-04-geoseries-scalars.md index 38dc77c4cf..66ed77d0dd 100644 --- a/specs/2025-08-04-geoseries-scalars.md +++ b/specs/2025-08-04-geoseries-scalars.md @@ -267,11 +267,14 @@ Raster functions: Functions for analyzing geospatial rasters using geographies. - [ ] **Export the new operation:** - [ ] In `bigframes/operations/__init__.py`, import your new operation dataclass and add it to the `__all__` list. - [ ] **Implement the compilation logic:** - - [ ] In `bigframes/core/compile/scalar_op_compiler.py`: - - [ ] If the BigQuery function has a direct equivalent in Ibis, you can often reuse an existing Ibis method. - - [ ] If not, define a new Ibis UDF using `@ibis_udf.scalar.builtin` to map to the specific BigQuery function signature. - - [ ] Create a new compiler implementation function (e.g., `geo_length_op_impl`). - - [ ] Register this function to your operation dataclass using `@scalar_op_compiler.register_unary_op` or `@scalar_op_compiler.register_binary_op`. + - [ ] In `bigframes/core/compile/ibis_compiler/operations/geo_ops.py`: + - [ ] If the BigQuery function has a direct equivalent in Ibis, you can often reuse an existing Ibis method. + - [ ] If not, define a new Ibis UDF using `@ibis_udf.scalar.builtin` to map to the specific BigQuery function signature. + - [ ] Create a new compiler implementation function (e.g., `geo_length_op_impl`). + - [ ] Register this function to your operation dataclass using `@register_unary_op` or `@register_binary_op`. + - [ ] In `bigframes/core/compile/sqlglot/expressions/geo_ops.py`: + - [ ] Create a new compiler implementation function that generates the appropriate `sqlglot.exp` expression. + - [ ] Register this function to your operation dataclass using `@register_unary_op` or `@register_binary_op`. - [ ] **Implement the user-facing function or property:** - [ ] For a `bigframes.bigquery` function: - [ ] In `bigframes/bigquery/_operations/geo.py`, create the user-facing function (e.g., `st_length`).