googleapis · tswast · Nov 5, 2025 · Nov 4, 2025 · Nov 4, 2025 · Nov 4, 2025
@@ -40,6 +40,7 @@
     st_intersection,
     st_isclosed,
     st_length,
+    st_regionstats,
     st_simplify,
 )
 from bigframes.bigquery._operations.json import (
@@ -81,6 +82,7 @@
     st_intersection,
     st_isclosed,
     st_length,
+    st_regionstats,
     st_simplify,
     # json ops
     json_extract,

@@ -14,11 +14,13 @@
 
 from __future__ import annotations
 
-from typing import Union
+import json
+from typing import Mapping, Optional, Union
 
 import shapely  # type: ignore
 
 from bigframes import operations as ops
+import bigframes.dataframe
 import bigframes.geopandas
 import bigframes.series
 
@@ -677,6 +679,65 @@ def st_length(
     return series
 
 
+def st_regionstats(
+    geography: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
+    raster_id: str,
+    band: Optional[str] = None,
+    include: Optional[str] = None,
+    options: Optional[Mapping[str, Union[str, int, float]]] = None,
+) -> bigframes.series.Series:
+    """Returns statistics summarizing the pixel values of the raster image
+    referenced by raster_id that intersect with geography.
+
+    The statistics include the count, minimum, maximum, sum, standard
+    deviation, mean, and area of the valid pixels of the raster band named
+    band_name. Google Earth Engine computes the results of the function call.
+
+    See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats
+
+    Args:
+        geography (bigframes.series.Series | bigframes.geopandas.GeoSeries):
+            A series of geography objects to intersect with the raster image.
+        raster_id (str):
+            A string that identifies a raster image. The following formats are
+            supported. A URI from an image table provided by Google Earth Engine
+            in BigQuery sharing (formerly Analytics Hub). A URI for a readable
+            GeoTIFF raster file. A Google Earth Engine asset path that
+            references public catalog data or project-owned assets with read
+            access.
+        band (Optional[str]):
+            A string in one of the following formats:
+            A single band within the raster image specified by raster_id. A
+            formula to compute a value from the available bands in the raster
+            image. The formula uses the Google Earth Engine image expression
+            syntax. Bands can be referenced by their name, band_name, in
+            expressions. If you don't specify a band, the first band of the
+            image is used.
+        include (Optional[str]):
+            An optional string formula that uses the Google Earth Engine image
+            expression syntax to compute a pixel weight. The formula should
+            return values from 0 to 1. Values outside this range are set to the
+            nearest limit, either 0 or 1. A value of 0 means that the pixel is
+            invalid and it's excluded from analysis. A positive value means that
+            a pixel is valid. Values between 0 and 1 represent proportional
+            weights for calculations, such as weighted means.
+        options (Mapping[str, Union[str, int, float]], optional):
+            A dictionary of options to pass to the function. See the BigQuery
+            documentation for a list of available options.
+
+    Returns:
+        bigframes.pandas.Series:
+            A STRUCT Series containing the computed statistics.
+    """
+    op = ops.GeoStRegionStatsOp(
+        raster_id=raster_id,
+        band=band,
+        include=include,
+        options=json.dumps(options) if options else None,
+    )
+    return geography._apply_unary_op(op)
+
+
 def st_simplify(
     geography: "bigframes.series.Series",
     tolerance_meters: float,

@@ -16,8 +16,10 @@
 
 from typing import cast
 
+from bigframes_vendored import ibis
 from bigframes_vendored.ibis.expr import types as ibis_types
 import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes
+import bigframes_vendored.ibis.expr.operations.geospatial as ibis_geo
 import bigframes_vendored.ibis.expr.operations.udf as ibis_udf
 
 from bigframes.core.compile.ibis_compiler import scalar_op_compiler
@@ -101,6 +103,35 @@ def geo_st_isclosed_op_impl(x: ibis_types.Value):
     return st_isclosed(x)
 
 
+@register_unary_op(ops.GeoStRegionStatsOp, pass_op=True)
+def geo_st_regionstats_op_impl(
+    geography: ibis_types.Value,
+    op: ops.GeoStRegionStatsOp,
+):
+    if op.band:
+        band = ibis.literal(op.band, type=ibis_dtypes.string())
+    else:
+        band = None
+
+    if op.include:
+        include = ibis.literal(op.include, type=ibis_dtypes.string())
+    else:
+        include = None
+
+    if op.options:
+        options = ibis.literal(op.options, type=ibis_dtypes.json())
+    else:
+        options = None
+
+    return ibis_geo.GeoRegionStats(
+        arg=geography,  # type: ignore
+        raster_id=ibis.literal(op.raster_id, type=ibis_dtypes.string()),  # type: ignore
+        band=band,  # type: ignore
+        include=include,  # type: ignore
+        options=options,  # type: ignore
+    ).to_expr()
+
+
 @register_unary_op(ops.GeoStSimplifyOp, pass_op=True)
 def st_simplify_op_impl(x: ibis_types.Value, op: ops.GeoStSimplifyOp):
     x = cast(ibis_types.GeoSpatialValue, x)

@@ -74,6 +74,32 @@ def _(expr: TypedExpr, op: ops.GeoStLengthOp) -> sge.Expression:
     return sge.func("ST_LENGTH", expr.expr)
 
 
+@register_unary_op(ops.GeoStRegionStatsOp, pass_op=True)
+def _(
+    geography: TypedExpr,
+    op: ops.GeoStRegionStatsOp,
+):
+    args = [geography.expr, sge.convert(op.raster_id)]
+    if op.band:
+        args.append(sge.Kwarg(this="band", expression=sge.convert(op.band)))
+    if op.include:
+        args.append(sge.Kwarg(this="include", expression=sge.convert(op.include)))
+    if op.options:
+        args.append(
+            sge.Kwarg(this="options", expression=sge.JSON(this=sge.convert(op.options)))
+        )
+    return sge.func("ST_REGIONSTATS", *args)
+
+
+@register_unary_op(ops.GeoStSimplifyOp, pass_op=True)
+def _(expr: TypedExpr, op: ops.GeoStSimplifyOp) -> sge.Expression:
+    return sge.func(
+        "ST_SIMPLIFY",
+        expr.expr,
+        sge.convert(op.tolerance_meters),
+    )
+
+
 @register_unary_op(ops.geo_x_op)
 def _(expr: TypedExpr) -> sge.Expression:
     return sge.func("SAFE.ST_X", expr.expr)

@@ -121,6 +121,7 @@
     GeoStBufferOp,
     GeoStDistanceOp,
     GeoStLengthOp,
+    GeoStRegionStatsOp,
     GeoStSimplifyOp,
 )
 from bigframes.operations.json_ops import (
@@ -415,12 +416,13 @@
     "geo_st_geogpoint_op",
     "geo_st_intersection_op",
     "geo_st_isclosed_op",
-    "GeoStBufferOp",
-    "GeoStLengthOp",
-    "GeoStSimplifyOp",
     "geo_x_op",
     "geo_y_op",
+    "GeoStBufferOp",
     "GeoStDistanceOp",
+    "GeoStLengthOp",
+    "GeoStRegionStatsOp",
+    "GeoStSimplifyOp",
     # AI ops
     "AIClassify",
     "AIGenerate",

@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import dataclasses
+from typing import Optional
 
 from bigframes import dtypes
 from bigframes.operations import base_ops
@@ -135,6 +136,29 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
         return dtypes.FLOAT_DTYPE
 
 
+@dataclasses.dataclass(frozen=True)
+class GeoStRegionStatsOp(base_ops.UnaryOp):
+    """See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats"""
+
+    name = "geo_st_regionstats"
+    raster_id: str
+    band: Optional[str]
+    include: Optional[str]
+    options: Optional[str]
+
+    def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
+        return dtypes.struct_type(
+            [
+                ("min", dtypes.FLOAT_DTYPE),
+                ("max", dtypes.FLOAT_DTYPE),
+                ("sum", dtypes.FLOAT_DTYPE),
+                ("count", dtypes.INT_DTYPE),
+                ("mean", dtypes.FLOAT_DTYPE),
+                ("area", dtypes.FLOAT_DTYPE),
+            ]
+        )
+
+
 @dataclasses.dataclass(frozen=True)
 class GeoStSimplifyOp(base_ops.UnaryOp):
     name = "st_simplify"

@@ -0,0 +1,80 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Code sample for https://docs.cloud.google.com/bigquery/docs/raster-data#analytics-hub-source"""
+
+
+def test_st_regionstats() -> None:
+    project_id = "bigframes-dev"
+
+    # [START bigquery_dataframes_st_regionstats]
+    import datetime
+    from typing import cast
+
+    import bigframes.bigquery as bbq
+    import bigframes.pandas as bpd
+
+    # TODO: Set the project_id to your Google Cloud project ID.
+    # project_id = "your-project-id"
+    bpd.options.bigquery.project = project_id
+
+    # TODO: Set the dataset_id to the ID of the dataset that contains the
+    # `climate` table. This is likely a linked dataset to Earth Engine.
+    # See: https://cloud.google.com/bigquery/docs/link-earth-engine
+    linked_dataset = "era5_land_daily_aggregated"
+
+    # For the best efficiency, use partial ordering mode.
+    bpd.options.bigquery.ordering_mode = "partial"
+
+    # Load the table of country boundaries.
+    countries = bpd.read_gbq("bigquery-public-data.overture_maps.division_area")
+
+    # Filter to just the countries.
+    countries = countries[countries["subtype"] == "country"].copy()
+    countries["name"] = countries["names"].struct.field("primary")
+    countries["simplified_geometry"] = bbq.st_simplify(
+        countries["geometry"],
+        tolerance_meters=10_000,
+    )
+
+    # Get the reference to the temperature data from a linked dataset.
+    # Note: This sample assumes you have a linked dataset to Earth Engine.
+    image_href = (
+        bpd.read_gbq(f"{project_id}.{linked_dataset}.climate")
+        .set_index("start_datetime")
+        .loc[[datetime.datetime(2025, 1, 1, tzinfo=datetime.timezone.utc)], :]
+    )
+    raster_id = image_href["assets"].struct.field("image").struct.field("href")
+    raster_id = raster_id.item()
+    stats = bbq.st_regionstats(
+        countries["simplified_geometry"],
+        raster_id=cast(str, raster_id),
+        band="temperature_2m",
+    )
+
+    # Extract the mean and convert from Kelvin to Celsius.
+    countries["mean_temperature"] = stats.struct.field("mean") - 273.15
+
+    # Sort by the mean temperature to find the warmest countries.
+    result = countries[["name", "mean_temperature"]].sort_values(
+        "mean_temperature", ascending=False
+    )
+    print(result.head(10))
+    # [END bigquery_dataframes_st_regionstats]
+
+    assert len(result) > 0
+
+
+if __name__ == "__main__":
+    test_st_regionstats()
@@ -54,7 +54,8 @@
     "pydata-google-auth >=1.8.2",
     "requests >=2.27.1",
     "shapely >=1.8.5",
-    "sqlglot >=23.6.3",
+    # 25.20.0 introduces this fix https://github.com/TobikoData/sqlmesh/issues/3095 for rtrim/ltrim.
+    "sqlglot >=25.20.0",
     "tabulate >=0.9",
     "ipywidgets >=7.7.1",
     "humanize >=4.6.0",

@@ -21,7 +21,7 @@ pydata-google-auth==1.8.2
 requests==2.27.1
 scikit-learn==1.2.2
 shapely==1.8.5
-sqlglot==23.6.3
+sqlglot==25.20.0
 tabulate==0.9
 ipywidgets==7.7.1
 humanize==4.6.0

@@ -0,0 +1,36 @@
+WITH `bfcte_0` AS (
+  SELECT
+    *
+  FROM UNNEST(ARRAY<STRUCT<`bfcol_0` STRING, `bfcol_1` INT64>>[STRUCT('POINT(1 1)', 0)])
+), `bfcte_1` AS (
+  SELECT
+    *,
+    ST_REGIONSTATS(
+      `bfcol_0`,
+      'ee://some/raster/uri',
+      band => 'band1',
+      include => 'some equation',
+      options => JSON '{"scale": 100}'
+    ) AS `bfcol_2`
+  FROM `bfcte_0`
+), `bfcte_2` AS (
+  SELECT
+    *,
+    `bfcol_2`.`min` AS `bfcol_5`,
+    `bfcol_2`.`max` AS `bfcol_6`,
+    `bfcol_2`.`sum` AS `bfcol_7`,
+    `bfcol_2`.`count` AS `bfcol_8`,
+    `bfcol_2`.`mean` AS `bfcol_9`,
+    `bfcol_2`.`area` AS `bfcol_10`
+  FROM `bfcte_1`
+)
+SELECT
+  `bfcol_5` AS `min`,
+  `bfcol_6` AS `max`,
+  `bfcol_7` AS `sum`,
+  `bfcol_8` AS `count`,
+  `bfcol_9` AS `mean`,
+  `bfcol_10` AS `area`
+FROM `bfcte_2`
+ORDER BY
+  `bfcol_1` ASC NULLS LAST