From a95a36a2040d2511510f9bee0eb42309c3a81a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Mon, 13 Apr 2026 21:41:53 +0000 Subject: [PATCH 1/3] feat: add support for `hparam_range` and `hparam_candidates` to `bigframes.bigquery.create_model` --- .../bigframes/bigframes/bigquery/__init__.py | 10 ++- .../bigquery/_operations/mathematical.py | 68 +++++++++++++++++++ .../bigframes/bigquery/_operations/ml.py | 2 +- .../create_model_expression_option.sql | 3 - .../create_model_hparam_tuning.sql | 3 + .../bigframes/tests/unit/core/sql/test_ml.py | 18 ++--- packages/bigframes/tests/unit/test_col.py | 2 +- 7 files changed, 88 insertions(+), 18 deletions(-) delete mode 100644 packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql create mode 100644 packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_hparam_tuning/create_model_hparam_tuning.sql diff --git a/packages/bigframes/bigframes/bigquery/__init__.py b/packages/bigframes/bigframes/bigquery/__init__.py index f08388704585..a31d7dd83f93 100644 --- a/packages/bigframes/bigframes/bigquery/__init__.py +++ b/packages/bigframes/bigframes/bigquery/__init__.py @@ -87,7 +87,11 @@ to_json, to_json_string, ) -from bigframes.bigquery._operations.mathematical import rand +from bigframes.bigquery._operations.mathematical import ( + hparam_candidates, + hparam_range, + rand, +) from bigframes.bigquery._operations.search import create_vector_index, vector_search from bigframes.bigquery._operations.sql import sql_scalar from bigframes.bigquery._operations.struct import struct @@ -130,6 +134,8 @@ to_json, to_json_string, # mathematical ops + hparam_candidates, + hparam_range, rand, # search ops create_vector_index, @@ -187,6 +193,8 @@ "to_json", "to_json_string", # mathematical ops + "hparam_candidates", + "hparam_range", "rand", # search ops "create_vector_index", diff --git a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py index 2e8351904775..bed9c307c3df 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py +++ b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py @@ -51,3 +51,71 @@ def rand() -> bigframes.core.col.Expression: is_deterministic=False, ) return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(op, ())) + + +def hparam_range(min: float | int, max: float | int) -> bigframes.core.col.Expression: + """ + Defines the minimum and maximum bounds of the search space of continuous + values for a hyperparameter. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> # Specify a range of values for a hyperparameter. + >>> learn_rate = bbq.hparam_range(0.0001, 1.0) + + Args: + min (float or int): + The minimum bound of the search space. + max (float or int): + The maximum bound of the search space. + + Returns: + bigframes.pandas.api.typing.Expression: + An expression that can be used in model options. + """ + min_expr = bigframes.core.expression.const(min) + max_expr = bigframes.core.expression.const(max) + + op = ops.SqlScalarOp( + _output_type=dtypes.FLOAT_DTYPE, + sql_template="HPARAM_RANGE({0}, {1})", + is_deterministic=True, + ) + return bigframes.core.col.Expression( + bigframes.core.expression.OpExpression(op, (min_expr, max_expr)) + ) + + +def hparam_candidates( + candidates: list[float | int | str], +) -> bigframes.core.col.Expression: + """ + Specifies the set of discrete values for the hyperparameter. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> # Specify a set of values for a hyperparameter. + >>> optimizer = bbq.hparam_candidates(['ADAGRAD', 'SGD', 'FTRL']) + + Args: + candidates (list): + The set of discrete values for the hyperparameter. + + Returns: + bigframes.pandas.api.typing.Expression: + An expression that can be used in model options. + """ + candidates_expr = bigframes.core.expression.const(candidates) + + op = ops.SqlScalarOp( + _output_type=dtypes.STRING_DTYPE, + sql_template="HPARAM_CANDIDATES({0})", + is_deterministic=True, + ) + return bigframes.core.col.Expression( + bigframes.core.expression.OpExpression(op, (candidates_expr,)) + ) diff --git a/packages/bigframes/bigframes/bigquery/_operations/ml.py b/packages/bigframes/bigframes/bigquery/_operations/ml.py index 412b49b888f5..c6ef1f8bb7a7 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ml.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ml.py @@ -20,12 +20,12 @@ import google.cloud.bigquery import pandas as pd +import bigframes.core.col as col import bigframes.core.logging.log_adapter as log_adapter import bigframes.core.sql.ml import bigframes.dataframe as dataframe import bigframes.ml.base import bigframes.session -import bigframes.core.col as col from bigframes.bigquery._operations import utils diff --git a/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql deleted file mode 100644 index e328aab5fbfd..000000000000 --- a/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql +++ /dev/null @@ -1,3 +0,0 @@ -CREATE MODEL `my_model` -OPTIONS(l2_reg = 0.1 * 10, booster_type = 'gbtree') -AS SELECT * FROM t diff --git a/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_hparam_tuning/create_model_hparam_tuning.sql b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_hparam_tuning/create_model_hparam_tuning.sql new file mode 100644 index 000000000000..c7ed32e54fc8 --- /dev/null +++ b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_hparam_tuning/create_model_hparam_tuning.sql @@ -0,0 +1,3 @@ +CREATE MODEL `my_model` +OPTIONS(model_type = 'LINEAR_REG', learn_rate = HPARAM_RANGE(0.0001, 1.0), optimizer = HPARAM_CANDIDATES(['ADAGRAD', 'SGD'])) +AS SELECT * FROM t diff --git a/packages/bigframes/tests/unit/core/sql/test_ml.py b/packages/bigframes/tests/unit/core/sql/test_ml.py index 61296638eec2..d2f789fc6309 100644 --- a/packages/bigframes/tests/unit/core/sql/test_ml.py +++ b/packages/bigframes/tests/unit/core/sql/test_ml.py @@ -14,6 +14,7 @@ import pytest +import bigframes.bigquery as bbq import bigframes.core.col as col import bigframes.core.expression as ex import bigframes.core.sql.ml @@ -101,24 +102,17 @@ def test_create_model_list_option(snapshot): snapshot.assert_match(sql, "create_model_list_option.sql") -def test_create_model_expression_option(snapshot): - # An expression that calls a function on a literal value - # e.g. 0.1 * 10 - literal_expr = ex.ScalarConstantExpression(0.1, dtypes.FLOAT_DTYPE) - multiplier_expr = ex.ScalarConstantExpression(10, dtypes.INT_DTYPE) - math_expr = col.Expression( - ex.OpExpression(op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr)) - ) - +def test_create_model_hparam_tuning(snapshot): sql = bigframes.core.sql.ml.create_model_ddl( model_name="my_model", options={ - "l2_reg": math_expr, - "booster_type": "gbtree", + "model_type": "LINEAR_REG", + "learn_rate": bbq.hparam_range(0.0001, 1.0), + "optimizer": bbq.hparam_candidates(["ADAGRAD", "SGD"]), }, training_data="SELECT * FROM t", ) - snapshot.assert_match(sql, "create_model_expression_option.sql") + snapshot.assert_match(sql, "create_model_hparam_tuning.sql") def test_evaluate_model_basic(snapshot): diff --git a/packages/bigframes/tests/unit/test_col.py b/packages/bigframes/tests/unit/test_col.py index cf9aa5c4b86a..9f5bbca5d9bc 100644 --- a/packages/bigframes/tests/unit/test_col.py +++ b/packages/bigframes/tests/unit/test_col.py @@ -16,13 +16,13 @@ import pathlib from typing import Generator +import numpy as np import pandas as pd import pytest import bigframes import bigframes.pandas as bpd from bigframes.testing.utils import assert_frame_equal, convert_pandas_dtypes -import numpy as np pytest.importorskip("polars") pytest.importorskip("pandas", minversion="3.0.0") From 1ee0f93baa49405695d246d16632ae1c75c651dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 13 Apr 2026 16:48:11 -0500 Subject: [PATCH 2/3] Update packages/bigframes/bigframes/bigquery/_operations/mathematical.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../bigframes/bigframes/bigquery/_operations/mathematical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py index bed9c307c3df..2c86ccc9991e 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py +++ b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py @@ -53,7 +53,7 @@ def rand() -> bigframes.core.col.Expression: return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(op, ())) -def hparam_range(min: float | int, max: float | int) -> bigframes.core.col.Expression: +def hparam_range(min: float, max: float) -> bigframes.core.col.Expression: """ Defines the minimum and maximum bounds of the search space of continuous values for a hyperparameter. From 30506fa1609cab52da09fe5cc9f8e08e010a5a8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 13 Apr 2026 16:48:45 -0500 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../bigframes/bigframes/bigquery/_operations/mathematical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py index 2c86ccc9991e..ed2ac120b175 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py +++ b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py @@ -89,7 +89,7 @@ def hparam_range(min: float, max: float) -> bigframes.core.col.Expression: def hparam_candidates( - candidates: list[float | int | str], + candidates: list[float | str], ) -> bigframes.core.col.Expression: """ Specifies the set of discrete values for the hyperparameter.