Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion packages/bigframes/bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,11 @@
to_json,
to_json_string,
)
from bigframes.bigquery._operations.mathematical import rand
from bigframes.bigquery._operations.mathematical import (
hparam_candidates,
hparam_range,
rand,
)
from bigframes.bigquery._operations.search import create_vector_index, vector_search
from bigframes.bigquery._operations.sql import sql_scalar
from bigframes.bigquery._operations.struct import struct
Expand Down Expand Up @@ -130,6 +134,8 @@
to_json,
to_json_string,
# mathematical ops
hparam_candidates,
hparam_range,
rand,
# search ops
create_vector_index,
Expand Down Expand Up @@ -187,6 +193,8 @@
"to_json",
"to_json_string",
# mathematical ops
"hparam_candidates",
"hparam_range",
"rand",
# search ops
"create_vector_index",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,71 @@ def rand() -> bigframes.core.col.Expression:
is_deterministic=False,
)
return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(op, ()))


def hparam_range(min: float, max: float) -> bigframes.core.col.Expression:
"""
Defines the minimum and maximum bounds of the search space of continuous
values for a hyperparameter.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> # Specify a range of values for a hyperparameter.
>>> learn_rate = bbq.hparam_range(0.0001, 1.0)

Args:
min (float or int):
The minimum bound of the search space.
max (float or int):
The maximum bound of the search space.

Returns:
bigframes.pandas.api.typing.Expression:
An expression that can be used in model options.
"""
min_expr = bigframes.core.expression.const(min)
max_expr = bigframes.core.expression.const(max)

op = ops.SqlScalarOp(
_output_type=dtypes.FLOAT_DTYPE,
sql_template="HPARAM_RANGE({0}, {1})",
is_deterministic=True,
)
return bigframes.core.col.Expression(
bigframes.core.expression.OpExpression(op, (min_expr, max_expr))
)


def hparam_candidates(
candidates: list[float | str],
) -> bigframes.core.col.Expression:
"""
Specifies the set of discrete values for the hyperparameter.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> # Specify a set of values for a hyperparameter.
>>> optimizer = bbq.hparam_candidates(['ADAGRAD', 'SGD', 'FTRL'])

Args:
candidates (list):
The set of discrete values for the hyperparameter.

Returns:
bigframes.pandas.api.typing.Expression:
An expression that can be used in model options.
"""
candidates_expr = bigframes.core.expression.const(candidates)

op = ops.SqlScalarOp(
_output_type=dtypes.STRING_DTYPE,
sql_template="HPARAM_CANDIDATES({0})",
is_deterministic=True,
)
return bigframes.core.col.Expression(
bigframes.core.expression.OpExpression(op, (candidates_expr,))
)
2 changes: 1 addition & 1 deletion packages/bigframes/bigframes/bigquery/_operations/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
import google.cloud.bigquery
import pandas as pd

import bigframes.core.col as col
import bigframes.core.logging.log_adapter as log_adapter
import bigframes.core.sql.ml
import bigframes.dataframe as dataframe
import bigframes.ml.base
import bigframes.session
import bigframes.core.col as col
from bigframes.bigquery._operations import utils


Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CREATE MODEL `my_model`
OPTIONS(model_type = 'LINEAR_REG', learn_rate = HPARAM_RANGE(0.0001, 1.0), optimizer = HPARAM_CANDIDATES(['ADAGRAD', 'SGD']))
AS SELECT * FROM t
18 changes: 6 additions & 12 deletions packages/bigframes/tests/unit/core/sql/test_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import pytest

import bigframes.bigquery as bbq
import bigframes.core.col as col
import bigframes.core.expression as ex
import bigframes.core.sql.ml
Expand Down Expand Up @@ -101,24 +102,17 @@ def test_create_model_list_option(snapshot):
snapshot.assert_match(sql, "create_model_list_option.sql")


def test_create_model_expression_option(snapshot):
# An expression that calls a function on a literal value
# e.g. 0.1 * 10
literal_expr = ex.ScalarConstantExpression(0.1, dtypes.FLOAT_DTYPE)
multiplier_expr = ex.ScalarConstantExpression(10, dtypes.INT_DTYPE)
math_expr = col.Expression(
ex.OpExpression(op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr))
)

def test_create_model_hparam_tuning(snapshot):
sql = bigframes.core.sql.ml.create_model_ddl(
model_name="my_model",
options={
"l2_reg": math_expr,
"booster_type": "gbtree",
"model_type": "LINEAR_REG",
"learn_rate": bbq.hparam_range(0.0001, 1.0),
"optimizer": bbq.hparam_candidates(["ADAGRAD", "SGD"]),
},
training_data="SELECT * FROM t",
)
snapshot.assert_match(sql, "create_model_expression_option.sql")
snapshot.assert_match(sql, "create_model_hparam_tuning.sql")


def test_evaluate_model_basic(snapshot):
Expand Down
2 changes: 1 addition & 1 deletion packages/bigframes/tests/unit/test_col.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
import pathlib
from typing import Generator

import numpy as np
import pandas as pd
import pytest

import bigframes
import bigframes.pandas as bpd
from bigframes.testing.utils import assert_frame_equal, convert_pandas_dtypes
import numpy as np

pytest.importorskip("polars")
pytest.importorskip("pandas", minversion="3.0.0")
Expand Down