From 67206cb251ca1c0c7a324b5012cc6dfbf3315bf1 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Mon, 15 Sep 2025 21:17:17 +0000 Subject: [PATCH 01/10] feat: support pandas series in ai.generate_bool --- bigframes/bigquery/_operations/ai.py | 38 ++++++++++++++++++++------ tests/system/small/bigquery/test_ai.py | 17 ++++++++++++ 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index d7ea29322d..f75267c609 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -21,14 +21,19 @@ import json from typing import Any, List, Literal, Mapping, Tuple -from bigframes import clients, dtypes, series -from bigframes.core import log_adapter +import pandas as pd + +from bigframes import clients, dtypes, series, session +from bigframes.core import convert, log_adapter from bigframes.operations import ai_ops @log_adapter.method_logger(custom_base_name="bigquery_ai") def generate_bool( - prompt: series.Series | List[str | series.Series] | Tuple[str | series.Series, ...], + prompt: series.Series + | pd.Series + | List[str | series.Series | pd.Series] + | Tuple[str | series.Series | pd.Series, ...], *, connection_id: str | None = None, endpoint: str | None = None, @@ -77,8 +82,9 @@ def generate_bool( Name: result, dtype: boolean Args: - prompt (series.Series | List[str|series.Series] | Tuple[str|series.Series, ...]): - A mixture of Series and string literals that specifies the prompt to send to the model. + prompt (Series | List[str|Series] | Tuple[str|Series, ...]): + A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series + or pandas Series. connection_id (str, optional): Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. If not provided, the connection from the current session will be used. @@ -142,16 +148,17 @@ def _separate_context_and_series( prompt_context: List[str | None] = [] series_list: List[series.Series] = [] + session = None for item in prompt: if isinstance(item, str): prompt_context.append(item) - elif isinstance(item, series.Series): + elif isinstance(item, (series.Series, pd.Series)): prompt_context.append(None) - if item.dtype == dtypes.OBJ_REF_DTYPE: - # Multi-model support - item = item.blob.read_url() + if isinstance(item, series.Series) and session is None: + # use the session from the first BigFrames session if possible + session = item._session series_list.append(item) else: @@ -160,9 +167,22 @@ def _separate_context_and_series( if not series_list: raise ValueError("Please provide at least one Series in the prompt") + series_list = [_convert_series(s, session) for s in series_list] + return prompt_context, series_list +def _convert_series( + s: series.Series | pd.Series, session: session.Session | None +) -> series.Series: + result = convert.to_bf_series(s, default_index=None, session=session) + + if result.dtype == dtypes.OBJ_REF_DTYPE: + # Support multimodel + return result.blob.read_url() + return result + + def _resolve_connection_id(series: series.Series, connection_id: str | None): return clients.get_canonical_bq_connection_id( connection_id or series._session._bq_connection, diff --git a/tests/system/small/bigquery/test_ai.py b/tests/system/small/bigquery/test_ai.py index 01050ade04..bebb215641 100644 --- a/tests/system/small/bigquery/test_ai.py +++ b/tests/system/small/bigquery/test_ai.py @@ -39,6 +39,23 @@ def test_ai_generate_bool(session): ) +def test_ai_generate_bool_with_pandas(session): + s1 = pd.Series(["apple", "bear"]) + s2 = bpd.Series(["fruit", "tree"], session=session) + prompt = (s1, " is a ", s2) + + result = bbq.ai.generate_bool(prompt, endpoint="gemini-2.5-flash").struct.field( + "result" + ) + + pandas.testing.assert_series_equal( + result.to_pandas(), + pd.Series([True, False], name="result"), + check_dtype=False, + check_index=False, + ) + + def test_ai_generate_bool_with_model_params(session): if sys.version_info < (3, 12): pytest.skip( From 87f37bed90a60db545f6c41f54afd7cf99110624 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Mon, 15 Sep 2025 21:24:42 +0000 Subject: [PATCH 02/10] fix mypy error --- bigframes/bigquery/_operations/ai.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index f75267c609..b33d3d1076 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -27,13 +27,17 @@ from bigframes.core import convert, log_adapter from bigframes.operations import ai_ops +PROMPT_TYPE = ( + series.Series + | pd.Series + | List[str | series.Series | pd.Series] + | Tuple[str | series.Series | pd.Series, ...] +) + @log_adapter.method_logger(custom_base_name="bigquery_ai") def generate_bool( - prompt: series.Series - | pd.Series - | List[str | series.Series | pd.Series] - | Tuple[str | series.Series | pd.Series, ...], + prompt: PROMPT_TYPE, *, connection_id: str | None = None, endpoint: str | None = None, @@ -127,7 +131,7 @@ def generate_bool( def _separate_context_and_series( - prompt: series.Series | List[str | series.Series] | Tuple[str | series.Series, ...], + prompt: PROMPT_TYPE, ) -> Tuple[List[str | None], List[series.Series]]: """ Returns the two values. The first value is the prompt with all series replaced by None. The second value is all the series From e1a6a5a895b99cdee2324792b068ef0975f80fb7 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Mon, 15 Sep 2025 21:29:26 +0000 Subject: [PATCH 03/10] define PROMPT_TYPE with Union --- bigframes/bigquery/_operations/ai.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index b33d3d1076..d7607a91ca 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -19,7 +19,7 @@ from __future__ import annotations import json -from typing import Any, List, Literal, Mapping, Tuple +from typing import Any, List, Literal, Mapping, Tuple, Union import pandas as pd @@ -27,12 +27,12 @@ from bigframes.core import convert, log_adapter from bigframes.operations import ai_ops -PROMPT_TYPE = ( - series.Series - | pd.Series - | List[str | series.Series | pd.Series] - | Tuple[str | series.Series | pd.Series, ...] -) +PROMPT_TYPE = Union[ + series.Series, + pd.Series, + List[Union[str, series.Series, pd.Series]], + Tuple[Union[str, series.Series, pd.Series], ...], +] @log_adapter.method_logger(custom_base_name="bigquery_ai") From f0b1d1a371817916a2c16d6832e6a85569122862 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Mon, 15 Sep 2025 21:30:06 +0000 Subject: [PATCH 04/10] fix type --- bigframes/bigquery/_operations/ai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index d7607a91ca..c006b543f2 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -150,7 +150,7 @@ def _separate_context_and_series( return [None], [prompt] prompt_context: List[str | None] = [] - series_list: List[series.Series] = [] + series_list: List[series.Series | pd.Series] = [] session = None for item in prompt: From 1edcfab2d551153d8dd30380f6cd787d39409cb1 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Tue, 16 Sep 2025 00:42:01 +0000 Subject: [PATCH 05/10] update test --- tests/system/small/bigquery/test_ai.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/system/small/bigquery/test_ai.py b/tests/system/small/bigquery/test_ai.py index 3065b6f687..8531a8651e 100644 --- a/tests/system/small/bigquery/test_ai.py +++ b/tests/system/small/bigquery/test_ai.py @@ -47,15 +47,17 @@ def test_ai_generate_bool_with_pandas(session): s2 = bpd.Series(["fruit", "tree"], session=session) prompt = (s1, " is a ", s2) - result = bbq.ai.generate_bool(prompt, endpoint="gemini-2.5-flash").struct.field( - "result" - ) + result = bbq.ai.generate_bool(prompt, endpoint="gemini-2.5-flash") - pandas.testing.assert_series_equal( - result.to_pandas(), - pd.Series([True, False], name="result"), - check_dtype=False, - check_index=False, + assert _contains_no_nulls(result) + assert result.dtype == pd.ArrowDtype( + pa.struct( + ( + pa.field("result", pa.bool_()), + pa.field("full_response", pa.string()), + pa.field("status", pa.string()), + ) + ) ) From 59ffbeeca0036f83a9de61cdc66a70e340e3fa30 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Tue, 16 Sep 2025 00:43:53 +0000 Subject: [PATCH 06/10] update comment --- bigframes/bigquery/_operations/ai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index 79b15963c8..56d22fb9fa 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -144,7 +144,7 @@ def _separate_context_and_series( prompt_context.append(None) if isinstance(item, series.Series) and session is None: - # use the session from the first BigFrames session if possible + # Use the first available BF session if there's any. session = item._session series_list.append(item) From 6f3a95c24b9618fa75bf1145e6fa7237e9c863f5 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Tue, 16 Sep 2025 00:56:44 +0000 Subject: [PATCH 07/10] fix mypy --- bigframes/bigquery/_operations/ai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index 56d22fb9fa..2d3a3431dd 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -154,9 +154,9 @@ def _separate_context_and_series( if not series_list: raise ValueError("Please provide at least one Series in the prompt") - series_list = [_convert_series(s, session) for s in series_list] + converted_list = [_convert_series(s, session) for s in series_list] - return prompt_context, series_list + return prompt_context, converted_list def _convert_series( From 3093c5eb733da9bd4d30dbb155b6e8bbedc593fe Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Tue, 16 Sep 2025 21:18:51 +0000 Subject: [PATCH 08/10] fix return type --- bigframes/operations/ai_ops.py | 2 +- tests/system/small/bigquery/test_ai.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bigframes/operations/ai_ops.py b/bigframes/operations/ai_ops.py index fe5eb1406f..680c1585fb 100644 --- a/bigframes/operations/ai_ops.py +++ b/bigframes/operations/ai_ops.py @@ -40,7 +40,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT pa.struct( ( pa.field("result", pa.bool_()), - pa.field("full_response", pa.string()), + pa.field("full_response", dtypes.JSON_ARROW_TYPE), pa.field("status", pa.string()), ) ) diff --git a/tests/system/small/bigquery/test_ai.py b/tests/system/small/bigquery/test_ai.py index 8531a8651e..be67a0d580 100644 --- a/tests/system/small/bigquery/test_ai.py +++ b/tests/system/small/bigquery/test_ai.py @@ -18,7 +18,7 @@ import pyarrow as pa import pytest -from bigframes import series +from bigframes import dtypes, series import bigframes.bigquery as bbq import bigframes.pandas as bpd @@ -35,7 +35,7 @@ def test_ai_generate_bool(session): pa.struct( ( pa.field("result", pa.bool_()), - pa.field("full_response", pa.string()), + pa.field("full_response", dtypes.JSON_ARROW_TYPE), pa.field("status", pa.string()), ) ) @@ -54,7 +54,7 @@ def test_ai_generate_bool_with_pandas(session): pa.struct( ( pa.field("result", pa.bool_()), - pa.field("full_response", pa.string()), + pa.field("full_response", dtypes.JSON_ARROW_TYPE), pa.field("status", pa.string()), ) ) @@ -81,7 +81,7 @@ def test_ai_generate_bool_with_model_params(session): pa.struct( ( pa.field("result", pa.bool_()), - pa.field("full_response", pa.string()), + pa.field("full_response", dtypes.JSON_ARROW_TYPE), pa.field("status", pa.string()), ) ) @@ -100,7 +100,7 @@ def test_ai_generate_bool_multi_model(session): pa.struct( ( pa.field("result", pa.bool_()), - pa.field("full_response", pa.string()), + pa.field("full_response", dtypes.JSON_ARROW_TYPE), pa.field("status", pa.string()), ) ) From ef6794e0f83af36fe81ed1586cd9a1ab63f5d1e8 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Tue, 16 Sep 2025 22:05:24 +0000 Subject: [PATCH 09/10] update doc --- bigframes/bigquery/_operations/ai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index 2d3a3431dd..3a88e9e236 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -94,7 +94,7 @@ def generate_bool( Returns: bigframes.series.Series: A new struct Series with the result data. The struct contains these fields: * "result": a BOOL value containing the model's response to the prompt. The result is None if the request fails or is filtered by responsible AI. - * "full_response": a STRING value containing the JSON response from the projects.locations.endpoints.generateContent call to the model. + * "full_response": a JSON value containing the response from the projects.locations.endpoints.generateContent call to the model. The generated text is in the text element. * "status": a STRING value that contains the API response status for the corresponding row. This value is empty if the operation was successful. """ From c3a658a69553fa5a6036d85f2fd7d3ae2ce8d06e Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Wed, 17 Sep 2025 21:18:32 +0000 Subject: [PATCH 10/10] fix doctest --- bigframes/bigquery/_operations/ai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index 3a88e9e236..3bafce6166 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -60,7 +60,7 @@ def generate_bool( 0 {'result': True, 'full_response': '{"candidate... 1 {'result': True, 'full_response': '{"candidate... 2 {'result': False, 'full_response': '{"candidat... - dtype: struct[pyarrow] + dtype: struct>, status: string>[pyarrow] >>> bbq.ai.generate_bool((df["col_1"], " is a ", df["col_2"])).struct.field("result") 0 True