From 9614f973b358890b5133b8818cadb87f6a5c1486 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 14:49:04 +0000 Subject: [PATCH 1/4] feat: embed str values directly in pyformat Updates `bigframes/core/pyformat.py` to directly embed string values in the formatted output. This allows for dynamic substitution of SQL identifiers, such as table or column names. Updated unit tests to reflect this new behavior by using valid SQL identifiers in the test cases. --- bigframes/core/pyformat.py | 3 +++ tests/unit/core/test_pyformat.py | 12 +++++------- tests/unit/session/test_read_gbq_colab.py | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/bigframes/core/pyformat.py b/bigframes/core/pyformat.py index eab86dc629..8f49556ff4 100644 --- a/bigframes/core/pyformat.py +++ b/bigframes/core/pyformat.py @@ -104,6 +104,9 @@ def _field_to_template_value( if isinstance(value, bigframes.dataframe.DataFrame): return _table_to_sql(value._to_placeholder_table(dry_run=dry_run)) + if isinstance(value, str): + return value + return bigframes.core.sql.simple_literal(value) diff --git a/tests/unit/core/test_pyformat.py b/tests/unit/core/test_pyformat.py index 447ce37766..db7cedba8f 100644 --- a/tests/unit/core/test_pyformat.py +++ b/tests/unit/core/test_pyformat.py @@ -444,7 +444,7 @@ def test_pyformat_with_pandas_dataframe_not_dry_run_no_session_raises_valueerror def test_pyformat_with_query_string_replaces_variables(session): pyformat_args = { - "my_string": "some string value", + "my_string": "`my_table`", "max_value": 2.25, "year": 2025, "null_value": None, @@ -456,9 +456,8 @@ def test_pyformat_with_query_string_replaces_variables(session): SELECT {year} - year AS age, @myparam AS myparam, '{{my_string}}' AS escaped_string, - {my_string} AS my_string, - {null_value} AS null_value, - FROM my_dataset.my_table + * + FROM {my_string} WHERE height < {max_value} """.strip() @@ -466,9 +465,8 @@ def test_pyformat_with_query_string_replaces_variables(session): SELECT 2025 - year AS age, @myparam AS myparam, '{my_string}' AS escaped_string, - 'some string value' AS my_string, - NULL AS null_value, - FROM my_dataset.my_table + * + FROM `my_table` WHERE height < 2.25 """.strip() diff --git a/tests/unit/session/test_read_gbq_colab.py b/tests/unit/session/test_read_gbq_colab.py index 52b091c045..b1dc1ec702 100644 --- a/tests/unit/session/test_read_gbq_colab.py +++ b/tests/unit/session/test_read_gbq_colab.py @@ -60,7 +60,7 @@ def test_read_gbq_colab_includes_formatted_values_in_dry_run(monkeypatch, dry_ru pyformat_args = { "some_integer": 123, - "some_string": "This could be dangerous, but we escape it", + "some_string": "some_column", "bf_df": bf_df, "pd_df": pd_df, # This is not a supported type, but ignored if not referenced. @@ -84,7 +84,7 @@ def test_read_gbq_colab_includes_formatted_values_in_dry_run(monkeypatch, dry_ru expected = textwrap.dedent( f""" SELECT 123 as some_integer, - 'This could be dangerous, but we escape it' as some_string, + some_column as some_string, '{{escaped}}' as escaped FROM `proj`.`dset`.`temp_{"table" if dry_run else "view"}` AS bf_df FULL OUTER JOIN `proj`.`dset`.`temp_{"table" if dry_run else "view"}` AS pd_df From e249c203e9211278cd1b80a13699fef95a4a6971 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 15:06:41 +0000 Subject: [PATCH 2/4] fix: correct failing test in test_api.py This commit fixes a unit test that was failing due to the changes in pyformat. The test is updated to pass a pre-quoted string, ensuring the resulting SQL is valid. --- tests/unit/pandas/io/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/pandas/io/test_api.py b/tests/unit/pandas/io/test_api.py index 14419236c9..dbdf427d91 100644 --- a/tests/unit/pandas/io/test_api.py +++ b/tests/unit/pandas/io/test_api.py @@ -108,7 +108,7 @@ def test_read_gbq_colab_calls_set_location( mock_with_default_session.return_value = mock_df query_or_table = "SELECT {param1} AS param1" - sample_pyformat_args = {"param1": "value1"} + sample_pyformat_args = {"param1": "'value1'"} result = bf_io_api._read_gbq_colab( query_or_table, pyformat_args=sample_pyformat_args, dry_run=False ) From f774fd83500155ebab170d1af4815c0df1b166d3 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 16:31:00 +0000 Subject: [PATCH 3/4] feat: embed str values directly in pyformat Updates `bigframes/core/pyformat.py` to directly embed string values in the formatted output. This allows for dynamic substitution of SQL identifiers, such as table or column names. Updated unit tests to reflect this new behavior by using valid SQL identifiers in the test cases. From 8697db9ef2c21e9183c9e5142f9e36dbc9385bde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Mon, 10 Nov 2025 19:41:34 +0000 Subject: [PATCH 4/4] fix system test --- tests/system/small/session/test_read_gbq_colab.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/system/small/session/test_read_gbq_colab.py b/tests/system/small/session/test_read_gbq_colab.py index 6d3cf6fe88..65f47fe4e3 100644 --- a/tests/system/small/session/test_read_gbq_colab.py +++ b/tests/system/small/session/test_read_gbq_colab.py @@ -143,7 +143,7 @@ def test_read_gbq_colab_repr_avoids_requery(maybe_ordered_session): def test_read_gbq_colab_includes_formatted_scalars(session): pyformat_args = { "some_integer": 123, - "some_string": "This could be dangerous, but we escape it", + "some_string": "This could be dangerous.", # This is not a supported type, but ignored if not referenced. "some_object": object(), } @@ -153,7 +153,7 @@ def test_read_gbq_colab_includes_formatted_scalars(session): df = session._read_gbq_colab( """ SELECT {some_integer} as some_integer, - {some_string} as some_string, + '{some_string}' as some_string, '{{escaped}}' as escaped """, pyformat_args=pyformat_args, @@ -165,7 +165,7 @@ def test_read_gbq_colab_includes_formatted_scalars(session): { "some_integer": pandas.Series([123], dtype=pandas.Int64Dtype()), "some_string": pandas.Series( - ["This could be dangerous, but we escape it"], + ["This could be dangerous."], dtype="string[pyarrow]", ), "escaped": pandas.Series(["{escaped}"], dtype="string[pyarrow]"),