From 18c8b4ace9a9cae30e6aa239ace8adce19259489 Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Wed, 18 Jun 2025 00:00:25 +0000
Subject: [PATCH 1/3] feat: add required param 'engine' to multimodal functions

---
 bigframes/operations/blob.py                  | 33 +++++++++++++++++
 .../multimodal/multimodal_dataframe.ipynb     |  9 ++---
 samples/snippets/multimodal_test.py           |  9 ++---
 tests/system/large/blob/test_function.py      | 36 +++++++++++++------
 4 files changed, 69 insertions(+), 18 deletions(-)

diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
index e143cfc519..63875ded99 100644
--- a/bigframes/operations/blob.py
+++ b/bigframes/operations/blob.py
@@ -303,6 +303,7 @@ def get_runtime_json_str(
     def exif(
         self,
         *,
+        engine: Literal[None, "pillow"] = None,
         connection: Optional[str] = None,
         max_batching_rows: int = 8192,
         container_cpu: Union[float, int] = 0.33,
@@ -311,6 +312,7 @@ def exif(
         """Extract EXIF data. Now only support image types.
 
         Args:
+            engine ('pillow' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
             connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
             max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function.
             container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
@@ -319,6 +321,8 @@ def exif(
         Returns:
             bigframes.series.Series: JSON series of key-value pairs.
         """
+        if engine is None or engine.casefold() != "pillow":
+            raise ValueError("Must specify the engine, supported value is 'pillow'.")
 
         import bigframes.bigquery as bbq
         import bigframes.blob._functions as blob_func
@@ -344,6 +348,7 @@ def image_blur(
         self,
         ksize: tuple[int, int],
         *,
+        engine: Literal[None, "opencv"] = None,
         dst: Optional[Union[str, bigframes.series.Series]] = None,
         connection: Optional[str] = None,
         max_batching_rows: int = 8192,
@@ -354,6 +359,7 @@ def image_blur(
 
         Args:
             ksize (tuple(int, int)): Kernel size.
+            engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
             dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of:
                 str: GCS folder str. The output filenames are the same as the input files.
                 blob Series: The output file paths are determined by the uris of the blob Series.
@@ -367,6 +373,9 @@ def image_blur(
         Returns:
             bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
         """
+        if engine is None or engine.casefold() != "opencv":
+            raise ValueError("Must specify the engine, supported value is 'opencv'.")
+
         import bigframes.blob._functions as blob_func
 
         connection = self._resolve_connection(connection)
@@ -424,6 +433,7 @@ def image_resize(
         self,
         dsize: tuple[int, int] = (0, 0),
         *,
+        engine: Literal[None, "opencv"] = None,
         fx: float = 0.0,
         fy: float = 0.0,
         dst: Optional[Union[str, bigframes.series.Series]] = None,
@@ -436,6 +446,7 @@ def image_resize(
 
         Args:
             dsize (tuple(int, int), default (0, 0)): Destination size. If set to 0, fx and fy parameters determine the size.
+            engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
             fx (float, default 0.0): scale factor along the horizontal axis. If set to 0.0, dsize parameter determines the output size.
             fy (float, defalut 0.0): scale factor along the vertical axis. If set to 0.0, dsize parameter determines the output size.
             dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of:
@@ -451,6 +462,9 @@ def image_resize(
         Returns:
             bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
         """
+        if engine is None or engine.casefold() != "opencv":
+            raise ValueError("Must specify the engine, supported value is 'opencv'.")
+
         dsize_set = dsize[0] > 0 and dsize[1] > 0
         fsize_set = fx > 0.0 and fy > 0.0
         if not dsize_set ^ fsize_set:
@@ -516,6 +530,7 @@ def image_resize(
     def image_normalize(
         self,
         *,
+        engine: Literal[None, "opencv"] = None,
         alpha: float = 1.0,
         beta: float = 0.0,
         norm_type: str = "l2",
@@ -528,6 +543,7 @@ def image_normalize(
         """Normalize images.
 
         Args:
+            engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
             alpha (float, default 1.0): Norm value to normalize to or the lower range boundary in case of the range normalization.
             beta (float, default 0.0): Upper range boundary in case of the range normalization; it is not used for the norm normalization.
             norm_type (str, default "l2"): Normalization type. Accepted values are "inf", "l1", "l2" and "minmax".
@@ -544,6 +560,9 @@ def image_normalize(
         Returns:
             bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
         """
+        if engine is None or engine.casefold() != "opencv":
+            raise ValueError("Must specify the engine, supported value is 'opencv'.")
+
         import bigframes.blob._functions as blob_func
 
         connection = self._resolve_connection(connection)
@@ -604,6 +623,7 @@ def image_normalize(
     def pdf_extract(
         self,
         *,
+        engine: Literal[None, "pypdf"] = None,
         connection: Optional[str] = None,
         max_batching_rows: int = 1,
         container_cpu: Union[float, int] = 2,
@@ -613,6 +633,7 @@ def pdf_extract(
         """Extracts text from PDF URLs and saves the text as string.
 
         Args:
+            engine ('pypdf' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
             connection (str or None, default None): BQ connection used for
                 function internet transactions, and the output blob if "dst"
                 is str. If None, uses default connection of the session.
@@ -631,6 +652,9 @@ def pdf_extract(
                 Contains the extracted text from the PDF file.
                 Includes error messages if verbosity is enabled.
         """
+        if engine is None or engine.casefold() != "pypdf":
+            raise ValueError("Must specify the engine, supported value is 'pypdf'.")
+
         import bigframes.bigquery as bbq
         import bigframes.blob._functions as blob_func
         import bigframes.pandas as bpd
@@ -663,6 +687,7 @@ def pdf_extract(
     def pdf_chunk(
         self,
         *,
+        engine: Literal[None, "pypdf"] = None,
         connection: Optional[str] = None,
         chunk_size: int = 2000,
         overlap_size: int = 200,
@@ -675,6 +700,7 @@ def pdf_chunk(
            arrays of strings.
 
         Args:
+            engine ('pypdf' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
             connection (str or None, default None): BQ connection used for
                 function internet transactions, and the output blob if "dst"
                 is str. If None, uses default connection of the session.
@@ -698,6 +724,8 @@ def pdf_chunk(
                 where each string is a chunk of text extracted from PDF.
                 Includes error messages if verbosity is enabled.
         """
+        if engine is None or engine.casefold() != "pypdf":
+            raise ValueError("Must specify the engine, supported value is 'pypdf'.")
 
         import bigframes.bigquery as bbq
         import bigframes.blob._functions as blob_func
@@ -740,6 +768,7 @@ def pdf_chunk(
     def audio_transcribe(
         self,
         *,
+        engine: Literal["bigquery"] = "bigquery",
         connection: Optional[str] = None,
         model_name: Optional[
             Literal[
@@ -753,6 +782,7 @@ def audio_transcribe(
         Transcribe audio content using a Gemini multimodal model.
 
         Args:
+            engine ('bigquery'): The engine (bigquery or third party library) used for the function.
             connection (str or None, default None): BQ connection used for
                 function internet transactions, and the output blob if "dst"
                 is str. If None, uses default connection of the session.
@@ -770,6 +800,9 @@ def audio_transcribe(
                 Contains the transcribed text from the audio file.
                 Includes error messages if verbosity is enabled.
         """
+        if engine.casefold() != "bigquery":
+            raise ValueError("Must specify the engine, supported value is 'bigquery'.")
+
         import bigframes.bigquery as bbq
         import bigframes.ml.llm as llm
         import bigframes.pandas as bpd
diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb
index 3f36c2908a..fbe074b0d0 100644
--- a/notebooks/multimodal/multimodal_dataframe.ipynb
+++ b/notebooks/multimodal/multimodal_dataframe.ipynb
@@ -254,16 +254,17 @@
       "outputs": [],
       "source": [
         "df_image[\"blurred\"] = df_image[\"image\"].blob.image_blur(\n",
-        "    (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\"\n",
+        "    (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", engine=\"opencv\"\n",
         ")\n",
         "df_image[\"resized\"] = df_image[\"image\"].blob.image_resize(\n",
-        "    (300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\"\n",
+        "    (300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\", engine=\"opencv\"\n",
         ")\n",
         "df_image[\"normalized\"] = df_image[\"image\"].blob.image_normalize(\n",
         "    alpha=50.0,\n",
         "    beta=150.0,\n",
         "    norm_type=\"minmax\",\n",
         "    dst=f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\",\n",
+        "    engine=\"opencv\",\n",
         ")"
       ]
     },
@@ -280,7 +281,7 @@
       "outputs": [],
       "source": [
         "# You can also chain functions together\n",
-        "df_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\")"
+        "df_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", engine=\"opencv\")"
       ]
     },
     {
@@ -419,7 +420,7 @@
       },
       "outputs": [],
       "source": [
-        "df_pdf[\"chunked\"] = df_pdf[\"pdf\"].blob.pdf_chunk()"
+        "df_pdf[\"chunked\"] = df_pdf[\"pdf\"].blob.pdf_chunk(engine=\"pypdf\")"
       ]
     },
     {
diff --git a/samples/snippets/multimodal_test.py b/samples/snippets/multimodal_test.py
index 7f8e13cd7b..087299aa0a 100644
--- a/samples/snippets/multimodal_test.py
+++ b/samples/snippets/multimodal_test.py
@@ -56,21 +56,22 @@ def test_multimodal_dataframe(gcs_dst_bucket: str) -> None:
 
     # [START bigquery_dataframes_multimodal_dataframe_image_transform]
     df_image["blurred"] = df_image["image"].blob.image_blur(
-        (20, 20), dst=f"{dst_bucket}/image_blur_transformed/"
+        (20, 20), dst=f"{dst_bucket}/image_blur_transformed/", engine="opencv"
     )
     df_image["resized"] = df_image["image"].blob.image_resize(
-        (300, 200), dst=f"{dst_bucket}/image_resize_transformed/"
+        (300, 200), dst=f"{dst_bucket}/image_resize_transformed/", engine="opencv"
     )
     df_image["normalized"] = df_image["image"].blob.image_normalize(
         alpha=50.0,
         beta=150.0,
         norm_type="minmax",
         dst=f"{dst_bucket}/image_normalize_transformed/",
+        engine="opencv",
     )
 
     # You can also chain functions together
     df_image["blur_resized"] = df_image["blurred"].blob.image_resize(
-        (300, 200), dst=f"{dst_bucket}/image_blur_resize_transformed/"
+        (300, 200), dst=f"{dst_bucket}/image_blur_resize_transformed/", engine="opencv"
     )
     df_image
     # [END bigquery_dataframes_multimodal_dataframe_image_transform]
@@ -113,7 +114,7 @@ def test_multimodal_dataframe(gcs_dst_bucket: str) -> None:
     df_pdf = bpd.from_glob_path(
         "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*", name="pdf"
     )
-    df_pdf["chunked"] = df_pdf["pdf"].blob.pdf_chunk()
+    df_pdf["chunked"] = df_pdf["pdf"].blob.pdf_chunk(engine="pypdf")
     chunked = df_pdf["chunked"].explode()
     chunked
     # [END bigquery_dataframes_multimodal_dataframe_pdf_chunk]
diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py
index 4a95e4c6d1..e88b7e0f26 100644
--- a/tests/system/large/blob/test_function.py
+++ b/tests/system/large/blob/test_function.py
@@ -86,7 +86,7 @@ def test_blob_image_blur_to_series(
     )
 
     actual = images_mm_df["blob_col"].blob.image_blur(
-        (8, 8), dst=series, connection=bq_connection
+        (8, 8), dst=series, connection=bq_connection, engine="opencv"
     )
     expected_df = pd.DataFrame(
         {
@@ -114,7 +114,7 @@ def test_blob_image_blur_to_folder(
     images_output_uris: list[str],
 ):
     actual = images_mm_df["blob_col"].blob.image_blur(
-        (8, 8), dst=images_output_folder, connection=bq_connection
+        (8, 8), dst=images_output_folder, connection=bq_connection, engine="opencv"
     )
     expected_df = pd.DataFrame(
         {
@@ -136,7 +136,9 @@ def test_blob_image_blur_to_folder(
 
 
 def test_blob_image_blur_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str):
-    actual = images_mm_df["blob_col"].blob.image_blur((8, 8), connection=bq_connection)
+    actual = images_mm_df["blob_col"].blob.image_blur(
+        (8, 8), connection=bq_connection, engine="opencv"
+    )
 
     assert isinstance(actual, bpd.Series)
     assert len(actual) == 2
@@ -154,7 +156,7 @@ def test_blob_image_resize_to_series(
     )
 
     actual = images_mm_df["blob_col"].blob.image_resize(
-        (200, 300), dst=series, connection=bq_connection
+        (200, 300), dst=series, connection=bq_connection, engine="opencv"
     )
     expected_df = pd.DataFrame(
         {
@@ -182,7 +184,7 @@ def test_blob_image_resize_to_folder(
     images_output_uris: list[str],
 ):
     actual = images_mm_df["blob_col"].blob.image_resize(
-        (200, 300), dst=images_output_folder, connection=bq_connection
+        (200, 300), dst=images_output_folder, connection=bq_connection, engine="opencv"
     )
     expected_df = pd.DataFrame(
         {
@@ -205,7 +207,7 @@ def test_blob_image_resize_to_folder(
 
 def test_blob_image_resize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str):
     actual = images_mm_df["blob_col"].blob.image_resize(
-        (200, 300), connection=bq_connection
+        (200, 300), connection=bq_connection, engine="opencv"
     )
 
     assert isinstance(actual, bpd.Series)
@@ -224,7 +226,12 @@ def test_blob_image_normalize_to_series(
     )
 
     actual = images_mm_df["blob_col"].blob.image_normalize(
-        alpha=50.0, beta=150.0, norm_type="minmax", dst=series, connection=bq_connection
+        alpha=50.0,
+        beta=150.0,
+        norm_type="minmax",
+        dst=series,
+        connection=bq_connection,
+        engine="opencv",
     )
     expected_df = pd.DataFrame(
         {
@@ -257,6 +264,7 @@ def test_blob_image_normalize_to_folder(
         norm_type="minmax",
         dst=images_output_folder,
         connection=bq_connection,
+        engine="opencv",
     )
     expected_df = pd.DataFrame(
         {
@@ -279,7 +287,11 @@ def test_blob_image_normalize_to_folder(
 
 def test_blob_image_normalize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str):
     actual = images_mm_df["blob_col"].blob.image_normalize(
-        alpha=50.0, beta=150.0, norm_type="minmax", connection=bq_connection
+        alpha=50.0,
+        beta=150.0,
+        norm_type="minmax",
+        connection=bq_connection,
+        engine="opencv",
     )
 
     assert isinstance(actual, bpd.Series)
@@ -322,7 +334,7 @@ def test_blob_pdf_extract(
 ):
     actual = (
         pdf_mm_df["pdf"]
-        .blob.pdf_extract(connection=bq_connection, verbose=verbose)
+        .blob.pdf_extract(connection=bq_connection, verbose=verbose, engine="pypdf")
         .explode()
         .to_pandas()
     )
@@ -373,7 +385,11 @@ def test_blob_pdf_chunk(
     actual = (
         pdf_mm_df["pdf"]
         .blob.pdf_chunk(
-            connection=bq_connection, chunk_size=50, overlap_size=10, verbose=verbose
+            connection=bq_connection,
+            chunk_size=50,
+            overlap_size=10,
+            verbose=verbose,
+            engine="pypdf",
         )
         .explode()
         .to_pandas()

From aeef2a8264f86c94c0cfd98969c60aad03da750f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <tswast@gmail.com>
Date: Wed, 18 Jun 2025 09:17:02 -0500
Subject: [PATCH 2/3] add missing engine to exif test

---
 tests/system/large/blob/test_function.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py
index e88b7e0f26..0f956dc668 100644
--- a/tests/system/large/blob/test_function.py
+++ b/tests/system/large/blob/test_function.py
@@ -61,7 +61,7 @@ def test_blob_exif(
         connection=bq_connection,
     )
 
-    actual = exif_image_df["blob_col"].blob.exif(connection=bq_connection)
+    actual = exif_image_df["blob_col"].blob.exif(engine="pillow", connection=bq_connection)
     expected = bpd.Series(
         ['{"ExifOffset": 47, "Make": "MyCamera"}'],
         session=session,

From a7aabe9fb829b8862a3a23fce25ee9187cf6a307 Mon Sep 17 00:00:00 2001
From: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Date: Wed, 18 Jun 2025 14:19:25 +0000
Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?=
 =?UTF-8?q?st-processor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
---
 tests/system/large/blob/test_function.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py
index 0f956dc668..a594b144f5 100644
--- a/tests/system/large/blob/test_function.py
+++ b/tests/system/large/blob/test_function.py
@@ -61,7 +61,9 @@ def test_blob_exif(
         connection=bq_connection,
     )
 
-    actual = exif_image_df["blob_col"].blob.exif(engine="pillow", connection=bq_connection)
+    actual = exif_image_df["blob_col"].blob.exif(
+        engine="pillow", connection=bq_connection
+    )
     expected = bpd.Series(
         ['{"ExifOffset": 47, "Make": "MyCamera"}'],
         session=session,