From 131627217162fbd777e00a254c3eb3c0d882c040 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 7 Oct 2025 04:17:32 +0000 Subject: [PATCH 1/4] change to ai.generate --- bigframes/operations/blob.py | 26 ++++++++---------------- tests/system/large/blob/test_function.py | 1 + 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py index 63875ded99..df98b3a35c 100644 --- a/bigframes/operations/blob.py +++ b/bigframes/operations/blob.py @@ -804,7 +804,6 @@ def audio_transcribe( raise ValueError("Must specify the engine, supported value is 'bigquery'.") import bigframes.bigquery as bbq - import bigframes.ml.llm as llm import bigframes.pandas as bpd # col name doesn't matter here. Rename to avoid column name conflicts @@ -812,27 +811,20 @@ def audio_transcribe( prompt_text = "**Task:** Transcribe the provided audio. **Instructions:** - Your response must contain only the verbatim transcription of the audio. - Do not include any introductory text, summaries, or conversational filler in your response. The output should begin directly with the first word of the audio." - llm_model = llm.GeminiTextGenerator( - model_name=model_name, - session=self._block.session, - connection_name=connection, + # Use bbq.ai.generate() to transcribe audio + transcribed_results = bbq.ai.generate( + prompt=(prompt_text, audio_series), + connection_id=connection, + endpoint=model_name, + request_type="unspecified", ) - # transcribe audio using ML.GENERATE_TEXT - transcribed_results = llm_model.predict( - X=audio_series, - prompt=[prompt_text, audio_series], - temperature=0.0, + transcribed_content_series = transcribed_results.struct.field("result").rename( + "transcribed_content" ) - transcribed_content_series = cast( - bpd.Series, transcribed_results["ml_generate_text_llm_result"] - ).rename("transcribed_content") - if verbose: - transcribed_status_series = cast( - bpd.Series, transcribed_results["ml_generate_text_status"] - ) + transcribed_status_series = transcribed_results.struct.field("status") results_df = bpd.DataFrame( { "status": transcribed_status_series, diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py index 70c3084261..3cfb8f4e29 100644 --- a/tests/system/large/blob/test_function.py +++ b/tests/system/large/blob/test_function.py @@ -424,6 +424,7 @@ def test_blob_transcribe( ) .to_pandas() ) + print(actual) # check relative length expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress" From ebea0376bb9c75490a8b92ab1561697a168fcaf8 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 8 Oct 2025 09:19:49 +0000 Subject: [PATCH 2/4] convert the input data type --- bigframes/operations/blob.py | 7 +++++-- tests/system/large/blob/test_function.py | 1 - 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py index df98b3a35c..d1071cc002 100644 --- a/bigframes/operations/blob.py +++ b/bigframes/operations/blob.py @@ -811,12 +811,15 @@ def audio_transcribe( prompt_text = "**Task:** Transcribe the provided audio. **Instructions:** - Your response must contain only the verbatim transcription of the audio. - Do not include any introductory text, summaries, or conversational filler in your response. The output should begin directly with the first word of the audio." - # Use bbq.ai.generate() to transcribe audio + # Convert the audio series to the runtime representation required by the model. + audio_runtime = audio_series.blob._get_runtime("R", with_metadata=True) + transcribed_results = bbq.ai.generate( - prompt=(prompt_text, audio_series), + prompt=(prompt_text, audio_runtime), connection_id=connection, endpoint=model_name, request_type="unspecified", + model_params={"generationConfig": {"temperature": 0.0}}, ) transcribed_content_series = transcribed_results.struct.field("result").rename( diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py index 3cfb8f4e29..70c3084261 100644 --- a/tests/system/large/blob/test_function.py +++ b/tests/system/large/blob/test_function.py @@ -424,7 +424,6 @@ def test_blob_transcribe( ) .to_pandas() ) - print(actual) # check relative length expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress" From 917910323c80081f84c95a3d9435e286d7187b1d Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 9 Oct 2025 04:37:24 +0000 Subject: [PATCH 3/4] remove default value setting --- bigframes/operations/blob.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py index d1071cc002..7f419bc5d8 100644 --- a/bigframes/operations/blob.py +++ b/bigframes/operations/blob.py @@ -818,7 +818,6 @@ def audio_transcribe( prompt=(prompt_text, audio_runtime), connection_id=connection, endpoint=model_name, - request_type="unspecified", model_params={"generationConfig": {"temperature": 0.0}}, ) From 084cfe9a4fe0d650352c46228e487151c80b6f79 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 9 Oct 2025 06:35:05 +0000 Subject: [PATCH 4/4] add audio_transcribe testcase to notebook --- .../multimodal/multimodal_dataframe.ipynb | 949 +++++++++++++++++- 1 file changed, 914 insertions(+), 35 deletions(-) diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb index f6f80b0009..c04463fc4c 100644 --- a/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/notebooks/multimodal/multimodal_dataframe.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -81,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -90,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -131,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -139,7 +139,20 @@ "id": "fx6YcZJbeYru", "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/global_session.py:113: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", + " _global_session = bigframes.session.connect(\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + } + ], "source": [ "# Create blob columns from wildcard path.\n", "df_image = bpd.from_glob_path(\n", @@ -155,7 +168,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -164,7 +177,83 @@ "id": "HhCb8jRsLe9B", "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
0
1
2
3
4
\n", + "

5 rows × 1 columns

\n", + "
[5 rows x 1 columns in total]" + ], + "text/plain": [ + " image\n", + "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", + "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", + "2 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", + "3 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", + "4 {'uri': 'gs://cloud-samples-data/bigquery/tuto...\n", + "\n", + "[5 rows x 1 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", "df_image = df_image.head(5)\n", @@ -191,11 +280,143 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "id": "YYYVn7NDH0Me" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\n", + "version. Use `json_query` instead.\n", + " warnings.warn(bfe.format_message(msg), category=UserWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\n", + "version. Use `json_query` instead.\n", + " warnings.warn(bfe.format_message(msg), category=UserWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\n", + "version. Use `json_query` instead.\n", + " warnings.warn(bfe.format_message(msg), category=UserWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" + ], + "text/plain": [ + " image author content_type \\\n", + "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", + "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", + "2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", + "3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", + "4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", + "\n", + " size updated \n", + "0 1591240 2025-03-20 17:45:04+00:00 \n", + "1 1182951 2025-03-20 17:45:02+00:00 \n", + "2 1520884 2025-03-20 17:44:55+00:00 \n", + "3 1235401 2025-03-20 17:45:19+00:00 \n", + "4 1591923 2025-03-20 17:44:47+00:00 \n", + "\n", + "[5 rows x 5 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Combine unstructured data with structured data\n", "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", @@ -216,7 +437,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -225,7 +446,53 @@ "id": "UGuAk9PNDRF3", "outputId": "73feb33d-4a05-48fb-96e5-3c48c2a456f3" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:124: UserWarning: The `json_extract` is deprecated and will be removed in a future\n", + "version. Use `json_query` instead.\n", + " warnings.warn(bfe.format_message(msg), category=UserWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# filter images and display, you can also display audio and video types\n", "df_image[df_image[\"author\"] == \"alice\"][\"image\"].blob.display()" @@ -243,7 +510,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -251,7 +518,32 @@ "id": "VWsl5BBPJ6N7", "outputId": "45d2356e-322b-4982-cfa7-42d034dc4344" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:180: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", + " return method(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:180: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", + " return method(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:180: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", + " return method(*args, **kwargs)\n" + ] + } + ], "source": [ "df_image[\"blurred\"] = df_image[\"image\"].blob.image_blur(\n", " (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", engine=\"opencv\"\n", @@ -270,7 +562,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -278,7 +570,20 @@ "id": "rWCAGC8w64vU", "outputId": "d7d456f0-8b56-492c-fe1b-967e9664d813" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:180: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", + " return method(*args, **kwargs)\n" + ] + } + ], "source": [ "# You can also chain functions together\n", "df_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", engine=\"opencv\")" @@ -286,7 +591,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -295,7 +600,182 @@ "id": "6NGK6GYSU44B", "outputId": "859101c1-2ee4-4f9a-e250-e8947127420a" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageauthorcontent_typesizeupdatedblurredresizednormalizedblur_resized
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n", + "

5 rows × 9 columns

\n", + "
[5 rows x 9 columns in total]" + ], + "text/plain": [ + " image author content_type \\\n", + "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", + "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", + "2 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", + "3 {'uri': 'gs://cloud-samples-data/bigquery/tuto... alice image/png \n", + "4 {'uri': 'gs://cloud-samples-data/bigquery/tuto... bob image/png \n", + "\n", + " size updated \\\n", + "0 1591240 2025-03-20 17:45:04+00:00 \n", + "1 1182951 2025-03-20 17:45:02+00:00 \n", + "2 1520884 2025-03-20 17:44:55+00:00 \n", + "3 1235401 2025-03-20 17:45:19+00:00 \n", + "4 1591923 2025-03-20 17:44:47+00:00 \n", + "\n", + " blurred \\\n", + "0 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", + "1 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", + "2 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", + "3 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", + "4 {'uri': 'gs://bigframes_blob_test/image_blur_t... \n", + "\n", + " resized \\\n", + "0 {'uri': 'gs://bigframes_blob_test/image_resize... \n", + "1 {'uri': 'gs://bigframes_blob_test/image_resize... \n", + "2 {'uri': 'gs://bigframes_blob_test/image_resize... \n", + "3 {'uri': 'gs://bigframes_blob_test/image_resize... \n", + "4 {'uri': 'gs://bigframes_blob_test/image_resize... \n", + "\n", + " normalized \\\n", + "0 {'uri': 'gs://bigframes_blob_test/image_normal... \n", + "1 {'uri': 'gs://bigframes_blob_test/image_normal... \n", + "2 {'uri': 'gs://bigframes_blob_test/image_normal... \n", + "3 {'uri': 'gs://bigframes_blob_test/image_normal... \n", + "4 {'uri': 'gs://bigframes_blob_test/image_normal... \n", + "\n", + " blur_resized \n", + "0 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", + "1 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", + "2 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", + "3 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", + "4 {'uri': 'gs://bigframes_blob_test/image_blur_r... \n", + "\n", + "[5 rows x 9 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_image" ] @@ -311,11 +791,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "id": "mRUGfcaFVW-3" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:180: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", + "default model will be removed in BigFrames 3.0. Please supply an\n", + "explicit model to avoid this message.\n", + " return method(*args, **kwargs)\n" + ] + } + ], "source": [ "from bigframes.ml import llm\n", "gemini = llm.GeminiTextGenerator()" @@ -323,7 +814,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -332,7 +823,87 @@ "id": "DNFP7CbjWdR9", "outputId": "3f90a062-0abc-4bce-f53c-db57b06a14b9" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a tin of K9Guard Dog Paw Balm.
1The item is a bottle of K9 Guard Dog Hot Spot Spray.
\n", + "

2 rows × 2 columns

\n", + "
[2 rows x 2 columns in total]" + ], + "text/plain": [ + " ml_generate_text_llm_result \\\n", + "0 The item is a tin of K9Guard Dog Paw Balm. \n", + "1 The item is a bottle of K9 Guard Dog Hot Spot ... \n", + "\n", + " image \n", + "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", + "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", + "\n", + "[2 rows x 2 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Ask the same question on the images\n", "df_image = df_image.head(2)\n", @@ -342,11 +913,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "id": "IG3J3HsKhyBY" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + } + ], "source": [ "# Ask different questions\n", "df_image[\"question\"] = [\"what item is it?\", \"what color is the picture?\"]" @@ -354,7 +936,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -363,7 +945,87 @@ "id": "qKOb765IiVuD", "outputId": "731bafad-ea29-463f-c8c1-cb7acfd70e5d" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is dog paw balm.
1The picture features a white bottle with a light blue spray nozzle and accents. The background is a neutral gray.\\n
\n", + "

2 rows × 2 columns

\n", + "
[2 rows x 2 columns in total]" + ], + "text/plain": [ + " ml_generate_text_llm_result \\\n", + "0 The item is dog paw balm. \n", + "1 The picture features a white bottle with a lig... \n", + "\n", + " image \n", + "0 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", + "1 {'uri': 'gs://cloud-samples-data/bigquery/tuto... \n", + "\n", + "[2 rows x 2 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" @@ -371,7 +1033,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -380,7 +1042,104 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:180: FutureWarning: Since upgrading the default model can cause unintended breakages, the\n", + "default model will be removed in BigFrames 3.0. Please supply an\n", + "explicit model to avoid this message.\n", + " return method(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638846 0.01666372 0.00451786 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2025-10-09T12:2...
1[ 0.0097399 0.0214815 0.00244266 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2025-10-09T12:2...
\n", + "

2 rows × 5 columns

\n", + "
[2 rows x 5 columns in total]" + ], + "text/plain": [ + " ml_generate_embedding_result \\\n", + "0 [ 0.00638846 0.01666372 0.00451786 ... -0.02... \n", + "1 [ 0.0097399 0.0214815 0.00244266 ... 0.00... \n", + "\n", + " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", + "0 \n", + "1 \n", + "\n", + " ml_generate_embedding_end_sec \\\n", + "0 \n", + "1 \n", + "\n", + " content \n", + "0 {\"access_urls\":{\"expiry_time\":\"2025-10-09T12:2... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2025-10-09T12:2... \n", + "\n", + "[2 rows x 5 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Generate embeddings.\n", "embed_model = llm.MultimodalEmbeddingGenerator()\n", @@ -399,18 +1158,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": { "id": "oDDuYtUm5Yiy" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + } + ], "source": [ "df_pdf = bpd.from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -418,22 +1188,130 @@ "id": "7jLpMYaj7nj8", "outputId": "06d5456f-580f-4693-adff-2605104b056c" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/log_adapter.py:180: FunctionAxisOnePreviewWarning: Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.\n", + " return method(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/bigquery/_operations/json.py:244: UserWarning: The `json_extract_string_array` is deprecated and will be removed in a\n", + "future version. Use `json_value_array` instead.\n", + " warnings.warn(bfe.format_message(msg), category=UserWarning)\n" + ] + } + ], "source": [ "df_pdf[\"chunked\"] = df_pdf[\"pdf\"].blob.pdf_chunk(engine=\"pypdf\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": { "id": "kaPvJATN7zlw" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", + "0 on a level, stable surface to prevent tipping....\n", + "0 included)\\nto maintain the schedule during pow...\n", + "0 digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n", + "0 paperclip) for 5\\nseconds. This will reset all...\n", + "0 unit with a damp cloth. Do not immerse the bas...\n", + "0 continues,\\ncontact customer support.\\nE2: Foo...\n", + "Name: chunked, dtype: string" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "chunked = df_pdf[\"chunked\"].explode()\n", "chunked" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6. Audio transcribe function" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + } + ], + "source": [ + "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\n", + "df = bpd.from_glob_path(audio_gcs_path, name=\"audio\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "0 Now, as all books, not primarily intended as p...\n", + "Name: transcribed_content, dtype: string" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transcribed_series = df['audio'].blob.audio_transcribe(model_name=\"gemini-2.0-flash-001\", verbose=False)\n", + "transcribed_series" + ] } ], "metadata": { @@ -441,7 +1319,8 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "venv", + "language": "python", "name": "python3" }, "language_info": { @@ -454,7 +1333,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.10.18" } }, "nbformat": 4,