In [1]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# BigFrames Multimodal DataFrame

<table align="left">

  <td>
    <a href="https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb">
      <img src="https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb">
      <img src="https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png" width="32" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb">
      <img src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s" alt="BQ logo" width="35">
      Open in BQ Studio
    </a>
  </td>
</table>


This notebook is introducing BigFrames Multimodal features:
1. Create Multimodal DataFrame
2. Combine unstructured data with structured data
3. Conduct image transformations
4. Use LLM models to ask questions and generate embeddings on images
5. PDF chunking function
6. Transcribe audio

### Setup

Install the latest bigframes package if bigframes version < 2.4.0

In [2]:
# !pip install bigframes --upgrade

In [3]:
PROJECT = "bigframes-dev" # replace with your project. 
# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#required_roles for your required permissions

OUTPUT_BUCKET = "bigframes_blob_test" # replace with your GCS bucket. 
# The connection (or bigframes-default-connection of the project) must have read/write permission to the bucket. 
# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#grant-permissions for setting up connection service account permissions.
# In this Notebook it uses bigframes-default-connection by default. You can also bring in your own connections in each method.

import bigframes
# Setup project
bigframes.options.bigquery.project = PROJECT

# Display options
bigframes.options.display.blob_display_width = 300
bigframes.options.display.progress_bar = None

import bigframes.pandas as bpd

### 1. Create Multimodal DataFrame
There are several ways to create Multimodal DataFrame. The easiest way is from the wildcard paths.

In [4]:
# Create blob columns from wildcard path.
df_image = bpd.from_glob_path(
    "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*", name="image"
)
# Other ways are: from string uri column
# df = bpd.DataFrame({"uri": ["gs://<my_bucket>/<my_file_0>", "gs://<my_bucket>/<my_file_1>"]})
# df["blob_col"] = df["uri"].str.to_blob()

# From an existing object table
# df = bpd.read_gbq_object_table("<my_object_table>", name="blob_col")

  _global_session = bigframes.session.connect(
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


In [5]:
# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame
df_image = df_image.head(5)
df_image

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


Unnamed: 0,image
0,
1,
2,
3,
4,


### 2. Combine unstructured data with structured data

Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself.

In [6]:
# Combine unstructured data with structured data
df_image["author"] = ["alice", "bob", "bob", "alice", "bob"]  # type: ignore
df_image["content_type"] = df_image["image"].blob.content_type()
df_image["size"] = df_image["image"].blob.size()
df_image["updated"] = df_image["image"].blob.updated()
df_image

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
version. Use `json_query` instead.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
version. Use `json_query` instead.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
version. Use `json_query` instead.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


Unnamed: 0,image,author,content_type,size,updated
0,,alice,image/png,1591240,2025-03-20 17:45:04+00:00
1,,bob,image/png,1182951,2025-03-20 17:45:02+00:00
2,,bob,image/png,1520884,2025-03-20 17:44:55+00:00
3,,alice,image/png,1235401,2025-03-20 17:45:19+00:00
4,,bob,image/png,1591923,2025-03-20 17:44:47+00:00


Then you can filter the rows based on the structured data. And for different content types, you can display them respectively or together.

In [7]:
# filter images and display, you can also display audio and video types
df_image[df_image["author"] == "alice"]["image"].blob.display()

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
version. Use `json_query` instead.


### 3. Conduct image transformations
BigFrames Multimodal DataFrame provides image(and other) transformation functions. Such as image_blur, image_resize and image_normalize. The output can be saved to GCS folders or to BQ as bytes.

In [8]:
df_image["blurred"] = df_image["image"].blob.image_blur(
    (20, 20), dst=f"gs://{OUTPUT_BUCKET}/image_blur_transformed/", engine="opencv"
)
df_image["resized"] = df_image["image"].blob.image_resize(
    (300, 200), dst=f"gs://{OUTPUT_BUCKET}/image_resize_transformed/", engine="opencv"
)
df_image["normalized"] = df_image["image"].blob.image_normalize(
    alpha=50.0,
    beta=150.0,
    norm_type="minmax",
    dst=f"gs://{OUTPUT_BUCKET}/image_normalize_transformed/",
    engine="opencv",
)

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  return method(*args, **kwargs)
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  return method(*args, **kwargs)
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  return method(*args, **kwargs)


In [9]:
# You can also chain functions together
df_image["blur_resized"] = df_image["blurred"].blob.image_resize((300, 200), dst=f"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/", engine="opencv")

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  return method(*args, **kwargs)


### Using `verbose` mode for detailed output\n
\n
All multimodal functions support a `verbose` parameter, which defaults to `False`.\n
\n
*   When `verbose=False` (the default), the function will only return the main content of the result (e.g., the transformed image, the extracted text).\n
*   When `verbose=True`, the function returns a `STRUCT` containing two fields:\n
    *   `content`: The main result of the operation.\n
    *   `status`: An informational field. If the operation is successful, this will be empty. If an error occurs during the processing of a specific row, this field will contain the error message, allowing the overall job to complete without failing.\n
\n
Using `verbose=True` is highly recommended for debugging and for workflows where you need to handle potential failures on a row-by-row basis. Let's see it in action with the `image_blur` function.

In [10]:
df_image["blurred_verbose"] = df_image["image"].blob.image_blur(
    (20, 20), dst=f"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose/", engine="opencv", verbose=True
)
df_image[["blurred_verbose"]]

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  return method(*args, **kwargs)
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


Unnamed: 0,blurred_verbose
0,"{'status': '', 'content': {'uri': 'gs://bigfra..."
1,"{'status': '', 'content': {'uri': 'gs://bigfra..."
2,"{'status': '', 'content': {'uri': 'gs://bigfra..."
3,"{'status': '', 'content': {'uri': 'gs://bigfra..."
4,"{'status': '', 'content': {'uri': 'gs://bigfra..."


In [11]:
df_image

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


Unnamed: 0,image,author,content_type,size,updated,blurred,resized,normalized,blur_resized,blurred_verbose
0,,alice,image/png,1591240,2025-03-20 17:45:04+00:00,,,,,"{'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-paw-balm.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}"
1,,bob,image/png,1182951,2025-03-20 17:45:02+00:00,,,,,"{'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/k9-guard-dog-hot-spot-spray.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}"
2,,bob,image/png,1520884,2025-03-20 17:44:55+00:00,,,,,"{'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/fluffy-buns-chinchilla-food-variety-pack.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}"
3,,alice,image/png,1235401,2025-03-20 17:45:19+00:00,,,,,"{'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/purrfect-perch-cat-scratcher.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}"
4,,bob,image/png,1591923,2025-03-20 17:44:47+00:00,,,,,"{'status': '', 'content': {'uri': 'gs://bigframes_blob_test/image_blur_transformed_verbose/chirpy-seed-deluxe-bird-food.png', 'version': None, 'authorizer': 'bigframes-dev.us.bigframes-default-connection', 'details': None}}"


### 4. Use LLM models to ask questions and generate embeddings on images

In [12]:
from bigframes.ml import llm
gemini = llm.GeminiTextGenerator()

default model will be removed in BigFrames 3.0. Please supply an
explicit model to avoid this message.
  return method(*args, **kwargs)


In [13]:
# Ask the same question on the images
df_image = df_image.head(2)
answer = gemini.predict(df_image, prompt=["what item is it?", df_image["image"]])
answer[["ml_generate_text_llm_result", "image"]]

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


Unnamed: 0,ml_generate_text_llm_result,image
0,The item is a tin of K9 Guard dog paw balm.,
1,The item is K9 Guard Dog Hot Spot Spray.,


In [14]:
# Ask different questions
df_image["question"] = ["what item is it?", "what color is the picture?"]

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


In [15]:
answer_alt = gemini.predict(df_image, prompt=[df_image["question"], df_image["image"]])
answer_alt[["ml_generate_text_llm_result", "image"]]

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


Unnamed: 0,ml_generate_text_llm_result,image
0,The item is a tin of K9Guard Dog Paw Balm.,
1,"The bottle is mostly white, with a light blue accents. The background is a light gray. There are also black and green elements on the bottle's label.",


In [16]:
# Generate embeddings.
embed_model = llm.MultimodalEmbeddingGenerator()
embeddings = embed_model.predict(df_image["image"])
embeddings

default model will be removed in BigFrames 3.0. Please supply an
explicit model to avoid this message.
  return method(*args, **kwargs)
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


Unnamed: 0,ml_generate_embedding_result,ml_generate_embedding_status,ml_generate_embedding_start_sec,ml_generate_embedding_end_sec,content
0,[ 0.00638842 0.01666344 0.00451782 ... -0.02...,,,,"{""access_urls"":{""expiry_time"":""2025-10-25T00:2..."
1,[ 0.00973689 0.02148374 0.00244311 ... 0.00...,,,,"{""access_urls"":{""expiry_time"":""2025-10-25T00:2..."


### 5. PDF chunking function

In [17]:
df_pdf = bpd.from_glob_path("gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*", name="pdf")

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


In [18]:
df_pdf["chunked"] = df_pdf["pdf"].blob.pdf_chunk(engine="pypdf")

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  return method(*args, **kwargs)
future version. Use `json_value_array` instead.
future version. Use `json_value_array` instead.


In [19]:
df_pdf["chunked_verbose"] = df_pdf["pdf"].blob.pdf_chunk(engine="pypdf", verbose=True)
df_pdf[["chunked_verbose"]]

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  return method(*args, **kwargs)
future version. Use `json_value_array` instead.


Unnamed: 0,chunked_verbose
0,"{'status': '', 'content': array([""CritterCuisi..."


In [20]:
chunked = df_pdf["chunked"].explode()
chunked

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


0    CritterCuisine Pro 5000 - Automatic Pet Feeder...
0    on a level, stable surface to prevent tipping....
0    included)\nto maintain the schedule during pow...
0    digits for Meal 1 will flash.\n . Use the UP/D...
0    paperclip) for 5\nseconds. This will reset all...
0    unit with a damp cloth. Do not immerse the bas...
0    continues,\ncontact customer support.\nE2: Foo...
Name: chunked, dtype: string

### 6. Audio transcribe function

In [21]:
audio_gcs_path = "gs://bigframes_blob_test/audio/*"
df = bpd.from_glob_path(audio_gcs_path, name="audio")

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


In [22]:
transcribed_series = df['audio'].blob.audio_transcribe(model_name="gemini-2.0-flash-001", verbose=False)
transcribed_series

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


0    Now, as all books, not primarily intended as p...
Name: transcribed_content, dtype: string

In [23]:
transcribed_series_verbose = df['audio'].blob.audio_transcribe(model_name="gemini-2.0-flash-001", verbose=True)
transcribed_series_verbose

instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.


0    {'status': '', 'content': 'Now, as all books, ...
Name: transcription_results, dtype: struct<status: string, content: string>[pyarrow]