From 8c3451266c28ec0da6dd57c4f9929ae68a593574 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Thu, 30 Oct 2025 21:46:08 +0000
Subject: [PATCH 01/15] Correctly display DataFrames with JSON columns in
 anywidget

---
 bigframes/core/blocks.py                  |  48 +++++++--
 bigframes/dataframe.py                    |   2 -
 bigframes/session/executor.py             |  34 +++++++
 mypy.ini                                  |   3 +
 notebooks/dataframes/anywidget_mode.ipynb | 119 ++++++++++++++++++++--
 5 files changed, 189 insertions(+), 17 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 1900b7208a..2dc9d7d898 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -43,6 +43,7 @@
 import warnings
 
 import bigframes_vendored.constants as constants
+import db_dtypes
 import google.cloud.bigquery as bigquery
 import numpy
 import pandas as pd
@@ -134,6 +135,21 @@ class MaterializationOptions:
     ordered: bool = True
 
 
+def _replace_json_arrow_with_string(pa_type: pa.DataType) -> pa.DataType:
+    """Recursively replace JSONArrowType with string type."""
+    if isinstance(pa_type, db_dtypes.JSONArrowType):
+        return pa.string()
+    if isinstance(pa_type, pa.ListType):
+        return pa.list_(_replace_json_arrow_with_string(pa_type.value_type))
+    if isinstance(pa_type, pa.StructType):
+        new_fields = [
+            field.with_type(_replace_json_arrow_with_string(field.type))
+            for field in pa_type
+        ]
+        return pa.struct(new_fields)
+    return pa_type
+
+
 class Block:
     """A immutable 2D data structure."""
 
@@ -715,12 +731,32 @@ def to_pandas_batches(
         # To reduce the number of edge cases to consider when working with the
         # results of this, always return at least one DataFrame. See:
         # b/428918844.
-        empty_val = pd.DataFrame(
-            {
-                col: pd.Series([], dtype=self.expr.get_column_type(col))
-                for col in itertools.chain(self.value_columns, self.index_columns)
-            }
-        )
+        series_map = {}
+        for col in itertools.chain(self.value_columns, self.index_columns):
+            dtype = self.expr.get_column_type(col)
+            if bigframes.dtypes.contains_db_dtypes_json_dtype(dtype):
+                # Due to a limitation in Apache Arrow (#45262), JSON columns are not
+                # natively supported by the to_pandas_batches() method, which is
+                # used by the anywidget backend.
+                # Workaround for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
+                # PyArrow doesn't support creating an empty array with db_dtypes.JSONArrowType,
+                # especially when nested.
+                # Create with string type and then cast.
+
+                # MyPy doesn't automatically narrow the type of 'dtype' here,
+                # so we add an explicit check.
+                if isinstance(dtype, pd.ArrowDtype):
+                    safe_pa_type = _replace_json_arrow_with_string(dtype.pyarrow_dtype)
+                    safe_dtype = pd.ArrowDtype(safe_pa_type)
+                    series_map[col] = pd.Series([], dtype=safe_dtype).astype(dtype)
+                else:
+                    # This branch should ideally not be reached if
+                    # contains_db_dtypes_json_dtype is accurate,
+                    # but it's here for MyPy's sake.
+                    series_map[col] = pd.Series([], dtype=dtype)
+            else:
+                series_map[col] = pd.Series([], dtype=dtype)
+        empty_val = pd.DataFrame(series_map)
         dfs = map(
             lambda a: a[0],
             itertools.zip_longest(
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index f016fddd83..c954c8eebc 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -783,8 +783,6 @@ def __repr__(self) -> str:
 
         opts = bigframes.options.display
         max_results = opts.max_rows
-        # anywdiget mode uses the same display logic as the "deferred" mode
-        # for faster execution
         if opts.repr_mode in ("deferred", "anywidget"):
             return formatter.repr_query_job(self._compute_dry_run())
 
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
index d0cfe5f4f7..97ad7f5bb8 100644
--- a/bigframes/session/executor.py
+++ b/bigframes/session/executor.py
@@ -52,6 +52,8 @@ def arrow_batches(self) -> Iterator[pyarrow.RecordBatch]:
         result_rows = 0
 
         for batch in self._arrow_batches:
+            # Convert JSON columns to strings before casting
+            batch = self._convert_json_to_string(batch)
             batch = pyarrow_utils.cast_batch(batch, self.schema.to_pyarrow())
             result_rows += batch.num_rows
 
@@ -67,6 +69,38 @@ def arrow_batches(self) -> Iterator[pyarrow.RecordBatch]:
 
             yield batch
 
+    def _convert_json_to_string(
+        self, batch: pyarrow.RecordBatch
+    ) -> pyarrow.RecordBatch:
+        """Convert JSON arrow extension types to string to avoid PyArrow compatibility issues."""
+        import logging
+
+        new_arrays = []
+        new_fields = []
+
+        for i, field in enumerate(batch.schema):
+            array = batch.column(i)
+
+            # Check if this column should be JSON based on our schema
+            schema_item = next(
+                (item for item in self.schema.items if item.column == field.name), None
+            )
+
+            if schema_item and schema_item.dtype == bigframes.dtypes.JSON_DTYPE:
+                logging.info(f"Converting JSON column: {field.name}")
+                # Convert JSONArrowType to string
+                if array.type == bigframes.dtypes.JSON_ARROW_TYPE:
+                    array = array.cast(pyarrow.string())
+                new_fields.append(pyarrow.field(field.name, pyarrow.string()))
+            else:
+                new_fields.append(field)
+
+            new_arrays.append(array)
+
+        return pyarrow.RecordBatch.from_arrays(
+            new_arrays, schema=pyarrow.schema(new_fields)
+        )
+
     def to_arrow_table(self) -> pyarrow.Table:
         # Need to provide schema if no result rows, as arrow can't infer
         # If ther are rows, it is safest to infer schema from batches.
diff --git a/mypy.ini b/mypy.ini
index 7709eb200a..1fbca2498a 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -44,3 +44,6 @@ ignore_missing_imports = True
 
 [mypy-anywidget]
 ignore_missing_imports = True
+
+[mypy-db_dtypes]
+ignore_missing_imports = True
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
index c2af915721..347f57566a 100644
--- a/notebooks/dataframes/anywidget_mode.ipynb
+++ b/notebooks/dataframes/anywidget_mode.ipynb
@@ -35,7 +35,16 @@
    "execution_count": 2,
    "id": "ca22f059",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/venv/lib/python3.10/site-packages/google/api_core/_python_version_support.py:266: FutureWarning: You are using a Python version (3.10.15) which Google will stop supporting in new releases of google.api_core once it reaches its end of life (2026-10-04). Please upgrade to the latest Python version, or at least Python 3.11, to continue receiving updates for google.api_core past that date.\n",
+      "  warnings.warn(message, FutureWarning)\n"
+     ]
+    }
+   ],
    "source": [
     "import bigframes.pandas as bpd"
    ]
@@ -142,9 +151,9 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "aafd4f912b5f42e0896aa5f0c2c62620",
+       "model_id": "473b016aa6b24c86aafc6372352e822d",
        "version_major": 2,
-       "version_minor": 0
+       "version_minor": 1
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
@@ -205,16 +214,17 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5ec0ad9f11874d4f9d8edbc903ee7b5d",
+       "model_id": "339279cc312e4e7fb67923e4e6ad7779",
        "version_major": 2,
-       "version_minor": 0
+       "version_minor": 1
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
       ]
      },
+     "execution_count": 7,
      "metadata": {},
-     "output_type": "display_data"
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -304,16 +314,17 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "651b5aac958c408183775152c2573a03",
+       "model_id": "8ff1f64c44304da0944eadbd0fb3981d",
        "version_major": 2,
-       "version_minor": 0
+       "version_minor": 1
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
       ]
      },
+     "execution_count": 9,
      "metadata": {},
-     "output_type": "display_data"
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -323,6 +334,96 @@
     "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
     "small_widget"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "added-cell-2",
+   "metadata": {},
+   "source": [
+    "### Displaying Generative AI results containing JSON\n",
+    "The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "added-cell-1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. \n",
+       "    Query processed 85.9 kB in 15 seconds of slot time.\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:969: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
+      "instead of using `db_dtypes` in the future when available in pandas\n",
+      "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
+      "  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a6d61e48cca642b7a57e6431359b4cc4",
+       "version_major": 2,
+       "version_minor": 1
+      },
+      "text/plain": [
+       "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [],
+      "text/plain": [
+       "Computation deferred. Computation will process 0 Bytes"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bpd._read_gbq_colab(\"\"\"\n",
+    "  SELECT\n",
+    "    AI.GENERATE(\n",
+    "      prompt=>(\\\"Extract the values.\\\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \\\"us.conn\\\")), \\\"r\\\")),\n",
+    "      connection_id=>\\\"bigframes-dev.us.bigframes-default-connection\\\",\n",
+    "      output_schema=>\\\"publication_date string, class_international string, application_number string, filing_date string\\\") AS result,\n",
+    "    *\n",
+    "  FROM `bigquery-public-data.labeled_patents.extracted_data`\n",
+    "  LIMIT 5;\n",
+    "\"\"\")"
+   ]
   }
  ],
  "metadata": {

From 05e9b6955125b051c2024bff274d5c2eaaf8e24b Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Thu, 30 Oct 2025 23:55:53 +0000
Subject: [PATCH 02/15] Improve JSON type handling for to_gbq and
 to_pandas_batches

---
 bigframes/core/blocks.py                | 10 +++-
 bigframes/dtypes.py                     | 15 +++++
 tests/system/small/test_dataframe_io.py | 77 +++++++++++++++++++++++++
 3 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 2dc9d7d898..b21b122134 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -731,6 +731,12 @@ def to_pandas_batches(
         # To reduce the number of edge cases to consider when working with the
         # results of this, always return at least one DataFrame. See:
         # b/428918844.
+        empty_val = pd.DataFrame(
+            {
+                col: pd.Series([], dtype=self.expr.get_column_type(col))
+                for col in itertools.chain(self.value_columns, self.index_columns)
+            }
+        )
         series_map = {}
         for col in itertools.chain(self.value_columns, self.index_columns):
             dtype = self.expr.get_column_type(col)
@@ -746,7 +752,9 @@ def to_pandas_batches(
                 # MyPy doesn't automatically narrow the type of 'dtype' here,
                 # so we add an explicit check.
                 if isinstance(dtype, pd.ArrowDtype):
-                    safe_pa_type = _replace_json_arrow_with_string(dtype.pyarrow_dtype)
+                    safe_pa_type = bigframes.dtypes._replace_json_arrow_with_string(
+                        dtype.pyarrow_dtype
+                    )
                     safe_dtype = pd.ArrowDtype(safe_pa_type)
                     series_map[col] = pd.Series([], dtype=safe_dtype).astype(dtype)
                 else:
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 6c05b6f4a3..2a7db7f86e 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -954,6 +954,21 @@ def contains_db_dtypes_json_dtype(dtype):
     return contains_db_dtypes_json_arrow_type(dtype.pyarrow_dtype)
 
 
+def _replace_json_arrow_with_string(pa_type: pa.DataType) -> pa.DataType:
+    """Recursively replace JSONArrowType with string type."""
+    if isinstance(pa_type, db_dtypes.JSONArrowType):
+        return pa.string()
+    if isinstance(pa_type, pa.ListType):
+        return pa.list_(_replace_json_arrow_with_string(pa_type.value_type))
+    if isinstance(pa_type, pa.StructType):
+        new_fields = [
+            field.with_type(_replace_json_arrow_with_string(field.type))
+            for field in pa_type
+        ]
+        return pa.struct(new_fields)
+    return pa_type
+
+
 def warn_on_db_dtypes_json_dtype(dtypes):
     """Warn that the JSON dtype is changing.
 
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 96d7881d67..400af791e8 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -376,6 +376,83 @@ def test_to_pandas_batches_w_empty_dataframe(session):
     pandas.testing.assert_series_equal(results[0].dtypes, empty.dtypes)
 
 
+def test_to_pandas_batches_w_empty_dataframe_json_in_list(session):
+    """Tests to_pandas_batches() with an empty DataFrame containing a list of JSON.
+
+    Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
+    """
+    import db_dtypes
+
+    json_list_dtype = pd.ArrowDtype(pa.list_(db_dtypes.JSONArrowType()))
+    empty_df_with_json_list = bpd.DataFrame(
+        {
+            "idx": pd.Series([], dtype="Int64"),
+            "json_list_col": pd.Series([], dtype=json_list_dtype),
+        },
+        session=session,
+    ).set_index("idx", drop=True)
+
+    results = list(empty_df_with_json_list.to_pandas_batches())
+
+    assert len(results) == 1
+    assert list(results[0].columns) == ["json_list_col"]
+    assert results[0].dtypes["json_list_col"] == json_list_dtype
+    assert len(results[0]) == 0
+
+
+# --- Behavior 2: JSON in Struct ---
+
+
+def test_to_pandas_batches_w_empty_dataframe_json_in_struct(session):
+    """Tests to_pandas_batches() with an empty DataFrame containing a struct of JSON.
+
+    Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
+    """
+    import db_dtypes
+
+    json_struct_dtype = pd.ArrowDtype(
+        pa.struct([("json_field", db_dtypes.JSONArrowType())])
+    )
+    empty_df_with_json_struct = bpd.DataFrame(
+        {
+            "idx": pd.Series([], dtype="Int64"),
+            "json_struct_col": pd.Series([], dtype=json_struct_dtype),
+        },
+        session=session,
+    ).set_index("idx", drop=True)
+
+    results = list(empty_df_with_json_struct.to_pandas_batches())
+
+    assert len(results) == 1
+    assert list(results[0].columns) == ["json_struct_col"]
+    assert results[0].dtypes["json_struct_col"] == json_struct_dtype
+    assert len(results[0]) == 0
+
+
+# --- Behavior 3: Simple JSON ---
+
+
+def test_to_pandas_batches_w_empty_dataframe_simple_json(session):
+    """Tests to_pandas_batches() with an empty DataFrame containing a simple JSON column.
+
+    Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
+    """
+    empty_df_with_json = bpd.DataFrame(
+        {
+            "idx": pd.Series([], dtype="Int64"),
+            "json_col": pd.Series([], dtype=dtypes.JSON_DTYPE),
+        },
+        session=session,
+    ).set_index("idx", drop=True)
+
+    results = list(empty_df_with_json.to_pandas_batches())
+
+    assert len(results) == 1
+    assert list(results[0].columns) == ["json_col"]
+    assert results[0].dtypes["json_col"] == dtypes.JSON_DTYPE
+    assert len(results[0]) == 0
+
+
 @pytest.mark.parametrize("allow_large_results", (True, False))
 def test_to_pandas_batches_w_page_size_and_max_results(session, allow_large_results):
     """Verify to_pandas_batches() APIs returns the expected page size.

From aa04bac44924009f5526067995c15c900c696dfa Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Fri, 31 Oct 2025 00:08:53 +0000
Subject: [PATCH 03/15] Revert "Correctly display DataFrames with JSON columns
 in anywidget"

This reverts commit 8c3451266c28ec0da6dd57c4f9929ae68a593574.
---
 bigframes/core/blocks.py                  |  16 ---
 bigframes/dataframe.py                    |   2 +
 bigframes/session/executor.py             |  34 -------
 mypy.ini                                  |   3 -
 notebooks/dataframes/anywidget_mode.ipynb | 119 ++--------------------
 5 files changed, 11 insertions(+), 163 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index b21b122134..3c2b45d193 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -43,7 +43,6 @@
 import warnings
 
 import bigframes_vendored.constants as constants
-import db_dtypes
 import google.cloud.bigquery as bigquery
 import numpy
 import pandas as pd
@@ -135,21 +134,6 @@ class MaterializationOptions:
     ordered: bool = True
 
 
-def _replace_json_arrow_with_string(pa_type: pa.DataType) -> pa.DataType:
-    """Recursively replace JSONArrowType with string type."""
-    if isinstance(pa_type, db_dtypes.JSONArrowType):
-        return pa.string()
-    if isinstance(pa_type, pa.ListType):
-        return pa.list_(_replace_json_arrow_with_string(pa_type.value_type))
-    if isinstance(pa_type, pa.StructType):
-        new_fields = [
-            field.with_type(_replace_json_arrow_with_string(field.type))
-            for field in pa_type
-        ]
-        return pa.struct(new_fields)
-    return pa_type
-
-
 class Block:
     """A immutable 2D data structure."""
 
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index c954c8eebc..f016fddd83 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -783,6 +783,8 @@ def __repr__(self) -> str:
 
         opts = bigframes.options.display
         max_results = opts.max_rows
+        # anywdiget mode uses the same display logic as the "deferred" mode
+        # for faster execution
         if opts.repr_mode in ("deferred", "anywidget"):
             return formatter.repr_query_job(self._compute_dry_run())
 
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
index 97ad7f5bb8..d0cfe5f4f7 100644
--- a/bigframes/session/executor.py
+++ b/bigframes/session/executor.py
@@ -52,8 +52,6 @@ def arrow_batches(self) -> Iterator[pyarrow.RecordBatch]:
         result_rows = 0
 
         for batch in self._arrow_batches:
-            # Convert JSON columns to strings before casting
-            batch = self._convert_json_to_string(batch)
             batch = pyarrow_utils.cast_batch(batch, self.schema.to_pyarrow())
             result_rows += batch.num_rows
 
@@ -69,38 +67,6 @@ def arrow_batches(self) -> Iterator[pyarrow.RecordBatch]:
 
             yield batch
 
-    def _convert_json_to_string(
-        self, batch: pyarrow.RecordBatch
-    ) -> pyarrow.RecordBatch:
-        """Convert JSON arrow extension types to string to avoid PyArrow compatibility issues."""
-        import logging
-
-        new_arrays = []
-        new_fields = []
-
-        for i, field in enumerate(batch.schema):
-            array = batch.column(i)
-
-            # Check if this column should be JSON based on our schema
-            schema_item = next(
-                (item for item in self.schema.items if item.column == field.name), None
-            )
-
-            if schema_item and schema_item.dtype == bigframes.dtypes.JSON_DTYPE:
-                logging.info(f"Converting JSON column: {field.name}")
-                # Convert JSONArrowType to string
-                if array.type == bigframes.dtypes.JSON_ARROW_TYPE:
-                    array = array.cast(pyarrow.string())
-                new_fields.append(pyarrow.field(field.name, pyarrow.string()))
-            else:
-                new_fields.append(field)
-
-            new_arrays.append(array)
-
-        return pyarrow.RecordBatch.from_arrays(
-            new_arrays, schema=pyarrow.schema(new_fields)
-        )
-
     def to_arrow_table(self) -> pyarrow.Table:
         # Need to provide schema if no result rows, as arrow can't infer
         # If ther are rows, it is safest to infer schema from batches.
diff --git a/mypy.ini b/mypy.ini
index 1fbca2498a..7709eb200a 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -44,6 +44,3 @@ ignore_missing_imports = True
 
 [mypy-anywidget]
 ignore_missing_imports = True
-
-[mypy-db_dtypes]
-ignore_missing_imports = True
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
index 347f57566a..c2af915721 100644
--- a/notebooks/dataframes/anywidget_mode.ipynb
+++ b/notebooks/dataframes/anywidget_mode.ipynb
@@ -35,16 +35,7 @@
    "execution_count": 2,
    "id": "ca22f059",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/venv/lib/python3.10/site-packages/google/api_core/_python_version_support.py:266: FutureWarning: You are using a Python version (3.10.15) which Google will stop supporting in new releases of google.api_core once it reaches its end of life (2026-10-04). Please upgrade to the latest Python version, or at least Python 3.11, to continue receiving updates for google.api_core past that date.\n",
-      "  warnings.warn(message, FutureWarning)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import bigframes.pandas as bpd"
    ]
@@ -151,9 +142,9 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "473b016aa6b24c86aafc6372352e822d",
+       "model_id": "aafd4f912b5f42e0896aa5f0c2c62620",
        "version_major": 2,
-       "version_minor": 1
+       "version_minor": 0
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
@@ -214,17 +205,16 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "339279cc312e4e7fb67923e4e6ad7779",
+       "model_id": "5ec0ad9f11874d4f9d8edbc903ee7b5d",
        "version_major": 2,
-       "version_minor": 1
+       "version_minor": 0
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
       ]
      },
-     "execution_count": 7,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
     }
    ],
    "source": [
@@ -314,17 +304,16 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8ff1f64c44304da0944eadbd0fb3981d",
+       "model_id": "651b5aac958c408183775152c2573a03",
        "version_major": 2,
-       "version_minor": 1
+       "version_minor": 0
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
       ]
      },
-     "execution_count": 9,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
     }
    ],
    "source": [
@@ -334,96 +323,6 @@
     "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
     "small_widget"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "added-cell-2",
-   "metadata": {},
-   "source": [
-    "### Displaying Generative AI results containing JSON\n",
-    "The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "added-cell-1",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "✅ Completed. \n",
-       "    Query processed 85.9 kB in 15 seconds of slot time.\n",
-       "    "
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:969: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
-      "instead of using `db_dtypes` in the future when available in pandas\n",
-      "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
-      "  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "✅ Completed. "
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a6d61e48cca642b7a57e6431359b4cc4",
-       "version_major": 2,
-       "version_minor": 1
-      },
-      "text/plain": [
-       "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [],
-      "text/plain": [
-       "Computation deferred. Computation will process 0 Bytes"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "bpd._read_gbq_colab(\"\"\"\n",
-    "  SELECT\n",
-    "    AI.GENERATE(\n",
-    "      prompt=>(\\\"Extract the values.\\\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \\\"us.conn\\\")), \\\"r\\\")),\n",
-    "      connection_id=>\\\"bigframes-dev.us.bigframes-default-connection\\\",\n",
-    "      output_schema=>\\\"publication_date string, class_international string, application_number string, filing_date string\\\") AS result,\n",
-    "    *\n",
-    "  FROM `bigquery-public-data.labeled_patents.extracted_data`\n",
-    "  LIMIT 5;\n",
-    "\"\"\")"
-   ]
   }
  ],
  "metadata": {

From 592e43b128ffdf58c133e904afcde1172b69ef52 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Fri, 31 Oct 2025 00:10:49 +0000
Subject: [PATCH 04/15] Remove unnecessary comment

---
 tests/system/small/test_dataframe_io.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 400af791e8..944fd27e6c 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -400,9 +400,6 @@ def test_to_pandas_batches_w_empty_dataframe_json_in_list(session):
     assert len(results[0]) == 0
 
 
-# --- Behavior 2: JSON in Struct ---
-
-
 def test_to_pandas_batches_w_empty_dataframe_json_in_struct(session):
     """Tests to_pandas_batches() with an empty DataFrame containing a struct of JSON.
 
@@ -429,9 +426,6 @@ def test_to_pandas_batches_w_empty_dataframe_json_in_struct(session):
     assert len(results[0]) == 0
 
 
-# --- Behavior 3: Simple JSON ---
-
-
 def test_to_pandas_batches_w_empty_dataframe_simple_json(session):
     """Tests to_pandas_batches() with an empty DataFrame containing a simple JSON column.
 

From 5955bfe0a6a435894a9eaa08331f59932d5aef08 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Fri, 31 Oct 2025 19:55:11 +0000
Subject: [PATCH 05/15] code refactor

---
 bigframes/core/blocks.py    | 17 +++++------------
 bigframes/session/loader.py | 23 +++--------------------
 2 files changed, 8 insertions(+), 32 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 40dff1c2a8..a5e5f270c1 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -720,17 +720,12 @@ def to_pandas_batches(
         series_map = {}
         for col in itertools.chain(self.value_columns, self.index_columns):
             dtype = self.expr.get_column_type(col)
-            if bigframes.dtypes.contains_db_dtypes_json_dtype(dtype):
-                # Due to a limitation in Apache Arrow (#45262), JSON columns are not
-                # natively supported by the to_pandas_batches() method, which is
-                # used by the anywidget backend.
-                # Workaround for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
-                # PyArrow doesn't support creating an empty array with db_dtypes.JSONArrowType,
-                # especially when nested.
+            try:
+                series_map[col] = pd.Series([], dtype=dtype)
+            except pa.ArrowNotImplementedError:
+                # PyArrow doesn't support creating an empty array with
+                # db_dtypes.JSONArrowType, especially when nested.
                 # Create with string type and then cast.
-
-                # MyPy doesn't automatically narrow the type of 'dtype' here,
-                # so we add an explicit check.
                 if isinstance(dtype, pd.ArrowDtype):
                     safe_pa_type = bigframes.dtypes._replace_json_arrow_with_string(
                         dtype.pyarrow_dtype
@@ -742,8 +737,6 @@ def to_pandas_batches(
                     # contains_db_dtypes_json_dtype is accurate,
                     # but it's here for MyPy's sake.
                     series_map[col] = pd.Series([], dtype=dtype)
-            else:
-                series_map[col] = pd.Series([], dtype=dtype)
         empty_val = pd.DataFrame(series_map)
         dfs = map(
             lambda a: a[0],
diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py
index 6b16fe6bfd..62be2666ef 100644
--- a/bigframes/session/loader.py
+++ b/bigframes/session/loader.py
@@ -45,7 +45,6 @@
 import google.cloud.bigquery.table
 from google.cloud.bigquery_storage_v1 import types as bq_storage_types
 import pandas
-import pyarrow as pa
 
 import bigframes._tools
 import bigframes._tools.strings
@@ -1307,22 +1306,6 @@ def _transform_read_gbq_configuration(configuration: Optional[dict]) -> dict:
     return configuration
 
 
-def _has_json_arrow_type(arrow_type: pa.DataType) -> bool:
-    """
-    Searches recursively for JSON array type within a PyArrow DataType.
-    """
-    if arrow_type == bigframes.dtypes.JSON_ARROW_TYPE:
-        return True
-    if pa.types.is_list(arrow_type):
-        return _has_json_arrow_type(arrow_type.value_type)
-    if pa.types.is_struct(arrow_type):
-        for i in range(arrow_type.num_fields):
-            if _has_json_arrow_type(arrow_type.field(i).type):
-                return True
-        return False
-    return False
-
-
 def _validate_dtype_can_load(name: str, column_type: bigframes.dtypes.Dtype):
     """
     Determines whether a datatype is supported by bq load jobs.
@@ -1339,9 +1322,9 @@ def _validate_dtype_can_load(name: str, column_type: bigframes.dtypes.Dtype):
     if column_type == bigframes.dtypes.JSON_DTYPE:
         return
 
-    if isinstance(column_type, pandas.ArrowDtype) and _has_json_arrow_type(
-        column_type.pyarrow_dtype
-    ):
+    if isinstance(
+        column_type, pandas.ArrowDtype
+    ) and bigframes.dtypes.contains_db_dtypes_json_dtype(column_type):
         raise NotImplementedError(
             f"Nested JSON types, found in column `{name}`: `{column_type}`', "
             f"are currently unsupported for upload. {constants.FEEDBACK_LINK}"

From d07ba7e68f867ab13bd70451a5b59965530b8000 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Fri, 31 Oct 2025 20:31:09 +0000
Subject: [PATCH 06/15] testcase update

---
 bigframes/core/blocks.py                | 12 +---
 tests/system/small/test_dataframe_io.py | 93 ++++++++++---------------
 2 files changed, 37 insertions(+), 68 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index a5e5f270c1..45daebf078 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -711,12 +711,6 @@ def to_pandas_batches(
         # To reduce the number of edge cases to consider when working with the
         # results of this, always return at least one DataFrame. See:
         # b/428918844.
-        empty_val = pd.DataFrame(
-            {
-                col: pd.Series([], dtype=self.expr.get_column_type(col))
-                for col in itertools.chain(self.value_columns, self.index_columns)
-            }
-        )
         series_map = {}
         for col in itertools.chain(self.value_columns, self.index_columns):
             dtype = self.expr.get_column_type(col)
@@ -733,10 +727,8 @@ def to_pandas_batches(
                     safe_dtype = pd.ArrowDtype(safe_pa_type)
                     series_map[col] = pd.Series([], dtype=safe_dtype).astype(dtype)
                 else:
-                    # This branch should ideally not be reached if
-                    # contains_db_dtypes_json_dtype is accurate,
-                    # but it's here for MyPy's sake.
-                    series_map[col] = pd.Series([], dtype=dtype)
+                    # Fallback for other types that might error
+                    series_map[col] = pd.Series([], dtype="object").astype(dtype)
         empty_val = pd.DataFrame(series_map)
         dfs = map(
             lambda a: a[0],
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 944fd27e6c..bb9a001606 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -376,75 +376,52 @@ def test_to_pandas_batches_w_empty_dataframe(session):
     pandas.testing.assert_series_equal(results[0].dtypes, empty.dtypes)
 
 
-def test_to_pandas_batches_w_empty_dataframe_json_in_list(session):
-    """Tests to_pandas_batches() with an empty DataFrame containing a list of JSON.
-
-    Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
+def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json(session):
+    """Verifies to_pandas_batches() preserves dtypes for nested JSON."""
+    # This SQL query only tests the POPULATED case.
+    sql = """
+        SELECT
+            0 AS id,
+            [JSON '{"a":1}', JSON '{"b":2}'] AS json_array,
+            STRUCT(JSON '{"x":1}' AS json_field, 'test' AS str_field) AS json_struct
     """
-    import db_dtypes
+    df = session.read_gbq(sql, index_col="id")
 
-    json_list_dtype = pd.ArrowDtype(pa.list_(db_dtypes.JSONArrowType()))
-    empty_df_with_json_list = bpd.DataFrame(
-        {
-            "idx": pd.Series([], dtype="Int64"),
-            "json_list_col": pd.Series([], dtype=json_list_dtype),
-        },
-        session=session,
-    ).set_index("idx", drop=True)
+    batches = list(df.to_pandas_batches())
 
-    results = list(empty_df_with_json_list.to_pandas_batches())
+    # Check that we processed the row
+    assert sum(len(b) for b in batches) == 1
 
-    assert len(results) == 1
-    assert list(results[0].columns) == ["json_list_col"]
-    assert results[0].dtypes["json_list_col"] == json_list_dtype
-    assert len(results[0]) == 0
+    # Check dtypes on the resulting batch
+    assert isinstance(batches[0].dtypes["json_array"], pd.ArrowDtype)
+    assert isinstance(batches[0].dtypes["json_array"].pyarrow_dtype, pa.ListType)
+    assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
+    assert isinstance(batches[0].dtypes["json_struct"].pyarrow_dtype, pa.StructType)
 
 
-def test_to_pandas_batches_w_empty_dataframe_json_in_struct(session):
-    """Tests to_pandas_batches() with an empty DataFrame containing a struct of JSON.
+def test_to_pandas_batches_should_not_error_on_empty_nested_json(session):
+    """Verify to_pandas_batches() works with empty nested JSON types.
 
-    Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
+    Regression test for PyArrow limitation with empty JSON arrays.
     """
-    import db_dtypes
-
-    json_struct_dtype = pd.ArrowDtype(
-        pa.struct([("json_field", db_dtypes.JSONArrowType())])
-    )
-    empty_df_with_json_struct = bpd.DataFrame(
-        {
-            "idx": pd.Series([], dtype="Int64"),
-            "json_struct_col": pd.Series([], dtype=json_struct_dtype),
-        },
-        session=session,
-    ).set_index("idx", drop=True)
-
-    results = list(empty_df_with_json_struct.to_pandas_batches())
-
-    assert len(results) == 1
-    assert list(results[0].columns) == ["json_struct_col"]
-    assert results[0].dtypes["json_struct_col"] == json_struct_dtype
-    assert len(results[0]) == 0
-
-
-def test_to_pandas_batches_w_empty_dataframe_simple_json(session):
-    """Tests to_pandas_batches() with an empty DataFrame containing a simple JSON column.
-
-    Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/1273
+    # This SQL query is MINIMAL and tests only the EMPTY regression case.
+    sql = """
+        SELECT
+            1 AS id,
+            [] AS json_array,
+            STRUCT(NULL AS json_field, 'test2' AS str_field) AS json_struct
     """
-    empty_df_with_json = bpd.DataFrame(
-        {
-            "idx": pd.Series([], dtype="Int64"),
-            "json_col": pd.Series([], dtype=dtypes.JSON_DTYPE),
-        },
-        session=session,
-    ).set_index("idx", drop=True)
+    df = session.read_gbq(sql, index_col="id")
 
-    results = list(empty_df_with_json.to_pandas_batches())
+    # The main point of this test is that this line does not raise an error.
+    batches = list(df.to_pandas_batches())
 
-    assert len(results) == 1
-    assert list(results[0].columns) == ["json_col"]
-    assert results[0].dtypes["json_col"] == dtypes.JSON_DTYPE
-    assert len(results[0]) == 0
+    # Verify the row was actually processed and not just skipped
+    assert sum(len(b) for b in batches) == 1
+
+    # Verify dtypes are still correct, even with empty data
+    assert isinstance(batches[0].dtypes["json_array"], pd.ArrowDtype)
+    assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
 
 
 @pytest.mark.parametrize("allow_large_results", (True, False))

From d7455a65ff016f507677c2a32df73c7941537890 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Fri, 31 Oct 2025 21:27:20 +0000
Subject: [PATCH 07/15] Fix testcase

---
 bigframes/core/blocks.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 45daebf078..ca6d7760c0 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -724,8 +724,12 @@ def to_pandas_batches(
                     safe_pa_type = bigframes.dtypes._replace_json_arrow_with_string(
                         dtype.pyarrow_dtype
                     )
-                    safe_dtype = pd.ArrowDtype(safe_pa_type)
-                    series_map[col] = pd.Series([], dtype=safe_dtype).astype(dtype)
+                    # Create empty array with safe type, but preserve original dtype metadata
+                    empty_array = pa.array([], type=safe_pa_type)
+                    series_map[col] = pd.Series(
+                        empty_array,
+                        dtype=dtype,  # Use original dtype directly
+                    )
                 else:
                     # Fallback for other types that might error
                     series_map[col] = pd.Series([], dtype="object").astype(dtype)

From 12e2a6387e9e6f32225656c8eff468886c721c1d Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Fri, 31 Oct 2025 23:26:07 +0000
Subject: [PATCH 08/15] function call updated in   bigframes/core/blocks.py,
 unused function removed from bigframes/dtypes.py

---
 bigframes/core/blocks.py |  4 +---
 bigframes/dtypes.py      | 15 ---------------
 2 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index ca6d7760c0..817e60cce8 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -721,9 +721,7 @@ def to_pandas_batches(
                 # db_dtypes.JSONArrowType, especially when nested.
                 # Create with string type and then cast.
                 if isinstance(dtype, pd.ArrowDtype):
-                    safe_pa_type = bigframes.dtypes._replace_json_arrow_with_string(
-                        dtype.pyarrow_dtype
-                    )
+                    safe_pa_type = bigframes.dtypes.to_storage_type(dtype.pyarrow_dtype)
                     # Create empty array with safe type, but preserve original dtype metadata
                     empty_array = pa.array([], type=safe_pa_type)
                     series_map[col] = pd.Series(
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 37a7c150ca..29e1be1ace 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -972,21 +972,6 @@ def contains_db_dtypes_json_dtype(dtype):
     return contains_db_dtypes_json_arrow_type(dtype.pyarrow_dtype)
 
 
-def _replace_json_arrow_with_string(pa_type: pa.DataType) -> pa.DataType:
-    """Recursively replace JSONArrowType with string type."""
-    if isinstance(pa_type, db_dtypes.JSONArrowType):
-        return pa.string()
-    if isinstance(pa_type, pa.ListType):
-        return pa.list_(_replace_json_arrow_with_string(pa_type.value_type))
-    if isinstance(pa_type, pa.StructType):
-        new_fields = [
-            field.with_type(_replace_json_arrow_with_string(field.type))
-            for field in pa_type
-        ]
-        return pa.struct(new_fields)
-    return pa_type
-
-
 def warn_on_db_dtypes_json_dtype(dtypes):
     """Warn that the JSON dtype is changing.
 

From 393a2f9b64173e578302ffca4841152a6b0f1a30 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Fri, 31 Oct 2025 23:31:02 +0000
Subject: [PATCH 09/15] revert the code refactor in loader.py, I will use a
 seperate pr for this refactor

---
 bigframes/session/loader.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py
index 62be2666ef..6b16fe6bfd 100644
--- a/bigframes/session/loader.py
+++ b/bigframes/session/loader.py
@@ -45,6 +45,7 @@
 import google.cloud.bigquery.table
 from google.cloud.bigquery_storage_v1 import types as bq_storage_types
 import pandas
+import pyarrow as pa
 
 import bigframes._tools
 import bigframes._tools.strings
@@ -1306,6 +1307,22 @@ def _transform_read_gbq_configuration(configuration: Optional[dict]) -> dict:
     return configuration
 
 
+def _has_json_arrow_type(arrow_type: pa.DataType) -> bool:
+    """
+    Searches recursively for JSON array type within a PyArrow DataType.
+    """
+    if arrow_type == bigframes.dtypes.JSON_ARROW_TYPE:
+        return True
+    if pa.types.is_list(arrow_type):
+        return _has_json_arrow_type(arrow_type.value_type)
+    if pa.types.is_struct(arrow_type):
+        for i in range(arrow_type.num_fields):
+            if _has_json_arrow_type(arrow_type.field(i).type):
+                return True
+        return False
+    return False
+
+
 def _validate_dtype_can_load(name: str, column_type: bigframes.dtypes.Dtype):
     """
     Determines whether a datatype is supported by bq load jobs.
@@ -1322,9 +1339,9 @@ def _validate_dtype_can_load(name: str, column_type: bigframes.dtypes.Dtype):
     if column_type == bigframes.dtypes.JSON_DTYPE:
         return
 
-    if isinstance(
-        column_type, pandas.ArrowDtype
-    ) and bigframes.dtypes.contains_db_dtypes_json_dtype(column_type):
+    if isinstance(column_type, pandas.ArrowDtype) and _has_json_arrow_type(
+        column_type.pyarrow_dtype
+    ):
         raise NotImplementedError(
             f"Nested JSON types, found in column `{name}`: `{column_type}`', "
             f"are currently unsupported for upload. {constants.FEEDBACK_LINK}"

From 2ff0108197e0e07953d8a1e4d13cf1fbfaa4afb7 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Fri, 31 Oct 2025 23:37:57 +0000
Subject: [PATCH 10/15] replace the manual   construction of the empty
 DataFrame with the more robust try...except block   that leverages to_pyarrow
 and empty_table

---
 bigframes/core/blocks.py | 29 ++++++++---------------------
 1 file changed, 8 insertions(+), 21 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 817e60cce8..1eac176c32 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -711,27 +711,14 @@ def to_pandas_batches(
         # To reduce the number of edge cases to consider when working with the
         # results of this, always return at least one DataFrame. See:
         # b/428918844.
-        series_map = {}
-        for col in itertools.chain(self.value_columns, self.index_columns):
-            dtype = self.expr.get_column_type(col)
-            try:
-                series_map[col] = pd.Series([], dtype=dtype)
-            except pa.ArrowNotImplementedError:
-                # PyArrow doesn't support creating an empty array with
-                # db_dtypes.JSONArrowType, especially when nested.
-                # Create with string type and then cast.
-                if isinstance(dtype, pd.ArrowDtype):
-                    safe_pa_type = bigframes.dtypes.to_storage_type(dtype.pyarrow_dtype)
-                    # Create empty array with safe type, but preserve original dtype metadata
-                    empty_array = pa.array([], type=safe_pa_type)
-                    series_map[col] = pd.Series(
-                        empty_array,
-                        dtype=dtype,  # Use original dtype directly
-                    )
-                else:
-                    # Fallback for other types that might error
-                    series_map[col] = pd.Series([], dtype="object").astype(dtype)
-        empty_val = pd.DataFrame(series_map)
+        try:
+            empty_arrow_table = self.expr.schema.to_pyarrow().empty_table()
+        except pa.ArrowNotImplementedError:
+            # Bug with some pyarrow versions, empty_table only supports base storage types, not extension types.
+            empty_arrow_table = self.expr.schema.to_pyarrow(
+                use_storage_types=True
+            ).empty_table()
+        empty_val = empty_arrow_table.to_pandas()
         dfs = map(
             lambda a: a[0],
             itertools.zip_longest(

From 512e3a186f2491bdb393d3e5dacee7fee603b3b6 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Sat, 1 Nov 2025 00:20:00 +0000
Subject: [PATCH 11/15] fix testcase

---
 tests/system/small/test_dataframe_io.py | 32 +++++++++++++------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index bb9a001606..c519636b0c 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -386,25 +386,25 @@ def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json(session):
             STRUCT(JSON '{"x":1}' AS json_field, 'test' AS str_field) AS json_struct
     """
     df = session.read_gbq(sql, index_col="id")
-
     batches = list(df.to_pandas_batches())
 
-    # Check that we processed the row
     assert sum(len(b) for b in batches) == 1
 
-    # Check dtypes on the resulting batch
-    assert isinstance(batches[0].dtypes["json_array"], pd.ArrowDtype)
-    assert isinstance(batches[0].dtypes["json_array"].pyarrow_dtype, pa.ListType)
+    # Check dtypes based on pandas version
+    if bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable:
+        assert isinstance(batches[0].dtypes["json_array"], pd.ArrowDtype)
+        assert isinstance(batches[0].dtypes["json_array"].pyarrow_dtype, pa.ListType)
+    else:
+        # In pandas 1.x, list types become object dtype
+        assert batches[0].dtypes["json_array"] == "object"
+
+    # Struct types work in both pandas versions
     assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
     assert isinstance(batches[0].dtypes["json_struct"].pyarrow_dtype, pa.StructType)
 
 
 def test_to_pandas_batches_should_not_error_on_empty_nested_json(session):
-    """Verify to_pandas_batches() works with empty nested JSON types.
-
-    Regression test for PyArrow limitation with empty JSON arrays.
-    """
-    # This SQL query is MINIMAL and tests only the EMPTY regression case.
+    """Verify to_pandas_batches() works with empty nested JSON types."""
     sql = """
         SELECT
             1 AS id,
@@ -413,14 +413,16 @@ def test_to_pandas_batches_should_not_error_on_empty_nested_json(session):
     """
     df = session.read_gbq(sql, index_col="id")
 
-    # The main point of this test is that this line does not raise an error.
+    # The main point: this should not raise an error
     batches = list(df.to_pandas_batches())
-
-    # Verify the row was actually processed and not just skipped
     assert sum(len(b) for b in batches) == 1
 
-    # Verify dtypes are still correct, even with empty data
-    assert isinstance(batches[0].dtypes["json_array"], pd.ArrowDtype)
+    # Check dtypes based on pandas version
+    if bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable:
+        assert isinstance(batches[0].dtypes["json_array"], pd.ArrowDtype)
+    else:
+        assert batches[0].dtypes["json_array"] == "object"
+
     assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
 
 

From 5f5881b167c70867c482e13b7e13834cd775c544 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Sat, 1 Nov 2025 00:51:04 +0000
Subject: [PATCH 12/15]  existing arrow_to_pandas() helper that properly
 handles dtype conversion

---
 bigframes/core/blocks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 1eac176c32..a70ea63c4d 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -68,6 +68,7 @@
 import bigframes.operations.aggregations as agg_ops
 from bigframes.session import dry_runs, execution_spec
 from bigframes.session import executor as executors
+from bigframes.session._io import pandas as io_pandas
 
 # Type constraint for wherever column labels are used
 Label = typing.Hashable
@@ -718,7 +719,7 @@ def to_pandas_batches(
             empty_arrow_table = self.expr.schema.to_pyarrow(
                 use_storage_types=True
             ).empty_table()
-        empty_val = empty_arrow_table.to_pandas()
+        empty_val = io_pandas.arrow_to_pandas(empty_arrow_table, self.expr.schema)
         dfs = map(
             lambda a: a[0],
             itertools.zip_longest(

From 3119771622f7a946da6b067a3f5ab23ba8f2b143 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Sat, 1 Nov 2025 01:00:06 +0000
Subject: [PATCH 13/15] testcase update

---
 tests/system/small/test_dataframe_io.py | 40 +++++++++++--------------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index c519636b0c..699fd2d056 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -378,7 +378,7 @@ def test_to_pandas_batches_w_empty_dataframe(session):
 
 def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json(session):
     """Verifies to_pandas_batches() preserves dtypes for nested JSON."""
-    # This SQL query only tests the POPULATED case.
+
     sql = """
         SELECT
             0 AS id,
@@ -386,25 +386,21 @@ def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json(session):
             STRUCT(JSON '{"x":1}' AS json_field, 'test' AS str_field) AS json_struct
     """
     df = session.read_gbq(sql, index_col="id")
-    batches = list(df.to_pandas_batches())
-
-    assert sum(len(b) for b in batches) == 1
 
-    # Check dtypes based on pandas version
-    if bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable:
-        assert isinstance(batches[0].dtypes["json_array"], pd.ArrowDtype)
-        assert isinstance(batches[0].dtypes["json_array"].pyarrow_dtype, pa.ListType)
-    else:
-        # In pandas 1.x, list types become object dtype
-        assert batches[0].dtypes["json_array"] == "object"
+    batches = list(df.to_pandas_batches())
 
-    # Struct types work in both pandas versions
-    assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
-    assert isinstance(batches[0].dtypes["json_struct"].pyarrow_dtype, pa.StructType)
+    # Focuses only on the "preserves dtypes" behavior.
+    # This implicitly checks that at least one batch was produced.
+    pd.testing.assert_series_equal(
+        batches[0].dtypes,
+        df.dtypes,
+        check_dtype=bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
+    )
 
 
 def test_to_pandas_batches_should_not_error_on_empty_nested_json(session):
     """Verify to_pandas_batches() works with empty nested JSON types."""
+
     sql = """
         SELECT
             1 AS id,
@@ -413,17 +409,15 @@ def test_to_pandas_batches_should_not_error_on_empty_nested_json(session):
     """
     df = session.read_gbq(sql, index_col="id")
 
-    # The main point: this should not raise an error
+    # Verify that this line does not raise an error.
     batches = list(df.to_pandas_batches())
-    assert sum(len(b) for b in batches) == 1
-
-    # Check dtypes based on pandas version
-    if bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable:
-        assert isinstance(batches[0].dtypes["json_array"], pd.ArrowDtype)
-    else:
-        assert batches[0].dtypes["json_array"] == "object"
 
-    assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
+    # Verify the resulting dtypes are correct for the empty/null data
+    pd.testing.assert_series_equal(
+        batches[0].dtypes,
+        df.dtypes,
+        check_dtype=bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
+    )
 
 
 @pytest.mark.parametrize("allow_large_results", (True, False))

From be1dea4ab8f414bdbe4b6ba8d6f0a8ba0f639db5 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Sat, 1 Nov 2025 20:01:01 +0000
Subject: [PATCH 14/15] refactor testcase

---
 tests/system/small/test_dataframe_io.py | 78 +++++++++++++++++++------
 1 file changed, 60 insertions(+), 18 deletions(-)

diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 699fd2d056..4d4a144d0a 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -376,9 +376,31 @@ def test_to_pandas_batches_w_empty_dataframe(session):
     pandas.testing.assert_series_equal(results[0].dtypes, empty.dtypes)
 
 
-def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json(session):
-    """Verifies to_pandas_batches() preserves dtypes for nested JSON."""
+@pytest.mark.skipif(
+    bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
+    reason="Test for pandas 1.x behavior only",
+)
+def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json_pandas1(session):
+    """Verifies to_pandas_batches() preserves dtypes for nested JSON in pandas 1.x."""
+    sql = """
+        SELECT
+            0 AS id,
+            [JSON '{"a":1}', JSON '{"b":2}'] AS json_array,
+            STRUCT(JSON '{"x":1}' AS json_field, 'test' AS str_field) AS json_struct
+    """
+    df = session.read_gbq(sql, index_col="id")
+    batches = list(df.to_pandas_batches())
+
+    assert batches[0].dtypes["json_array"] == "object"
+    assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
 
+
+@pytest.mark.skipif(
+    not bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
+    reason="Test for pandas 2.x behavior only",
+)
+def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json_pandas2(session):
+    """Verifies to_pandas_batches() preserves dtypes for nested JSON in pandas 2.x."""
     sql = """
         SELECT
             0 AS id,
@@ -386,20 +408,42 @@ def test_to_pandas_batches_preserves_dtypes_for_populated_nested_json(session):
             STRUCT(JSON '{"x":1}' AS json_field, 'test' AS str_field) AS json_struct
     """
     df = session.read_gbq(sql, index_col="id")
+    batches = list(df.to_pandas_batches())
+
+    assert isinstance(batches[0].dtypes["json_array"], pd.ArrowDtype)
+    assert isinstance(batches[0].dtypes["json_array"].pyarrow_dtype, pa.ListType)
+    assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
+
+
+@pytest.mark.skipif(
+    bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
+    reason="Test for pandas 1.x behavior only",
+)
+def test_to_pandas_batches_should_not_error_on_empty_nested_json_pandas1(session):
+    """Verify to_pandas_batches() works with empty nested JSON types in pandas 1.x."""
+
+    sql = """
+        SELECT
+            1 AS id,
+            [] AS json_array,
+            STRUCT(NULL AS json_field, 'test2' AS str_field) AS json_struct
+    """
+    df = session.read_gbq(sql, index_col="id")
 
+    # The main point: this should not raise an error
     batches = list(df.to_pandas_batches())
+    assert sum(len(b) for b in batches) == 1
 
-    # Focuses only on the "preserves dtypes" behavior.
-    # This implicitly checks that at least one batch was produced.
-    pd.testing.assert_series_equal(
-        batches[0].dtypes,
-        df.dtypes,
-        check_dtype=bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
-    )
+    assert batches[0].dtypes["json_array"] == "object"
+    assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
 
 
-def test_to_pandas_batches_should_not_error_on_empty_nested_json(session):
-    """Verify to_pandas_batches() works with empty nested JSON types."""
+@pytest.mark.skipif(
+    not bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
+    reason="Test for pandas 2.x behavior only",
+)
+def test_to_pandas_batches_should_not_error_on_empty_nested_json_pandas2(session):
+    """Verify to_pandas_batches() works with empty nested JSON types in pandas 2.x."""
 
     sql = """
         SELECT
@@ -409,15 +453,13 @@ def test_to_pandas_batches_should_not_error_on_empty_nested_json(session):
     """
     df = session.read_gbq(sql, index_col="id")
 
-    # Verify that this line does not raise an error.
+    # The main point: this should not raise an error
     batches = list(df.to_pandas_batches())
+    assert sum(len(b) for b in batches) == 1
 
-    # Verify the resulting dtypes are correct for the empty/null data
-    pd.testing.assert_series_equal(
-        batches[0].dtypes,
-        df.dtypes,
-        check_dtype=bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable,
-    )
+    assert isinstance(batches[0].dtypes["json_array"], pd.ArrowDtype)
+    assert isinstance(batches[0].dtypes["json_struct"], pd.ArrowDtype)
+    assert isinstance(batches[0].dtypes["json_struct"].pyarrow_dtype, pa.StructType)
 
 
 @pytest.mark.parametrize("allow_large_results", (True, False))

From 5ed42933457e0368b24a3bfc2fd83588a4a7d123 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 3 Nov 2025 21:30:07 +0000
Subject: [PATCH 15/15] Add pyarrow id to comments

---
 bigframes/core/blocks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index a70ea63c4d..61aaab1120 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -715,7 +715,8 @@ def to_pandas_batches(
         try:
             empty_arrow_table = self.expr.schema.to_pyarrow().empty_table()
         except pa.ArrowNotImplementedError:
-            # Bug with some pyarrow versions, empty_table only supports base storage types, not extension types.
+            # Bug with some pyarrow versions(https://github.com/apache/arrow/issues/45262),
+            # empty_table only supports base storage types, not extension types.
             empty_arrow_table = self.expr.schema.to_pyarrow(
                 use_storage_types=True
             ).empty_table()