Skip to content

Commit 994a22d

Browse files
authored
fix: cast JSON and nested struct columns to string for anywidget rendering (#17189)
This Pull Request resolves visualization crashes when rendering DataFrames or Series containing raw JSON and nested JSON struct structures. It ensures that these columns are safely pre-serialized into clean, flat JSON string format on the database level prior to visual layout rendering. Verified at: screen/424ojbuqyBPinTb Fixes #<514763826> 🦕
1 parent 9700dfc commit 994a22d

6 files changed

Lines changed: 420 additions & 357 deletions

File tree

packages/bigframes/bigframes/dataframe.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -819,9 +819,25 @@ def __repr__(self) -> str:
819819
column_count=len(self.columns),
820820
)
821821

822-
def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]:
823-
"""Process ObjectRef columns for display. (Deprecated)"""
824-
return self, []
822+
def _get_display_df(self) -> DataFrame:
823+
"""Process ObjectRef and JSON/nested JSON columns for display."""
824+
df = self
825+
# Arrow/Pandas to_pandas_batches does not support raw JSON/nested JSON
826+
# columns. Pre-serialize them to string format to bypass this limit.
827+
# Using TO_JSON_STRING via SqlScalarOp handles complex nested STRUCT
828+
# types correctly.
829+
json_cols = [
830+
col
831+
for col in df.columns
832+
if bigframes.dtypes.contains_db_dtypes_json_dtype(df[col].dtype)
833+
]
834+
if json_cols:
835+
op = ops.SqlScalarOp(
836+
_output_type=bigframes.dtypes.STRING_DTYPE,
837+
sql_template="TO_JSON_STRING({0})",
838+
)
839+
df = df.assign(**{col: df[col]._apply_unary_op(op) for col in json_cols})
840+
return df
825841

826842
def _repr_mimebundle_(self, include=None, exclude=None):
827843
"""

packages/bigframes/bigframes/display/html.py

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import bigframes.formatting_helpers as formatter
3131
from bigframes._config import display_options, options
3232
from bigframes.display import plaintext
33+
from bigframes.series import Series
3334

3435
if typing.TYPE_CHECKING:
3536
import bigframes.dataframe
@@ -191,8 +192,6 @@ def create_html_representation(
191192
total_columns: int,
192193
) -> str:
193194
"""Create an HTML representation of the DataFrame or Series."""
194-
from bigframes.series import Series
195-
196195
opts = options.display
197196
with display_options.pandas_repr(opts):
198197
if isinstance(obj, Series):
@@ -217,8 +216,6 @@ def create_html_representation(
217216
def _get_obj_metadata(
218217
obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
219218
) -> tuple[bool, bool]:
220-
from bigframes.series import Series
221-
222219
is_series = isinstance(obj, Series)
223220
if is_series:
224221
has_index = len(obj._block.index_columns) > 0
@@ -237,12 +234,8 @@ def get_anywidget_bundle(
237234
This function encapsulates the logic for anywidget display.
238235
"""
239236
from bigframes import display
240-
from bigframes.series import Series
241237

242-
if isinstance(obj, Series):
243-
df = obj.to_frame()
244-
else:
245-
df, _ = obj._get_display_df_and_blob_cols()
238+
df = obj._get_display_df()
246239

247240
widget = display.TableWidget(df)
248241
widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude)
@@ -290,18 +283,11 @@ def repr_mimebundle_deferred(
290283
def repr_mimebundle_head(
291284
obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
292285
) -> dict[str, str]:
293-
from bigframes.series import Series
294-
295286
opts = options.display
296-
if isinstance(obj, Series):
297-
pandas_df, row_count, query_job = obj._block.retrieve_repr_request_results(
298-
opts.max_rows
299-
)
300-
else:
301-
df, _ = obj._get_display_df_and_blob_cols()
302-
pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
303-
opts.max_rows
304-
)
287+
df = obj._get_display_df()
288+
pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
289+
opts.max_rows
290+
)
305291

306292
obj._set_internal_query_job(query_job)
307293
column_count = len(pandas_df.columns)

packages/bigframes/bigframes/series.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,9 @@ def reset_index(
572572
block = block.assign_label(self._value_column, name)
573573
return bigframes.dataframe.DataFrame(block)
574574

575+
def _get_display_df(self) -> bigframes.dataframe.DataFrame:
576+
return self.to_frame()._get_display_df()
577+
575578
def _repr_mimebundle_(self, include=None, exclude=None):
576579
"""
577580
Custom display method for IPython/Jupyter environments.

0 commit comments

Comments
 (0)