From c54f6c0ddf5e376db30c69b9d8e3dbfe89655ffd Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 11 Nov 2025 21:50:56 +0000 Subject: [PATCH 1/2] feat: pivot_table supports fill_value arg --- bigframes/core/reshape/pivot.py | 5 ++-- bigframes/dataframe.py | 8 ++--- tests/system/small/test_dataframe.py | 29 ++++++++++++++----- .../bigframes_vendored/pandas/core/frame.py | 4 +++ 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/bigframes/core/reshape/pivot.py b/bigframes/core/reshape/pivot.py index 8b83cb0fc7..c69c7f11ab 100644 --- a/bigframes/core/reshape/pivot.py +++ b/bigframes/core/reshape/pivot.py @@ -71,12 +71,11 @@ def crosstab( columns=tmp_col_names, aggfunc=aggfunc or "count", sort=False, + fill_value=0 if (aggfunc is None) else None, ) + # Undo temporary unique level labels pivot_table.index.names = rownames or [i.name for i in index] pivot_table.columns.names = colnames or [c.name for c in columns] - if aggfunc is None: - # TODO: Push this into pivot_table itself - pivot_table = pivot_table.fillna(0) return pivot_table diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index da6da7a925..01501a1056 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -3486,10 +3486,6 @@ def pivot_table( observed: bool = False, sort: bool = True, ) -> DataFrame: - if fill_value is not None: - raise NotImplementedError( - "DataFrame.pivot_table fill_value arg not supported. {constants.FEEDBACK_LINK}" - ) if margins: raise NotImplementedError( "DataFrame.pivot_table margins arg not supported. {constants.FEEDBACK_LINK}" @@ -3549,6 +3545,8 @@ def pivot_table( index=index, values=values if len(values) > 1 else None, ) + if fill_value: + pivoted = pivoted.fillna(fill_value) if sort: pivoted = pivoted.sort_index() @@ -3556,7 +3554,7 @@ def pivot_table( # The pivot_table method results in multi-index columns that are always ordered. # However, the order of the pivoted result columns is not guaranteed to be sorted. # Sort and reorder. - return pivoted[pivoted.columns.sort_values()] + return pivoted.sort_index(axis=1) # type: ignore def stack(self, level: LevelsType = -1): if not isinstance(self.columns, pandas.MultiIndex): diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 475f98407b..49dad19398 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -3784,12 +3784,18 @@ def test_df_pivot_hockey(hockey_df, hockey_pandas_df, values, index, columns): @pytest.mark.parametrize( - ("values", "index", "columns", "aggfunc"), + ("values", "index", "columns", "aggfunc", "fill_value"), [ - (("culmen_length_mm", "body_mass_g"), "species", "sex", "std"), - (["body_mass_g", "culmen_length_mm"], ("species", "island"), "sex", "sum"), - ("body_mass_g", "sex", ["island", "species"], "mean"), - ("culmen_depth_mm", "island", "species", "max"), + (("culmen_length_mm", "body_mass_g"), "species", "sex", "std", 1.0), + ( + ["body_mass_g", "culmen_length_mm"], + ("species", "island"), + "sex", + "sum", + None, + ), + ("body_mass_g", "sex", ["island", "species"], "mean", None), + ("culmen_depth_mm", "island", "species", "max", -1), ], ) def test_df_pivot_table( @@ -3799,12 +3805,21 @@ def test_df_pivot_table( index, columns, aggfunc, + fill_value, ): bf_result = penguins_df_default_index.pivot_table( - values=values, index=index, columns=columns, aggfunc=aggfunc + values=values, + index=index, + columns=columns, + aggfunc=aggfunc, + fill_value=fill_value, ).to_pandas() pd_result = penguins_pandas_df_default_index.pivot_table( - values=values, index=index, columns=columns, aggfunc=aggfunc + values=values, + index=index, + columns=columns, + aggfunc=aggfunc, + fill_value=fill_value, ) pd.testing.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_column_type=False diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 3381f53351..dc1bcca213 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -6414,6 +6414,10 @@ def pivot_table(self, values=None, index=None, columns=None, aggfunc="mean"): aggfunc (str, default "mean"): Aggregation function name to compute summary statistics (e.g., 'sum', 'mean'). + fill_value (scalar, default None): + Value to replace missing values with (in the resulting pivot table, after + aggregation). + Returns: bigframes.pandas.DataFrame: An Excel style pivot table. """ From 1c12d7c922127241db8b8c807a0e1a21dff9a028 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Wed, 12 Nov 2025 01:25:57 +0000 Subject: [PATCH 2/2] properly handle fill_value where bool(val)==False --- bigframes/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 01501a1056..1e60fe6a8d 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -3545,7 +3545,7 @@ def pivot_table( index=index, values=values if len(values) > 1 else None, ) - if fill_value: + if fill_value is not None: pivoted = pivoted.fillna(fill_value) if sort: pivoted = pivoted.sort_index()