From e3e8d6bcbcb6b4999113ba5bae5280fea4807582 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 11 Nov 2025 20:57:17 +0000 Subject: [PATCH 1/2] feat: Support builtins funcs for df.agg --- bigframes/core/groupby/dataframe_group_by.py | 10 ++++----- bigframes/operations/aggregations.py | 10 +++++++-- tests/system/small/test_dataframe.py | 22 ++++++++++++++++++++ 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/bigframes/core/groupby/dataframe_group_by.py b/bigframes/core/groupby/dataframe_group_by.py index 3948d08a23..149971249f 100644 --- a/bigframes/core/groupby/dataframe_group_by.py +++ b/bigframes/core/groupby/dataframe_group_by.py @@ -593,6 +593,7 @@ def _agg_func(self, func) -> df.DataFrame: def _agg_dict(self, func: typing.Mapping) -> df.DataFrame: aggregations: typing.List[agg_expressions.Aggregation] = [] column_labels = [] + function_labels = [] want_aggfunc_level = any(utils.is_list_like(aggs) for aggs in func.values()) @@ -602,8 +603,10 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame: funcs_for_id if utils.is_list_like(funcs_for_id) else [funcs_for_id] ) for f in func_list: - aggregations.append(aggs.agg(col_id, agg_ops.lookup_agg_func(f)[0])) + f_op, f_label = agg_ops.lookup_agg_func(f) + aggregations.append(aggs.agg(col_id, f_op)) column_labels.append(label) + function_labels.append(f_label) agg_block, _ = self._block.aggregate( by_column_ids=self._by_col_ids, aggregations=aggregations, @@ -613,10 +616,7 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame: agg_block = agg_block.with_column_labels( utils.combine_indices( pd.Index(column_labels), - pd.Index( - typing.cast(agg_ops.AggregateOp, agg.op).name - for agg in aggregations - ), + pd.Index(function_labels), ) ) else: diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py index f6e8600d42..1160ab2c8e 100644 --- a/bigframes/operations/aggregations.py +++ b/bigframes/operations/aggregations.py @@ -717,9 +717,15 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT np.all: all_op, np.any: any_op, np.unique: nunique_op, - # TODO(b/443252872): Solve - # list: ArrayAggOp(), np.size: size_op, + # TODO(b/443252872): Solve + list: ArrayAggOp(), + len: size_op, + sum: sum_op, + min: min_op, + max: max_op, + any: any_op, + all: all_op, } diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 475f98407b..5750f03f9c 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -6151,6 +6151,28 @@ def test_agg_with_dict_strs(scalars_dfs): ) +def test_df_agg_with_builtins(scalars_dfs): + bf_df, pd_df = scalars_dfs + + bf_result = ( + bf_df[["int64_col", "bool_col"]] + .dropna() + .groupby(bf_df.int64_too % 2) + .agg({"int64_col": [len, sum, min, max, list], "bool_col": [all, any, max]}) + .to_pandas() + ) + pd_result = ( + pd_df[["int64_col", "bool_col"]] + .dropna() + .groupby(pd_df.int64_too % 2) + .agg({"int64_col": [len, sum, min, max, list], "bool_col": [all, any, max]}) + ) + + pd.testing.assert_frame_equal( + bf_result, pd_result, check_dtype=False, check_index_type=False + ) + + def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs): bf_df, _ = scalars_dfs agg_funcs = { From 51ec179f53e8d2ed0c8451c55d3115f69820e092 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Wed, 12 Nov 2025 01:29:15 +0000 Subject: [PATCH 2/2] fix test_dataframe_groupby_agg_dict_with_list --- tests/system/small/test_groupby.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py index 4f187dcccc..2e09ffd1a6 100644 --- a/tests/system/small/test_groupby.py +++ b/tests/system/small/test_groupby.py @@ -282,8 +282,6 @@ def test_dataframe_groupby_agg_dict_with_list( ) bf_result_computed = bf_result.to_pandas() - # some inconsistency between versions, so normalize to bigframes behavior - pd_result = pd_result.rename({"amax": "max"}, axis="columns") pd.testing.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False, check_index_type=False )