From 658e064db50e5e516631094d2b2aa74c28c3c506 Mon Sep 17 00:00:00 2001 From: jiangzhx Date: Mon, 11 Sep 2023 19:02:38 +0800 Subject: [PATCH 1/2] add bit_and,bit_or,bit_xor,bool_add,bool_or --- datafusion/tests/test_aggregation.py | 39 ++++++++++++++++++++++++++-- src/functions.rs | 10 +++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/datafusion/tests/test_aggregation.py b/datafusion/tests/test_aggregation.py index 2c8c064b1..1145dcaeb 100644 --- a/datafusion/tests/test_aggregation.py +++ b/datafusion/tests/test_aggregation.py @@ -33,8 +33,9 @@ def df(): pa.array([1, 2, 3]), pa.array([4, 4, 6]), pa.array([9, 8, 5]), + pa.array([True, True, False]), ], - names=["a", "b", "c"], + names=["a", "b", "c", "d"], ) return ctx.create_dataframe([[batch]]) @@ -73,7 +74,8 @@ def test_built_in_aggregation(df): ], ) result = agg_df.collect()[0] - values_a, values_b, values_c = df.collect()[0] + print(df.collect()[0]) + values_a, values_b, values_c, values_d = df.collect()[0] assert result.column(0) == pa.array([2], type=pa.uint64()) assert result.column(1) == pa.array([4]) @@ -125,3 +127,36 @@ def test_built_in_aggregation(df): np.testing.assert_array_almost_equal( result.column(21), np.var(values_c, ddof=1) ) + + +def test_bit_add_or_xor(df): + + df = df.aggregate( + [], + [ + f.bit_and(column("a")), + f.bit_or(column("b")), + f.bit_xor(column("c")), + ], + ) + + result = df.collect() + result = result[0] + assert result.column(0) == pa.array([0]) + assert result.column(1) == pa.array([6]) + assert result.column(2) == pa.array([4]) + + +def test_bool_and_or(df): + + df = df.aggregate( + [], + [ + f.bool_and(column("d")), + f.bool_or(column("d")), + ], + ) + result = df.collect() + result = result[0] + assert result.column(0) == pa.array([False]) + assert result.column(1) == pa.array([True]) diff --git a/src/functions.rs b/src/functions.rs index ef26240fe..eed28154e 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -362,6 +362,11 @@ aggregate_function!(stddev_samp, Stddev); aggregate_function!(var, Variance); aggregate_function!(var_pop, VariancePop); aggregate_function!(var_samp, Variance); +aggregate_function!(bit_and, BitAnd); +aggregate_function!(bit_or, BitOr); +aggregate_function!(bit_xor, BitXor); +aggregate_function!(bool_and, BoolAnd); +aggregate_function!(bool_or, BoolOr); pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(abs))?; @@ -489,6 +494,11 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(var_pop))?; m.add_wrapped(wrap_pyfunction!(var_samp))?; m.add_wrapped(wrap_pyfunction!(window))?; + m.add_wrapped(wrap_pyfunction!(bit_and))?; + m.add_wrapped(wrap_pyfunction!(bit_or))?; + m.add_wrapped(wrap_pyfunction!(bit_xor))?; + m.add_wrapped(wrap_pyfunction!(bool_and))?; + m.add_wrapped(wrap_pyfunction!(bool_or))?; //Binary String Functions m.add_wrapped(wrap_pyfunction!(encode))?; From d7ac9ed2b14a428be81e2e0710861f96543da71e Mon Sep 17 00:00:00 2001 From: zhenxing jiang Date: Tue, 10 Oct 2023 23:27:22 -0500 Subject: [PATCH 2/2] Update datafusion/tests/test_aggregation.py Co-authored-by: Liang-Chi Hsieh --- datafusion/tests/test_aggregation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/tests/test_aggregation.py b/datafusion/tests/test_aggregation.py index 1145dcaeb..0a6c90c32 100644 --- a/datafusion/tests/test_aggregation.py +++ b/datafusion/tests/test_aggregation.py @@ -74,7 +74,6 @@ def test_built_in_aggregation(df): ], ) result = agg_df.collect()[0] - print(df.collect()[0]) values_a, values_b, values_c, values_d = df.collect()[0] assert result.column(0) == pa.array([2], type=pa.uint64())