From f24d46459211f3e0783711331448de67c9071b4e Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Sat, 23 Mar 2019 04:43:07 -0400 Subject: [PATCH] [OmniSci/MapD] Add support for arbitrary aggregate This fixes #1680 by adding support for the `arbitrary` aggregate in MapD by mapping it to the `SAMPLE` function. Author: Saul Shanabrook Closes #1683 from saulshanabrook/omnisci-sample and squashes the following commits: f8d9ee1 [Saul Shanabrook] Add support for arbitrary aggregate in OmniSci/MapD (fixes #1680) --- ibis/bigquery/compiler.py | 2 +- ibis/clickhouse/operations.py | 4 +++- ibis/expr/api.py | 2 +- ibis/expr/operations.py | 2 +- ibis/mapd/operations.py | 17 +++++++++++++++-- ibis/mapd/tests/conftest.py | 5 +++++ ibis/mapd/tests/test_operations.py | 14 ++++++++++++++ ibis/pandas/execution/generic.py | 10 +++++++--- 8 files changed, 47 insertions(+), 9 deletions(-) diff --git a/ibis/bigquery/compiler.py b/ibis/bigquery/compiler.py index 80f2871668df..c4cc263d1ad2 100644 --- a/ibis/bigquery/compiler.py +++ b/ibis/bigquery/compiler.py @@ -277,7 +277,7 @@ def _arbitrary(translator, expr): if where is not None: arg = where.ifelse(arg, ibis.NA) - if how != 'first': + if how not in (None, 'first'): raise com.UnsupportedOperationError( '{!r} value not supported for arbitrary in BigQuery'.format(how) ) diff --git a/ibis/clickhouse/operations.py b/ibis/clickhouse/operations.py index 30cac222b6db..5f71747b60ab 100644 --- a/ibis/clickhouse/operations.py +++ b/ibis/clickhouse/operations.py @@ -129,7 +129,9 @@ def varargs_formatter(translator, expr): def _arbitrary(translator, expr): arg, how, where = expr.op().args - functions = {'first': 'any', + functions = { + None: 'any', + 'first': 'any', 'last': 'anyLast', 'heavy': 'anyHeavy'} return _aggregate(translator, functions[how], arg, where=where) diff --git a/ibis/expr/api.py b/ibis/expr/api.py index b6c7844055fb..1f48cb90a672 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -481,7 +481,7 @@ def group_concat(arg, sep=',', where=None): return ops.GroupConcat(arg, sep, where).to_expr() -def arbitrary(arg, where=None, how='first'): +def arbitrary(arg, where=None, how=None): """ Selects the first / last non-null value in a column diff --git a/ibis/expr/operations.py b/ibis/expr/operations.py index 8559d0509255..f343fcfb6c78 100644 --- a/ibis/expr/operations.py +++ b/ibis/expr/operations.py @@ -835,7 +835,7 @@ def output_type(self): class Arbitrary(Reduction): arg = Arg(rlz.column(rlz.any)) - how = Arg(rlz.isin({'first', 'last', 'heavy'}), default='first') + how = Arg(rlz.isin({'first', 'last', 'heavy'}), default=None) where = Arg(rlz.boolean, default=None) output_type = rlz.scalar_like('arg') diff --git a/ibis/mapd/operations.py b/ibis/mapd/operations.py index 1e321b392ac6..41a98145238f 100644 --- a/ibis/mapd/operations.py +++ b/ibis/mapd/operations.py @@ -573,6 +573,20 @@ def _table_column(translator, expr): count = _reduction('count') +def _arbitrary(translator, expr): + arg, how, where = expr.op().args + + if how not in (None, 'last'): + raise com.UnsupportedOperationError( + '{!r} value not supported for arbitrary in MapD'.format(how) + ) + + if where is not None: + arg = where.ifelse(arg, ibis.NA) + + return 'SAMPLE({})'.format(translator.translate(arg)) + + # MATH class NumericTruncate(ops.NumericBinaryOp): @@ -707,6 +721,7 @@ class ByteLength(ops.StringLength): _agg_ops = { ops.HLLCardinality: approx_count_distinct, ops.DistinctColumn: unary_prefix_op('distinct'), + ops.Arbitrary: _arbitrary } # GENERAL @@ -734,7 +749,6 @@ class ByteLength(ops.StringLength): ops.CumulativeAny, ops.CumulativeAll, ops.IdenticalTo, - ops.Arbitrary, ops.RowNumber, ops.DenseRank, ops.MinRank, @@ -746,7 +760,6 @@ class ByteLength(ops.StringLength): ops.Lead, ops.NTile, ops.GroupConcat, - ops.Arbitrary, ops.NullIf, ops.NullIfZero, ops.NullLiteral, diff --git a/ibis/mapd/tests/conftest.py b/ibis/mapd/tests/conftest.py index ae07a74cea4b..d8f7bb8ffc09 100644 --- a/ibis/mapd/tests/conftest.py +++ b/ibis/mapd/tests/conftest.py @@ -37,6 +37,11 @@ def batting(con): return con.table('batting') +@pytest.fixture(scope='module') +def df_alltypes(alltypes): + return alltypes.execute() + + @pytest.fixture def translate(): """ diff --git a/ibis/mapd/tests/test_operations.py b/ibis/mapd/tests/test_operations.py index e806928add08..b5e01ee43dc0 100644 --- a/ibis/mapd/tests/test_operations.py +++ b/ibis/mapd/tests/test_operations.py @@ -134,3 +134,17 @@ def test_literal_geospatial(): "SELECT 'MULTIPOLYGON(((0 0, 0 0), (0 0, 0 0)), " "((0 0, 0 0), (0 0, 0 0)))' AS tmp" ) + + +@pytest.mark.parametrize(('result_fn', 'expected_fn'), [ + param( + lambda t: t.double_col.arbitrary(), + lambda t: t.double_col.iloc[-1], + id='double_col_arbitrary_none' + ), +]) +def test_arbitrary_none(alltypes, df_alltypes, result_fn, expected_fn): + expr = result_fn(alltypes) + result = expr.execute() + expected = expected_fn(df_alltypes) + np.testing.assert_allclose(result, expected) diff --git a/ibis/pandas/execution/generic.py b/ibis/pandas/execution/generic.py index b77bfdfd1ae1..54fffc5e2ac8 100644 --- a/ibis/pandas/execution/generic.py +++ b/ibis/pandas/execution/generic.py @@ -461,10 +461,14 @@ def execute_count_distinct_series_groupby( @execute_node.register(ops.Arbitrary, SeriesGroupBy, type(None)) def execute_arbitrary_series_groupby(op, data, _, aggcontext=None, **kwargs): - if op.how not in {'first', 'last'}: + how = op.how + if how is None: + how = 'first' + + if how not in {'first', 'last'}: raise com.OperationNotDefinedError( - 'Arbitrary {!r} is not supported'.format(op.how)) - return aggcontext.agg(data, op.how) + 'Arbitrary {!r} is not supported'.format(how)) + return aggcontext.agg(data, how) def _filtered_reduction(mask, method, data):