Skip to content

Commit

Permalink
[OmniSci/MapD] Add support for arbitrary aggregate
Browse files Browse the repository at this point in the history
This fixes #1680 by adding support for the `arbitrary` aggregate in
MapD by mapping it to the `SAMPLE` function.
Author: Saul Shanabrook <s.shanabrook@gmail.com>

Closes #1683 from saulshanabrook/omnisci-sample and squashes the following commits:

f8d9ee1 [Saul Shanabrook] Add support for arbitrary aggregate in OmniSci/MapD (fixes #1680)
  • Loading branch information
saulshanabrook authored and cpcloud committed Mar 23, 2019
1 parent 7df7e0d commit f24d464
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 9 deletions.
2 changes: 1 addition & 1 deletion ibis/bigquery/compiler.py
Expand Up @@ -277,7 +277,7 @@ def _arbitrary(translator, expr):
if where is not None:
arg = where.ifelse(arg, ibis.NA)

if how != 'first':
if how not in (None, 'first'):
raise com.UnsupportedOperationError(
'{!r} value not supported for arbitrary in BigQuery'.format(how)
)
Expand Down
4 changes: 3 additions & 1 deletion ibis/clickhouse/operations.py
Expand Up @@ -129,7 +129,9 @@ def varargs_formatter(translator, expr):

def _arbitrary(translator, expr):
arg, how, where = expr.op().args
functions = {'first': 'any',
functions = {
None: 'any',
'first': 'any',
'last': 'anyLast',
'heavy': 'anyHeavy'}
return _aggregate(translator, functions[how], arg, where=where)
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/api.py
Expand Up @@ -481,7 +481,7 @@ def group_concat(arg, sep=',', where=None):
return ops.GroupConcat(arg, sep, where).to_expr()


def arbitrary(arg, where=None, how='first'):
def arbitrary(arg, where=None, how=None):
"""
Selects the first / last non-null value in a column
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/operations.py
Expand Up @@ -835,7 +835,7 @@ def output_type(self):

class Arbitrary(Reduction):
arg = Arg(rlz.column(rlz.any))
how = Arg(rlz.isin({'first', 'last', 'heavy'}), default='first')
how = Arg(rlz.isin({'first', 'last', 'heavy'}), default=None)
where = Arg(rlz.boolean, default=None)
output_type = rlz.scalar_like('arg')

Expand Down
17 changes: 15 additions & 2 deletions ibis/mapd/operations.py
Expand Up @@ -573,6 +573,20 @@ def _table_column(translator, expr):
count = _reduction('count')


def _arbitrary(translator, expr):
arg, how, where = expr.op().args

if how not in (None, 'last'):
raise com.UnsupportedOperationError(
'{!r} value not supported for arbitrary in MapD'.format(how)
)

if where is not None:
arg = where.ifelse(arg, ibis.NA)

return 'SAMPLE({})'.format(translator.translate(arg))


# MATH

class NumericTruncate(ops.NumericBinaryOp):
Expand Down Expand Up @@ -707,6 +721,7 @@ class ByteLength(ops.StringLength):
_agg_ops = {
ops.HLLCardinality: approx_count_distinct,
ops.DistinctColumn: unary_prefix_op('distinct'),
ops.Arbitrary: _arbitrary
}

# GENERAL
Expand Down Expand Up @@ -734,7 +749,6 @@ class ByteLength(ops.StringLength):
ops.CumulativeAny,
ops.CumulativeAll,
ops.IdenticalTo,
ops.Arbitrary,
ops.RowNumber,
ops.DenseRank,
ops.MinRank,
Expand All @@ -746,7 +760,6 @@ class ByteLength(ops.StringLength):
ops.Lead,
ops.NTile,
ops.GroupConcat,
ops.Arbitrary,
ops.NullIf,
ops.NullIfZero,
ops.NullLiteral,
Expand Down
5 changes: 5 additions & 0 deletions ibis/mapd/tests/conftest.py
Expand Up @@ -37,6 +37,11 @@ def batting(con):
return con.table('batting')


@pytest.fixture(scope='module')
def df_alltypes(alltypes):
return alltypes.execute()


@pytest.fixture
def translate():
"""
Expand Down
14 changes: 14 additions & 0 deletions ibis/mapd/tests/test_operations.py
Expand Up @@ -134,3 +134,17 @@ def test_literal_geospatial():
"SELECT 'MULTIPOLYGON(((0 0, 0 0), (0 0, 0 0)), "
"((0 0, 0 0), (0 0, 0 0)))' AS tmp"
)


@pytest.mark.parametrize(('result_fn', 'expected_fn'), [
param(
lambda t: t.double_col.arbitrary(),
lambda t: t.double_col.iloc[-1],
id='double_col_arbitrary_none'
),
])
def test_arbitrary_none(alltypes, df_alltypes, result_fn, expected_fn):
expr = result_fn(alltypes)
result = expr.execute()
expected = expected_fn(df_alltypes)
np.testing.assert_allclose(result, expected)
10 changes: 7 additions & 3 deletions ibis/pandas/execution/generic.py
Expand Up @@ -461,10 +461,14 @@ def execute_count_distinct_series_groupby(

@execute_node.register(ops.Arbitrary, SeriesGroupBy, type(None))
def execute_arbitrary_series_groupby(op, data, _, aggcontext=None, **kwargs):
if op.how not in {'first', 'last'}:
how = op.how
if how is None:
how = 'first'

if how not in {'first', 'last'}:
raise com.OperationNotDefinedError(
'Arbitrary {!r} is not supported'.format(op.how))
return aggcontext.agg(data, op.how)
'Arbitrary {!r} is not supported'.format(how))
return aggcontext.agg(data, how)


def _filtered_reduction(mask, method, data):
Expand Down

0 comments on commit f24d464

Please sign in to comment.