From 514a8a823d1ed803d5f0c96fdf2a9dd9acbd0aac Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 2 Nov 2017 09:49:04 -0400 Subject: [PATCH] ENH: Support contains/not contains in the pandas backend --- ibis/expr/types.py | 4 +-- ibis/pandas/execution/generic.py | 15 ++++++++ .../pandas/execution/tests/test_operations.py | 36 +++++++++++++++++++ 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/ibis/expr/types.py b/ibis/expr/types.py index cf3419eae7ad..175af118700e 100644 --- a/ibis/expr/types.py +++ b/ibis/expr/types.py @@ -1582,8 +1582,8 @@ class ValueList(ValueOp): """ def __init__(self, args): - self.values = [as_value_expr(x) for x in args] - ValueOp.__init__(self, self.values) + self.values = list(map(as_value_expr, args)) + super(ValueList, self).__init__(self.values) def root_tables(self): return distinct_roots(*self.values) diff --git a/ibis/pandas/execution/generic.py b/ibis/pandas/execution/generic.py index 97a89fc9131c..7dceb0525a9b 100644 --- a/ibis/pandas/execution/generic.py +++ b/ibis/pandas/execution/generic.py @@ -763,3 +763,18 @@ def execute_array_collect_group_by(op, data, **kwargs): @execute_node.register(ops.SelfReference, pd.DataFrame) def execute_node_self_reference_dataframe(op, data, **kwargs): return data + + +@execute_node.register(ir.ValueList) +def execute_node_value_list(op, **kwargs): + return [execute(arg, **kwargs) for arg in op.values] + + +@execute_node.register(ops.Contains, pd.Series, list) +def execute_node_contains_series_list(op, data, elements, **kwargs): + return data.isin(elements) + + +@execute_node.register(ops.NotContains, pd.Series, list) +def execute_node_not_contains_series_list(op, data, elements, **kwargs): + return ~data.isin(elements) diff --git a/ibis/pandas/execution/tests/test_operations.py b/ibis/pandas/execution/tests/test_operations.py index a756f3ca734e..3e109de64e9f 100644 --- a/ibis/pandas/execution/tests/test_operations.py +++ b/ibis/pandas/execution/tests/test_operations.py @@ -634,3 +634,39 @@ def test_scalar_parameter(t, df, raw_value): result = expr.execute(params={value: raw_value}) expected = df.float64_with_zeros == raw_value tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + 'elements', + [ + [1], + (1,), + pytest.mark.xfail({1}, raises=TypeError, reason='Not yet implemented'), + pytest.mark.xfail( + frozenset({1}), raises=TypeError, reason='Not yet implemented' + ), + ] +) +def test_isin(t, df, elements): + expr = t.plain_float64.isin(elements) + expected = df.plain_float64.isin(elements) + result = expr.execute() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + 'elements', + [ + [1], + (1,), + pytest.mark.xfail({1}, raises=TypeError, reason='Not yet implemented'), + pytest.mark.xfail( + frozenset({1}), raises=TypeError, reason='Not yet implemented' + ), + ] +) +def test_notin(t, df, elements): + expr = t.plain_float64.notin(elements) + expected = ~df.plain_float64.isin(elements) + result = expr.execute() + tm.assert_series_equal(result, expected)