Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add missing date operations and struct field operation for the pandas backend #1790

Merged
merged 2 commits into from May 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion ibis/expr/operations.py
Expand Up @@ -2608,7 +2608,8 @@ class Date(UnaryOp):

class TimestampFromUNIX(ValueOp):
arg = Arg(rlz.any)
unit = Arg(rlz.isin({'s', 'ms', 'us'}))
# Only pandas-based backends support 'ns'
unit = Arg(rlz.isin({'s', 'ms', 'us', 'ns'}))
output_type = rlz.shape_like('arg', dt.timestamp)


Expand Down
1 change: 1 addition & 0 deletions ibis/pandas/execution/__init__.py
Expand Up @@ -5,5 +5,6 @@
from ibis.pandas.execution.maps import * # noqa: F401,F403
from ibis.pandas.execution.selection import * # noqa: F401,F403
from ibis.pandas.execution.strings import * # noqa: F401,F403
from ibis.pandas.execution.structs import * # noqa: F401,F403
from ibis.pandas.execution.temporal import * # noqa: F401,F403
from ibis.pandas.execution.window import * # noqa: F401,F403
2 changes: 1 addition & 1 deletion ibis/pandas/execution/generic.py
@@ -1,4 +1,4 @@
from __future__ import absolute_import
"""Execution rules for generic ibis operations."""

import collections
import datetime
Expand Down
31 changes: 31 additions & 0 deletions ibis/pandas/execution/structs.py
@@ -0,0 +1,31 @@
"""Pandas backend execution of struct fields and literals."""

import collections
import operator

import pandas as pd
from pandas.core.groupby import SeriesGroupBy

import ibis.expr.operations as ops
from ibis.pandas.dispatch import execute_node


@execute_node.register(ops.StructField, collections.abc.Mapping)
def execute_node_struct_field_dict(op, data, **kwargs):
return data[op.field]


@execute_node.register(ops.StructField, pd.Series)
def execute_node_struct_field_series(op, data, **kwargs):
field = op.field
return data.map(operator.itemgetter(field)).rename(field)


@execute_node.register(ops.StructField, SeriesGroupBy)
def execute_node_struct_field_series_group_by(op, data, **kwargs):
field = op.field
return (
data.obj.map(operator.itemgetter(field))
.rename(field)
.groupby(data.grouper.groupings)
)
22 changes: 22 additions & 0 deletions ibis/pandas/execution/temporal.py
Expand Up @@ -7,6 +7,7 @@
import ibis
import ibis.expr.operations as ops
from ibis.pandas.core import (
date_types,
integer_types,
numeric_types,
timedelta_types,
Expand Down Expand Up @@ -229,3 +230,24 @@ def execute_day_of_week_name_series(op, data, **kwargs):
@execute_node.register(ops.DayOfWeekName, SeriesGroupBy)
def execute_day_of_week_name_series_group_by(op, data, **kwargs):
return day_name(data.obj.dt).groupby(data.grouper.groupings)


@execute_node.register(ops.DateSub, date_types, timedelta_types)
@execute_node.register((ops.DateDiff, ops.DateSub), date_types, pd.Series)
@execute_node.register(ops.DateSub, pd.Series, timedelta_types)
@execute_node.register((ops.DateDiff, ops.DateSub), pd.Series, pd.Series)
@execute_node.register(ops.DateDiff, date_types, date_types)
@execute_node.register(ops.DateDiff, pd.Series, date_types)
def execute_date_sub_diff(op, left, right, **kwargs):
return left - right


@execute_node.register(ops.DateAdd, pd.Series, timedelta_types)
@execute_node.register(ops.DateAdd, timedelta_types, pd.Series)
@execute_node.register(ops.DateAdd, pd.Series, pd.Series)
@execute_node.register(ops.DateAdd, date_types, timedelta_types)
@execute_node.register(ops.DateAdd, timedelta_types, date_types)
@execute_node.register(ops.DateAdd, date_types, pd.Series)
@execute_node.register(ops.DateAdd, pd.Series, date_types)
def execute_date_add(op, left, right, **kwargs):
return left + right
83 changes: 83 additions & 0 deletions ibis/pandas/execution/tests/test_structs.py
@@ -0,0 +1,83 @@
from collections import OrderedDict

import pandas as pd
import pandas.util.testing as tm
import pytest

import ibis
import ibis.expr.datatypes as dt


@pytest.fixture(scope="module")
def value():
return OrderedDict([("fruit", "pear"), ("weight", 0)])


@pytest.fixture(scope="module")
def struct_client(value):
df = pd.DataFrame(
{
"s": [
OrderedDict([("fruit", "apple"), ("weight", None)]),
value,
OrderedDict([("fruit", "pear"), ("weight", 1)]),
],
"key": list("aab"),
"value": [1, 2, 3],
}
)
return ibis.pandas.connect({"t": df})


@pytest.fixture
def struct_table(struct_client):
return struct_client.table(
"t",
schema={
"s": dt.Struct.from_tuples(
[("fruit", dt.string), ("weight", dt.int8)]
)
},
)


def test_struct_field_literal(value):
struct = ibis.literal(value)
assert struct.type() == dt.Struct.from_tuples(
[("fruit", dt.string), ("weight", dt.int8)]
)

expr = struct.fruit
result = ibis.pandas.execute(expr)
assert result == "pear"

expr = struct.weight
result = ibis.pandas.execute(expr)
assert result == 0


def test_struct_field_series(struct_table):
t = struct_table
expr = t.s.fruit
result = expr.execute()
expected = pd.Series(["apple", "pear", "pear"], name="fruit")
tm.assert_series_equal(result, expected)


def test_struct_field_series_group_by_key(struct_table):
t = struct_table
expr = t.groupby(t.s.fruit).aggregate(total=t.value.sum())
result = expr.execute()
expected = pd.DataFrame(
[("apple", 1), ("pear", 5)], columns=["fruit", "total"]
)
tm.assert_frame_equal(result, expected)


def test_struct_field_series_group_by_value(struct_table):
t = struct_table
expr = t.groupby(t.key).aggregate(total=t.s.weight.sum())
result = expr.execute()
# these are floats because we have a NULL value in the input data
expected = pd.DataFrame([("a", 0.0), ("b", 1.0)], columns=["key", "total"])
tm.assert_frame_equal(result, expected)
12 changes: 8 additions & 4 deletions ibis/tests/all/test_temporal.py
Expand Up @@ -255,13 +255,17 @@ def test_strftime(backend, con, alltypes, df, ibis_pattern, pandas_pattern):
backend.assert_series_equal(result, expected)


unit_factors = {'s': int(1e9), 'ms': int(1e6), 'us': int(1e3)}
unit_factors = {
's': int(1e9),
'ms': int(1e6),
'us': int(1e3),
'ns': 1,
}


@pytest.mark.parametrize(
'unit',
[
'D',
's',
'ms',
param(
Expand All @@ -270,11 +274,11 @@ def test_strftime(backend, con, alltypes, df, ibis_pattern, pandas_pattern):
(BigQuery, Csv, Impala, Pandas, Parquet)
),
),
param('ns', marks=pytest.mark.xfail),
param('ns', marks=pytest.mark.xpass_backends((Csv, Pandas, Parquet))),
],
)
@tu.skipif_unsupported
def test_to_timestamp(backend, con, alltypes, df, unit):
def test_to_timestamp(backend, con, unit):
if unit not in backend.supported_to_timestamp_units:
pytest.skip(
'Unit {!r} not supported by {} to_timestamp'.format(unit, backend)
Expand Down
3 changes: 3 additions & 0 deletions ibis/tests/backends.py
Expand Up @@ -121,6 +121,9 @@ def assert_frame_equal(self, left, right, *args, **kwargs):
class Pandas(Backend, RoundHalfToEven):
check_names = False
additional_skipped_operations = frozenset({ops.StringSQLLike})
supported_to_timestamp_units = Backend.supported_to_timestamp_units | {
'ns'
}
supports_divide_by_zero = True
returned_timestamp_unit = 'ns'

Expand Down