Skip to content

Commit

Permalink
TST/ENH: More thorough ALL tests for date and timestamp arithmetics
Browse files Browse the repository at this point in the history
Implementations: clickhouse, impala  Support for mysql blob type.
`Schema.apply_to`uses numpy to apply timedelta dtypes.

Author: Krisztián Szűcs <szucs.krisztian@gmail.com>

Closes #1352 from kszucs/date_arithmetics and squashes the following commits:

621f131 [Krisztián Szűcs] bigquery raises for timestamp - timestamp and date - date
a43e6c6 [Krisztián Szűcs] resolve review issues
bf1d8ec [Krisztián Szűcs] more thorough ALL tests for timestamp and date arithmetics
1b92114 [Krisztián Szűcs] clickhouse DateAdd, DateSubtract; support for mysql blob type
  • Loading branch information
kszucs authored and cpcloud committed Feb 13, 2018
1 parent 4d202c3 commit 0c63d4b
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 20 deletions.
6 changes: 6 additions & 0 deletions ibis/bigquery/compiler.py
Expand Up @@ -265,6 +265,12 @@ def _timestamp_op(func, units):
def _formatter(translator, expr):
op = expr.op()
arg, offset = op.args
if not isinstance(offset, ir.IntervalValue):
raise com.UnsupportedOperationError(
'Binary operations between two timestamps or dates '
'are not yet supported in bigquery backend'
)

if offset.unit not in units:
raise com.UnsupportedOperationError(
'BigQuery does not allow binary operation '
Expand Down
2 changes: 2 additions & 0 deletions ibis/clickhouse/operations.py
Expand Up @@ -629,6 +629,8 @@ def _string_like(translator, expr):
ops.TableColumn: _table_column,
ops.TableArrayView: _table_array_view,

ops.DateAdd: binary_infix_op('+'),
ops.DateSubtract: binary_infix_op('-'),
ops.TimestampAdd: binary_infix_op('+'),
ops.TimestampSubtract: binary_infix_op('-'),
ops.TimestampFromUNIX: _timestamp_from_unix,
Expand Down
53 changes: 43 additions & 10 deletions ibis/impala/compiler.py
Expand Up @@ -615,18 +615,49 @@ def _table_array_view(translator, expr):
def _timestamp_op(func):
def _formatter(translator, expr):
op = expr.op()
arg, offset = op.args
formatted_arg = translator.translate(arg)
formatted_offset = translator.translate(offset)
left, right = op.args
formatted_left = translator.translate(left)
formatted_right = translator.translate(right)

if isinstance(left, (ir.TimestampScalar, ir.DateValue)):
formatted_left = 'cast({} as timestamp)'.format(formatted_left)

if isinstance(arg, ir.TimestampScalar):
formatted_arg = 'cast({} as timestamp)'.format(formatted_arg)
if isinstance(right, (ir.TimestampScalar, ir.DateValue)):
formatted_right = 'cast({} as timestamp)'.format(formatted_right)

return '{}({}, {})'.format(func, formatted_arg, formatted_offset)
return '{}({}, {})'.format(func, formatted_left, formatted_right)

return _formatter


_date_add = _timestamp_op('date_add')
_timestamp_add = _timestamp_op('date_add')


def _date_subtract(translator, expr):
op = expr.op()
left, right = op.args

if isinstance(right, ir.DateValue):
func = _timestamp_op('datediff')
else:
func = _timestamp_op('date_sub')

return func(translator, expr)


def _timestamp_subtract(translator, expr):
op = expr.op()
left, right = op.args

if isinstance(right, ir.TimestampValue):
return 'unix_timestamp({}) - unix_timestamp({})'.format(
translator.translate(left), translator.translate(right))
else:
func = _timestamp_op('date_sub')
return func(translator, expr)


# ---------------------------------------------------------------------
# Semi/anti-join supports

Expand Down Expand Up @@ -710,15 +741,15 @@ def _truncate(translator, expr):
op = expr.op()
arg, unit = op.args

arg = translator.translate(op.args[0])
arg_formatted = translator.translate(arg)
try:
unit = _impala_unit_names[unit]
except KeyError:
raise com.UnsupportedOperationError(
'{!r} unit is not supported in timestamp truncate'.format(unit)
)

return "trunc({}, '{}')".format(arg, unit)
return "trunc({}, '{}')".format(arg_formatted, unit)


def _timestamp_from_unix(translator, expr):
Expand Down Expand Up @@ -1083,8 +1114,10 @@ def _string_like(translator, expr):

ops.TableArrayView: _table_array_view,

ops.TimestampAdd: _timestamp_op('date_add'),
ops.TimestampSubtract: _timestamp_op('date_sub'),
ops.DateAdd: _date_add,
ops.DateSubtract: _date_subtract,
ops.TimestampAdd: _timestamp_add,
ops.TimestampSubtract: _timestamp_subtract,
ops.TimestampFromUNIX: _timestamp_from_unix,

transforms.ExistsSubquery: _exists_subquery,
Expand Down
18 changes: 14 additions & 4 deletions ibis/pandas/client.py
Expand Up @@ -30,6 +30,7 @@
dt.Date: 'datetime64[ns]',
dt.Time: 'datetime64[ns]',
dt.Timestamp: 'datetime64[ns]',
dt.Interval: 'timedelta64[ns]',
dt.Int8: 'int8',
dt.Int16: 'int16',
dt.Int32: 'int32',
Expand Down Expand Up @@ -154,12 +155,16 @@ def infer_pandas_schema(df, schema=None):

def ibis_dtype_to_pandas(ibis_dtype):
"""Convert ibis dtype to the pandas / numpy alternative"""
assert isinstance(ibis_dtype, dt.DataType)

if isinstance(ibis_dtype, dt.Timestamp) and ibis_dtype.timezone:
return DatetimeTZDtype('ns', ibis_dtype.timezone)
elif (isinstance(ibis_dtype, dt.DataType) and
type(ibis_dtype) not in _ibis_dtypes):
elif isinstance(ibis_dtype, dt.Interval):
return 'timedelta64[{}]'.format(ibis_dtype.unit)
elif type(ibis_dtype) in _ibis_dtypes:
return _ibis_dtypes[type(ibis_dtype)]
else:
return 'object'
return _ibis_dtypes[type(ibis_dtype)]


def ibis_schema_to_pandas(schema):
Expand All @@ -170,7 +175,12 @@ def ibis_schema_apply_to(schema, df):
"""Applies the Ibis schema on a pandas dataframe"""

for column, dtype in schema.items():
df[column] = df[column].astype(dtype.to_pandas(), errors='ignore')
pandas_dtype = dtype.to_pandas()
if isinstance(dtype, dt.Interval):
df[column] = df[column].values.astype(pandas_dtype)
else:
df[column] = df[column].astype(pandas_dtype, errors='ignore')

if PY2 and dtype == dt.string:
df[column] = df[column].str.decode('utf-8', errors='ignore')

Expand Down
5 changes: 5 additions & 0 deletions ibis/sql/mysql/client.py
Expand Up @@ -27,6 +27,11 @@ def mysql_tinyint(satype, nullable=True):
return dt.Int8(nullable=nullable)


@dt.dtype.register(mysql.BLOB)
def mysql_blob(satype, nullable=True):
return dt.Binary(nullable=nullable)


class MySQLTable(alch.AlchemyTable):
pass

Expand Down
30 changes: 24 additions & 6 deletions ibis/tests/all/test_temporal.py
Expand Up @@ -125,22 +125,40 @@ def test_integer_to_interval_date_failure(backend, con, alltypes, df, unit):
date_col + interval


date_value = pd.Timestamp('2017-12-31')
timestamp_value = pd.Timestamp('2018-01-01 18:18:18')


@pytest.mark.parametrize(('expr_fn', 'expected_fn'), [
param(lambda t: t.timestamp_col + ibis.interval(days=4),
lambda t: t.timestamp_col + pd.Timedelta(days=4),
id='timestamp-add-days'),
param(lambda t: t.timestamp_col - ibis.interval(days=4),
lambda t: t.timestamp_col - pd.Timedelta(days=4),
id='timestamp-sub-days'),
id='timestamp-add-interval'),
param(lambda t: t.timestamp_col - ibis.interval(days=17),
lambda t: t.timestamp_col - pd.Timedelta(days=17),
id='timestamp-subtract-interval'),
param(lambda t: t.timestamp_col.date() + ibis.interval(days=4),
lambda t: t.timestamp_col.dt.floor('d') + pd.Timedelta(days=4),
id='date-add-interval'),
param(lambda t: t.timestamp_col.date() - ibis.interval(days=14),
lambda t: t.timestamp_col.dt.floor('d') - pd.Timedelta(days=14),
id='date-subtract-interval'),
param(lambda t: t.timestamp_col - ibis.timestamp(timestamp_value),
lambda t: pd.Series((t.timestamp_col - timestamp_value)
.values.astype('timedelta64[s]')),
id='timestamp-subtract-timestamp'),
param(lambda t: t.timestamp_col.date() - ibis.date(date_value),
lambda t: t.timestamp_col.dt.floor('d') - date_value,
id='date-subtract-date'),
])
@tu.skipif_unsupported
def test_timestamp_binop(backend, con, alltypes, df,
expr_fn, expected_fn):
def test_temporal_binop(backend, con, alltypes, df,
expr_fn, expected_fn):
expr = expr_fn(alltypes)
expected = expected_fn(df)

result = con.execute(expr)
expected = backend.default_series_rename(expected)

backend.assert_series_equal(result, expected)


Expand Down

0 comments on commit 0c63d4b

Please sign in to comment.