Skip to content

Commit

Permalink
Added dateadd/timestampad mapd operations.
Browse files Browse the repository at this point in the history
  • Loading branch information
xmnlab committed Apr 19, 2018
1 parent 0387ef7 commit 650c57b
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 37 deletions.
82 changes: 82 additions & 0 deletions ibis/mapd/README.rst
Expand Up @@ -226,6 +226,88 @@ a set of reserved words from `MapD` language.
`quote_identifiers` is used to put quotes around the string sent if the string
match to specific criteria.

Timestamp/Date operations
-------------------------

**Interval:**

MapD Interval statement allow just the follow date/time attribute: YEAR, DAY,
MONTH, HOUR, MINUTE, SECOND

To use the interval statement, it is necessary use a `integer literal/constant`
and use the `to_interval` method:

.. code-block:: Python
>>> t['arr_timestamp'] + ibis.literal(1).to_interval('Y')
.. code-block:: Sql
SELECT "arr_timestamp" + INTERVAL '1' YEAR AS tmp
FROM mapd.flights_2008_10k
**Extract date/time**

To extract a date part information from a timestamp, `extract` would be used:

.. code-block:: Python
>>> t['arr_timestamp'].extract('YEAR')
The `extract` method is just available on `ibis.mapd` backend.

The operators allowed are: YEAR, QUARTER, MONTH, DAY, HOUR, MINUTE, SECOND,
DOW, ISODOW, DOY, EPOCH, QUARTERDAY, WEEK

**Direct functions to extract date/time**

There is some direct functions to extract date/time, the following shows how
to use that:

.. code-block:: Python
>>> t['arr_timestamp'].year()
>>> t['arr_timestamp'].month()
>>> t['arr_timestamp'].day()
>>> t['arr_timestamp'].hour()
>>> t['arr_timestamp'].minute()
>>> t['arr_timestamp'].second()
The result should be:

.. code-block:: Sql
SELECT EXTRACT(YEAR FROM "arr_timestamp") AS tmp
FROM mapd.flights_2008_10k
SELECT EXTRACT(MONTH FROM "arr_timestamp") AS tmp
FROM mapd.flights_2008_10k
SELECT EXTRACT(DAY FROM "arr_timestamp") AS tmp
FROM mapd.flights_2008_10k
SELECT EXTRACT(HOUR FROM "arr_timestamp") AS tmp
FROM mapd.flights_2008_10k
SELECT EXTRACT(MINUTE FROM "arr_timestamp") AS tmp
FROM mapd.flights_2008_10k
SELECT EXTRACT(SECOND FROM "arr_timestamp") AS tmp
FROM mapd.flights_2008_10k
**Timestap/Date Truncate**

A truncate timestamp/data value function is available as `truncate`:

.. code-block:: Python
>>> t['arr_timestamp'].truncate(date_part)
The date part operators allowed are: YEAR or Y, QUARTER or Q, MONTH or M,
DAY or D, HOUR or h, MINUTE or m, SECOND or s, WEEK, MILLENNIUM, CENTURY,
DECADE, QUARTERDAY

Best practices
--------------

Expand Down
10 changes: 3 additions & 7 deletions ibis/mapd/compiler.py
@@ -1,14 +1,11 @@
from six import StringIO
from .operations import (
_operation_registry, _name_expr
)
from . import operations as mapd_ops

import ibis.common as com
import ibis.util as util
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis.sql.compiler as compiles
import ibis.expr.types as ir


def build_ast(expr, context):
Expand Down Expand Up @@ -169,11 +166,11 @@ class MapDExprTranslator(compiles.ExprTranslator):
"""
"""
_registry = _operation_registry
_registry = mapd_ops._operation_registry
context_class = MapDQueryContext

def name(self, translated, name, force=True):
return _name_expr(translated, name)
return mapd_ops._name_expr(translated, name)


class MapDDialect(compiles.Dialect):
Expand All @@ -186,4 +183,3 @@ class MapDDialect(compiles.Dialect):
dialect = MapDDialect
compiles = MapDExprTranslator.compiles
rewrites = MapDExprTranslator.rewrites

108 changes: 78 additions & 30 deletions ibis/mapd/operations.py
Expand Up @@ -77,33 +77,36 @@ def unary(func_name):
return fixed_arity(func_name, 1)


def _reduction_format(translator, func_name, arg, args, where, distinct=False):
def _reduction_format(
translator,
func_name,
sql_func_name=None,
sql_signature='{}({})',
arg=None, args=None, where=None
):
if not sql_func_name:
sql_func_name = func_name

if where is not None:
arg = where.ifelse(arg, ibis.NA)

distinct_ = '' if not distinct else 'DISTINCT '
return '{}({}{})'.format(
func_name,
distinct_,
return sql_signature.format(
sql_func_name,
', '.join(map(translator.translate, [arg] + list(args)))
)


def _reduction(func_name, distinct=False):
def _reduction(func_name, sql_func_name=None, sql_signature='{}({})'):
def formatter(translator, expr):
op = expr.op()

# HACK: support trailing arguments
where = op.where
args = [arg for arg in op.args if arg is not where]
distinct_ = distinct

if hasattr(op, 'approx') and func_name == 'count' and distinct:
func_name == 'APPROX_COUNT_DISTINCT'
distinct_ = False

return _reduction_format(
translator, func_name, args[0], args[1:], where, distinct_
translator, func_name, sql_func_name, sql_signature,
args[0], args[1:], where
)

formatter.__name__ = func_name
Expand All @@ -122,7 +125,9 @@ def formatter(translator, expr):
func_type = '' if not _is_floating(arg) else '_FLOAT'

return _reduction_format(
translator, variants[how].upper() + func_type, arg, [], where
translator, variants[how].upper() + func_type,
None, '{}({})',
arg, [], where
)

formatter.__name__ = func
Expand Down Expand Up @@ -382,7 +387,22 @@ def _interval_from_integer(translator, expr):
"MapD doesn't support subsecond interval resolutions")

arg_ = translator.translate(arg)
return 'INTERVAL {} {}'.format(arg_, dtype.resolution.upper())
return '{}, (sign){}'.format(dtype.resolution.upper(), arg_)


def _timestamp_op(func, op_sign='+'):
def _formatter(translator, expr):
op = expr.op()
left, right = op.args
formatted_left = translator.translate(left)
formatted_right = translator.translate(right)

return '{}({}, {})'.format(
func, formatted_right.replace('(sign)', op_sign),
formatted_left
)

return _formatter


def _set_literal_format(translator, expr):
Expand Down Expand Up @@ -637,7 +657,7 @@ def _zero_if_null(translator, expr):

# AGGREGATION

class ApproxCountDistinct(ops.CountDistinct):
class CountDistinct(ops.CountDistinct):
"""
Returns the approximate count of distinct values of x with defined
expected error rate e
Expand Down Expand Up @@ -874,20 +894,50 @@ class TimestampAdd(ops.TimestampUnaryOp):

TimestampExtract: timestamp_binary_infix_op('EXTRACT', 'FROM'),

ops.DateAdd: binary_infix_op('+'),
ops.DateSub: binary_infix_op('-'),
ops.DateDiff: binary_infix_op('-'),
ops.TimestampAdd: binary_infix_op('+'),
ops.TimestampSub: binary_infix_op('-'),
ops.TimestampDiff: binary_infix_op('-'),
ops.IntervalAdd: _interval_from_integer,
ops.IntervalFromInteger: _interval_from_integer,

ops.DateAdd: _timestamp_op('DATEADD'),
ops.DateSub: _timestamp_op('DATEADD', '-'),
ops.DateDiff: _timestamp_op('DATEDIFF'),
ops.TimestampAdd: _timestamp_op('TIMESTAMPADD'),
ops.TimestampSub: _timestamp_op('TIMESTAMPADD', '-'),
ops.TimestampDiff: _timestamp_op('TIMESTAMPDIFF'),
ops.TimestampFromUNIX: _timestamp_from_unix,
TimestampAdd: lambda field, value, unit: 'TIMESTAMPADD({}, {}, {}) '.format(v, u)
TimestampAdd: (
lambda field, value, unit:
'TIMESTAMPADD({}, {}, {}) '.format(value, unit)
)
}


class ApproxCountDistinct(ops.Reduction):
"""Approximate number of unique values
"""
arg = ops.Arg(rlz.column(rlz.any))
approx = ops.Arg(rlz.integer, default=1)
where = ops.Arg(rlz.boolean, default=None)

def output_type(self):
# Impala 2.0 and higher returns a DOUBLE
# return ir.DoubleScalar
return ops.partial(ir.IntegerScalar, dtype=ops.dt.int64)


approx_count_distinct = _reduction(
'approx_nunique',
sql_func_name='approx_count_distinct',
sql_signature='{}({})'
)

count_distinct = _reduction('count', sql_signature='{}(DISTINCT {})')
count = _reduction('count')

_agg_ops = {
ops.Count: _reduction('count'),
ops.CountDistinct: _reduction('count', distinct=True),
ApproxCountDistinct: _reduction('count', distinct=True),
ops.Count: count,
ops.CountDistinct: count_distinct,
ApproxCountDistinct: approx_count_distinct,
ops.Mean: _reduction('avg'),
ops.Max: _reduction('max'),
ops.Min: _reduction('min'),
Expand Down Expand Up @@ -966,20 +1016,18 @@ def f(_klass):
assign_functions_to_dtype(ir.NumericValue, _trigonometric_ops, forced=True)
assign_functions_to_dtype(ir.NumericValue, _math_ops, forced=True)
assign_functions_to_dtype(ir.NumericValue, _geometric_ops, forced=True)
assign_functions_to_dtype(ir.NumericValue, _stats_ops, forced=True)
assign_functions_to_dtype(ir.NumericValue, _agg_ops, forced=True)

assign_functions_to_dtype(ir.NumericValue, _stats_ops, forced=False)
assign_functions_to_dtype(ir.ColumnExpr, _agg_ops, forced=True)
# string operations
assign_functions_to_dtype(ir.StringValue, _string_ops, forced=True)

# date/time/timestamp operations
assign_functions_to_dtype(ir.TimestampColumn, _date_ops, forced=True)
assign_functions_to_dtype(ir.DateColumn, _date_ops, forced=True)
# assign_functions_to_dtype(ir.DateColumn, _date_ops, forced=True)

_add_method(ir.TimestampColumn, TimestampTruncate, 'truncate')
_add_method(ir.DateColumn, DateTruncate, 'truncate')
_add_method(ir.TimestampColumn, TimestampExtract, 'extract')
# _add_method(ir.DateColumn, TimestampExtract, 'extract')


year = ibis.api._timedelta('MY_YEAR', 'Y')

0 comments on commit 650c57b

Please sign in to comment.