Skip to content

Commit

Permalink
Added byte_length operation
Browse files Browse the repository at this point in the history
  • Loading branch information
xmnlab committed Apr 19, 2018
1 parent 650c57b commit 716494e
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 8 deletions.
37 changes: 37 additions & 0 deletions ibis/mapd/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,43 @@ The date part operators allowed are: YEAR or Y, QUARTER or Q, MONTH or M,
DAY or D, HOUR or h, MINUTE or m, SECOND or s, WEEK, MILLENNIUM, CENTURY,
DECADE, QUARTERDAY

String operations
-----------------

- `byte_length` is not part of `ibis` `string` operations, it will work just
for `mapd` backend.

`Not` operation can be done using `~` operator:

.. code-block:: Python
>>> ~t['dest_name'].like('L%')
`regexp` and `regexp_like` operations can be done using `re_search` operation:

.. code-block:: Python
>>> t['dest_name'].re_search('L%')
Aggregate operations
====================

count column
t['taxiin'].count()

distinct count column
t['taxiin'].distinct().count()


distinct count/nunique
t['taxiin'].nunique().name('v')


approx distinct count
t['taxiin'].approx_nunique(10)


Best practices
--------------

Expand Down
37 changes: 29 additions & 8 deletions ibis/mapd/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,17 @@ def formatter(translator, expr):
return formatter


def unary_prefix_op(prefix_op):
def formatter(translator, expr):
op = expr.op()
arg = _parenthesize(translator, op.args[0])

return '{0!s} {1!s}'.format(prefix_op.upper(), arg)

formatter.__name__ = prefix_op
return formatter


def binary_infix_op(infix_sym):
def formatter(translator, expr):
op = expr.op()
Expand Down Expand Up @@ -248,12 +259,15 @@ def compile_cov(translator, expr):
)


def compile_char_length(translator, expr):
# pull out the arguments to the expression
arg = expr.op().args[0]
# compile the argument
compiled_arg = translator.translate(arg)
return 'CHAR_LENGTH({})'.format(compiled_arg)
def compile_length(func_name='length', sql_func_name='CHAR_LENGTH'):
def _compile_lenght(translator, expr):
# pull out the arguments to the expression
arg = expr.op().args[0]
# compile the argument
compiled_arg = translator.translate(arg)
return '{}({})'.format(sql_func_name, compiled_arg)
_compile_lenght.__name__ = func_name
return _compile_lenght


def _xor(translator, expr):
Expand Down Expand Up @@ -839,8 +853,14 @@ class DateTruncate(ops.DateTruncate):
Conv_4326_900913_Y: unary('conv_4326_900913_y')
}


class ByteLength(ops.StringLength):
"""Returns the length of a string in bytes length"""


_string_ops = {
ops.StringLength: unary('char_length'),
ops.StringLength: compile_length(),
ByteLength: compile_length('byte_length', 'LENGTH'),
ops.RegexSearch: binary_infix_op('REGEXP'),
ops.StringSQLLike: binary_infix_op('like'),
ops.StringSQLILike: binary_infix_op('ilike'),
Expand Down Expand Up @@ -931,13 +951,14 @@ def output_type(self):
sql_signature='{}({})'
)

count_distinct = _reduction('count', sql_signature='{}(DISTINCT {})')
count_distinct = _reduction('count')
count = _reduction('count')

_agg_ops = {
ops.Count: count,
ops.CountDistinct: count_distinct,
ApproxCountDistinct: approx_count_distinct,
ops.DistinctColumn: unary_prefix_op('distinct'),
ops.Mean: _reduction('avg'),
ops.Max: _reduction('max'),
ops.Min: _reduction('min'),
Expand Down

0 comments on commit 716494e

Please sign in to comment.