Skip to content

Commit

Permalink
More exhaustive bucket test cases, and move dimension creation to tra…
Browse files Browse the repository at this point in the history
…nslate_expr code path
  • Loading branch information
wesm committed May 27, 2015
1 parent cb90310 commit f0404e3
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 32 deletions.
32 changes: 0 additions & 32 deletions ibis/sql/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,38 +250,6 @@ def _visit_select_expr(self, expr):
else:
return expr

def _visit_select_Bucket(self, expr):
import operator

op = expr.op()

stmt = api.case()

if op.closed == 'left':
l_cmp = operator.le
r_cmp = operator.lt
else:
l_cmp = operator.lt
r_cmp = operator.le

bucket_id = 0
if op.include_under:
stmt = stmt.when(r_cmp(op.arg, op.buckets[0]), bucket_id)
bucket_id += 1

for lower, upper in zip(op.buckets, op.buckets[1:]):
stmt = stmt.when(l_cmp(lower, op.arg) & r_cmp(op.arg, upper),
bucket_id)
bucket_id += 1

if op.include_over:
stmt = stmt.when(l_cmp(op.buckets[-1], op.arg), bucket_id)
bucket_id += 1

case_expr = stmt.end()

return case_expr.name(expr.get_name())

def _analyze_filter_exprs(self):
# What's semantically contained in the filter predicates may need to be
# rewritten. Not sure if this is the right place to do this, but a
Expand Down
47 changes: 47 additions & 0 deletions ibis/sql/exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

from io import BytesIO

import ibis.expr.analytics as analytics
import ibis.expr.api as api
import ibis.expr.types as ir
import ibis.expr.operations as ops
import ibis.expr.temporal as tempo
Expand Down Expand Up @@ -282,6 +284,49 @@ def _searched_case(translator, expr):
return formatter.get_result()


def _bucket(translator, expr):
import operator

op = expr.op()

stmt = api.case()

if op.closed == 'left':
l_cmp = operator.le
r_cmp = operator.lt
else:
l_cmp = operator.lt
r_cmp = operator.le

bucket_id = 0
if op.include_under:
cmp = operator.lt if op.close_extreme else r_cmp
stmt = stmt.when(cmp(op.arg, op.buckets[0]), bucket_id)
bucket_id += 1

user_num_buckets = len(op.buckets) - 1

for j, (lower, upper) in enumerate(zip(op.buckets, op.buckets[1:])):
if (op.close_extreme
and ((op.closed == 'right' and j == 0) or
(op.closed == 'left' and j == (user_num_buckets - 1)))):
stmt = stmt.when((lower <= op.arg) & (op.arg <= upper),
bucket_id)
else:
stmt = stmt.when(l_cmp(lower, op.arg) & r_cmp(op.arg, upper),
bucket_id)
bucket_id += 1

if op.include_over:
cmp = operator.lt if op.close_extreme else l_cmp
stmt = stmt.when(cmp(op.buckets[-1], op.arg), bucket_id)
bucket_id += 1

case_expr = stmt.end().name(expr._name)
return _searched_case(translator, case_expr)



def _table_array_view(translator, expr):
ctx = translator.context
table = expr.op().table
Expand Down Expand Up @@ -602,6 +647,8 @@ def _not_implemented(translator, expr):
ops.Contains: _contains,
ops.NotContains: _not_contains,

analytics.Bucket: _bucket,

ops.SimpleCase: _simple_case,
ops.SearchedCase: _searched_case,

Expand Down
107 changes: 107 additions & 0 deletions ibis/sql/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,113 @@ def test_search_case(self):
END"""
assert result == expected

def test_bucket_to_case(self):
buckets = [0, 10, 25, 50]

expr1 = self.table.f.bucket(buckets)
expected1 = """\
CASE
WHEN (f >= 0) AND (f < 10) THEN 0
WHEN (f >= 10) AND (f < 25) THEN 1
WHEN (f >= 25) AND (f <= 50) THEN 2
ELSE NULL
END"""

expr2 = self.table.f.bucket(buckets, close_extreme=False)
expected2 = """\
CASE
WHEN (f >= 0) AND (f < 10) THEN 0
WHEN (f >= 10) AND (f < 25) THEN 1
WHEN (f >= 25) AND (f < 50) THEN 2
ELSE NULL
END"""

expr3 = self.table.f.bucket(buckets, closed='right')
expected3 = """\
CASE
WHEN (f >= 0) AND (f <= 10) THEN 0
WHEN (f > 10) AND (f <= 25) THEN 1
WHEN (f > 25) AND (f <= 50) THEN 2
ELSE NULL
END"""

expr4 = self.table.f.bucket(buckets, closed='right',
close_extreme=False)
expected4 = """\
CASE
WHEN (f > 0) AND (f <= 10) THEN 0
WHEN (f > 10) AND (f <= 25) THEN 1
WHEN (f > 25) AND (f <= 50) THEN 2
ELSE NULL
END"""


expr5 = self.table.f.bucket(buckets, include_under=True)
expected5 = """\
CASE
WHEN f < 0 THEN 0
WHEN (f >= 0) AND (f < 10) THEN 1
WHEN (f >= 10) AND (f < 25) THEN 2
WHEN (f >= 25) AND (f <= 50) THEN 3
ELSE NULL
END"""

expr6 = self.table.f.bucket(buckets,
include_under=True,
include_over=True)
expected6 = """\
CASE
WHEN f < 0 THEN 0
WHEN (f >= 0) AND (f < 10) THEN 1
WHEN (f >= 10) AND (f < 25) THEN 2
WHEN (f >= 25) AND (f <= 50) THEN 3
WHEN f > 50 THEN 4
ELSE NULL
END"""

expr7 = self.table.f.bucket(buckets,
close_extreme=False,
include_under=True,
include_over=True)
expected7 = """\
CASE
WHEN f < 0 THEN 0
WHEN (f >= 0) AND (f < 10) THEN 1
WHEN (f >= 10) AND (f < 25) THEN 2
WHEN (f >= 25) AND (f < 50) THEN 3
WHEN f >= 50 THEN 4
ELSE NULL
END"""

expr8 = self.table.f.bucket(buckets, closed='right',
close_extreme=False,
include_under=True)
expected8 = """\
CASE
WHEN f <= 0 THEN 0
WHEN (f > 0) AND (f <= 10) THEN 1
WHEN (f > 10) AND (f <= 25) THEN 2
WHEN (f > 25) AND (f <= 50) THEN 3
ELSE NULL
END"""


cases = [
(expr1, expected1),
(expr2, expected2),
(expr3, expected3),
(expr4, expected4),
(expr5, expected5),
(expr6, expected6),
(expr7, expected7),
(expr8, expected8)
]
cases = [(expr,
#self.table[[expr.name('bucket')]],
exp)
for expr, exp in cases]
self._check_expr_cases(cases)

def test_where_use_if(self):
expr = api.where(self.table.f > 0, self.table.e, self.table.a)
assert isinstance(expr, ir.FloatValue)
Expand Down
4 changes: 4 additions & 0 deletions ibis/tests/test_impala_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,10 @@ def test_builtins_1(self):
d.bucket([0, 10, 25, 50], include_over=True, close_extreme=False),
d.bucket([10, 25, 50, 100], include_under=True),

# d.histogram(10),
# d.histogram(5, base=10),
# d.histogram(base=10, binwidth=5),

# coalesce-like cases
api.coalesce(table.int_col,
api.null(),
Expand Down

0 comments on commit f0404e3

Please sign in to comment.