Skip to content

Commit

Permalink
BUG: Fix sort_by codegen and repeated compilation issue
Browse files Browse the repository at this point in the history
Closes #1179 Closes #1180

Author: Phillip Cloud <cpcloud@gmail.com>

Closes #1181 from cpcloud/fix-order-by-count and squashes the following commits:

f1e2ebe [Phillip Cloud] BUG: Fix sort_by codegen and repeated compilation issue
  • Loading branch information
cpcloud committed Oct 25, 2017
1 parent 0b65281 commit a0581b2
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 41 deletions.
4 changes: 4 additions & 0 deletions docs/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ New Features
Bug Fixes
~~~~~~~~~

* Remove global expression caching to ensure repeatable code generation
(:issue:`1179`, :issue:`1181`)
* Fix ``ORDER BY`` generation without a ``GROUP BY`` (:issue:`1180`,
:issue:`1181`)
* Ensure that the pandas backend can deal with unary operations in groupby
* (:issue:`1182`)
* Incorrect impala code generated for NOT with complex argument (:issue:`1176`)
Expand Down
75 changes: 45 additions & 30 deletions ibis/expr/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@

def sub_for(expr, substitutions):
mapping = {repr(k.op()): v for k, v in substitutions}
return _subs(expr, mapping)
substitutor = Substitutor()
return substitutor.substitute(expr, mapping)


def _expr_key(expr):
Expand All @@ -46,36 +47,50 @@ def _expr_key(expr):
return repr(op), name


@toolz.memoize(key=lambda args, kwargs: _expr_key(args[0]))
def _subs(expr, mapping):
"""Substitute expressions with other expressions
"""
node = expr.op()
key = repr(node)
if key in mapping:
return mapping[key]
if node.blocks():
return expr

new_args = list(node.args)
unchanged = True
for i, arg in enumerate(new_args):
if isinstance(arg, ir.Expr):
new_arg = _subs(arg, mapping)
unchanged = unchanged and new_arg is arg
new_args[i] = new_arg
if unchanged:
return expr
try:
new_node = type(node)(*new_args)
except IbisTypeError:
return expr
class Substitutor(object):

try:
name = expr.get_name()
except ExpressionError:
name = None
return expr._factory(new_node, name=name)
def __init__(self):
cache = toolz.memoize(key=lambda args, kwargs: _expr_key(args[0]))
self.substitute = cache(self._substitute)

def _substitute(self, expr, mapping):
"""Substitute expressions with other expressions.
Parameters
----------
expr : ibis.expr.types.Expr
mapping : Dict, OrderedDict
Returns
-------
new_expr : ibis.expr.types.Expr
"""
node = expr.op()
key = repr(node)
if key in mapping:
return mapping[key]
if node.blocks():
return expr

new_args = list(node.args)
unchanged = True
for i, arg in enumerate(new_args):
if isinstance(arg, ir.Expr):
new_arg = self.substitute(arg, mapping)
unchanged = unchanged and new_arg is arg
new_args[i] = new_arg
if unchanged:
return expr
try:
new_node = type(node)(*new_args)
except IbisTypeError:
return expr

try:
name = expr.get_name()
except ExpressionError:
name = None
return expr._factory(new_node, name=name)


class ScalarAggregate(object):
Expand Down
14 changes: 4 additions & 10 deletions ibis/expr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import warnings
import operator
import functools
Expand Down Expand Up @@ -2155,17 +2157,9 @@ def _table_info(self, buf=None):
types = ['Type', '----'] + [repr(x) for x in self.schema().types]
counts = ['Non-null #', '----------'] + [str(x) for x in metrics[1:]]
col_metrics = util.adjoin(2, names, types, counts)
result = 'Table rows: {}\n\n{}'.format(metrics[0], col_metrics)

result = ('Table rows: {0}\n\n'
'{1}'
.format(metrics[0], col_metrics))

if buf is None:
import sys
sys.stdout.write(result)
sys.stdout.write('\n')
else:
buf.write(result)
print(result, file=buf)


def _table_set_column(table, name, expr):
Expand Down
4 changes: 3 additions & 1 deletion ibis/expr/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1900,7 +1900,9 @@ def __init__(self, table, metrics, by=None, having=None,
self.having = [] if having is None else having

self.predicates = [] if predicates is None else predicates
sort_keys = [] if sort_keys is None else sort_keys

# order by only makes sense with group by in an aggregation
sort_keys = [] if not self.by or sort_keys is None else sort_keys
self.sort_keys = [to_sort_key(self.table, k)
for k in util.promote_list(sort_keys)]

Expand Down
33 changes: 33 additions & 0 deletions ibis/sql/sqlite/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,3 +781,36 @@ def test_scalar_parameter(db):
result = expr.execute(params={start: start_string, end: end_string})
expected = col.between(start_string, end_string).execute()
tm.assert_series_equal(result, expected)


def test_compile_twice(dbpath):
con1 = ibis.sqlite.connect(dbpath)
t1 = con1.table('batting')
sort_key1 = ibis.desc(t1.playerID)
sorted_table1 = t1.sort_by(sort_key1)
expr1 = sorted_table1.count()

con2 = ibis.sqlite.connect(dbpath)
t2 = con2.table('batting')
sort_key2 = ibis.desc(t2.playerID)
sorted_table2 = t2.sort_by(sort_key2)
expr2 = sorted_table2.count()

result1 = str(expr1.compile())
result2 = str(expr2.compile())

assert result1 == result2


def test_count_on_order_by(db):
t = db.batting
sort_key = ibis.desc(t.playerID)
sorted_table = t.sort_by(sort_key)
expr = sorted_table.count()
result = str(
expr.compile().compile(compile_kwargs={'literal_binds': True})
)
expected = """\
SELECT count('*') AS count
FROM "default".batting AS t0""" # noqa: W291
assert result == expected

0 comments on commit a0581b2

Please sign in to comment.