BUG: Fix sort_by codegen and repeated compilation issue

Closes #1179 Closes #1180 Author: Phillip Cloud <cpcloud@gmail.com> Closes #1181 from cpcloud/fix-order-by-count and squashes the following commits: f1e2ebe [Phillip Cloud] BUG: Fix sort_by codegen and repeated compilation issue
ibis-project · Oct 25, 2017 · a0581b2 · a0581b2
1 parent 0b65281
commit a0581b2
Show file tree

Hide file tree

Showing 5 changed files with 89 additions and 41 deletions.
diff --git a/docs/source/release.rst b/docs/source/release.rst
@@ -52,6 +52,10 @@ New Features
 Bug Fixes
 ~~~~~~~~~
 
+* Remove global expression caching to ensure repeatable code generation
+  (:issue:`1179`, :issue:`1181`)
+* Fix ``ORDER BY`` generation without a ``GROUP BY`` (:issue:`1180`,
+  :issue:`1181`)
 * Ensure that the pandas backend can deal with unary operations in groupby
 * (:issue:`1182`)
 * Incorrect impala code generated for NOT with complex argument (:issue:`1176`)

diff --git a/ibis/expr/analysis.py b/ibis/expr/analysis.py
@@ -29,7 +29,8 @@
 
 def sub_for(expr, substitutions):
     mapping = {repr(k.op()): v for k, v in substitutions}
-    return _subs(expr, mapping)
+    substitutor = Substitutor()
+    return substitutor.substitute(expr, mapping)
 
 
 def _expr_key(expr):
@@ -46,36 +47,50 @@ def _expr_key(expr):
         return repr(op), name
 
 
-@toolz.memoize(key=lambda args, kwargs: _expr_key(args[0]))
-def _subs(expr, mapping):
-    """Substitute expressions with other expressions
-    """
-    node = expr.op()
-    key = repr(node)
-    if key in mapping:
-        return mapping[key]
-    if node.blocks():
-        return expr
-
-    new_args = list(node.args)
-    unchanged = True
-    for i, arg in enumerate(new_args):
-        if isinstance(arg, ir.Expr):
-            new_arg = _subs(arg, mapping)
-            unchanged = unchanged and new_arg is arg
-            new_args[i] = new_arg
-    if unchanged:
-        return expr
-    try:
-        new_node = type(node)(*new_args)
-    except IbisTypeError:
-        return expr
+class Substitutor(object):
 
-    try:
-        name = expr.get_name()
-    except ExpressionError:
-        name = None
-    return expr._factory(new_node, name=name)
+    def __init__(self):
+        cache = toolz.memoize(key=lambda args, kwargs: _expr_key(args[0]))
+        self.substitute = cache(self._substitute)
+
+    def _substitute(self, expr, mapping):
+        """Substitute expressions with other expressions.
+
+        Parameters
+        ----------
+        expr : ibis.expr.types.Expr
+        mapping : Dict, OrderedDict
+
+        Returns
+        -------
+        new_expr : ibis.expr.types.Expr
+        """
+        node = expr.op()
+        key = repr(node)
+        if key in mapping:
+            return mapping[key]
+        if node.blocks():
+            return expr
+
+        new_args = list(node.args)
+        unchanged = True
+        for i, arg in enumerate(new_args):
+            if isinstance(arg, ir.Expr):
+                new_arg = self.substitute(arg, mapping)
+                unchanged = unchanged and new_arg is arg
+                new_args[i] = new_arg
+        if unchanged:
+            return expr
+        try:
+            new_node = type(node)(*new_args)
+        except IbisTypeError:
+            return expr
+
+        try:
+            name = expr.get_name()
+        except ExpressionError:
+            name = None
+        return expr._factory(new_node, name=name)
 
 
 class ScalarAggregate(object):

diff --git a/ibis/expr/api.py b/ibis/expr/api.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import print_function
+
 import warnings
 import operator
 import functools
@@ -2155,17 +2157,9 @@ def _table_info(self, buf=None):
     types = ['Type', '----'] + [repr(x) for x in self.schema().types]
     counts = ['Non-null #', '----------'] + [str(x) for x in metrics[1:]]
     col_metrics = util.adjoin(2, names, types, counts)
+    result = 'Table rows: {}\n\n{}'.format(metrics[0], col_metrics)
 
-    result = ('Table rows: {0}\n\n'
-              '{1}'
-              .format(metrics[0], col_metrics))
-
-    if buf is None:
-        import sys
-        sys.stdout.write(result)
-        sys.stdout.write('\n')
-    else:
-        buf.write(result)
+    print(result, file=buf)
 
 
 def _table_set_column(table, name, expr):

diff --git a/ibis/expr/operations.py b/ibis/expr/operations.py
@@ -1900,7 +1900,9 @@ def __init__(self, table, metrics, by=None, having=None,
         self.having = [] if having is None else having
 
         self.predicates = [] if predicates is None else predicates
-        sort_keys = [] if sort_keys is None else sort_keys
+
+        # order by only makes sense with group by in an aggregation
+        sort_keys = [] if not self.by or sort_keys is None else sort_keys
         self.sort_keys = [to_sort_key(self.table, k)
                           for k in util.promote_list(sort_keys)]
 

diff --git a/ibis/sql/sqlite/tests/test_functions.py b/ibis/sql/sqlite/tests/test_functions.py
@@ -781,3 +781,36 @@ def test_scalar_parameter(db):
     result = expr.execute(params={start: start_string, end: end_string})
     expected = col.between(start_string, end_string).execute()
     tm.assert_series_equal(result, expected)
+
+
+def test_compile_twice(dbpath):
+    con1 = ibis.sqlite.connect(dbpath)
+    t1 = con1.table('batting')
+    sort_key1 = ibis.desc(t1.playerID)
+    sorted_table1 = t1.sort_by(sort_key1)
+    expr1 = sorted_table1.count()
+
+    con2 = ibis.sqlite.connect(dbpath)
+    t2 = con2.table('batting')
+    sort_key2 = ibis.desc(t2.playerID)
+    sorted_table2 = t2.sort_by(sort_key2)
+    expr2 = sorted_table2.count()
+
+    result1 = str(expr1.compile())
+    result2 = str(expr2.compile())
+
+    assert result1 == result2
+
+
+def test_count_on_order_by(db):
+    t = db.batting
+    sort_key = ibis.desc(t.playerID)
+    sorted_table = t.sort_by(sort_key)
+    expr = sorted_table.count()
+    result = str(
+        expr.compile().compile(compile_kwargs={'literal_binds': True})
+    )
+    expected = """\
+SELECT count('*') AS count 
+FROM "default".batting AS t0"""  # noqa: W291
+    assert result == expected