Merge remote-tracking branch 'ibis-project/master'

ibis-project · May 29, 2018 · 89f17e9 · 89f17e9
2 parents 49dc84c + cf8c5cf
commit 89f17e9
Show file tree

Hide file tree

Showing 8 changed files with 110 additions and 56 deletions.
diff --git a/ci/datamgr.py b/ci/datamgr.py
@@ -73,7 +73,10 @@ def insert_tables(engine, names, data_directory):
         with engine.begin() as connection:
             df.to_sql(
                 table, connection, index=False, if_exists='append',
-                chunksize=1
+                chunksize=1 if os.name == 'nt' else None
+                # Pandas 0.23 uses multi value inserts which is very slow for a
+                # chunksize of 1. For some reason this only shows up on
+                # Appveyor Windows CI
             )
 
 

diff --git a/ibis/bigquery/tests/test_compiler.py b/ibis/bigquery/tests/test_compiler.py
@@ -42,3 +42,16 @@ def test_ieee_divide(alltypes):
 SELECT IEEE_DIVIDE(`double_col`, 0) AS `tmp`
 FROM `ibis-gbq.testing.functional_alltypes`"""
     assert result == expected
+
+
+def test_identical_to(alltypes):
+    t = alltypes
+    pred = t.string_col.identical_to('a') & t.date_string_col.identical_to('b')
+    expr = t[pred]
+    result = expr.compile()
+    expected = """\
+SELECT *
+FROM `ibis-gbq.testing.functional_alltypes`
+WHERE (((`string_col` IS NULL) AND ('a' IS NULL)) OR (`string_col` = 'a')) AND
+      (((`date_string_col` IS NULL) AND ('b' IS NULL)) OR (`date_string_col` = 'b'))"""  # noqa: E501
+    assert result == expected
diff --git a/ibis/clickhouse/tests/test_select.py b/ibis/clickhouse/tests/test_select.py
@@ -322,8 +322,8 @@ def test_where_simple_comparisons(con, db, alltypes):
     result = ibis.clickhouse.compile(expr)
     expected = """SELECT *
 FROM {0}.`functional_alltypes`
-WHERE `float_col` > 0 AND
-      `int_col` < (`float_col` * 2)"""
+WHERE (`float_col` > 0) AND
+      (`int_col` < (`float_col` * 2))"""
     assert result == expected.format(db.name)
     assert len(con.execute(expr))
 
@@ -335,8 +335,8 @@ def test_where_with_between(con, db, alltypes):
     result = ibis.clickhouse.compile(expr)
     expected = """SELECT *
 FROM {0}.`functional_alltypes`
-WHERE `int_col` > 0 AND
-      `float_col` BETWEEN 0 AND 1"""
+WHERE (`int_col` > 0) AND
+      (`float_col` BETWEEN 0 AND 1)"""
     assert result == expected.format(db.name)
     con.execute(expr)
 

diff --git a/ibis/expr/rules.py b/ibis/expr/rules.py
@@ -224,6 +224,17 @@ def column(inner, arg):
     return instance_of(ir.ColumnExpr, inner(arg))
 
 
+@validator
+def array_of(inner, arg):
+    val = arg if isinstance(arg, ir.Expr) else ir.literal(arg)
+    argtype = val.type()
+    if not isinstance(argtype, dt.Array):
+        raise com.IbisTypeError(
+            'Argument must be an array, got expression {} which is of type '
+            '{}'.format(val, val.type()))
+    return value(dt.Array(inner(val[0]).type()), val)
+
+
 any = value(dt.any)
 double = value(dt.double)
 string = value(dt.string)
@@ -241,8 +252,8 @@ def column(inner, arg):
 soft_numeric = one_of([integer, floating, decimal, boolean])
 numeric = soft_numeric
 
-set_ = value(dt.Set(dt.any))
-array = value(dt.Array(dt.any))
+set_ = value(dt.Set)
+array = value(dt.Array)
 struct = value(dt.Struct)
 mapping = value(dt.Map(dt.any, dt.any))
 

diff --git a/ibis/expr/tests/test_rules.py b/ibis/expr/tests/test_rules.py
@@ -253,3 +253,30 @@ def test_shape_like_with_no_arguments():
     with pytest.raises(ValueError) as e:
         rlz.shape_like([])
     assert str(e.value) == 'Must pass at least one expression'
+
+
+@pytest.mark.parametrize(
+    ('rule', 'input'),
+    [
+        (rlz.array_of(rlz.integer), [1, 2, 3]),
+        (rlz.array_of(rlz.integer), []),
+        (rlz.array_of(rlz.double), [1, 2]),
+        (rlz.array_of(rlz.string), ['a', 'b']),
+        (rlz.array_of(rlz.array_of(rlz.string)), [['a'], [], [], ['a', 'b']])
+    ]
+)
+def test_array_of(rule, input):
+    assert isinstance(rule(input).type(), dt.Array)
+
+
+@pytest.mark.parametrize(
+    ('rule', 'input'),
+    [
+        (rlz.array_of(rlz.array_of(rlz.string)), [1, 2]),
+        (rlz.array_of(rlz.string), [1, 2.0]),
+        (rlz.array_of(rlz.array_of(rlz.integer)), [2, 2.0]),
+    ]
+)
+def test_array_of_invalid_input(rule, input):
+    with pytest.raises(IbisTypeError):
+        rule(input)
diff --git a/ibis/impala/tests/test_sql.py b/ibis/impala/tests/test_sql.py
@@ -85,8 +85,8 @@ def test_nested_joins_single_cte():
     GROUP BY 1
   ) t3
     LEFT OUTER JOIN t0
-      ON t3.`uuid` = t0.`uuid` AND
-         t3.`max_count` = t0.`count`
+      ON (t3.`uuid` = t0.`uuid`) AND
+         (t3.`max_count` = t0.`count`)
 ) t1
   LEFT OUTER JOIN (
     SELECT `uuid`, max(`ts`) AS `last_visit`
@@ -145,8 +145,8 @@ def test_nested_join_multiple_ctes():
 t2 AS (
   SELECT t1.*
   FROM t1
-  WHERE t1.`userid` = 118205 AND
-        extract(t1.`datetime`, 'year') > 2001
+  WHERE (t1.`userid` = 118205) AND
+        (extract(t1.`datetime`, 'year') > 2001)
 )
 SELECT t2.*
 FROM t2
@@ -155,10 +155,10 @@ def test_nested_join_multiple_ctes():
   FROM (
     SELECT t1.*
     FROM t1
-    WHERE t1.`userid` = 118205 AND
-          extract(t1.`datetime`, 'year') > 2001 AND
-          t1.`userid` = 118205 AND
-          extract(t1.`datetime`, 'year') < 2009
+    WHERE (t1.`userid` = 118205) AND
+          (extract(t1.`datetime`, 'year') > 2001) AND
+          (t1.`userid` = 118205) AND
+          (extract(t1.`datetime`, 'year') < 2009)
   ) t4
 )"""
     compiled_result = to_sql(result)
@@ -199,7 +199,7 @@ def test_join_with_nested_or_condition():
 SELECT t0.*
 FROM t t0
   INNER JOIN t t1
-    ON t0.`a` = t1.`a` AND
+    ON (t0.`a` = t1.`a`) AND
        ((t0.`a` != t1.`b`) OR (t0.`b` != t1.`a`))"""
     assert to_sql(expr) == expected
 
@@ -216,7 +216,7 @@ def test_join_with_nested_xor_condition():
 SELECT t0.*
 FROM t t0
   INNER JOIN t t1
-    ON t0.`a` = t1.`a` AND
+    ON (t0.`a` = t1.`a`) AND
        (((t0.`a` != t1.`b`) OR (t0.`b` != t1.`a`)) AND NOT ((t0.`a` != t1.`b`) AND (t0.`b` != t1.`a`)))"""  # noqa: E501
     assert to_sql(expr) == expected
 
@@ -350,10 +350,10 @@ def test_multiple_filters2():
   FROM t0
   WHERE `a` < 100
 ) t0
-WHERE `a` = (
+WHERE (`a` = (
   SELECT max(`a`) AS `max`
   FROM t0
   WHERE `a` < 100
-) AND
-      `b` = 'a'"""
+)) AND
+      (`b` = 'a')"""
     assert result == expected
diff --git a/ibis/sql/compiler.py b/ibis/sql/compiler.py
@@ -1649,10 +1649,10 @@ def format_where(self):
         buf = StringIO()
         buf.write('WHERE ')
         fmt_preds = []
+        npreds = len(self.where)
         for pred in self.where:
             new_pred = self._translate(pred, permit_subquery=True)
-            if isinstance(pred.op(), (ops.Or, ops.Xor)):
-                # parens for OR exprs because it binds looser than AND
+            if npreds > 1:
                 new_pred = '({})'.format(new_pred)
             fmt_preds.append(new_pred)
 
@@ -1827,10 +1827,10 @@ def get_result(self):
             buf.write(util.indent('{} {}'.format(jtype, table), self.indent))
 
             fmt_preds = []
+            npreds = len(preds)
             for pred in preds:
                 new_pred = self._translate(pred)
-                if isinstance(pred.op(), (ops.Or, ops.Xor)):
-                    # parens for OR exprs because it binds looser than AND
+                if npreds > 1:
                     new_pred = '({})'.format(new_pred)
                 fmt_preds.append(new_pred)
 

diff --git a/ibis/sql/tests/test_compiler.py b/ibis/sql/tests/test_compiler.py
@@ -849,8 +849,8 @@ def test_simple_joins(self):
              """SELECT t0.*
 FROM star1 t0
   INNER JOIN star2 t1
-    ON t0.`foo_id` = t1.`foo_id` AND
-       t0.`bar_id` = t1.`foo_id`"""),
+    ON (t0.`foo_id` = t1.`foo_id`) AND
+       (t0.`bar_id` = t1.`foo_id`)"""),
         ]
 
         for expr, expected_sql in cases:
@@ -978,8 +978,8 @@ def test_where_simple_comparisons(self):
         result = to_sql(what)
         expected = """SELECT *
 FROM star1
-WHERE `f` > 0 AND
-      `c` < (`f` * 2)"""
+WHERE (`f` > 0) AND
+      (`c` < (`f` * 2))"""
         assert result == expected
 
     def test_where_in_array_literal(self):
@@ -994,8 +994,8 @@ def test_where_with_join(self):
 FROM star1 t0
   INNER JOIN star2 t1
     ON t0.`foo_id` = t1.`foo_id`
-WHERE t0.`f` > 0 AND
-      t1.`value3` < 1000"""
+WHERE (t0.`f` > 0) AND
+      (t1.`value3` < 1000)"""
 
         result_sql = to_sql(e1)
         assert result_sql == expected_sql
@@ -1036,8 +1036,8 @@ def test_where_with_between(self):
         result = to_sql(what)
         expected = """SELECT *
 FROM alltypes
-WHERE `a` > 0 AND
-      `f` BETWEEN 0 AND 1"""
+WHERE (`a` > 0) AND
+      (`f` BETWEEN 0 AND 1)"""
         assert result == expected
 
     def test_where_analyze_scalar_op(self):
@@ -1055,8 +1055,8 @@ def test_where_analyze_scalar_op(self):
         expected = """\
 SELECT count(*) AS `count`
 FROM functional_alltypes
-WHERE `timestamp_col` < date_add(cast({} as timestamp), INTERVAL 3 MONTH) AND
-      `timestamp_col` < date_add(cast(now() as timestamp), INTERVAL 10 DAY)"""
+WHERE (`timestamp_col` < date_add(cast({} as timestamp), INTERVAL 3 MONTH)) AND
+      (`timestamp_col` < date_add(cast(now() as timestamp), INTERVAL 10 DAY))"""  # noqa: E501
         assert result == expected.format("'2010-01-01 00:00:00'")
 
     def test_bug_duplicated_where(self):
@@ -1340,8 +1340,8 @@ def agg(x):
         result = to_sql(filtered)
         expected = """SELECT *, `a` + `b` AS `foo`
 FROM alltypes
-WHERE `f` > 0 AND
-      `g` = 'bar'"""
+WHERE (`f` > 0) AND
+      (`g` = 'bar')"""
         assert result == expected
 
         agged = agg(filtered)
@@ -1350,8 +1350,8 @@ def agg(x):
 FROM (
   SELECT *, `a` + `b` AS `foo`
   FROM alltypes
-  WHERE `f` > 0 AND
-        `g` = 'bar'
+  WHERE (`f` > 0) AND
+        (`g` = 'bar')
 ) t0
 GROUP BY 1"""
         assert result == expected
@@ -1853,8 +1853,8 @@ def test_exists(self):
 WHERE EXISTS (
   SELECT 1
   FROM bar t1
-  WHERE t0.`key1` = t1.`key1` AND
-        t1.`key2` = 'foo'
+  WHERE (t0.`key1` = t1.`key1`) AND
+        (t1.`key2` = 'foo')
 )"""
         assert result == expected
 
@@ -2127,20 +2127,20 @@ def test_join_filtered_tables_no_pushdown(self):
 FROM (
   SELECT *
   FROM a
-  WHERE `year` = 2016 AND
-        `month` = 2 AND
-        `day` = 29
+  WHERE (`year` = 2016) AND
+        (`month` = 2) AND
+        (`day` = 29)
 ) t0
   LEFT OUTER JOIN (
     SELECT *
     FROM b
-    WHERE `year` = 2016 AND
-          `month` = 2 AND
-          `day` = 29
+    WHERE (`year` = 2016) AND
+          (`month` = 2) AND
+          (`day` = 29)
   ) t1
-    ON t0.`year` = t1.`year` AND
-       t0.`month` = t1.`month` AND
-       t0.`day` = t1.`day`"""
+    ON (t0.`year` = t1.`year`) AND
+       (t0.`month` = t1.`month`) AND
+       (t0.`day` = t1.`day`)"""
 
         assert result_sql == expected_sql
 
@@ -2172,8 +2172,8 @@ def test_loj_subquery_filter_handling(self):
     FROM bar
     WHERE `id` < 3
   ) t1
-    ON t0.`id` = t1.`id` AND
-       t0.`desc` = t1.`desc`"""
+    ON (t0.`id` = t1.`id`) AND
+       (t0.`desc` = t1.`desc`)"""
 
         assert result == expected
 
@@ -2299,8 +2299,8 @@ def test_pushdown_with_or():
     expected = """\
 SELECT *
 FROM functional_alltypes
-WHERE `double_col` > 3.14 AND
-      locate('foo', `string_col`) - 1 >= 0 AND
+WHERE (`double_col` > 3.14) AND
+      (locate('foo', `string_col`) - 1 >= 0) AND
       (((`int_col` - 1) = 0) OR (`float_col` <= 1.34))"""
     assert result == expected
 
@@ -2371,12 +2371,12 @@ def test_agg_and_non_agg_filter():
   FROM my_table
   WHERE `a` < 100
 ) t0
-WHERE `a` = (
+WHERE (`a` = (
   SELECT max(`a`) AS `max`
   FROM my_table
   WHERE `a` < 100
-) AND
-      `b` = 'a'"""
+)) AND
+      (`b` = 'a')"""
     assert result == expected