1,283 changes: 1,283 additions & 0 deletions ibis/backends/base/sqlglot/compiler.py

Large diffs are not rendered by default.

21 changes: 0 additions & 21 deletions ibis/backends/base/sqlglot/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,26 +444,5 @@ class OracleType(SqlglotType):
dialect = "oracle"


class SnowflakeType(SqlglotType):
dialect = "snowflake"
default_temporal_scale = 9

@classmethod
def _from_sqlglot_FLOAT(cls) -> dt.Float64:
return dt.Float64(nullable=cls.default_nullable)

@classmethod
def _from_sqlglot_DECIMAL(cls, precision=None, scale=None) -> dt.Decimal:
if scale is None or int(scale.this.this) == 0:
return dt.Int64(nullable=cls.default_nullable)
else:
return super()._from_sqlglot_DECIMAL(precision, scale)

@classmethod
def _from_sqlglot_ARRAY(cls, value_type=None) -> dt.Array:
assert value_type is None
return dt.Array(dt.json, nullable=cls.default_nullable)


class SQLiteType(SqlglotType):
dialect = "sqlite"
10 changes: 10 additions & 0 deletions ibis/backends/base/sqlglot/rewrites.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import annotations

import os
from typing import Literal, Optional

from public import public
Expand Down Expand Up @@ -119,6 +120,9 @@ def merge_select_select(_):
)


DEBUG = os.environ.get("IBIS_SQL_DEBUG", False)


def sqlize(node):
"""Lower the ibis expression graph to a SQL-like relational algebra."""
step1 = node.replace(
Expand All @@ -127,5 +131,11 @@ def sqlize(node):
| filter_to_select
| sort_to_select
)
if DEBUG:
print("--------- STEP 1 ---------")
print(step1.to_expr())
step2 = step1.replace(merge_select_select)
if DEBUG:
print("--------- STEP 2 ---------")
print(step2.to_expr())
return step2
235 changes: 73 additions & 162 deletions ibis/backends/clickhouse/__init__.py

Large diffs are not rendered by default.

686 changes: 686 additions & 0 deletions ibis/backends/clickhouse/compiler.py

Large diffs are not rendered by default.

13 changes: 0 additions & 13 deletions ibis/backends/clickhouse/compiler/__init__.py

This file was deleted.

133 changes: 0 additions & 133 deletions ibis/backends/clickhouse/compiler/core.py

This file was deleted.

215 changes: 0 additions & 215 deletions ibis/backends/clickhouse/compiler/relations.py

This file was deleted.

1,059 changes: 0 additions & 1,059 deletions ibis/backends/clickhouse/compiler/values.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.double_col
t0.double_col AS double_col
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.bigint_col
t0.bigint_col AS bigint_col
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.bool_col
t0.bool_col AS bool_col
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.date_string_col
t0.date_string_col AS date_string_col
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.double_col
t0.double_col AS double_col
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.float_col
t0.float_col AS float_col
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.id
t0.id AS id
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.int_col
t0.int_col AS int_col
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.month
t0.month AS month
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.smallint_col
t0.smallint_col AS smallint_col
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.string_col
t0.string_col AS string_col
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.timestamp_col
t0.timestamp_col AS timestamp_col
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.tinyint_col
t0.tinyint_col AS tinyint_col
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
t0.year
t0.year AS year
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
SELECT
(
t0.string_col LIKE 'foo%'
) OR (
t0.string_col LIKE '%bar'
) AS "Or(StringSQLLike(string_col, 'foo%'), StringSQLLike(string_col, '%bar'))"
t0.string_col LIKE 'foo%' OR t0.string_col LIKE '%bar' AS "Or(StringSQLLike(string_col, 'foo%'), StringSQLLike(string_col, '%bar'))"
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
CEIL(t0.double_col) AS "Ceil(double_col)"
CAST(CEIL(t0.double_col) AS Nullable(Int64)) AS "Ceil(double_col)"
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
SELECT
(
LN(t0.int_col)
) + t0.double_col AS "Add(Log(int_col), double_col)"
LN(t0.int_col) + t0.double_col AS "Add(Log(int_col), double_col)"
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
SELECT
t0.tinyint_col + (
-(
t0.int_col + t0.double_col
)
t0.tinyint_col + -(
t0.int_col + t0.double_col
) AS "Add(tinyint_col, Negate(Add(int_col, double_col)))"
FROM functional_alltypes AS t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
SELECT
t0.id IN (SELECT
arrayJoin(t1.ids) AS ids
FROM way_view AS t1) AS "InColumn(id, ids)"
t0.id IN ((
SELECT
arrayJoin(t1.ids) AS ids
FROM way_view AS t1
)) AS "InSubquery(id)"
FROM node_view AS t0
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ SELECT
CAST(t1.string_col AS Nullable(Float64)) AS "Cast(string_col, float64)"
FROM (
SELECT
t0.string_col,
t0.string_col AS string_col,
COUNT(*) AS count
FROM functional_alltypes AS t0
GROUP BY
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
SELECT
t0.a AS a,
t0.b AS b,
t2.c AS c,
t2.d AS d,
t2.c / (
t0.a - t0.b
) AS e
FROM s AS t0
INNER JOIN t AS t2
ON t0.a = t2.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT
t0.a,
t0.a AS a,
COALESCE(countIf(NOT (
t0.b
)), 0) AS A,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
SELECT
*
t0.id AS id,
t0.bool_col AS bool_col,
t0.tinyint_col AS tinyint_col,
t0.smallint_col AS smallint_col,
t0.int_col AS int_col,
t0.bigint_col AS bigint_col,
t0.float_col AS float_col,
t0.double_col AS double_col,
t0.date_string_col AS date_string_col,
t0.string_col AS string_col,
t0.timestamp_col AS timestamp_col,
t0.year AS year,
t0.month AS month
FROM functional_alltypes AS t0
WHERE
t0.string_col IN ('foo', 'bar')
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
SELECT
*
t0.id AS id,
t0.bool_col AS bool_col,
t0.tinyint_col AS tinyint_col,
t0.smallint_col AS smallint_col,
t0.int_col AS int_col,
t0.bigint_col AS bigint_col,
t0.float_col AS float_col,
t0.double_col AS double_col,
t0.date_string_col AS date_string_col,
t0.string_col AS string_col,
t0.timestamp_col AS timestamp_col,
t0.year AS year,
t0.month AS month
FROM functional_alltypes AS t0
WHERE
NOT (
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
SELECT
t0.*
t0.id AS id,
t0.bool_col AS bool_col,
t0.tinyint_col AS tinyint_col,
t0.smallint_col AS smallint_col,
t0.int_col AS int_col,
t0.bigint_col AS bigint_col,
t0.float_col AS float_col,
t0.double_col AS double_col,
t0.date_string_col AS date_string_col,
t0.string_col AS string_col,
t0.timestamp_col AS timestamp_col,
t0.year AS year,
t0.month AS month
FROM functional_alltypes AS t0
INNER JOIN functional_alltypes AS t1
ON t0.id = t1.id
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
SELECT
t0.key,
t1.key AS key,
SUM((
(
t0.value + 1
t1.value + 1
) + 2
) + 3) AS abc
FROM t0 AS t0
WHERE
t0.value = 42
FROM (
SELECT
t0.key AS key,
t0.value AS value
FROM t0 AS t0
WHERE
t0.value = 42
) AS t1
GROUP BY
t0.key
t1.key
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
SELECT
t0.key,
t1.key AS key,
SUM((
(
t0.value + 1
t1.value + 1
) + 2
) + 3) AS foo
FROM t0 AS t0
WHERE
t0.value = 42
FROM (
SELECT
t0.key AS key,
t0.value AS value
FROM t0 AS t0
WHERE
t0.value = 42
) AS t1
GROUP BY
t0.key
t1.key
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
SELECT
t0.*
t0.playerID AS playerID,
t0.yearID AS yearID,
t0.stint AS stint,
t0.teamID AS teamID,
t0.lgID AS lgID,
t0.G AS G,
t0.AB AS AB,
t0.R AS R,
t0.H AS H,
t0.X2B AS X2B,
t0.X3B AS X3B,
t0.HR AS HR,
t0.RBI AS RBI,
t0.SB AS SB,
t0.CS AS CS,
t0.BB AS BB,
t0.SO AS SO,
t0.IBB AS IBB,
t0.HBP AS HBP,
t0.SH AS SH,
t0.SF AS SF,
t0.GIDP AS GIDP
FROM batting AS t0
ANY JOIN awards_players AS t1
ON t0.playerID = t1.awardID
ANY JOIN awards_players AS t2
ON t0.playerID = t2.awardID
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
SELECT
t0.*
t0.playerID AS playerID,
t0.yearID AS yearID,
t0.stint AS stint,
t0.teamID AS teamID,
t0.lgID AS lgID,
t0.G AS G,
t0.AB AS AB,
t0.R AS R,
t0.H AS H,
t0.X2B AS X2B,
t0.X3B AS X3B,
t0.HR AS HR,
t0.RBI AS RBI,
t0.SB AS SB,
t0.CS AS CS,
t0.BB AS BB,
t0.SO AS SO,
t0.IBB AS IBB,
t0.HBP AS HBP,
t0.SH AS SH,
t0.SF AS SF,
t0.GIDP AS GIDP
FROM batting AS t0
LEFT ANY JOIN awards_players AS t1
ON t0.playerID = t1.awardID
LEFT ANY JOIN awards_players AS t2
ON t0.playerID = t2.awardID
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
SELECT
t0.*
t0.playerID AS playerID,
t0.yearID AS yearID,
t0.stint AS stint,
t0.teamID AS teamID,
t0.lgID AS lgID,
t0.G AS G,
t0.AB AS AB,
t0.R AS R,
t0.H AS H,
t0.X2B AS X2B,
t0.X3B AS X3B,
t0.HR AS HR,
t0.RBI AS RBI,
t0.SB AS SB,
t0.CS AS CS,
t0.BB AS BB,
t0.SO AS SO,
t0.IBB AS IBB,
t0.HBP AS HBP,
t0.SH AS SH,
t0.SF AS SF,
t0.GIDP AS GIDP
FROM batting AS t0
INNER JOIN awards_players AS t1
ON t0.playerID = t1.awardID
INNER JOIN awards_players AS t2
ON t0.playerID = t2.awardID
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
SELECT
t0.*
t0.playerID AS playerID,
t0.yearID AS yearID,
t0.stint AS stint,
t0.teamID AS teamID,
t0.lgID AS lgID,
t0.G AS G,
t0.AB AS AB,
t0.R AS R,
t0.H AS H,
t0.X2B AS X2B,
t0.X3B AS X3B,
t0.HR AS HR,
t0.RBI AS RBI,
t0.SB AS SB,
t0.CS AS CS,
t0.BB AS BB,
t0.SO AS SO,
t0.IBB AS IBB,
t0.HBP AS HBP,
t0.SH AS SH,
t0.SF AS SF,
t0.GIDP AS GIDP
FROM batting AS t0
LEFT OUTER JOIN awards_players AS t1
ON t0.playerID = t1.awardID
LEFT OUTER JOIN awards_players AS t2
ON t0.playerID = t2.awardID
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
SELECT
t0.*
t0.playerID AS playerID,
t0.yearID AS yearID,
t0.stint AS stint,
t0.teamID AS teamID,
t0.lgID AS lgID,
t0.G AS G,
t0.AB AS AB,
t0.R AS R,
t0.H AS H,
t0.X2B AS X2B,
t0.X3B AS X3B,
t0.HR AS HR,
t0.RBI AS RBI,
t0.SB AS SB,
t0.CS AS CS,
t0.BB AS BB,
t0.SO AS SO,
t0.IBB AS IBB,
t0.HBP AS HBP,
t0.SH AS SH,
t0.SF AS SF,
t0.GIDP AS GIDP
FROM batting AS t0
ANY JOIN awards_players AS t1
ON t0.playerID = t1.playerID
ANY JOIN awards_players AS t2
ON t0.playerID = t2.playerID
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
SELECT
t0.*
t0.playerID AS playerID,
t0.yearID AS yearID,
t0.stint AS stint,
t0.teamID AS teamID,
t0.lgID AS lgID,
t0.G AS G,
t0.AB AS AB,
t0.R AS R,
t0.H AS H,
t0.X2B AS X2B,
t0.X3B AS X3B,
t0.HR AS HR,
t0.RBI AS RBI,
t0.SB AS SB,
t0.CS AS CS,
t0.BB AS BB,
t0.SO AS SO,
t0.IBB AS IBB,
t0.HBP AS HBP,
t0.SH AS SH,
t0.SF AS SF,
t0.GIDP AS GIDP
FROM batting AS t0
LEFT ANY JOIN awards_players AS t1
ON t0.playerID = t1.playerID
LEFT ANY JOIN awards_players AS t2
ON t0.playerID = t2.playerID
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
SELECT
t0.*
t0.playerID AS playerID,
t0.yearID AS yearID,
t0.stint AS stint,
t0.teamID AS teamID,
t0.lgID AS lgID,
t0.G AS G,
t0.AB AS AB,
t0.R AS R,
t0.H AS H,
t0.X2B AS X2B,
t0.X3B AS X3B,
t0.HR AS HR,
t0.RBI AS RBI,
t0.SB AS SB,
t0.CS AS CS,
t0.BB AS BB,
t0.SO AS SO,
t0.IBB AS IBB,
t0.HBP AS HBP,
t0.SH AS SH,
t0.SF AS SF,
t0.GIDP AS GIDP
FROM batting AS t0
INNER JOIN awards_players AS t1
ON t0.playerID = t1.playerID
INNER JOIN awards_players AS t2
ON t0.playerID = t2.playerID
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
SELECT
t0.*
t0.playerID AS playerID,
t0.yearID AS yearID,
t0.stint AS stint,
t0.teamID AS teamID,
t0.lgID AS lgID,
t0.G AS G,
t0.AB AS AB,
t0.R AS R,
t0.H AS H,
t0.X2B AS X2B,
t0.X3B AS X3B,
t0.HR AS HR,
t0.RBI AS RBI,
t0.SB AS SB,
t0.CS AS CS,
t0.BB AS BB,
t0.SO AS SO,
t0.IBB AS IBB,
t0.HBP AS HBP,
t0.SH AS SH,
t0.SF AS SF,
t0.GIDP AS GIDP
FROM batting AS t0
LEFT OUTER JOIN awards_players AS t1
ON t0.playerID = t1.playerID
LEFT OUTER JOIN awards_players AS t2
ON t0.playerID = t2.playerID
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
SELECT
SUM(t0.float_col) AS "Sum(float_col)"
FROM functional_alltypes AS t0
WHERE
t0.int_col > 0
SUM(t1.float_col) AS "Sum(float_col)"
FROM (
SELECT
t0.id AS id,
t0.bool_col AS bool_col,
t0.tinyint_col AS tinyint_col,
t0.smallint_col AS smallint_col,
t0.int_col AS int_col,
t0.bigint_col AS bigint_col,
t0.float_col AS float_col,
t0.double_col AS double_col,
t0.date_string_col AS date_string_col,
t0.string_col AS string_col,
t0.timestamp_col AS timestamp_col,
t0.year AS year,
t0.month AS month
FROM functional_alltypes AS t0
WHERE
t0.int_col > 0
) AS t1
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
SELECT
t1.string_col
t2.string_col AS string_col
FROM (
SELECT
t0.string_col,
SUM(t0.float_col) AS total
FROM functional_alltypes AS t0
WHERE
t0.int_col > 0
t1.string_col AS string_col,
SUM(t1.float_col) AS total
FROM (
SELECT
t0.id AS id,
t0.bool_col AS bool_col,
t0.tinyint_col AS tinyint_col,
t0.smallint_col AS smallint_col,
t0.int_col AS int_col,
t0.bigint_col AS bigint_col,
t0.float_col AS float_col,
t0.double_col AS double_col,
t0.date_string_col AS date_string_col,
t0.string_col AS string_col,
t0.timestamp_col AS timestamp_col,
t0.year AS year,
t0.month AS month
FROM functional_alltypes AS t0
WHERE
t0.int_col > 0
) AS t1
GROUP BY
t0.string_col
) AS t1
t1.string_col
) AS t2
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
SELECT
*
t0.id AS id,
t0.bool_col AS bool_col,
t0.tinyint_col AS tinyint_col,
t0.smallint_col AS smallint_col,
t0.int_col AS int_col,
t0.bigint_col AS bigint_col,
t0.float_col AS float_col,
t0.double_col AS double_col,
t0.date_string_col AS date_string_col,
t0.string_col AS string_col,
t0.timestamp_col AS timestamp_col,
t0.year AS year,
t0.month AS month
FROM functional_alltypes AS t0
WHERE
t0.float_col > 0 AND t0.int_col < (
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
SELECT
*
t0.id AS id,
t0.bool_col AS bool_col,
t0.tinyint_col AS tinyint_col,
t0.smallint_col AS smallint_col,
t0.int_col AS int_col,
t0.bigint_col AS bigint_col,
t0.float_col AS float_col,
t0.double_col AS double_col,
t0.date_string_col AS date_string_col,
t0.string_col AS string_col,
t0.timestamp_col AS timestamp_col,
t0.year AS year,
t0.month AS month
FROM functional_alltypes AS t0
WHERE
t0.int_col > 0 AND t0.float_col BETWEEN 0 AND 1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT
t0.uuid,
t0.uuid AS uuid,
minIf(t0.ts, t0.search_level = 1) AS min_date
FROM t AS t0
GROUP BY
Expand Down
9 changes: 9 additions & 0 deletions ibis/backends/clickhouse/tests/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,3 +403,12 @@ def test_array_join_in_subquery(snapshot):

out = ibis.clickhouse.compile(expr)
snapshot.assert_match(out, "out.sql")


def test_complex_join(snapshot):
t1 = ibis.table({"a": "int", "b": "int"}, name="s")
t2 = ibis.table({"c": "int", "d": "int"}, name="t")
t3 = t1.join(t2, t1.a == t2.c)
q = t3.mutate(e=t3.c / (t3.a - t3.b))
out = ibis.clickhouse.compile(q)
snapshot.assert_match(out, "out.sql")
829 changes: 513 additions & 316 deletions ibis/backends/duckdb/__init__.py

Large diffs are not rendered by default.

426 changes: 375 additions & 51 deletions ibis/backends/duckdb/compiler.py

Large diffs are not rendered by default.

75 changes: 6 additions & 69 deletions ibis/backends/duckdb/datatypes.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,11 @@
from __future__ import annotations

import duckdb_engine.datatypes as ducktypes
import sqlalchemy.dialects.postgresql as psql
import numpy as np

import ibis.expr.datatypes as dt
from ibis.backends.base.sql.alchemy.datatypes import AlchemyType
from ibis.backends.base.sqlglot.datatypes import DuckDBType as SqlglotDuckdbType
from ibis.formats.pandas import PandasData

try:
from geoalchemy2 import Geometry

class Geometry_WKB(Geometry):
as_binary = "ST_AsWKB"

except ImportError:

class Geometry_WKB:
...


_from_duckdb_types = {
psql.BYTEA: dt.Binary,
psql.UUID: dt.UUID,
ducktypes.TinyInteger: dt.Int8,
ducktypes.SmallInteger: dt.Int16,
ducktypes.Integer: dt.Int32,
ducktypes.BigInteger: dt.Int64,
ducktypes.HugeInteger: dt.Decimal(38, 0),
ducktypes.UInt8: dt.UInt8,
ducktypes.UTinyInteger: dt.UInt8,
ducktypes.UInt16: dt.UInt16,
ducktypes.USmallInteger: dt.UInt16,
ducktypes.UInt32: dt.UInt32,
ducktypes.UInteger: dt.UInt32,
ducktypes.UInt64: dt.UInt64,
ducktypes.UBigInteger: dt.UInt64,
}

_to_duckdb_types = {
dt.UUID: psql.UUID,
dt.Int8: ducktypes.TinyInteger,
dt.Int16: ducktypes.SmallInteger,
dt.Int32: ducktypes.Integer,
dt.Int64: ducktypes.BigInteger,
dt.UInt8: ducktypes.UTinyInteger,
dt.UInt16: ducktypes.USmallInteger,
dt.UInt32: ducktypes.UInteger,
dt.UInt64: ducktypes.UBigInteger,
# Handle projections with geometry columns
dt.Geometry: Geometry_WKB,
}


class DuckDBType(AlchemyType):
dialect = "duckdb"

@classmethod
def to_ibis(cls, typ, nullable=True):
if dtype := _from_duckdb_types.get(type(typ)):
return dtype(nullable=nullable)
else:
return super().to_ibis(typ, nullable=nullable)

@classmethod
def from_ibis(cls, dtype):
if typ := _to_duckdb_types.get(type(dtype)):
return typ
else:
return super().from_ibis(dtype)

@classmethod
def from_string(cls, type_string, nullable=True):
return SqlglotDuckdbType.from_string(type_string, nullable=nullable)
class DuckDBPandasData(PandasData):
@staticmethod
def convert_Array(s, dtype, pandas_type):
return s.replace(np.nan, None)
11 changes: 8 additions & 3 deletions ibis/backends/duckdb/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

if TYPE_CHECKING:
from collections.abc import Iterator
from typing import Any

from ibis.backends.base import BaseBackend

Expand All @@ -36,7 +37,7 @@

class TestConf(BackendTest):
supports_map = True
deps = "duckdb", "duckdb_engine"
deps = ("duckdb",)
stateful = False
supports_tpch = True

Expand Down Expand Up @@ -87,8 +88,12 @@ def connect(*, tmpdir, worker_id, **kw) -> BaseBackend:
return ibis.duckdb.connect(extension_directory=extension_directory, **kw)

def load_tpch(self) -> None:
with self.connection.begin() as con:
con.exec_driver_sql("CALL dbgen(sf=0.1)")
self.connection.raw_sql("CALL dbgen(sf=0.1)")

def _load_data(self, **_: Any) -> None:
"""Load test data into a backend."""
for stmt in self.ddl_script:
self.connection.raw_sql(stmt)


@pytest.fixture(scope="session")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
*
FROM "functional_alltypes"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
ST_DWITHIN(t0.geom, t0.geom, CAST(3.0 AS DOUBLE)) AS tmp
FROM t AS t0
ST_DWITHIN(t0.geom, t0.geom, CAST(3.0 AS DOUBLE)) AS "GeoDWithin(geom, geom, 3.0)"
FROM t AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
ST_ASTEXT(t0.geom) AS tmp
ST_ASTEXT(t0.geom) AS "GeoAsText(geom)"
FROM t AS t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
ST_NPOINTS(t0.geom) AS tmp
ST_NPOINTS(t0.geom) AS "GeoNPoints(geom)"
FROM t AS t0
68 changes: 53 additions & 15 deletions ibis/backends/duckdb/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import duckdb
import pyarrow as pa
import pytest
import sqlalchemy as sa
from pytest import param

import ibis
Expand All @@ -25,7 +24,7 @@ def ext_directory(tmpdir_factory):
@pytest.mark.xfail(
LINUX and SANDBOXED,
reason="nix on linux cannot download duckdb extensions or data due to sandboxing",
raises=sa.exc.OperationalError,
raises=duckdb.IOException,
)
@pytest.mark.xdist_group(name="duckdb-extensions")
def test_connect_extensions(ext_directory):
Expand Down Expand Up @@ -76,11 +75,11 @@ def test_cross_db(tmpdir):

con2.attach(path1, name="test1", read_only=True)

t1_from_con2 = con2.table("t1", schema="test1.main")
t1_from_con2 = con2.table("t1", schema="main", database="test1")
assert t1_from_con2.schema() == t2.schema()
assert t1_from_con2.execute().equals(t2.execute())

foo_t1_from_con2 = con2.table("t1", schema="test1.foo")
foo_t1_from_con2 = con2.table("t1", schema="foo", database="test1")
assert foo_t1_from_con2.schema() == t2.schema()
assert foo_t1_from_con2.execute().equals(t2.execute())

Expand Down Expand Up @@ -115,24 +114,26 @@ def test_attach_detach(tmpdir):
con2.detach(name)
assert name not in con2.list_databases()

with pytest.raises(sa.exc.ProgrammingError):
with pytest.raises(duckdb.BinderException):
con2.detach(name)


@pytest.mark.parametrize(
"scale",
("scale", "expected_scale"),
[
None,
param(0, id="seconds"),
param(3, id="millis"),
param(6, id="micros"),
param(9, id="nanos"),
param(None, 6, id="default"),
param(0, 0, id="seconds"),
param(3, 3, id="millis"),
param(6, 6, id="micros"),
param(9, 9, id="nanos"),
],
)
def test_create_table_with_timestamp_scales(con, scale):
def test_create_table_with_timestamp_scales(con, scale, expected_scale):
schema = ibis.schema(dict(ts=dt.Timestamp(scale=scale)))
t = con.create_table(gen_name("duckdb_timestamp_scale"), schema=schema, temp=True)
assert t.schema() == schema
expected = ibis.schema(dict(ts=dt.Timestamp(scale=expected_scale)))
name = gen_name("duckdb_timestamp_scale")
t = con.create_table(name, schema=schema, temp=True)
assert t.schema() == expected


def test_config_options(con):
Expand All @@ -153,8 +154,45 @@ def test_config_options(con):


def test_config_options_bad_option(con):
with pytest.raises(sa.exc.ProgrammingError):
with pytest.raises(duckdb.CatalogException):
con.settings["not_a_valid_option"] = "oopsie"

with pytest.raises(KeyError):
con.settings["i_didnt_set_this"]


def test_insert(con):
import pandas as pd

name = ibis.util.guid()

t = con.create_table(name, schema=ibis.schema({"a": "int64"}))
con.insert(name, obj=pd.DataFrame({"a": [1, 2]}))
assert t.count().execute() == 2

con.insert(name, obj=pd.DataFrame({"a": [1, 2]}))
assert t.count().execute() == 4

con.insert(name, obj=pd.DataFrame({"a": [1, 2]}), overwrite=True)
assert t.count().execute() == 2

con.insert(name, t)
assert t.count().execute() == 4

con.insert(name, [{"a": 1}, {"a": 2}], overwrite=True)
assert t.count().execute() == 2

con.insert(name, [(1,), (2,)])
assert t.count().execute() == 4

con.insert(name, {"a": [1, 2]}, overwrite=True)
assert t.count().execute() == 2


def test_to_other_sql(con, snapshot):
pytest.importorskip("snowflake.connector")

t = con.table("functional_alltypes")

sql = ibis.to_sql(t, dialect="snowflake")
snapshot.assert_match(sql, "out.sql")
14 changes: 1 addition & 13 deletions ibis/backends/duckdb/tests/test_datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,12 @@
import duckdb_engine
import numpy as np
import pytest
import sqlalchemy as sa
from packaging.version import parse as vparse
from pytest import param

import ibis
import ibis.backends.base.sql.alchemy.datatypes as sat
import ibis.common.exceptions as exc
import ibis.expr.datatypes as dt
from ibis.backends.duckdb.datatypes import DuckDBType
from ibis.backends.base.sqlglot.datatypes import DuckDBType


@pytest.mark.parametrize(
Expand Down Expand Up @@ -93,15 +90,6 @@ def test_parse_quoted_struct_field():
)


def test_generate_quoted_struct():
typ = sat.StructType(
{"in come": sa.VARCHAR(), "my count": sa.BIGINT(), "thing": sa.INTEGER()}
)
result = typ.compile(dialect=duckdb_engine.Dialect())
expected = 'STRUCT("in come" VARCHAR, "my count" BIGINT, thing INTEGER)'
assert result == expected


@pytest.mark.xfail(
condition=vparse(duckdb_engine.__version__) < vparse("0.9.2"),
raises=AssertionError,
Expand Down
28 changes: 14 additions & 14 deletions ibis/backends/duckdb/tests/test_geospatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


def test_geospatial_point(zones, zones_gdf):
coord = zones.x_cent.point(zones.y_cent).name("coord")
coord = zones.x_cent.point(zones.y_cent)
# this returns GeometryArray
gp_coord = gpd.points_from_xy(zones_gdf.x_cent, zones_gdf.y_cent)

Expand All @@ -34,13 +34,13 @@ def test_geospatial_point(zones, zones_gdf):
)
def test_geospatial_unary_snapshot(operation, keywords, snapshot):
t = ibis.table([("geom", "geometry")], name="t")
expr = getattr(t.geom, operation)(**keywords).name("tmp")
expr = getattr(t.geom, operation)(**keywords)
snapshot.assert_match(ibis.to_sql(expr), "out.sql")


def test_geospatial_dwithin(snapshot):
t = ibis.table([("geom", "geometry")], name="t")
expr = t.geom.d_within(t.geom, 3.0).name("tmp")
expr = t.geom.d_within(t.geom, 3.0)

snapshot.assert_match(ibis.to_sql(expr), "out.sql")

Expand All @@ -62,7 +62,7 @@ def test_geospatial_dwithin(snapshot):
],
)
def test_geospatial_unary_tm(op, keywords, gp_op, zones, zones_gdf):
expr = getattr(zones.geom, op)(**keywords).name("tmp")
expr = getattr(zones.geom, op)(**keywords)
gp_expr = getattr(zones_gdf.geometry, gp_op)

tm.assert_series_equal(expr.to_pandas(), gp_expr, check_names=False)
Expand All @@ -76,10 +76,10 @@ def test_geospatial_unary_tm(op, keywords, gp_op, zones, zones_gdf):
],
)
def test_geospatial_xy(op, keywords, gp_op, zones, zones_gdf):
cen = zones.geom.centroid().name("centroid")
cen = zones.geom.centroid()
gp_cen = zones_gdf.geometry.centroid

expr = getattr(cen, op)(**keywords).name("tmp")
expr = getattr(cen, op)(**keywords)
gp_expr = getattr(gp_cen, gp_op)

tm.assert_series_equal(expr.to_pandas(), gp_expr, check_names=False)
Expand All @@ -88,7 +88,7 @@ def test_geospatial_xy(op, keywords, gp_op, zones, zones_gdf):
def test_geospatial_length(lines, lines_gdf):
# note: ST_LENGTH returns 0 for the case of polygon
# or multi polygon while pandas geopandas returns the perimeter.
length = lines.geom.length().name("length")
length = lines.geom.length()
gp_length = lines_gdf.geometry.length

tm.assert_series_equal(length.to_pandas(), gp_length, check_names=False)
Expand All @@ -113,7 +113,7 @@ def test_geospatial_length(lines, lines_gdf):
],
)
def test_geospatial_binary_tm(op, gp_op, zones, zones_gdf):
expr = getattr(zones.geom, op)(zones.geom).name("tmp")
expr = getattr(zones.geom, op)(zones.geom)
gp_func = getattr(zones_gdf.geometry, gp_op)(zones_gdf.geometry)

tm.assert_series_equal(expr.to_pandas(), gp_func, check_names=False)
Expand All @@ -129,7 +129,7 @@ def test_geospatial_binary_tm(op, gp_op, zones, zones_gdf):
],
)
def test_geospatial_unary_gtm(op, gp_op, zones, zones_gdf):
expr = getattr(zones.geom, op)().name("tmp")
expr = getattr(zones.geom, op)()
gp_expr = getattr(zones_gdf.geometry, gp_op)

gtm.assert_geoseries_equal(expr.to_pandas(), gp_expr, check_crs=False)
Expand All @@ -146,22 +146,22 @@ def test_geospatial_unary_gtm(op, gp_op, zones, zones_gdf):
],
)
def test_geospatial_binary_gtm(op, gp_op, zones, zones_gdf):
expr = getattr(zones.geom, op)(zones.geom).name("tmp")
expr = getattr(zones.geom, op)(zones.geom)
gp_func = getattr(zones_gdf.geometry, gp_op)(zones_gdf.geometry)

gtm.assert_geoseries_equal(expr.to_pandas(), gp_func, check_crs=False)


def test_geospatial_end_point(lines, lines_gdf):
epoint = lines.geom.end_point().name("end_point")
epoint = lines.geom.end_point()
# geopandas does not have end_point this is a work around to get it
gp_epoint = lines_gdf.geometry.boundary.explode(index_parts=True).xs(1, level=1)

gtm.assert_geoseries_equal(epoint.to_pandas(), gp_epoint, check_crs=False)


def test_geospatial_start_point(lines, lines_gdf):
spoint = lines.geom.start_point().name("start_point")
spoint = lines.geom.start_point()
# geopandas does not have start_point this is a work around to get it
gp_spoint = lines_gdf.geometry.boundary.explode(index_parts=True).xs(0, level=1)

Expand All @@ -170,7 +170,7 @@ def test_geospatial_start_point(lines, lines_gdf):

# this one takes a bit longer than the rest.
def test_geospatial_unary_union(zones, zones_gdf):
unary_union = zones.geom.unary_union().name("unary_union")
unary_union = zones.geom.unary_union()
# this returns a shapely geometry object
gp_unary_union = zones_gdf.geometry.unary_union

Expand All @@ -182,7 +182,7 @@ def test_geospatial_unary_union(zones, zones_gdf):


def test_geospatial_buffer_point(zones, zones_gdf):
cen = zones.geom.centroid().name("centroid")
cen = zones.geom.centroid()
gp_cen = zones_gdf.geometry.centroid

buffer = cen.buffer(100.0)
Expand Down
12 changes: 5 additions & 7 deletions ibis/backends/duckdb/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,16 +138,15 @@ def test_temp_directory(tmp_path):

# 1. in-memory + no temp_directory specified
con = ibis.duckdb.connect()
with con.begin() as c:
value = c.exec_driver_sql(query).scalar()
assert value # we don't care what the specific value is

value = con.raw_sql(query).fetchone()[0]
assert value # we don't care what the specific value is

temp_directory = Path(tempfile.gettempdir()) / "duckdb"

# 2. in-memory + temp_directory specified
con = ibis.duckdb.connect(temp_directory=temp_directory)
with con.begin() as c:
value = c.exec_driver_sql(query).scalar()
value = con.raw_sql(query).fetchone()[0]
assert value == str(temp_directory)

# 3. on-disk + no temp_directory specified
Expand All @@ -156,8 +155,7 @@ def test_temp_directory(tmp_path):

# 4. on-disk + temp_directory specified
con = ibis.duckdb.connect(tmp_path / "test2.ddb", temp_directory=temp_directory)
with con.begin() as c:
value = c.exec_driver_sql(query).scalar()
value = con.raw_sql(query).fetchone()[0]
assert value == str(temp_directory)


Expand Down
13 changes: 6 additions & 7 deletions ibis/backends/duckdb/tests/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,7 @@ def test_builtin_scalar(con, func):
a, b = "duck", "luck"
expr = func(a, b)

with con.begin() as c:
expected = c.exec_driver_sql(f"SELECT {func.__name__}({a!r}, {b!r})").scalar()

expected = con.raw_sql(f"SELECT {func.__name__}({a!r}, {b!r})").df().squeeze()
assert con.execute(expr) == expected


Expand Down Expand Up @@ -93,10 +91,11 @@ def test_builtin_agg(con, func):
data = ibis.memtable({"a": raw_data})
expr = func(data.a)

with con.begin() as c:
expected = c.exec_driver_sql(
f"SELECT {func.__name__}(a) FROM UNNEST({raw_data!r}) _ (a)"
).scalar()
expected = (
con.raw_sql(f"SELECT {func.__name__}(a) FROM UNNEST({raw_data!r}) _ (a)")
.df()
.squeeze()
)

assert con.execute(expr) == expected

Expand Down
Original file line number Diff line number Diff line change
@@ -1,62 +1,55 @@
SELECT
t5.street,
t5.key
t5.street AS street,
t5.key AS key,
t5.key_right AS key_right
FROM (
SELECT
t4.street,
ROW_NUMBER() OVER (ORDER BY t4.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key
t1.street AS street,
ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key,
t3.key AS key_right
FROM (
SELECT
t1.street,
t1.key
t0.street AS street,
ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key
FROM data AS t0
) AS t1
INNER JOIN (
SELECT
t1.key AS key
FROM (
SELECT
t0.*,
t0.street AS street,
ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key
FROM data AS t0
) AS t1
INNER JOIN (
SELECT
t1.key
FROM (
SELECT
t0.*,
ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key
FROM data AS t0
) AS t1
) AS t2
ON t1.key = t2.key
) AS t4
) AS t3
ON t1.key = t3.key
) AS t5
INNER JOIN (
SELECT
t5.key
t5.key AS key
FROM (
SELECT
t4.street,
ROW_NUMBER() OVER (ORDER BY t4.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key
t1.street AS street,
ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key,
t3.key AS key_right
FROM (
SELECT
t1.street,
t1.key
t0.street AS street,
ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key
FROM data AS t0
) AS t1
INNER JOIN (
SELECT
t1.key AS key
FROM (
SELECT
t0.*,
t0.street AS street,
ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key
FROM data AS t0
) AS t1
INNER JOIN (
SELECT
t1.key
FROM (
SELECT
t0.*,
ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key
FROM data AS t0
) AS t1
) AS t2
ON t1.key = t2.key
) AS t4
) AS t3
ON t1.key = t3.key
) AS t5
) AS t6
ON t5.key = t6.key
) AS t7
ON t5.key = t7.key
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
SELECT
"t5"."street" AS "street",
"t5"."key" AS "key",
"t5"."key_right" AS "key_right"
FROM (
SELECT
"t1"."street" AS "street",
ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key",
"t2"."key" AS "key_right"
FROM (
SELECT
"t0"."street" AS "street",
ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key"
FROM "data" AS "t0"
) AS "t1"
INNER JOIN (
SELECT
"t1"."key" AS "key"
FROM (
SELECT
"t0"."street" AS "street",
ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key"
FROM "data" AS "t0"
) AS "t1"
) AS "t2"
ON "t1"."key" = "t2"."key"
) AS "t5"
INNER JOIN (
SELECT
"t5"."key" AS "key"
FROM (
SELECT
"t1"."street" AS "street",
ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key",
"t2"."key" AS "key_right"
FROM (
SELECT
"t0"."street" AS "street",
ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key"
FROM "data" AS "t0"
) AS "t1"
INNER JOIN (
SELECT
"t1"."key" AS "key"
FROM (
SELECT
"t0"."street" AS "street",
ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key"
FROM "data" AS "t0"
) AS "t1"
) AS "t2"
ON "t1"."key" = "t2"."key"
) AS "t5"
) AS "t6"
ON "t5"."key" = "t6"."key"
Original file line number Diff line number Diff line change
@@ -1,32 +1,55 @@
WITH t0 AS (
SELECT
t5.street AS street,
ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS key
FROM data AS t5
), t1 AS (
SELECT
t0.key AS key
FROM t0
), t2 AS (
SELECT
t0.street AS street,
t0.key AS key
FROM t0
JOIN t1
ON t0.key = t1.key
), t3 AS (
SELECT
t5.street AS street,
t5.key AS key,
t5.key_right AS key_right
FROM (
SELECT
t2.street AS street,
ROW_NUMBER() OVER (ORDER BY t2.street ASC) - 1 AS key
FROM t2
), t4 AS (
t1.street AS street,
ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key,
t3.key AS key_right
FROM (
SELECT
t0.street AS street,
ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key
FROM data AS t0
) AS t1
INNER JOIN (
SELECT
t1.key AS key
FROM (
SELECT
t0.street AS street,
ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key
FROM data AS t0
) AS t1
) AS t3
ON t1.key = t3.key
) AS t5
INNER JOIN (
SELECT
t3.key AS key
FROM t3
)
SELECT
t3.street,
t3.key
FROM t3
JOIN t4
ON t3.key = t4.key
t5.key AS key
FROM (
SELECT
t1.street AS street,
ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key,
t3.key AS key_right
FROM (
SELECT
t0.street AS street,
ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key
FROM data AS t0
) AS t1
INNER JOIN (
SELECT
t1.key AS key
FROM (
SELECT
t0.street AS street,
ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key
FROM data AS t0
) AS t1
) AS t3
ON t1.key = t3.key
) AS t5
) AS t7
ON t5.key = t7.key
Original file line number Diff line number Diff line change
@@ -1,32 +1,55 @@
WITH t0 AS (
SELECT
t5."street" AS "street",
ROW_NUMBER() OVER (ORDER BY t5."street" ASC) - 1 AS "key"
FROM "data" AS t5
), t1 AS (
SELECT
t0."key" AS "key"
FROM t0
), t2 AS (
SELECT
t0."street" AS "street",
t0."key" AS "key"
FROM t0
JOIN t1
ON t0."key" = t1."key"
), t3 AS (
SELECT
"t5"."street" AS "street",
"t5"."key" AS "key",
"t5"."key_right" AS "key_right"
FROM (
SELECT
t2."street" AS "street",
ROW_NUMBER() OVER (ORDER BY t2."street" ASC) - 1 AS "key"
FROM t2
), t4 AS (
"t1"."street" AS "street",
ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key",
"t3"."key" AS "key_right"
FROM (
SELECT
"t0"."street" AS "street",
ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key"
FROM "data" AS "t0"
) AS "t1"
INNER JOIN (
SELECT
"t1"."key" AS "key"
FROM (
SELECT
"t0"."street" AS "street",
ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key"
FROM "data" AS "t0"
) AS "t1"
) AS "t3"
ON "t1"."key" = "t3"."key"
) AS "t5"
INNER JOIN (
SELECT
t3."key" AS "key"
FROM t3
)
SELECT
t3."street",
t3."key"
FROM t3
JOIN t4
ON t3."key" = t4."key"
"t5"."key" AS "key"
FROM (
SELECT
"t1"."street" AS "street",
ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key",
"t3"."key" AS "key_right"
FROM (
SELECT
"t0"."street" AS "street",
ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key"
FROM "data" AS "t0"
) AS "t1"
INNER JOIN (
SELECT
"t1"."key" AS "key"
FROM (
SELECT
"t0"."street" AS "street",
ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key"
FROM "data" AS "t0"
) AS "t1"
) AS "t3"
ON "t1"."key" = "t3"."key"
) AS "t5"
) AS "t7"
ON "t5"."key" = "t7"."key"
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
SELECT
t3.key1 AS key1,
AVG(t3.value1 - t3.value2) AS avg_diff
FROM (
SELECT
t0.value1 AS value1,
t0.key1 AS key1,
t0.key2 AS key2,
t1.value2 AS value2,
t1.key1 AS key1_right,
t1.key4 AS key4
FROM table1 AS t0
LEFT OUTER JOIN table2 AS t1
ON t0.key1 = t1.key1
) AS t3
GROUP BY
1
Original file line number Diff line number Diff line change
@@ -1,49 +1,40 @@
SELECT
t4.key
t2.key AS key,
t3.key AS key_right,
t6.key_right AS key_right_right
FROM (
SELECT
t1.key
t0.key AS key
FROM leaf AS t0
WHERE
TRUE
) AS t2
INNER JOIN (
SELECT
t0.key AS key
FROM leaf AS t0
WHERE
TRUE
) AS t3
ON t2.key = t3.key
INNER JOIN (
SELECT
t2.key AS key,
t3.key AS key_right
FROM (
SELECT
*
t0.key AS key
FROM leaf AS t0
WHERE
TRUE
) AS t1
INNER JOIN (
SELECT
t1.key
FROM (
SELECT
*
FROM leaf AS t0
WHERE
TRUE
) AS t1
) AS t2
ON t1.key = t2.key
) AS t4
INNER JOIN (
SELECT
t1.key
FROM (
INNER JOIN (
SELECT
*
t0.key AS key
FROM leaf AS t0
WHERE
TRUE
) AS t1
INNER JOIN (
SELECT
t1.key
FROM (
SELECT
*
FROM leaf AS t0
WHERE
TRUE
) AS t1
) AS t2
ON t1.key = t2.key
) AS t5
ON t4.key = t5.key
) AS t3
ON t2.key = t3.key
) AS t6
ON t6.key = t6.key
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
SELECT
"t1"."key" AS "key",
"t2"."key" AS "key_right",
"t4"."key_right" AS "key_right_right"
FROM (
SELECT
*
FROM "leaf" AS "t0"
WHERE
TRUE
) AS "t1"
INNER JOIN (
SELECT
"t1"."key" AS "key"
FROM (
SELECT
*
FROM "leaf" AS "t0"
WHERE
TRUE
) AS "t1"
) AS "t2"
ON "t1"."key" = "t2"."key"
INNER JOIN (
SELECT
"t1"."key" AS "key",
"t2"."key" AS "key_right"
FROM (
SELECT
*
FROM "leaf" AS "t0"
WHERE
TRUE
) AS "t1"
INNER JOIN (
SELECT
"t1"."key" AS "key"
FROM (
SELECT
*
FROM "leaf" AS "t0"
WHERE
TRUE
) AS "t1"
) AS "t2"
ON "t1"."key" = "t2"."key"
) AS "t4"
ON "t1"."key" = "t1"."key"
Original file line number Diff line number Diff line change
@@ -1,22 +1,40 @@
WITH t0 AS (
SELECT
t1.key AS key,
t2.key AS key_right,
t5.key_right AS key_right_right
FROM (
SELECT
t4.key AS key
FROM leaf AS t4
t0.key AS key
FROM leaf AS t0
WHERE
CAST(TRUE AS BOOLEAN)
), t1 AS (
TRUE
) AS t1
INNER JOIN (
SELECT
t0.key AS key
FROM t0
), t2 AS (
FROM leaf AS t0
WHERE
TRUE
) AS t2
ON t1.key = t2.key
INNER JOIN (
SELECT
t0.key AS key
FROM t0
JOIN t1
ON t0.key = t1.key
)
SELECT
t2.key
FROM t2
JOIN t2 AS t3
ON t2.key = t3.key
t1.key AS key,
t2.key AS key_right
FROM (
SELECT
t0.key AS key
FROM leaf AS t0
WHERE
TRUE
) AS t1
INNER JOIN (
SELECT
t0.key AS key
FROM leaf AS t0
WHERE
TRUE
) AS t2
ON t1.key = t2.key
) AS t5
ON t1.key = t5.key
Original file line number Diff line number Diff line change
@@ -1,22 +1,40 @@
WITH t0 AS (
SELECT
"t1"."key" AS "key",
"t2"."key" AS "key_right",
"t5"."key_right" AS "key_right_right"
FROM (
SELECT
t4."key" AS "key"
FROM "leaf" AS t4
"t0"."key" AS "key"
FROM "leaf" AS "t0"
WHERE
TRUE
), t1 AS (
) AS "t1"
INNER JOIN (
SELECT
t0."key" AS "key"
FROM t0
), t2 AS (
"t0"."key" AS "key"
FROM "leaf" AS "t0"
WHERE
TRUE
) AS "t2"
ON "t1"."key" = "t2"."key"
INNER JOIN (
SELECT
t0."key" AS "key"
FROM t0
JOIN t1
ON t0."key" = t1."key"
)
SELECT
t2."key"
FROM t2
JOIN t2 AS t3
ON t2."key" = t3."key"
"t1"."key" AS "key",
"t2"."key" AS "key_right"
FROM (
SELECT
"t0"."key" AS "key"
FROM "leaf" AS "t0"
WHERE
TRUE
) AS "t1"
INNER JOIN (
SELECT
"t0"."key" AS "key"
FROM "leaf" AS "t0"
WHERE
TRUE
) AS "t2"
ON "t1"."key" = "t2"."key"
) AS "t5"
ON "t1"."key" = "t5"."key"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
SELECT
CASE "t0"."continent"
WHEN 'NA'
THEN 'North America'
WHEN 'SA'
THEN 'South America'
WHEN 'EU'
THEN 'Europe'
WHEN 'AF'
THEN 'Africa'
WHEN 'AS'
THEN 'Asia'
WHEN 'OC'
THEN 'Oceania'
WHEN 'AN'
THEN 'Antarctica'
ELSE 'Unknown continent'
END AS "cont",
SUM("t0"."population") AS "total_pop"
FROM "countries" AS "t0"
GROUP BY
1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT
CASE t0."continent"
CASE "t0"."continent"
WHEN 'NA'
THEN 'North America'
WHEN 'SA'
Expand All @@ -16,7 +16,7 @@ SELECT
THEN 'Antarctica'
ELSE 'Unknown continent'
END AS "cont",
SUM(t0."population") AS "total_pop"
FROM "countries" AS t0
SUM("t0"."population") AS "total_pop"
FROM "countries" AS "t0"
GROUP BY
1
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
SELECT
t0.x IN (
t0.x IN ((
SELECT
t1.x
FROM (
SELECT
*
FROM t AS t0
WHERE
t0.x > 2
) AS t1
) AS "InColumn(x, x)"
t0.x AS x
FROM t AS t0
WHERE
t0.x > 2
)) AS "InSubquery(x)"
FROM t AS t0
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
SELECT
"t0"."x" IN ((
SELECT
"t1"."x" AS "x"
FROM (
SELECT
*
FROM "t" AS "t0"
WHERE
(
"t0"."x" > 2
)
) AS "t1"
)) AS "InSubquery(x)"
FROM "t" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
SELECT
t0.x IN (
t0.x IN ((
SELECT
t1.x
FROM (
SELECT
t0.x AS x
FROM t AS t0
WHERE
t0.x > CAST(2 AS TINYINT)
) AS t1
) AS "InColumn(x, x)"
t0.x AS x
FROM t AS t0
WHERE
t0.x > CAST(2 AS TINYINT)
)) AS "InSubquery(x)"
FROM t AS t0
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
SELECT
t0."x" IN (
"t0"."x" IN ((
SELECT
t1."x"
FROM (
SELECT
t0."x" AS "x"
FROM "t" AS t0
WHERE
t0."x" > 2
) AS t1
) AS "InColumn(x, x)"
FROM "t" AS t0
"t0"."x" AS "x"
FROM "t" AS "t0"
WHERE
"t0"."x" > 2
)) AS "InSubquery(x)"
FROM "t" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,94 +1,96 @@
SELECT
t11.field_of_study,
t11.diff
t10.field_of_study AS field_of_study,
t10.diff AS diff
FROM (
SELECT
*
t5.field_of_study AS field_of_study,
t5.diff AS diff
FROM (
SELECT
*
t4.field_of_study AS field_of_study,
any(t4.diff) AS diff
FROM (
SELECT
t4.field_of_study,
any(t4.diff) AS diff
t3.field_of_study AS field_of_study,
t3.years AS years,
t3.degrees AS degrees,
t3.earliest_degrees AS earliest_degrees,
t3.latest_degrees AS latest_degrees,
t3.latest_degrees - t3.earliest_degrees AS diff
FROM (
SELECT
t3.*,
t3.latest_degrees - t3.earliest_degrees AS diff
t2.field_of_study AS field_of_study,
t2.years AS years,
t2.degrees AS degrees,
any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees,
anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees
FROM (
SELECT
t2.*,
any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees,
anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees
t1.field_of_study AS field_of_study,
CAST(t1.__pivoted__.1 AS Nullable(String)) AS years,
CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees
FROM (
SELECT
t1.field_of_study,
CAST(t1.__pivoted__.1 AS Nullable(String)) AS years,
CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees
FROM (
SELECT
t0.field_of_study,
arrayJoin(
[CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))]
) AS __pivoted__
FROM humanities AS t0
) AS t1
) AS t2
) AS t3
) AS t4
GROUP BY
t4.field_of_study
) AS t5
ORDER BY
t5.diff DESC
) AS t6
t0.field_of_study AS field_of_study,
arrayJoin(
[CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))]
) AS __pivoted__
FROM humanities AS t0
) AS t1
) AS t2
) AS t3
) AS t4
GROUP BY
t4.field_of_study
) AS t5
ORDER BY
t5.diff DESC
LIMIT 10
UNION ALL
SELECT
*
t5.field_of_study AS field_of_study,
t5.diff AS diff
FROM (
SELECT
*
t4.field_of_study AS field_of_study,
any(t4.diff) AS diff
FROM (
SELECT
*
t3.field_of_study AS field_of_study,
t3.years AS years,
t3.degrees AS degrees,
t3.earliest_degrees AS earliest_degrees,
t3.latest_degrees AS latest_degrees,
t3.latest_degrees - t3.earliest_degrees AS diff
FROM (
SELECT
t4.field_of_study,
any(t4.diff) AS diff
t2.field_of_study AS field_of_study,
t2.years AS years,
t2.degrees AS degrees,
any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees,
anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees
FROM (
SELECT
t3.*,
t3.latest_degrees - t3.earliest_degrees AS diff
t1.field_of_study AS field_of_study,
CAST(t1.__pivoted__.1 AS Nullable(String)) AS years,
CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees
FROM (
SELECT
t2.*,
any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees,
anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees
FROM (
SELECT
t1.field_of_study,
CAST(t1.__pivoted__.1 AS Nullable(String)) AS years,
CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees
FROM (
SELECT
t0.field_of_study,
arrayJoin(
[CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))]
) AS __pivoted__
FROM humanities AS t0
) AS t1
) AS t2
) AS t3
) AS t4
GROUP BY
t4.field_of_study
) AS t5
WHERE
t5.diff < 0
) AS t7
ORDER BY
t7.diff ASC
) AS t9
t0.field_of_study AS field_of_study,
arrayJoin(
[CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))]
) AS __pivoted__
FROM humanities AS t0
) AS t1
) AS t2
) AS t3
) AS t4
GROUP BY
t4.field_of_study
) AS t5
WHERE
t5.diff < 0
ORDER BY
t5.diff ASC
LIMIT 10
) AS t11
) AS t10
Original file line number Diff line number Diff line change
@@ -1,75 +1,96 @@
WITH t0 AS (
SELECT
t7.field_of_study AS field_of_study,
UNNEST(
CAST([{'years': '1970-71', 'degrees': t7."1970-71"}, {'years': '1975-76', 'degrees': t7."1975-76"}, {'years': '1980-81', 'degrees': t7."1980-81"}, {'years': '1985-86', 'degrees': t7."1985-86"}, {'years': '1990-91', 'degrees': t7."1990-91"}, {'years': '1995-96', 'degrees': t7."1995-96"}, {'years': '2000-01', 'degrees': t7."2000-01"}, {'years': '2005-06', 'degrees': t7."2005-06"}, {'years': '2010-11', 'degrees': t7."2010-11"}, {'years': '2011-12', 'degrees': t7."2011-12"}, {'years': '2012-13', 'degrees': t7."2012-13"}, {'years': '2013-14', 'degrees': t7."2013-14"}, {'years': '2014-15', 'degrees': t7."2014-15"}, {'years': '2015-16', 'degrees': t7."2015-16"}, {'years': '2016-17', 'degrees': t7."2016-17"}, {'years': '2017-18', 'degrees': t7."2017-18"}, {'years': '2018-19', 'degrees': t7."2018-19"}, {'years': '2019-20', 'degrees': t7."2019-20"}] AS STRUCT(years TEXT, degrees BIGINT)[])
) AS __pivoted__
FROM humanities AS t7
), t1 AS (
SELECT
t0.field_of_study AS field_of_study,
STRUCT_EXTRACT(t0.__pivoted__, 'years') AS years,
STRUCT_EXTRACT(t0.__pivoted__, 'degrees') AS degrees
FROM t0
), t2 AS (
SELECT
t1.field_of_study AS field_of_study,
t1.years AS years,
t1.degrees AS degrees,
FIRST_VALUE(t1.degrees) OVER (PARTITION BY t1.field_of_study ORDER BY t1.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees,
LAST_VALUE(t1.degrees) OVER (PARTITION BY t1.field_of_study ORDER BY t1.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees
FROM t1
), t3 AS (
SELECT
t2.field_of_study AS field_of_study,
t2.years AS years,
t2.degrees AS degrees,
t2.earliest_degrees AS earliest_degrees,
t2.latest_degrees AS latest_degrees,
t2.latest_degrees - t2.earliest_degrees AS diff
FROM t2
), t4 AS (
SELECT
t3.field_of_study AS field_of_study,
FIRST(t3.diff) AS diff
FROM t3
GROUP BY
1
), anon_1 AS (
SELECT
t10.field_of_study AS field_of_study,
t10.diff AS diff
FROM (
SELECT
t4.field_of_study AS field_of_study,
t4.diff AS diff
FROM t4
t5.field_of_study AS field_of_study,
t5.diff AS diff
FROM (
SELECT
t4.field_of_study AS field_of_study,
FIRST(t4.diff) AS diff
FROM (
SELECT
t3.field_of_study AS field_of_study,
t3.years AS years,
t3.degrees AS degrees,
t3.earliest_degrees AS earliest_degrees,
t3.latest_degrees AS latest_degrees,
t3.latest_degrees - t3.earliest_degrees AS diff
FROM (
SELECT
t2.field_of_study AS field_of_study,
t2.years AS years,
t2.degrees AS degrees,
FIRST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees,
LAST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees
FROM (
SELECT
t1.field_of_study AS field_of_study,
t1.__pivoted__['years'] AS years,
t1.__pivoted__['degrees'] AS degrees
FROM (
SELECT
t0.field_of_study AS field_of_study,
UNNEST(
[{'years': '1970-71', 'degrees': t0."1970-71"}, {'years': '1975-76', 'degrees': t0."1975-76"}, {'years': '1980-81', 'degrees': t0."1980-81"}, {'years': '1985-86', 'degrees': t0."1985-86"}, {'years': '1990-91', 'degrees': t0."1990-91"}, {'years': '1995-96', 'degrees': t0."1995-96"}, {'years': '2000-01', 'degrees': t0."2000-01"}, {'years': '2005-06', 'degrees': t0."2005-06"}, {'years': '2010-11', 'degrees': t0."2010-11"}, {'years': '2011-12', 'degrees': t0."2011-12"}, {'years': '2012-13', 'degrees': t0."2012-13"}, {'years': '2013-14', 'degrees': t0."2013-14"}, {'years': '2014-15', 'degrees': t0."2014-15"}, {'years': '2015-16', 'degrees': t0."2015-16"}, {'years': '2016-17', 'degrees': t0."2016-17"}, {'years': '2017-18', 'degrees': t0."2017-18"}, {'years': '2018-19', 'degrees': t0."2018-19"}, {'years': '2019-20', 'degrees': t0."2019-20"}]
) AS __pivoted__
FROM humanities AS t0
) AS t1
) AS t2
) AS t3
) AS t4
GROUP BY
1
) AS t5
ORDER BY
t4.diff DESC
t5.diff DESC
LIMIT 10
), t5 AS (
SELECT
t4.field_of_study AS field_of_study,
t4.diff AS diff
FROM t4
WHERE
t4.diff < CAST(0 AS TINYINT)
), anon_2 AS (
UNION ALL
SELECT
t5.field_of_study AS field_of_study,
t5.diff AS diff
FROM t5
FROM (
SELECT
t4.field_of_study AS field_of_study,
FIRST(t4.diff) AS diff
FROM (
SELECT
t3.field_of_study AS field_of_study,
t3.years AS years,
t3.degrees AS degrees,
t3.earliest_degrees AS earliest_degrees,
t3.latest_degrees AS latest_degrees,
t3.latest_degrees - t3.earliest_degrees AS diff
FROM (
SELECT
t2.field_of_study AS field_of_study,
t2.years AS years,
t2.degrees AS degrees,
FIRST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees,
LAST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees
FROM (
SELECT
t1.field_of_study AS field_of_study,
t1.__pivoted__['years'] AS years,
t1.__pivoted__['degrees'] AS degrees
FROM (
SELECT
t0.field_of_study AS field_of_study,
UNNEST(
[{'years': '1970-71', 'degrees': t0."1970-71"}, {'years': '1975-76', 'degrees': t0."1975-76"}, {'years': '1980-81', 'degrees': t0."1980-81"}, {'years': '1985-86', 'degrees': t0."1985-86"}, {'years': '1990-91', 'degrees': t0."1990-91"}, {'years': '1995-96', 'degrees': t0."1995-96"}, {'years': '2000-01', 'degrees': t0."2000-01"}, {'years': '2005-06', 'degrees': t0."2005-06"}, {'years': '2010-11', 'degrees': t0."2010-11"}, {'years': '2011-12', 'degrees': t0."2011-12"}, {'years': '2012-13', 'degrees': t0."2012-13"}, {'years': '2013-14', 'degrees': t0."2013-14"}, {'years': '2014-15', 'degrees': t0."2014-15"}, {'years': '2015-16', 'degrees': t0."2015-16"}, {'years': '2016-17', 'degrees': t0."2016-17"}, {'years': '2017-18', 'degrees': t0."2017-18"}, {'years': '2018-19', 'degrees': t0."2018-19"}, {'years': '2019-20', 'degrees': t0."2019-20"}]
) AS __pivoted__
FROM humanities AS t0
) AS t1
) AS t2
) AS t3
) AS t4
GROUP BY
1
) AS t5
WHERE
t5.diff < CAST(0 AS TINYINT)
ORDER BY
t5.diff ASC
LIMIT 10
)
SELECT
t6.field_of_study,
t6.diff
FROM (
SELECT
anon_1.field_of_study AS field_of_study,
anon_1.diff AS diff
FROM anon_1
UNION ALL
SELECT
anon_2.field_of_study AS field_of_study,
anon_2.diff AS diff
FROM anon_2
) AS t6
) AS t10
17 changes: 11 additions & 6 deletions ibis/backends/tests/sql/conftest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from __future__ import annotations

import pytest
import sqlglot as sg

import ibis

pytest.importorskip("duckdb")


from ibis.backends.duckdb import Backend as DuckDBBackend # noqa: E402
from ibis.tests.expr.mocks import MockBackend # noqa: E402


Expand Down Expand Up @@ -70,13 +71,17 @@ def bar_t(con):
return con.table("bar_t")


def get_query(expr):
ast = Compiler.to_ast(expr, QueryContext(compiler=Compiler))
return ast.queries[0]
def to_sql(expr, *args, **kwargs) -> str:
if args:
raise TypeError("Unexpected positional arguments")
if kwargs:
raise TypeError("Unexpected keyword arguments")

sql = DuckDBBackend.compiler.translate(expr.op(), params={})
if isinstance(sql, sg.exp.Table):
sql = sg.select("*").from_(sql)

def to_sql(expr, *args, **kwargs) -> str:
return get_query(expr).compile(*args, **kwargs)
return sql.sql(dialect="duckdb", pretty=True)


@pytest.fixture(scope="module")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
SELECT DISTINCT t0.`string_col`
FROM functional_alltypes t0
SELECT DISTINCT
*
FROM (
SELECT
t0.string_col AS string_col
FROM functional_alltypes AS t0
) AS t1
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
SELECT t0.`int_col` + 4 AS `Add(int_col, 4)`
FROM int_col_table t0
t0.int_col + CAST(4 AS TINYINT)
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
SELECT t0.`int_col` + 4 AS `foo`
FROM int_col_table t0
t0.int_col + CAST(4 AS TINYINT) AS foo
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@
"month": "int32",
},
)
f = functional_alltypes.filter(functional_alltypes.bigint_col > 0)

result = (
functional_alltypes.filter(functional_alltypes.bigint_col > 0)
.group_by(functional_alltypes.string_col)
.aggregate(functional_alltypes.int_col.nunique().name("nunique"))
)
result = f.aggregate([f.int_col.nunique().name("nunique")], by=[f.string_col])
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
SELECT t0.`string_col`, count(DISTINCT t0.`int_col`) AS `nunique`
FROM functional_alltypes t0
WHERE t0.`bigint_col` > 0
GROUP BY 1
SELECT
t1.string_col AS string_col,
COUNT(DISTINCT t1.int_col) AS nunique
FROM (
SELECT
*
FROM functional_alltypes AS t0
WHERE
(
t0.bigint_col > CAST(0 AS TINYINT)
)
) AS t1
GROUP BY
1
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,12 @@
},
)
lit = ibis.literal(0)
alias = functional_alltypes.string_col.name("key")
difference = (
functional_alltypes.select(
[alias, functional_alltypes.float_col.cast("float64").name("value")]
)
.filter(functional_alltypes.int_col > lit)
.difference(
functional_alltypes.select(
[alias, functional_alltypes.double_col.name("value")]
).filter(functional_alltypes.int_col <= lit),
distinct=True,
)
f = functional_alltypes.filter(functional_alltypes.int_col > lit)
f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit)
difference = f.select(
f.string_col.name("key"), f.float_col.cast("float64").name("value")
).difference(
f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True
)
proj = difference.select([difference.key, difference.value])

result = proj.select(proj.key)
result = difference.select(difference.key)
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
SELECT t0.`key`
SELECT
t5.key AS key
FROM (
SELECT t1.`key`, t1.`value`
SELECT
t1.string_col AS key,
CAST(t1.float_col AS DOUBLE) AS value
FROM (
WITH t2 AS (
SELECT t4.`string_col` AS `key`, t4.`double_col` AS `value`
FROM functional_alltypes t4
WHERE t4.`int_col` <= 0
),
t3 AS (
SELECT t4.`string_col` AS `key`, CAST(t4.`float_col` AS double) AS `value`
FROM functional_alltypes t4
WHERE t4.`int_col` > 0
)
SELECT *
FROM t3
EXCEPT
SELECT *
FROM t2
) t1
) t0
SELECT
*
FROM functional_alltypes AS t0
WHERE
(
t0.int_col > CAST(0 AS TINYINT)
)
) AS t1
EXCEPT
SELECT
t2.string_col AS key,
t2.double_col AS value
FROM (
SELECT
*
FROM functional_alltypes AS t0
WHERE
(
t0.int_col <= CAST(0 AS TINYINT)
)
) AS t2
) AS t5
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@


t = ibis.table(name="t", schema={"a": "int64", "b": "string"})
f = t.filter(t.b == "m")
agg = f.aggregate([f.a.sum().name("sum"), f.a.max()], by=[f.b])
f1 = agg.filter(agg.Max(a) == 2)

result = (
t.filter(t.b == "m")
.group_by(t.b)
.having(t.a.max() == 2)
.aggregate(t.a.sum().name("sum"))
)
result = f1.select(f1.b, f1.sum)
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
SELECT t0.`b`, sum(t0.`a`) AS `sum`
FROM t t0
WHERE t0.`b` = 'm'
GROUP BY 1
HAVING max(t0.`a`) = 2
SELECT
t3.b AS b,
t3.sum AS sum
FROM (
SELECT
*
FROM (
SELECT
t1.b AS b,
SUM(t1.a) AS sum,
MAX(t1.a) AS "Max(a)"
FROM (
SELECT
*
FROM t AS t0
WHERE
(
t0.b = 'm'
)
) AS t1
GROUP BY
1
) AS t2
WHERE
(
t2."Max(a)" = CAST(2 AS TINYINT)
)
) AS t3
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@
SELECT t0.`string_col`, count(1) AS `CountStar(functional_alltypes)`
FROM functional_alltypes t0
GROUP BY 1
HAVING max(t0.`double_col`) = 1
SELECT
t2.string_col AS string_col,
t2."CountStar()" AS "CountStar()"
FROM (
SELECT
*
FROM (
SELECT
t0.string_col AS string_col,
COUNT(*) AS "CountStar()",
MAX(t0.double_col) AS "Max(double_col)"
FROM functional_alltypes AS t0
GROUP BY
1
) AS t1
WHERE
(
t1."Max(double_col)" = CAST(1 AS TINYINT)
)
) AS t2
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,12 @@
},
)
lit = ibis.literal(0)
alias = functional_alltypes.string_col.name("key")
intersection = (
functional_alltypes.select(
[alias, functional_alltypes.float_col.cast("float64").name("value")]
)
.filter(functional_alltypes.int_col > lit)
.intersect(
functional_alltypes.select(
[alias, functional_alltypes.double_col.name("value")]
).filter(functional_alltypes.int_col <= lit),
distinct=True,
)
f = functional_alltypes.filter(functional_alltypes.int_col > lit)
f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit)
intersection = f.select(
f.string_col.name("key"), f.float_col.cast("float64").name("value")
).intersect(
f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True
)
proj = intersection.select([intersection.key, intersection.value])

result = proj.select(proj.key)
result = intersection.select(intersection.key)
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
SELECT t0.`key`
SELECT
t5.key AS key
FROM (
SELECT t1.`key`, t1.`value`
SELECT
t1.string_col AS key,
CAST(t1.float_col AS DOUBLE) AS value
FROM (
WITH t2 AS (
SELECT t4.`string_col` AS `key`, t4.`double_col` AS `value`
FROM functional_alltypes t4
WHERE t4.`int_col` <= 0
),
t3 AS (
SELECT t4.`string_col` AS `key`, CAST(t4.`float_col` AS double) AS `value`
FROM functional_alltypes t4
WHERE t4.`int_col` > 0
)
SELECT *
FROM t3
INTERSECT
SELECT *
FROM t2
) t1
) t0
SELECT
*
FROM functional_alltypes AS t0
WHERE
(
t0.int_col > CAST(0 AS TINYINT)
)
) AS t1
INTERSECT
SELECT
t2.string_col AS key,
t2.double_col AS value
FROM (
SELECT
*
FROM functional_alltypes AS t0
WHERE
(
t0.int_col <= CAST(0 AS TINYINT)
)
) AS t2
) AS t5
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
},
)

result = functional_alltypes.group_by(functional_alltypes.string_col).aggregate(
result = functional_alltypes.aggregate(
[
functional_alltypes.int_col.nunique().name("int_card"),
functional_alltypes.smallint_col.nunique().name("smallint_card"),
]
],
by=[functional_alltypes.string_col],
)
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
SELECT t0.`string_col`, count(DISTINCT t0.`int_col`) AS `int_card`,
count(DISTINCT t0.`smallint_col`) AS `smallint_card`
FROM functional_alltypes t0
GROUP BY 1
SELECT
t0.string_col AS string_col,
COUNT(DISTINCT t0.int_col) AS int_card,
COUNT(DISTINCT t0.smallint_col) AS smallint_card
FROM functional_alltypes AS t0
GROUP BY
1
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
SELECT t0.*
FROM functional_alltypes t0
WHERE (t0.`double_col` > 3.14) AND
(locate('foo', t0.`string_col`) - 1 >= 0) AND
(((t0.`int_col` - 1) = 0) OR (t0.`float_col` <= 1.34))
SELECT
*
FROM functional_alltypes AS t0
WHERE
(
t0.double_col > CAST(3.14 AS DOUBLE)
)
AND CONTAINS(t0.string_col, 'foo')
AND (
(
(
t0.int_col - CAST(1 AS TINYINT)
) = CAST(0 AS TINYINT)
)
OR (
t0.float_col <= CAST(1.34 AS DOUBLE)
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,11 @@
},
)
lit = ibis.literal(0)
alias = functional_alltypes.string_col.name("key")
difference = (
functional_alltypes.select(
[alias, functional_alltypes.float_col.cast("float64").name("value")]
)
.filter(functional_alltypes.int_col > lit)
.difference(
functional_alltypes.select(
[alias, functional_alltypes.double_col.name("value")]
).filter(functional_alltypes.int_col <= lit),
distinct=True,
)
)
f = functional_alltypes.filter(functional_alltypes.int_col > lit)
f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit)

result = difference.select([difference.key, difference.value])
result = f.select(
f.string_col.name("key"), f.float_col.cast("float64").name("value")
).difference(
f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True
)
Loading