Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT BIT_AND(`int_col`) AS `tmp`
FROM functional_alltypes
SELECT BIT_AND(t0.`int_col`) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT BIT_OR(`int_col`) AS `tmp`
FROM functional_alltypes
SELECT BIT_OR(t0.`int_col`) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT BIT_XOR(`int_col`) AS `tmp`
FROM functional_alltypes
SELECT BIT_XOR(t0.`int_col`) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT avg(CAST(`bool_col` AS INT64)) AS `tmp`
FROM functional_alltypes
SELECT avg(CAST(t0.`bool_col` AS INT64)) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(CAST(`bool_col` AS INT64)) AS `tmp`
FROM functional_alltypes
SELECT sum(CAST(t0.`bool_col` AS INT64)) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(if((`month` > 6) AND (`month` < 10), CAST(`bool_col` AS INT64), NULL)) AS `tmp`
FROM functional_alltypes
SELECT sum(if((t0.`month` > 6) AND (t0.`month` < 10), CAST(t0.`bool_col` AS INT64), NULL)) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT avg(if(`month` > 6, CAST(`bool_col` AS INT64), NULL)) AS `tmp`
FROM functional_alltypes
SELECT avg(if(t0.`month` > 6, CAST(t0.`bool_col` AS INT64), NULL)) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
SELECT
CASE
WHEN (0 <= `value`) AND (`value` < 1) THEN 0
WHEN (1 <= `value`) AND (`value` <= 3) THEN 1
WHEN (0 <= t0.`value`) AND (t0.`value` < 1) THEN 0
WHEN (1 <= t0.`value`) AND (t0.`value` <= 3) THEN 1
ELSE CAST(NULL AS INT64)
END AS `tmp`
FROM t
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`foo`) AS `tmp`
FROM t0
SELECT sum(t0.`foo`) AS `tmp`
FROM t0 t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT COVAR_POP(`double_col`, `double_col`) AS `tmp`
FROM functional_alltypes
SELECT COVAR_POP(t0.`double_col`, t0.`double_col`) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT COVAR_SAMP(`double_col`, `double_col`) AS `tmp`
FROM functional_alltypes
SELECT COVAR_SAMP(t0.`double_col`, t0.`double_col`) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT CAST(FLOOR(IEEE_DIVIDE(`double_col`, 0)) AS INT64) AS `tmp`
FROM functional_alltypes
SELECT CAST(FLOOR(IEEE_DIVIDE(t0.`double_col`, 0)) AS INT64) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT IEEE_DIVIDE(`double_col`, 0) AS `tmp`
FROM functional_alltypes
SELECT IEEE_DIVIDE(t0.`double_col`, 0) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT DATE(`ts`) AS `tmp`
FROM t
SELECT DATE(t0.`ts`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIME(`ts`) AS `tmp`
FROM t
SELECT TIME(t0.`ts`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_AZIMUTH(`p0`, `p1`) AS `tmp`
FROM t
SELECT ST_AZIMUTH(t0.`p0`, t0.`p1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_CONTAINS(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_CONTAINS(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_COVEREDBY(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_COVEREDBY(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_COVERS(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_COVERS(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_DWITHIN(`geog0`, `geog1`, 5.2) AS `tmp`
FROM t
SELECT ST_DWITHIN(t0.`geog0`, t0.`geog1`, 5.2) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_DIFFERENCE(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_DIFFERENCE(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_DISJOINT(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_DISJOINT(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_DISTANCE(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_DISTANCE(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_EQUALS(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_EQUALS(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_INTERSECTION(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_INTERSECTION(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_INTERSECTS(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_INTERSECTS(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_MAXDISTANCE(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_MAXDISTANCE(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_TOUCHES(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_TOUCHES(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_UNION(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_UNION(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_WITHIN(`geog0`, `geog1`) AS `tmp`
FROM t
SELECT ST_WITHIN(t0.`geog0`, t0.`geog1`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_BOUNDINGBOX(`geog`).xmax AS `tmp`
FROM t
SELECT ST_BOUNDINGBOX(t0.`geog`).xmax AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_BOUNDINGBOX(`geog`).xmin AS `tmp`
FROM t
SELECT ST_BOUNDINGBOX(t0.`geog`).xmin AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_BOUNDINGBOX(`geog`).ymax AS `tmp`
FROM t
SELECT ST_BOUNDINGBOX(t0.`geog`).ymax AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_BOUNDINGBOX(`geog`).ymin AS `tmp`
FROM t
SELECT ST_BOUNDINGBOX(t0.`geog`).ymin AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_GEOGPOINT(`lon`, `lat`) AS `tmp`
FROM t
SELECT ST_GEOGPOINT(t0.`lon`, t0.`lat`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_SIMPLIFY(`geog`, 5.2) AS `tmp`
FROM t
SELECT ST_SIMPLIFY(t0.`geog`, 5.2) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_AREA(`geog`) AS `tmp`
FROM t
SELECT ST_AREA(t0.`geog`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_ASBINARY(`geog`) AS `tmp`
FROM t
SELECT ST_ASBINARY(t0.`geog`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_ASTEXT(`geog`) AS `tmp`
FROM t
SELECT ST_ASTEXT(t0.`geog`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_BUFFER(`geog`, 5.2) AS `tmp`
FROM t
SELECT ST_BUFFER(t0.`geog`, 5.2) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_CENTROID(`geog`) AS `tmp`
FROM t
SELECT ST_CENTROID(t0.`geog`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_ENDPOINT(`geog`) AS `tmp`
FROM t
SELECT ST_ENDPOINT(t0.`geog`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_GEOMETRYTYPE(`geog`) AS `tmp`
FROM t
SELECT ST_GEOMETRYTYPE(t0.`geog`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_LENGTH(`geog`) AS `tmp`
FROM t
SELECT ST_LENGTH(t0.`geog`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_NUMPOINTS(`geog`) AS `tmp`
FROM t
SELECT ST_NUMPOINTS(t0.`geog`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_PERIMETER(`geog`) AS `tmp`
FROM t
SELECT ST_PERIMETER(t0.`geog`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_POINTN(`geog`, 3) AS `tmp`
FROM t
SELECT ST_POINTN(t0.`geog`, 3) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_STARTPOINT(`geog`) AS `tmp`
FROM t
SELECT ST_STARTPOINT(t0.`geog`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_UNION_AGG(`geog`) AS `tmp`
FROM t
SELECT ST_UNION_AGG(t0.`geog`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_X(`pt`) AS `tmp`
FROM t
SELECT ST_X(t0.`pt`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT ST_Y(`pt`) AS `tmp`
FROM t
SELECT ST_Y(t0.`pt`) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SELECT *
FROM functional_alltypes
WHERE (`string_col` IS NOT DISTINCT FROM 'a') AND
(`date_string_col` IS NOT DISTINCT FROM 'b')
SELECT t0.*
FROM functional_alltypes t0
WHERE (t0.`string_col` IS NOT DISTINCT FROM 'a') AND
(t0.`date_string_col` IS NOT DISTINCT FROM 'b')
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
WITH t0 AS (
SELECT *
FROM unbound_table
WHERE `PARTITIONTIME` < DATE '2017-01-01'
SELECT t4.*
FROM unbound_table t4
WHERE t4.`PARTITIONTIME` < DATE '2017-01-01'
),
t1 AS (
SELECT CAST(`file_date` AS DATE) AS `file_date`, `PARTITIONTIME`, `val`
SELECT CAST(t0.`file_date` AS DATE) AS `file_date`, t0.`PARTITIONTIME`,
t0.`val`
FROM t0
WHERE `file_date` < DATE '2017-01-01'
WHERE t0.`file_date` < DATE '2017-01-01'
),
t2 AS (
SELECT *, `val` * 2 AS `XYZ`
SELECT t1.*, t1.`val` * 2 AS `XYZ`
FROM t1
)
SELECT t2.*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `month` ASC RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) AS `two_month_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (PARTITION BY t0.`year` ORDER BY t0.`month` ASC RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) AS `two_month_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (PARTITION BY `year` ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `two_month_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (PARTITION BY t0.`year` ORDER BY UNIX_MICROS(t0.`timestamp_col`) RANGE BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `two_month_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT *
FROM t0
SELECT t0.*
FROM t0 t0
EXCEPT DISTINCT
SELECT *
FROM t1
SELECT t0.*
FROM t1 t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT *
FROM t0
SELECT t0.*
FROM t0 t0
INTERSECT DISTINCT
SELECT *
FROM t1
SELECT t0.*
FROM t1 t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT *
FROM t0
SELECT t0.*
FROM t0 t0
UNION ALL
SELECT *
FROM t1
SELECT t0.*
FROM t1 t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT *
FROM t0
SELECT t0.*
FROM t0 t0
UNION DISTINCT
SELECT *
FROM t1
SELECT t0.*
FROM t1 t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT substr(`value`, 3 + 1, 1) AS `tmp`
FROM t
SELECT substr(t0.`value`, 3 + 1, 1) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT DATE_TRUNC(`a`, DAY) AS `tmp`
FROM t
SELECT DATE_TRUNC(t0.`a`, DAY) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIMESTAMP_TRUNC(`a`, DAY) AS `tmp`
FROM t
SELECT TIMESTAMP_TRUNC(t0.`a`, DAY) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIME_TRUNC(`a`, HOUR) AS `tmp`
FROM t
SELECT TIME_TRUNC(t0.`a`, HOUR) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIMESTAMP_TRUNC(`a`, HOUR) AS `tmp`
FROM t
SELECT TIMESTAMP_TRUNC(t0.`a`, HOUR) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIME_TRUNC(`a`, MICROSECOND) AS `tmp`
FROM t
SELECT TIME_TRUNC(t0.`a`, MICROSECOND) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIMESTAMP_TRUNC(`a`, MICROSECOND) AS `tmp`
FROM t
SELECT TIMESTAMP_TRUNC(t0.`a`, MICROSECOND) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIME_TRUNC(`a`, MILLISECOND) AS `tmp`
FROM t
SELECT TIME_TRUNC(t0.`a`, MILLISECOND) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIMESTAMP_TRUNC(`a`, MILLISECOND) AS `tmp`
FROM t
SELECT TIMESTAMP_TRUNC(t0.`a`, MILLISECOND) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIME_TRUNC(`a`, MINUTE) AS `tmp`
FROM t
SELECT TIME_TRUNC(t0.`a`, MINUTE) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIMESTAMP_TRUNC(`a`, MINUTE) AS `tmp`
FROM t
SELECT TIMESTAMP_TRUNC(t0.`a`, MINUTE) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT DATE_TRUNC(`a`, MONTH) AS `tmp`
FROM t
SELECT DATE_TRUNC(t0.`a`, MONTH) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIMESTAMP_TRUNC(`a`, MONTH) AS `tmp`
FROM t
SELECT TIMESTAMP_TRUNC(t0.`a`, MONTH) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT DATE_TRUNC(`a`, QUARTER) AS `tmp`
FROM t
SELECT DATE_TRUNC(t0.`a`, QUARTER) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIMESTAMP_TRUNC(`a`, QUARTER) AS `tmp`
FROM t
SELECT TIMESTAMP_TRUNC(t0.`a`, QUARTER) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIME_TRUNC(`a`, SECOND) AS `tmp`
FROM t
SELECT TIME_TRUNC(t0.`a`, SECOND) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIMESTAMP_TRUNC(`a`, SECOND) AS `tmp`
FROM t
SELECT TIMESTAMP_TRUNC(t0.`a`, SECOND) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT DATE_TRUNC(`a`, WEEK(MONDAY)) AS `tmp`
FROM t
SELECT DATE_TRUNC(t0.`a`, WEEK(MONDAY)) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIMESTAMP_TRUNC(`a`, WEEK(MONDAY)) AS `tmp`
FROM t
SELECT TIMESTAMP_TRUNC(t0.`a`, WEEK(MONDAY)) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT DATE_TRUNC(`a`, YEAR) AS `tmp`
FROM t
SELECT DATE_TRUNC(t0.`a`, YEAR) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT TIMESTAMP_TRUNC(`a`, YEAR) AS `tmp`
FROM t
SELECT TIMESTAMP_TRUNC(t0.`a`, YEAR) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT PARSE_TIMESTAMP('%F', `date_string_col`) AS `tmp`
FROM functional_alltypes
SELECT PARSE_TIMESTAMP('%F', t0.`date_string_col`) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT PARSE_TIMESTAMP('%F %Z', CONCAT(`date_string_col`, ' America/New_York')) AS `tmp`
FROM functional_alltypes
SELECT PARSE_TIMESTAMP('%F %Z', CONCAT(t0.`date_string_col`, ' America/New_York')) AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 86400000000 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) RANGE BETWEEN 86400000000 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 5 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) RANGE BETWEEN 5 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 3600000000 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) RANGE BETWEEN 3600000000 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 60000000 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) RANGE BETWEEN 60000000 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 0.001 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) RANGE BETWEEN 0.001 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 1000000 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) RANGE BETWEEN 1000000 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 172800000000 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) RANGE BETWEEN 172800000000 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 604800000000 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (ORDER BY UNIX_MICROS(t0.`timestamp_col`) RANGE BETWEEN 604800000000 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT *
FROM functional_alltypes
SELECT t0.*
FROM functional_alltypes t0
UNION ALL
SELECT *
FROM functional_alltypes
SELECT t0.*
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT *
FROM functional_alltypes
SELECT t0.*
FROM functional_alltypes t0
UNION DISTINCT
SELECT *
FROM functional_alltypes
SELECT t0.*
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
WITH t0 AS (
SELECT `string_col`, sum(`double_col`) AS `metric`
FROM functional_alltypes
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
)
SELECT *
FROM t0
UNION ALL
SELECT `string_col`, sum(`double_col`) AS `metric`
FROM functional_alltypes
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
UNION ALL
SELECT `string_col`, sum(`double_col`) AS `metric`
FROM functional_alltypes
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
WITH t0 AS (
SELECT `string_col`, sum(`double_col`) AS `metric`
FROM functional_alltypes
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
)
SELECT *
FROM t0
UNION DISTINCT
SELECT `string_col`, sum(`double_col`) AS `metric`
FROM functional_alltypes
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
UNION ALL
SELECT `string_col`, sum(`double_col`) AS `metric`
FROM functional_alltypes
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
WITH t0 AS (
SELECT `string_col`, sum(`double_col`) AS `metric`
FROM functional_alltypes
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
)
SELECT *
FROM t0
UNION ALL
SELECT `string_col`, sum(`double_col`) AS `metric`
FROM functional_alltypes
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
UNION DISTINCT
SELECT `string_col`, sum(`double_col`) AS `metric`
FROM functional_alltypes
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
WITH t0 AS (
SELECT `string_col`, sum(`double_col`) AS `metric`
FROM functional_alltypes
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
)
SELECT *
FROM t0
UNION DISTINCT
SELECT `string_col`, sum(`double_col`) AS `metric`
FROM functional_alltypes
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
UNION DISTINCT
SELECT `string_col`, sum(`double_col`) AS `metric`
FROM functional_alltypes
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `timestamp_col` ASC ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING) AS `win_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (PARTITION BY t0.`year` ORDER BY t0.`timestamp_col` ASC ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING) AS `win_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `timestamp_col` ASC ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (PARTITION BY t0.`year` ORDER BY t0.`timestamp_col` ASC ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS `win_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `timestamp_col` ASC ROWS BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `win_avg`
FROM functional_alltypes
SELECT t0.*,
avg(t0.`float_col`) OVER (PARTITION BY t0.`year` ORDER BY t0.`timestamp_col` ASC ROWS BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `win_avg`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`a`) OVER (ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) AS `tmp`
FROM t
SELECT sum(t0.`a`) OVER (ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`a`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS `tmp`
FROM t
SELECT sum(t0.`a`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS `tmp`
FROM t t0
10 changes: 5 additions & 5 deletions ibis/backends/bigquery/tests/unit/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ class MockBackend(ibis.backends.bigquery.Backend):
pass

names = [f"col_{i}" for i in range(num_columns)]
schema = ibis.Schema(names, ["string"] * num_columns)
schema = ibis.Schema(dict.fromkeys(names, "string"))
ibis_client = MockBackend()
table = ops.SQLQueryResult("select * from t", schema, ibis_client).to_expr()
for _ in range(num_joins): # noqa: F402
Expand Down Expand Up @@ -384,8 +384,8 @@ def test_timestamp_accepts_date_literals(alltypes):
params = {p: date_string}
result = to_sql(expr, params=params)
expected = """\
SELECT \\*, @param_\\d+ AS `param`
FROM functional_alltypes"""
SELECT t\\d+\\.\\*, @param_\\d+ AS `param`
FROM functional_alltypes t\\d+"""
assert re.match(expected, result) is not None


Expand Down Expand Up @@ -581,6 +581,6 @@ def test_scalar_param_scope(alltypes):
param = ibis.param("timestamp")
result = to_sql(t.mutate(param=param), params={param: "2017-01-01"})
expected = """\
SELECT \\*, @param_\\d+ AS `param`
FROM functional_alltypes"""
SELECT t\\d+\\.\\*, @param_\\d+ AS `param`
FROM functional_alltypes t\\d+"""
assert re.match(expected, result) is not None
28 changes: 15 additions & 13 deletions ibis/backends/bigquery/tests/unit/test_datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@

import ibis.expr.datatypes as dt
from ibis.backends.bigquery.datatypes import (
TypeTranslationContext,
UDFContext,
ibis_type_to_bigquery_type,
spread_type,
)


Expand Down Expand Up @@ -54,8 +53,7 @@ def test_no_ambiguities():
],
)
def test_simple(datatype, expected):
context = TypeTranslationContext()
assert ibis_type_to_bigquery_type(datatype, context) == expected
assert ibis_type_to_bigquery_type(datatype) == expected


@pytest.mark.parametrize("datatype", [dt.uint64, dt.Decimal(8, 3)])
Expand All @@ -65,21 +63,25 @@ def test_simple_failure_mode(datatype):


@pytest.mark.parametrize(
("type", "expected"),
("type_", "expected"),
[
param(dt.int64, "INT64", marks=pytest.mark.xfail(raises=TypeError)),
param(
dt.int64,
[dt.int64],
),
param(
dt.Array(dt.int64),
"ARRAY<INT64>",
marks=pytest.mark.xfail(raises=TypeError),
[dt.int64, dt.Array(value_type=dt.int64)],
),
param(
dt.Struct.from_tuples([("a", dt.Array(dt.int64))]),
"STRUCT<a ARRAY<INT64>>",
marks=pytest.mark.xfail(raises=TypeError),
[
dt.int64,
dt.Array(value_type=dt.int64),
dt.Struct.from_tuples([('a', dt.Array(value_type=dt.int64))]),
],
),
],
)
def test_ibis_type_to_bigquery_type_udf(type, expected):
context = UDFContext()
assert ibis_type_to_bigquery_type(type, context) == expected
def test_spread_type(type_, expected):
assert list(spread_type(type_)) == expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
CREATE TEMPORARY FUNCTION my_len_0(s STRING)
RETURNS FLOAT64
LANGUAGE js AS """
'use strict';
function my_len(s) {
return s.length;
}
return my_len(s);
""";

CREATE TEMPORARY FUNCTION my_len_1(s STRING)
RETURNS FLOAT64
LANGUAGE js AS """
'use strict';
function my_len(s) {
return (s.length + 1);
}
return my_len(s);
""";

SELECT (my_len_0('abcd') + my_len_0('abcd')) + my_len_1('abcd') AS `tmp`
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
CREATE TEMPORARY FUNCTION my_len_0(s STRING)
RETURNS FLOAT64
NOT DETERMINISTIC
LANGUAGE js AS """
'use strict';
function my_len(s) {
return s.length;
}
return my_len(s);
""";

SELECT my_len_0('abcd') AS `tmp`
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
CREATE TEMPORARY FUNCTION my_len_0(s STRING)
RETURNS FLOAT64
LANGUAGE js AS """
'use strict';
function my_len(s) {
return s.length;
}
return my_len(s);
""";

SELECT my_len_0('abcd') AS `tmp`
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
CREATE TEMPORARY FUNCTION my_len_0(s STRING)
RETURNS FLOAT64
DETERMINISTIC
LANGUAGE js AS """
'use strict';
function my_len(s) {
return s.length;
}
return my_len(s);
""";

SELECT my_len_0('abcd') AS `tmp`
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CREATE TEMPORARY FUNCTION format_t_0(input STRING)
RETURNS FLOAT64
AS (FORMAT('%T', input));

SELECT format_t_0('abcd') AS `tmp`
74 changes: 47 additions & 27 deletions ibis/backends/bigquery/tests/unit/udf/test_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,66 @@
import ibis
import ibis.expr.datatypes as dt
from ibis.backends.bigquery import udf
from ibis.backends.bigquery.udf import _udf_name_cache


def test_multiple_calls_redefinition():
@udf([dt.string], dt.double)
def test_multiple_calls_redefinition(snapshot):
_udf_name_cache.clear()

@udf.python([dt.string], dt.double)
def my_len(s):
return s.length

s = ibis.literal("abcd")
expr = my_len(s) + my_len(s)

@udf([dt.string], dt.double)
@udf.python([dt.string], dt.double)
def my_len(s):
return s.length + 1

expr = expr + my_len(s)

sql = ibis.bigquery.compile(expr)
expected = '''\
CREATE TEMPORARY FUNCTION my_len_0(s STRING)
RETURNS FLOAT64
LANGUAGE js AS """
'use strict';
function my_len(s) {
return s.length;
}
return my_len(s);
""";

CREATE TEMPORARY FUNCTION my_len_1(s STRING)
RETURNS FLOAT64
LANGUAGE js AS """
'use strict';
function my_len(s) {
return (s.length + 1);
}
return my_len(s);
""";

SELECT (my_len_0('abcd') + my_len_0('abcd')) + my_len_1('abcd') AS `tmp`'''
assert sql == expected
snapshot.assert_match(sql, "out.sql")


@pytest.mark.parametrize(
("determinism",),
[
param(True),
param(False),
param(None),
],
)
def test_udf_determinism(snapshot, determinism):
_udf_name_cache.clear()

@udf.python([dt.string], dt.double, determinism=determinism)
def my_len(s):
return s.length

s = ibis.literal("abcd")
expr = my_len(s)

sql = ibis.bigquery.compile(expr)
snapshot.assert_match(sql, "out.sql")


def test_udf_sql(snapshot):
_udf_name_cache.clear()

format_t = udf.sql(
"format_t",
params={'input': dt.string},
output_type=dt.double,
sql_expression="FORMAT('%T', input)",
)

s = ibis.literal("abcd")
expr = format_t(s)

sql = ibis.bigquery.compile(expr)
snapshot.assert_match(sql, "out.sql")


@pytest.mark.parametrize(
Expand Down Expand Up @@ -93,6 +113,6 @@ def my_len(s):
)
def test_udf_int64(argument_type, return_type):
# invalid argument type, valid return type
@udf([argument_type], return_type)
@udf.python([argument_type], return_type)
def my_int64_add(x):
return 1.0
488 changes: 334 additions & 154 deletions ibis/backends/bigquery/udf/__init__.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion ibis/backends/bigquery/udf/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,4 +602,4 @@ def range(n):
nnn = len(values)
return [sum(values) - a + b * y**-x, z, foo.width, nnn]

print(my_func.js) # noqa: T201
print(my_func.sql) # noqa: T201
3 changes: 0 additions & 3 deletions ibis/backends/bigquery/version.py

This file was deleted.

26 changes: 12 additions & 14 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,12 @@ class Options(ibis.config.Config):
----------
temp_db : str
Database to use for temporary objects.
bool_type : str
Type to use for boolean columns
"""

temp_db: str = "__ibis_tmp"
bool_type: str = "Boolean"

def __init__(self, *args, external_tables=None, **kwargs):
super().__init__(*args, **kwargs)
Expand Down Expand Up @@ -291,7 +294,7 @@ def to_pyarrow_batches(
params
Mapping of scalar parameter expressions to value.
chunk_size
Number of rows in each returned record batch.
Maximum number of rows in each returned record batch.

Returns
-------
Expand All @@ -300,21 +303,16 @@ def to_pyarrow_batches(
"""
pa = self._import_pyarrow()

from ibis.backends.pyarrow.datatypes import ibis_to_pyarrow_struct

schema = self._table_or_column_schema(expr)

def _batches():
schema = expr.as_table().schema()
array_type = schema.as_struct().to_pyarrow()
batches = (
pa.RecordBatch.from_struct_array(pa.array(batch, type=array_type))
for batch in self._cursor_batches(
expr, params=params, limit=limit, chunk_size=chunk_size
):
struct_array = pa.array(
map(tuple, batch),
type=ibis_to_pyarrow_struct(schema),
)
yield pa.RecordBatch.from_struct_array(struct_array)

return pa.ipc.RecordBatchReader.from_batches(schema.to_pyarrow(), _batches())
)
)

return pa.ipc.RecordBatchReader.from_batches(schema.to_pyarrow(), batches)

def _cursor_batches(
self,
Expand Down
21 changes: 15 additions & 6 deletions ibis/backends/clickhouse/compiler/values.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def _string_find(op, **kw):

arg = translate_val(op.arg, **kw)
substr = translate_val(op.substr, **kw)
return f"position({substr} IN {arg}) - 1"
return f"locate({arg}, {substr}) - 1"


@translate_val.register(ops.RegexExtract)
Expand Down Expand Up @@ -548,11 +548,20 @@ def _truncate(op, **kw):
@translate_val.register(ops.ExistsSubquery)
@translate_val.register(ops.NotExistsSubquery)
def _exists_subquery(op, **kw):
foreign_table = translate_val(op.foreign_table, **kw)
# https://github.com/ClickHouse/ClickHouse/issues/6697
#
# this would work, if clickhouse supported correlated subqueries
from ibis.backends.clickhouse.compiler.relations import translate_rel

foreign_table = translate_rel(op.foreign_table, **kw)
predicates = translate_val(op.predicates, **kw)
subq = sg.subquery(foreign_table.where(predicates, dialect="clickhouse").select(1))
subq = (
sg.select(1)
.from_(foreign_table, dialect="clickhouse")
.where(sg.condition(predicates), dialect="clickhouse")
)
prefix = "NOT " * isinstance(op, ops.NotExistsSubquery)
return f"{prefix}EXISTS {subq}"
return f"{prefix}EXISTS ({subq})"


@translate_val.register(ops.StringSplit)
Expand Down Expand Up @@ -627,7 +636,7 @@ def _bit_agg(func):
def _translate(op, **kw):
arg = translate_val(op.arg, **kw)
if not isinstance((type := op.arg.output_dtype), dt.UnsignedInteger):
nbits = type._nbytes * 8
nbits = type.nbytes * 8
arg = f"reinterpretAsUInt{nbits}({arg})"

if (where := op.where) is not None:
Expand Down Expand Up @@ -738,7 +747,7 @@ def _scalar_param(op, params: Mapping[ops.Node, Any], **kw):
def _string_contains(op, **kw):
haystack = translate_val(op.haystack, **kw)
needle = translate_val(op.needle, **kw)
return f"position({needle} IN {haystack}) > 0"
return f"locate({haystack}, {needle}) > 0"


def contains(op_string: Literal["IN", "NOT IN"]) -> str:
Expand Down
60 changes: 43 additions & 17 deletions ibis/backends/clickhouse/datatypes.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations

import functools
from functools import partial

import parsy

import ibis
import ibis.expr.datatypes as dt
from ibis.common.parsing import (
COMMA,
Expand All @@ -19,24 +21,45 @@
)


def _bool_type():
return getattr(getattr(ibis.options, "clickhouse", None), "bool_type", "Boolean")


def parse(text: str) -> dt.DataType:
@parsy.generate
def datetime():
yield spaceless_string("datetime64", "datetime")
timezone = yield parened_string.optional()
return dt.Timestamp(timezone=timezone, nullable=False)
parened_string = LPAREN.then(RAW_STRING).skip(RPAREN)

datetime64_args = LPAREN.then(
parsy.seq(
scale=parsy.decimal_digit.map(int).optional(),
timezone=COMMA.then(RAW_STRING).optional(),
)
).skip(RPAREN)

datetime64 = spaceless_string("datetime64").then(
datetime64_args.optional(default={}).combine_dict(
partial(dt.Timestamp, nullable=False)
)
)

datetime = spaceless_string("datetime").then(
parsy.seq(timezone=parened_string.optional()).combine_dict(
partial(dt.Timestamp, nullable=False)
)
)

primitive = (
datetime
datetime64
| datetime
| spaceless_string("null", "nothing").result(dt.null)
| spaceless_string("bigint", "int64").result(dt.Int64(nullable=False))
| spaceless_string("double", "float64").result(dt.Float64(nullable=False))
| spaceless_string("float32", "float").result(dt.Float32(nullable=False))
| spaceless_string("smallint", "int16", "int2").result(dt.Int16(nullable=False))
| spaceless_string("date32", "date").result(dt.Date(nullable=False))
| spaceless_string("time").result(dt.Time(nullable=False))
| spaceless_string("tinyint", "int8", "int1", "boolean", "bool").result(
dt.Int8(nullable=False)
| spaceless_string("tinyint", "int8", "int1").result(dt.Int8(nullable=False))
| spaceless_string("boolean", "bool").result(
getattr(dt, _bool_type())(nullable=False)
)
| spaceless_string("integer", "int32", "int4", "int").result(
dt.Int32(nullable=False)
Expand All @@ -61,13 +84,6 @@ def datetime():
).result(dt.String(nullable=False))
)

@parsy.generate
def parened_string():
yield LPAREN
s = yield RAW_STRING
yield RPAREN
return s

@parsy.generate
def nullable():
yield spaceless_string("nullable")
Expand Down Expand Up @@ -223,6 +239,11 @@ def _(ty: dt.DataType) -> str:
return type(ty).__name__.capitalize()


@serialize_raw.register(dt.Boolean)
def _(_: dt.Boolean) -> str:
return _bool_type()


@serialize_raw.register(dt.Array)
def _(ty: dt.Array) -> str:
return f"Array({serialize(ty.value_type)})"
Expand All @@ -239,11 +260,16 @@ def _(ty: dt.Map) -> str:
@serialize_raw.register(dt.Struct)
def _(ty: dt.Struct) -> str:
fields = ", ".join(
f"{name} {serialize(field_ty)}" for name, field_ty in ty.pairs.items()
f"{name} {serialize(field_ty)}" for name, field_ty in ty.fields.items()
)
return f"Tuple({fields})"


@serialize_raw.register(dt.Timestamp)
def _(ty: dt.Timestamp) -> str:
return "DateTime64(6)" if ty.timezone is None else f"DateTime64(6, {ty.timezone!r})"
if (scale := ty.scale) is None:
scale = 3

if (timezone := ty.timezone) is not None:
return f"DateTime64({scale:d}, {timezone})"
return f"DateTime64({scale:d})"
11 changes: 5 additions & 6 deletions ibis/backends/clickhouse/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

import ibis
import ibis.expr.types as ir
from ibis.backends.conftest import TEST_TABLES, read_tables
from ibis.backends.tests.base import BackendTest, RoundHalfToEven, UnorderedComparator

CLICKHOUSE_HOST = os.environ.get('IBIS_TEST_CLICKHOUSE_HOST', 'localhost')
Expand All @@ -24,9 +23,13 @@ class TestConf(UnorderedComparator, BackendTest, RoundHalfToEven):
returned_timestamp_unit = 's'
supported_to_timestamp_units = {'s'}
supports_floating_modulus = False
bool_is_int = True
supports_json = False

@property
def native_bool(self) -> bool:
[(value,)] = self.connection._client.execute("SELECT true")
return isinstance(value, bool)

@staticmethod
def _load_data(
data_dir: Path,
Expand Down Expand Up @@ -67,10 +70,6 @@ def _load_data(
for stmt in filter(None, map(str.strip, schema.read().split(";"))):
client.execute(stmt)

for table, df in read_tables(TEST_TABLES, data_dir):
query = f"INSERT INTO {table} VALUES"
client.insert_dataframe(query, df.to_pandas(), settings={"use_numpy": True})

@staticmethod
def connect(data_directory: Path):
pytest.importorskip("clickhouse_driver")
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
CAST(string_col AS Nullable(DateTime64(6)))
CAST(string_col AS Nullable(DateTime64(3)))
Original file line number Diff line number Diff line change
@@ -1 +1 @@
position('a' IN string_col) - 1
locate(string_col, 'a') - 1
Original file line number Diff line number Diff line change
@@ -1 +1 @@
position(string_col IN string_col) - 1
locate(string_col, string_col) - 1
Original file line number Diff line number Diff line change
@@ -1 +1 @@
CAST(timestamp_col AS DateTime64(6))
CAST(timestamp_col AS DateTime64(3))
Original file line number Diff line number Diff line change
@@ -1 +1 @@
CAST(int_col AS DateTime64(6))
CAST(int_col AS DateTime64(3))
15 changes: 11 additions & 4 deletions ibis/backends/clickhouse/tests/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import pandas.testing as tm
import pytest
from pytest import param

import ibis
import ibis.expr.datatypes as dt
Expand Down Expand Up @@ -156,11 +157,17 @@ def test_field_in_literals(con, alltypes, translate, container):
assert len(con.execute(expr))


@pytest.mark.parametrize('column', ['int_col', 'float_col', 'bool_col'])
def test_negate(con, alltypes, translate, column):
# clickhouse represent boolean as UInt8
@pytest.mark.parametrize(
("column", "operator"),
[
param("int_col", "-", id="int_col"),
param("float_col", "-", id="float_col"),
param("bool_col", "NOT ", id="bool_col"),
],
)
def test_negate(con, alltypes, translate, column, operator):
expr = -alltypes[column]
assert translate(expr.op()) == f'-{column}'
assert translate(expr.op()) == f"{operator}{column}"
assert len(con.execute(expr))


Expand Down
145 changes: 109 additions & 36 deletions ibis/backends/clickhouse/tests/test_types.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
from pytest import param

import ibis.expr.datatypes as dt
from ibis.backends.clickhouse.datatypes import parse
Expand Down Expand Up @@ -30,55 +31,104 @@ def test_columns_types_with_additional_argument(con):
@pytest.mark.parametrize(
('ch_type', 'ibis_type'),
[
(
param(
"Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4)",
dt.String(nullable=False),
id="enum",
),
('IPv4', dt.inet(nullable=False)),
('IPv6', dt.inet(nullable=False)),
('JSON', dt.json(nullable=False)),
("Object('json')", dt.json(nullable=False)),
('LowCardinality(String)', dt.String(nullable=False)),
('Array(Int8)', dt.Array(dt.Int8(nullable=False), nullable=False)),
('Array(Int16)', dt.Array(dt.Int16(nullable=False), nullable=False)),
('Array(Int32)', dt.Array(dt.Int32(nullable=False), nullable=False)),
('Array(Int64)', dt.Array(dt.Int64(nullable=False), nullable=False)),
('Array(UInt8)', dt.Array(dt.UInt8(nullable=False), nullable=False)),
('Array(UInt16)', dt.Array(dt.UInt16(nullable=False), nullable=False)),
('Array(UInt32)', dt.Array(dt.UInt32(nullable=False), nullable=False)),
('Array(UInt64)', dt.Array(dt.UInt64(nullable=False), nullable=False)),
(
param('IPv4', dt.inet(nullable=False), id="ipv4"),
param('IPv6', dt.inet(nullable=False), id="ipv6"),
param('JSON', dt.json(nullable=False), id="json"),
param("Object('json')", dt.json(nullable=False), id="object_json"),
param(
'LowCardinality(String)', dt.String(nullable=False), id="low_card_string"
),
param(
'Array(Int8)',
dt.Array(dt.Int8(nullable=False), nullable=False),
id="array_int8",
),
param(
'Array(Int16)',
dt.Array(dt.Int16(nullable=False), nullable=False),
id="array_int16",
),
param(
'Array(Int32)',
dt.Array(dt.Int32(nullable=False), nullable=False),
id="array_int32",
),
param(
'Array(Int64)',
dt.Array(dt.Int64(nullable=False), nullable=False),
id="array_int64",
),
param(
'Array(UInt8)',
dt.Array(dt.UInt8(nullable=False), nullable=False),
id="array_uint8",
),
param(
'Array(UInt16)',
dt.Array(dt.UInt16(nullable=False), nullable=False),
id="array_uint16",
),
param(
'Array(UInt32)',
dt.Array(dt.UInt32(nullable=False), nullable=False),
id="array_uint32",
),
param(
'Array(UInt64)',
dt.Array(dt.UInt64(nullable=False), nullable=False),
id="array_uint64",
),
param(
'Array(Float32)',
dt.Array(dt.Float32(nullable=False), nullable=False),
id="array_float32",
),
(
param(
'Array(Float64)',
dt.Array(dt.Float64(nullable=False), nullable=False),
id="array_float64",
),
param(
'Array(String)',
dt.Array(dt.String(nullable=False), nullable=False),
id="array_string",
),
('Array(String)', dt.Array(dt.String(nullable=False), nullable=False)),
(
param(
'Array(FixedString(32))',
dt.Array(dt.String(nullable=False), nullable=False),
id="array_fixed_string",
),
param(
'Array(Date)',
dt.Array(dt.Date(nullable=False), nullable=False),
id="array_date",
),
('Array(Date)', dt.Array(dt.Date(nullable=False), nullable=False)),
(
param(
'Array(DateTime)',
dt.Array(dt.Timestamp(nullable=False), nullable=False),
id="array_datetime",
),
(
param(
'Array(DateTime64)',
dt.Array(dt.Timestamp(nullable=False), nullable=False),
id="array_datetime64",
),
('Array(Nothing)', dt.Array(dt.null, nullable=False)),
('Array(Null)', dt.Array(dt.null, nullable=False)),
(
param('Array(Nothing)', dt.Array(dt.null, nullable=False), id="array_nothing"),
param('Array(Null)', dt.Array(dt.null, nullable=False), id="array_null"),
param(
'Array(Array(Int8))',
dt.Array(
dt.Array(dt.Int8(nullable=False), nullable=False),
nullable=False,
),
id="double_array",
),
(
param(
'Array(Array(Array(Int8)))',
dt.Array(
dt.Array(
Expand All @@ -87,8 +137,9 @@ def test_columns_types_with_additional_argument(con):
),
nullable=False,
),
id="triple_array",
),
(
param(
'Array(Array(Array(Array(Int8))))',
dt.Array(
dt.Array(
Expand All @@ -100,52 +151,74 @@ def test_columns_types_with_additional_argument(con):
),
nullable=False,
),
id="quad_array",
),
(
param(
"Map(Nullable(String), Nullable(UInt64))",
dt.Map(dt.string, dt.uint64, nullable=False),
id="map",
),
("Decimal(10, 3)", dt.Decimal(10, 3, nullable=False)),
(
param("Decimal(10, 3)", dt.Decimal(10, 3, nullable=False), id="decimal"),
param(
"Tuple(a String, b Array(Nullable(Float64)))",
dt.Struct.from_dict(
dt.Struct(
dict(
a=dt.String(nullable=False),
b=dt.Array(dt.float64, nullable=False),
),
nullable=False,
),
id="named_tuple",
),
(
param(
"Tuple(String, Array(Nullable(Float64)))",
dt.Struct.from_dict(
dt.Struct(
dict(
f0=dt.String(nullable=False),
f1=dt.Array(dt.float64, nullable=False),
),
nullable=False,
),
id="unnamed_tuple",
),
(
param(
"Tuple(a String, Array(Nullable(Float64)))",
dt.Struct.from_dict(
dt.Struct(
dict(
a=dt.String(nullable=False),
f1=dt.Array(dt.float64, nullable=False),
),
nullable=False,
),
id="partially_named",
),
(
param(
"Nested(a String, b Array(Nullable(Float64)))",
dt.Struct.from_dict(
dt.Struct(
dict(
a=dt.Array(dt.String(nullable=False), nullable=False),
b=dt.Array(dt.Array(dt.float64, nullable=False), nullable=False),
),
nullable=False,
),
id="nested",
),
param(
"DateTime64(0)", dt.Timestamp(scale=0, nullable=False), id="datetime64_zero"
),
param(
"DateTime64(1)", dt.Timestamp(scale=1, nullable=False), id="datetime64_one"
),
param("DateTime64", dt.Timestamp(nullable=False), id="datetime64"),
]
+ [
param(
f"DateTime64({scale}, '{tz}')",
dt.Timestamp(scale=scale, timezone=tz, nullable=False),
id=f"datetime64_{scale}_{tz}",
)
for scale in range(10)
for tz in ("UTC", "America/New_York", "America/Chicago", "America/Los_Angeles")
],
)
def test_parse_type(ch_type, ibis_type):
Expand Down
50 changes: 15 additions & 35 deletions ibis/backends/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,15 @@
import platform
from functools import lru_cache
from pathlib import Path
from typing import TYPE_CHECKING, Any, Iterable, Iterator, TextIO
from typing import Any, TextIO

import _pytest
import pandas as pd
import pytest
import sqlalchemy as sa
from packaging.requirements import Requirement
from packaging.version import parse as vparse

if TYPE_CHECKING:
import pyarrow as pa

import pytest

import ibis
from ibis import util
from ibis.backends.base import _get_backend_names
Expand Down Expand Up @@ -141,16 +137,17 @@ def recreate_database(
engine = sa.create_engine(url.set(database=""), **kwargs)

if url.database is not None:
with engine.connect() as conn:
conn.execute(f'DROP DATABASE IF EXISTS {database}')
conn.execute(f'CREATE DATABASE {database}')
with engine.begin() as con:
con.exec_driver_sql(f"DROP DATABASE IF EXISTS {database}")
con.exec_driver_sql(f"CREATE DATABASE {database}")


def init_database(
url: sa.engine.url.URL,
database: str,
schema: TextIO | None = None,
recreate: bool = True,
isolation_level: str | None = "AUTOCOMMIT",
**kwargs: Any,
) -> sa.engine.Engine:
"""Initialise `database` at `url` with `schema`.
Expand All @@ -167,11 +164,17 @@ def init_database(
File object containing schema to use
recreate : bool
If true, drop the database if it exists
isolation_level : str
Transaction isolation_level

Returns
-------
sa.engine.Engine for the database created
sa.engine.Engine
SQLAlchemy engine object
"""
if isolation_level is not None:
kwargs["isolation_level"] = isolation_level

if recreate:
recreate_database(url, database, **kwargs)

Expand All @@ -183,36 +186,13 @@ def init_database(
engine = sa.create_engine(url, **kwargs)

if schema:
with engine.connect() as conn:
with engine.begin() as conn:
for stmt in filter(None, map(str.strip, schema.read().split(';'))):
conn.execute(stmt)
conn.exec_driver_sql(stmt)

return engine


def read_tables(
names: Iterable[str],
data_dir: Path,
) -> Iterator[tuple[str, pa.Table]]:
"""For each csv {names} in {data_dir} return a pyarrow.Table."""

import pyarrow.csv as pac

import ibis.backends.pyarrow.datatypes as pa_dt

for name in names:
schema = TEST_TABLES[name]
convert_options = pac.ConvertOptions(
column_types={
name: pa_dt.to_pyarrow_type(type) for name, type in schema.items()
}
)
yield name, pac.read_csv(
data_dir / f'{name}.csv',
convert_options=convert_options,
)


def _random_identifier(suffix: str) -> str:
return f"__ibis_test_{suffix}_{util.guid()}"

Expand Down
7 changes: 5 additions & 2 deletions ibis/backends/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ class Backend(BasePandasBackend):

def do_connect(
self,
dictionary: MutableMapping[str, dd.DataFrame],
dictionary: MutableMapping[str, dd.DataFrame] | None = None,
) -> None:
"""Construct a Dask backend client from a dictionary of data sources.

Parameters
----------
dictionary
Mapping from `str` table names to Dask DataFrames.
An optional mapping from `str` table names to Dask DataFrames.

Examples
--------
Expand All @@ -52,6 +52,9 @@ def do_connect(
# register dispatchers
from ibis.backends.dask import udf # noqa: F401

if dictionary is None:
dictionary = {}

for k, v in dictionary.items():
if not isinstance(v, (dd.DataFrame, pd.DataFrame)):
raise TypeError(
Expand Down
5 changes: 5 additions & 0 deletions ibis/backends/dask/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ def table(client):
return client.table('df')


def test_connect_no_args():
con = ibis.dask.connect()
assert dict(con.tables) == {}


def test_client_table(table):
assert isinstance(table.op(), ibis.expr.operations.DatabaseTable)
assert isinstance(table.op(), DaskTable)
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,14 +213,14 @@ def _get_frame(
return self.compile(expr, params, **kwargs)
elif isinstance(expr, ir.Column):
# expression must be named for the projection
expr = expr.name('tmp').to_projection()
expr = expr.name('tmp').as_table()
return self.compile(expr, params, **kwargs)
elif isinstance(expr, ir.Scalar):
if an.find_immediate_parent_tables(expr.op()):
# there are associated datafusion tables so convert the expr
# to a selection which we can directly convert to a datafusion
# plan
expr = expr.name('tmp').to_projection()
expr = expr.name('tmp').as_table()
frame = self.compile(expr, params, **kwargs)
else:
# doesn't have any tables associated so create a plan from a
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/datafusion/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class TestConf(BackendTest, RoundAwayFromZero):
# additional_skipped_operations = frozenset({ops.StringSQLLike})
# supports_divide_by_zero = True
# returned_timestamp_unit = 'ns'
bool_is_int = True
native_bool = False
supports_structs = False
supports_json = False

Expand Down
307 changes: 155 additions & 152 deletions ibis/backends/duckdb/__init__.py

Large diffs are not rendered by default.

24 changes: 19 additions & 5 deletions ibis/backends/duckdb/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,19 @@
import ibis.backends.base.sql.alchemy.datatypes as sat
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator
from ibis.backends.base.sql.alchemy import (
AlchemyCompiler,
AlchemyExprTranslator,
to_sqla_type,
)
from ibis.backends.duckdb.registry import operation_registry


class DuckDBSQLExprTranslator(AlchemyExprTranslator):
_registry = operation_registry
_rewrites = AlchemyExprTranslator._rewrites.copy()
# The PostgreSQLExprTranslater maps to a `DOUBLE_PRECISION`
# type that duckdb doesn't understand, but we probably still want
# the updated `operation_registry` from postgres
_type_map = AlchemyExprTranslator._type_map.copy()
_has_reduction_filter_syntax = True
_dialect_name = "duckdb"


@compiles(sat.UInt64, "duckdb")
Expand All @@ -27,6 +28,11 @@ def compile_uint(element, compiler, **kw):
return element.__class__.__name__.upper()


@compiles(sat.ArrayType, "duckdb")
def compile_array(element, compiler, **kw):
return f"{compiler.process(element.value_type, **kw)}[]"


try:
import duckdb_engine
except ImportError:
Expand All @@ -40,6 +46,14 @@ def compile_uint(element, compiler, **kw):
def dtype_uint(_, satype, nullable=True):
return getattr(dt, satype.__class__.__name__)(nullable=nullable)

@dt.dtype.register(duckdb_engine.Dialect, sat.ArrayType)
def _(dialect, satype, nullable=True):
return dt.Array(dt.dtype(dialect, satype.value_type), nullable=nullable)

@to_sqla_type.register(duckdb_engine.Dialect, dt.Array)
def _(dialect, itype):
return sat.ArrayType(to_sqla_type(dialect, itype.value_type))


rewrites = DuckDBSQLExprTranslator.rewrites

Expand Down
3 changes: 1 addition & 2 deletions ibis/backends/duckdb/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,7 @@ def struct():


@util.deprecated(
instead=f"use {parse.__module__}.{parse.__name__}",
version="4.0",
instead=f"use {parse.__module__}.{parse.__name__}", as_of="4.0", removed_in="5.0"
)
def parse_type(*args, **kwargs):
return parse(*args, **kwargs)
32 changes: 32 additions & 0 deletions ibis/backends/duckdb/pyarrow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from __future__ import annotations

import pyarrow as pa


class IbisRecordBatchReader(pa.ipc.RecordBatchReader):
"""Hack to make sure the database cursor isn't garbage collected.

Without this hack batches are streamed out of the RecordBatchReader on a
closed cursor.
"""

def __init__(self, reader, cursor):
self.reader = reader
self.cursor = cursor

def close(self):
self.reader.close()
del self.cursor

def read_all(self):
return self.reader.read_all()

def read_next_batch(self):
return self.reader.read_next_batch()

def read_pandas(self):
return self.reader.read_pandas()

@property
def schema(self):
return self.reader.schema
68 changes: 47 additions & 21 deletions ibis/backends/duckdb/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import sqlalchemy as sa

import ibis.expr.operations as ops
from ibis.backends.base.sql.alchemy import to_sqla_type, unary
from ibis.backends.base.sql.alchemy import unary
from ibis.backends.base.sql.alchemy.registry import (
_table_column,
geospatial_functions,
Expand Down Expand Up @@ -74,9 +74,9 @@ def _timestamp_from_unix(t, op):
raise ValueError(f"`{unit}` unit is not supported!")


def _literal(_, op):
def _literal(t, op):
dtype = op.output_dtype
sqla_type = to_sqla_type(dtype)
sqla_type = t.get_sqla_type(dtype)
value = op.value

if dtype.is_interval():
Expand All @@ -99,7 +99,7 @@ def _literal(_, op):
*(sa.bindparam(f"v{i:d}", val) for i, val in enumerate(value.values()))
)
name = op.name if isinstance(op, ops.Named) else "tmp"
params = {name: to_sqla_type(dtype)}
params = {name: t.get_sqla_type(dtype)}
return bound_text.columns(**params).scalar_subquery()
raise NotImplementedError(
f"Ibis dtype `{dtype}` with mapping type "
Expand All @@ -117,20 +117,16 @@ def _neg_idx_to_pos(array, idx):

def _regex_extract(string, pattern, index):
result = sa.case(
[
(
sa.func.regexp_matches(string, pattern),
sa.func.regexp_extract(
string,
pattern,
# DuckDB requires the index to be a constant so we compile
# the value and inline it using sa.text
sa.text(
str(index.compile(compile_kwargs=dict(literal_binds=True)))
),
),
)
],
(
sa.func.regexp_matches(string, pattern),
sa.func.regexp_extract(
string,
pattern,
# DuckDB requires the index to be a constant so we compile
# the value and inline it using sa.text
sa.text(str(index.compile(compile_kwargs=dict(literal_binds=True)))),
),
),
else_="",
)
return result
Expand Down Expand Up @@ -186,7 +182,7 @@ def _struct_column(t, op):
ops.ArrayColumn: (
lambda t, op: sa.cast(
sa.func.list_value(*map(t.translate, op.cols)),
to_sqla_type(op.output_dtype),
t.get_sqla_type(op.output_dtype),
)
),
ops.ArrayConcat: fixed_arity(sa.func.array_concat, 2),
Expand All @@ -202,8 +198,11 @@ def _struct_column(t, op):
ops.ArraySlice: _array_slice(
index_converter=_neg_idx_to_pos,
array_length=sa.func.array_length,
func=sa.func.list_slice,
),
ops.ArrayIndex: _array_index(
index_converter=_neg_idx_to_pos, func=sa.func.list_extract
),
ops.ArrayIndex: _array_index(index_converter=_neg_idx_to_pos),
ops.DayOfWeekName: unary(sa.func.dayname),
ops.Literal: _literal,
ops.Log2: unary(sa.func.log2),
Expand All @@ -217,7 +216,7 @@ def _struct_column(t, op):
lambda t, op: sa.func.struct_extract(
t.translate(op.arg),
sa.text(repr(op.field)),
type_=to_sqla_type(op.output_dtype),
type_=t.get_sqla_type(op.output_dtype),
)
),
ops.TableColumn: _table_column,
Expand Down Expand Up @@ -253,5 +252,32 @@ def _struct_column(t, op):
ops.StringToTimestamp: fixed_arity(sa.func.strptime, 2),
ops.Quantile: reduction(sa.func.quantile_cont),
ops.MultiQuantile: reduction(sa.func.quantile_cont),
ops.TypeOf: unary(sa.func.typeof),
}
)


_invalid_operations = {
# ibis.expr.operations.analytic
ops.CumulativeAll,
ops.CumulativeAny,
ops.CumulativeOp,
ops.NTile,
# ibis.expr.operations.strings
ops.Capitalize,
ops.Translate,
# ibis.expr.operations.temporal
ops.TimestampDiff,
# ibis.expr.operations.maps
ops.MapGet,
ops.MapContains,
ops.MapKeys,
ops.MapValues,
ops.MapMerge,
ops.MapLength,
ops.Map,
}

operation_registry = {
k: v for k, v in operation_registry.items() if k not in _invalid_operations
}
2 changes: 1 addition & 1 deletion ibis/backends/duckdb/tests/test_datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
P=dt.string,
Q=dt.Array(dt.int32),
R=dt.Map(dt.string, dt.int64),
S=dt.Struct.from_dict(
S=dt.Struct(
dict(
a=dt.int32,
b=dt.string,
Expand Down
30 changes: 25 additions & 5 deletions ibis/backends/duckdb/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pandas as pd
import pytest
import sqlalchemy as sa

import ibis

Expand All @@ -17,19 +18,36 @@ def test_read_parquet(data_directory):
assert t.count().execute()


def test_read_json(data_directory, tmp_path):
pqt = ibis.read_parquet(data_directory / "functional_alltypes.parquet")

path = tmp_path.joinpath("ft.json")
path.write_text(pqt.execute().to_json(orient="records", lines=True))

jst = ibis.read_json(path)

nrows = pqt.count().execute()
assert nrows
assert nrows == jst.count().execute()


def test_temp_directory(tmp_path):
query = "SELECT value FROM duckdb_settings() WHERE name = 'temp_directory'"
query = sa.text("SELECT value FROM duckdb_settings() WHERE name = 'temp_directory'")

# 1. in-memory + no temp_directory specified
con = ibis.duckdb.connect()
[(value,)] = con.con.execute(query).fetchall()
assert value # we don't care what the specific value is
with con.begin() as c:
cur = c.execute(query)
value = cur.scalar()
assert value # we don't care what the specific value is

temp_directory = Path(tempfile.gettempdir()) / "duckdb"

# 2. in-memory + temp_directory specified
con = ibis.duckdb.connect(temp_directory=temp_directory)
[(value,)] = con.con.execute(query).fetchall()
with con.begin() as c:
cur = c.execute(query)
value = cur.scalar()
assert value == str(temp_directory)

# 3. on-disk + no temp_directory specified
Expand All @@ -38,7 +56,9 @@ def test_temp_directory(tmp_path):

# 4. on-disk + temp_directory specified
con = ibis.duckdb.connect(tmp_path / "test2.ddb", temp_directory=temp_directory)
[(value,)] = con.con.execute(query).fetchall()
with con.begin() as c:
cur = c.execute(query)
value = cur.scalar()
assert value == str(temp_directory)


Expand Down
14 changes: 7 additions & 7 deletions ibis/backends/impala/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,7 @@ def _get_list(self, cur):
return list(map(operator.itemgetter(0), tuples))

@util.deprecated(
version='2.0',
instead='use a new connection to the database',
as_of="2.0", removed_in="5.0", instead="use a new connection to the database"
)
def set_database(self, name):
# XXX The parent `Client` has a generic method that calls this same
Expand Down Expand Up @@ -490,10 +489,11 @@ def get_schema(

# only pull out the first two columns which are names and types
pairs = [row[:2] for row in self.con.fetchall(query)]

names, types = zip(*pairs)

ibis_types = [udf.parse_type(type.lower()) for type in types]
return sch.Schema(names, ibis_types)
ibis_fields = dict(zip(names, ibis_types))
return sch.Schema(ibis_fields)

@property
def client_options(self):
Expand Down Expand Up @@ -1004,10 +1004,10 @@ def _get_schema_using_query(self, query):
cur = self.raw_sql(f"SELECT * FROM ({query}) t0 LIMIT 0")
# resets the state of the cursor and closes operation
cur.fetchall()
names, ibis_types = self._adapt_types(cur.description)
ibis_fields = self._adapt_types(cur.description)
cur.release()

return sch.Schema(names, ibis_types)
return sch.Schema(ibis_fields)

def create_function(self, func, name=None, database=None):
"""Create a function within Impala.
Expand Down Expand Up @@ -1335,7 +1335,7 @@ def _adapt_types(self, descr):
adapted_types.append(dt.Decimal(precision, scale))
else:
adapted_types.append(typename)
return names, adapted_types
return dict(zip(names, adapted_types))

def write_dataframe(
self,
Expand Down
7 changes: 3 additions & 4 deletions ibis/backends/impala/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def _new_cursor(self):
wrapper.set_options()
return wrapper

@util.deprecated(instead="", version="4.0")
@util.deprecated(instead="", as_of="4.0", removed_in="5.0")
def ping(self): # pragma: no cover
self.pool.connect()._cursor.ping()

Expand Down Expand Up @@ -382,7 +382,7 @@ def rename(self, new_name, database=None):
statement = RenameTable(self._qualified_name, new_name, new_database=database)
self._client.raw_sql(statement)

op = self.op().change_name(statement.new_qualified_name)
op = self.op().copy(name=statement.new_qualified_name)
return type(self)(op)

@property
Expand All @@ -403,8 +403,7 @@ def partition_schema(self):
break
partition_fields.append((x, name_to_type[x]))

pnames, ptypes = zip(*partition_fields)
return sch.Schema(pnames, ptypes)
return sch.Schema(dict(partition_fields))

def add_partition(self, spec, location=None):
"""Add a new table partition.
Expand Down
18 changes: 0 additions & 18 deletions ibis/backends/impala/parquet.py

This file was deleted.

15 changes: 7 additions & 8 deletions ibis/backends/impala/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,7 @@ def connect(
)

def _get_original_column_names(self, tablename: str) -> list[str]:
import pyarrow.parquet as pq

pq_file = pq.ParquetFile(
self.data_directory / "parquet" / tablename / f"{tablename}.parquet"
)
return pq_file.schema.names
return list(TEST_TABLES[tablename].names)

def _get_renamed_table(self, tablename: str) -> ir.Table:
t = self.connection.table(tablename)
Expand Down Expand Up @@ -502,8 +497,12 @@ def impala_create_test_database(con, env):


PARQUET_SCHEMAS = {
'functional_alltypes': TEST_TABLES["functional_alltypes"].delete(
["index", "Unnamed: 0"]
"functional_alltypes": ibis.schema(
{
name: dtype
for name, dtype in TEST_TABLES["functional_alltypes"].items()
if name not in {"index", "Unnamed: 0"}
}
),
"tpch_region": ibis.schema(
[
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
SELECT
CASE `tier`
CASE t0.`tier`
WHEN 0 THEN 'Under 0'
WHEN 1 THEN '0 to 10'
WHEN 2 THEN '10 to 25'
WHEN 3 THEN '25 to 50'
ELSE 'error'
END AS `tier2`, `count`
END AS `tier2`, t0.`count`
FROM (
SELECT
CASE
WHEN `f` < 0 THEN 0
WHEN (0 <= `f`) AND (`f` < 10) THEN 1
WHEN (10 <= `f`) AND (`f` < 25) THEN 2
WHEN (25 <= `f`) AND (`f` <= 50) THEN 3
WHEN t1.`f` < 0 THEN 0
WHEN (0 <= t1.`f`) AND (t1.`f` < 10) THEN 1
WHEN (10 <= t1.`f`) AND (t1.`f` < 25) THEN 2
WHEN (25 <= t1.`f`) AND (t1.`f` <= 50) THEN 3
ELSE CAST(NULL AS tinyint)
END AS `tier`, count(1) AS `count`
FROM alltypes
FROM alltypes t1
GROUP BY 1
) t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT `tinyint_col` IS NOT DISTINCT FROM `double_col` AS `tmp`
FROM functional_alltypes
SELECT t0.`tinyint_col` IS NOT DISTINCT FROM t0.`double_col` AS `tmp`
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CREATE TABLE IF NOT EXISTS `tname`
STORED AS AVRO
AS
SELECT *
FROM functional_alltypes
SELECT t0.*
FROM functional_alltypes t0
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ CREATE EXTERNAL TABLE foo.`another_table`
STORED AS PARQUET
LOCATION '/path/to/table'
AS
SELECT *
FROM test1
SELECT t0.*
FROM test1 t0
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CREATE TABLE bar.`some_table`
STORED AS PARQUET
AS
SELECT *
FROM functional_alltypes
WHERE `bigint_col` > 0
SELECT t0.*
FROM functional_alltypes t0
WHERE t0.`bigint_col` > 0
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CREATE TABLE IF NOT EXISTS `tname`
STORED AS PARQUET
AS
SELECT *
FROM functional_alltypes
WHERE `bigint_col` > 0
SELECT t0.*
FROM functional_alltypes t0
WHERE t0.`bigint_col` > 0
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
INSERT INTO foo.`testing123456`
SELECT *
FROM functional_alltypes
SELECT t0.*
FROM functional_alltypes t0
LIMIT 10
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
INSERT OVERWRITE foo.`testing123456`
SELECT *
FROM functional_alltypes
SELECT t0.*
FROM functional_alltypes t0
LIMIT 10
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
SELECT `col`, `analytic`
SELECT t0.`col`, t0.`analytic`
FROM (
SELECT `col`, count(1) OVER () AS `analytic`
SELECT t1.`col`, count(1) OVER () AS `analytic`
FROM (
SELECT `col`, `filter`
SELECT t2.`col`, t2.`filter`
FROM (
SELECT *
SELECT t3.*
FROM (
SELECT `col`, NULL AS `filter`
FROM x
SELECT t4.`col`, NULL AS `filter`
FROM x t4
) t3
WHERE `filter` IS NULL
WHERE t3.`filter` IS NULL
) t2
) t1
) t0
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SELECT `key`, sum(((`value` + 1) + 2) + 3) AS `abc`
FROM t0
WHERE `value` = 42
SELECT t0.`key`, sum(((t0.`value` + 1) + 2) + 3) AS `abc`
FROM t0 t0
WHERE t0.`value` = 42
GROUP BY 1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SELECT `key`, sum(((`value` + 1) + 2) + 3) AS `foo`
FROM t0
WHERE `value` = 42
SELECT t0.`key`, sum(((t0.`value` + 1) + 2) + 3) AS `foo`
FROM t0 t0
WHERE t0.`value` = 42
GROUP BY 1
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT count(DISTINCT if(`value` >= 1.0, `key`, NULL)) AS `CountDistinct(key, GreaterEqual(value, 1.0))`
FROM t0
SELECT count(DISTINCT if(t0.`value` >= 1.0, t0.`key`, NULL)) AS `CountDistinct(key, GreaterEqual(value, 1.0))`
FROM t0 t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT `uuid`, min(if(`search_level` = 1, `ts`, NULL)) AS `min_date`
FROM t
SELECT t0.`uuid`, min(if(t0.`search_level` = 1, t0.`ts`, NULL)) AS `min_date`
FROM t t0
GROUP BY 1
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *
FROM alltypes
WHERE `g` IN ('foo', 'bar')
SELECT t0.*
FROM alltypes t0
WHERE t0.`g` IN ('foo', 'bar')
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *
FROM alltypes
WHERE `g` NOT IN ('foo', 'bar')
SELECT t0.*
FROM alltypes t0
WHERE t0.`g` NOT IN ('foo', 'bar')
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *,
sum(`two`) OVER (PARTITION BY `three` ORDER BY `one` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `four`
FROM my_data
SELECT t0.*,
sum(t0.`two`) OVER (PARTITION BY t0.`three` ORDER BY t0.`one` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `four`
FROM my_data t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *
FROM `table`
WHERE (`a` IS NULL) = (`b` IS NULL)
SELECT t0.*
FROM `table` t0
WHERE (t0.`a` IS NULL) = (t0.`b` IS NULL)
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *
FROM `table`
WHERE (`a` IS NOT NULL) = (`b` IS NOT NULL)
SELECT t0.*
FROM `table` t0
WHERE (t0.`a` IS NOT NULL) = (t0.`b` IS NOT NULL)
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT *
FROM `table`
WHERE (`a` IS NOT DISTINCT FROM NULL) = (`b` IS NOT DISTINCT FROM NULL)
SELECT t0.*
FROM `table` t0
WHERE (t0.`a` IS NOT DISTINCT FROM NULL) = (t0.`b` IS NOT DISTINCT FROM NULL)
Original file line number Diff line number Diff line change
@@ -1,33 +1,35 @@
WITH t0 AS (
SELECT `d`, `c`
SELECT t2.`d`, t2.`c`
FROM t2
),
t1 AS (
SELECT `d`, CAST(`d` / 15 AS bigint) AS `idx`, `c`, count(1) AS `row_count`
SELECT t0.`d`, CAST(t0.`d` / 15 AS bigint) AS `idx`, t0.`c`,
count(1) AS `row_count`
FROM t0
GROUP BY 1, 2, 3
),
t2 AS (
SELECT *, `a` + 20 AS `d`
FROM test_table
SELECT t5.*, t5.`a` + 20 AS `d`
FROM test_table t5
)
SELECT t3.*, t4.`total`
FROM (
SELECT `d`, `b`, count(1) AS `count`, count(DISTINCT `c`) AS `unique`
SELECT t2.`d`, t2.`b`, count(1) AS `count`,
count(DISTINCT t2.`c`) AS `unique`
FROM t2
GROUP BY 1, 2
) t3
INNER JOIN (
SELECT t5.*
FROM (
SELECT t1.*, t8.`total`
SELECT t1.*, t7.`total`
FROM t1
INNER JOIN (
SELECT `d`, sum(`row_count`) AS `total`
SELECT t1.`d`, sum(t1.`row_count`) AS `total`
FROM t1
GROUP BY 1
) t8
ON t1.`d` = t8.`d`
) t7
ON t1.`d` = t7.`d`
) t5
WHERE t5.`row_count` < (t5.`total` / 2)
) t4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@ WITH t0 AS (
SELECT t1.`year`, t1.`count` AS `pre_count`, t2.`count` AS `post_count`,
t2.`count` / CAST(t1.`count` AS double) AS `fraction`
FROM (
SELECT extract(`odate`, 'year') AS `year`, count(1) AS `count`
SELECT extract(t0.`odate`, 'year') AS `year`, count(1) AS `count`
FROM t0
GROUP BY 1
) t1
INNER JOIN (
SELECT extract(t0.`odate`, 'year') AS `year`, count(1) AS `count`
FROM t0
WHERE t0.`o_totalprice` > (
SELECT avg(t7.`o_totalprice`) AS `mean`
FROM t0 t7
WHERE t7.`region` = t0.`region`
SELECT avg(t4.`o_totalprice`) AS `mean`
FROM t0 t4
WHERE t4.`region` = t0.`region`
)
GROUP BY 1
) t2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ WITH t0 AS (
ON t6.`o_custkey` = t5.`c_custkey`
),
t1 AS (
SELECT extract(`odate`, 'year') AS `year`, count(1) AS `count`
SELECT extract(t0.`odate`, 'year') AS `year`, count(1) AS `count`
FROM t0
GROUP BY 1
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
WITH t0 AS (
SELECT *
FROM functional_alltypes
SELECT t2.*
FROM functional_alltypes t2
LIMIT 100
)
SELECT t0.*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT NOT (`a` IN ('foo') AND (`c` IS NOT NULL)) AS `tmp`
FROM t
SELECT NOT (t0.`a` IN ('foo') AND (t0.`c` IS NOT NULL)) AS `tmp`
FROM t t0
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
SELECT *
SELECT t0.*
FROM (
SELECT *
FROM t0
WHERE `a` < 100
SELECT t1.*
FROM t0 t1
WHERE t1.`a` < 100
) t0
WHERE `a` = (
SELECT max(`a`) AS `Max(a)`
FROM t0
WHERE `a` < 100
WHERE t0.`a` = (
SELECT max(t1.`a`) AS `Max(a)`
FROM t0 t1
WHERE t1.`a` < 100
)
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
SELECT *
SELECT t0.*
FROM (
SELECT *
FROM t0
WHERE `a` < 100
SELECT t1.*
FROM t0 t1
WHERE t1.`a` < 100
) t0
WHERE (`a` = (
SELECT max(`a`) AS `Max(a)`
FROM t0
WHERE `a` < 100
WHERE (t0.`a` = (
SELECT max(t1.`a`) AS `Max(a)`
FROM t0 t1
WHERE t1.`a` < 100
)) AND
(`b` = 'a')
(t0.`b` = 'a')
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
WITH t0 AS (
SELECT `uuid`, count(1) AS `count`
FROM t
SELECT t2.`uuid`, count(1) AS `count`
FROM t t2
GROUP BY 1
)
SELECT t0.*
FROM (
SELECT `uuid`, max(`count`) AS `max_count`
SELECT t0.`uuid`, max(t0.`count`) AS `max_count`
FROM t0
GROUP BY 1
) t1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
WITH t0 AS (
SELECT `userid`, `movieid`, `rating`,
CAST(`timestamp` AS timestamp) AS `datetime`
FROM ratings
SELECT t3.`userid`, t3.`movieid`, t3.`rating`,
CAST(t3.`timestamp` AS timestamp) AS `datetime`
FROM ratings t3
),
t1 AS (
SELECT t0.*, t5.`title`
SELECT t0.*, t4.`title`
FROM t0
INNER JOIN movies t5
ON t0.`movieid` = t5.`movieid`
INNER JOIN movies t4
ON t0.`movieid` = t4.`movieid`
)
SELECT t2.*
FROM (
Expand All @@ -17,16 +17,16 @@ FROM (
(extract(t1.`datetime`, 'year') > 2001)
) t2
WHERE t2.`movieid` IN (
SELECT `movieid`
SELECT t3.`movieid`
FROM (
SELECT `movieid`
SELECT t4.`movieid`
FROM (
SELECT t1.*
FROM t1
WHERE (t1.`userid` = 118205) AND
(extract(t1.`datetime`, 'year') > 2001) AND
(t1.`userid` = 118205) AND
(extract(t1.`datetime`, 'year') < 2009)
) t5
) t4
) t4
) t3
)
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
WITH t0 AS (
SELECT `uuid`, count(1) AS `count`
FROM t
SELECT t3.`uuid`, count(1) AS `count`
FROM t t3
GROUP BY 1
)
SELECT t1.*, t2.`last_visit`
FROM (
SELECT t0.*
FROM (
SELECT `uuid`, max(`count`) AS `max_count`
SELECT t0.`uuid`, max(t0.`count`) AS `max_count`
FROM t0
GROUP BY 1
) t3
Expand All @@ -16,8 +16,8 @@ FROM (
(t3.`max_count` = t0.`count`)
) t1
LEFT OUTER JOIN (
SELECT `uuid`, max(`ts`) AS `last_visit`
FROM t
SELECT t3.`uuid`, max(t3.`ts`) AS `last_visit`
FROM t t3
GROUP BY 1
) t2
ON t1.`uuid` = t2.`uuid`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT `foo` AS `one`, `bar`, `baz` AS `three`
FROM `table`
SELECT t0.`foo` AS `one`, t0.`bar`, t0.`baz` AS `three`
FROM `table` t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT extract(`i`, 'year') AS `year`, extract(`i`, 'month') AS `month`,
extract(`i`, 'day') AS `day`
FROM alltypes
SELECT extract(t0.`i`, 'year') AS `year`, extract(t0.`i`, 'month') AS `month`,
extract(t0.`i`, 'day') AS `day`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT *, lag(`f`) OVER (PARTITION BY `g` ORDER BY `f`) AS `lag`,
lead(`f`) OVER (PARTITION BY `g` ORDER BY `f`) - `f` AS `fwd_diff`,
first_value(`f`) OVER (PARTITION BY `g` ORDER BY `f`) AS `first`,
last_value(`f`) OVER (PARTITION BY `g` ORDER BY `f`) AS `last`,
lag(`f`) OVER (PARTITION BY `g` ORDER BY `d` ASC) AS `lag2`
FROM alltypes
SELECT t0.*, lag(t0.`f`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f`) AS `lag`,
lead(t0.`f`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f`) - t0.`f` AS `fwd_diff`,
first_value(t0.`f`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f`) AS `first`,
last_value(t0.`f`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f`) AS `last`,
lag(t0.`f`) OVER (PARTITION BY t0.`g` ORDER BY t0.`d` ASC) AS `lag2`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT *, `f` / sum(`f`) OVER () AS `normed_f`
FROM alltypes
SELECT t0.*, t0.`f` / sum(t0.`f`) OVER () AS `normed_f`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT max(`f`) OVER (ORDER BY `d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes
SELECT max(t0.`f`) OVER (ORDER BY t0.`d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT max(`f`) OVER (ORDER BY `d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes
SELECT max(t0.`f`) OVER (ORDER BY t0.`d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT avg(`f`) OVER (ORDER BY `d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes
SELECT avg(t0.`f`) OVER (ORDER BY t0.`d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT avg(`f`) OVER (ORDER BY `d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes
SELECT avg(t0.`f`) OVER (ORDER BY t0.`d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT min(`f`) OVER (ORDER BY `d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes
SELECT min(t0.`f`) OVER (ORDER BY t0.`d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT min(`f`) OVER (ORDER BY `d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes
SELECT min(t0.`f`) OVER (ORDER BY t0.`d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`f`) OVER (ORDER BY `d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes
SELECT sum(t0.`f`) OVER (ORDER BY t0.`d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`f`) OVER (ORDER BY `d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes
SELECT sum(t0.`f`) OVER (ORDER BY t0.`d` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT `g`, sum(`f`) OVER (PARTITION BY `g`) - sum(`f`) OVER () AS `result`
FROM alltypes
SELECT t0.`g`,
sum(t0.`f`) OVER (PARTITION BY t0.`g`) - sum(t0.`f`) OVER () AS `result`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT lag(`f` - lag(`f`) OVER (ORDER BY `f` ASC)) OVER (ORDER BY `f` ASC) AS `foo`
FROM alltypes
SELECT lag(t0.`f` - lag(t0.`f`) OVER (ORDER BY t0.`f` ASC)) OVER (ORDER BY t0.`f` ASC) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT `f`, (row_number() OVER (ORDER BY `f` DESC) - 1) AS `revrank`
FROM alltypes
SELECT t0.`f`, (row_number() OVER (ORDER BY t0.`f` DESC) - 1) AS `revrank`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT lag(`d`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `foo`,
max(`a`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `Max(a)`
FROM alltypes
SELECT lag(t0.`d`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f` DESC) AS `foo`,
max(t0.`a`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f` DESC) AS `Max(a)`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT lag(`f` - lag(`f`) OVER (PARTITION BY `g` ORDER BY `f` ASC)) OVER (PARTITION BY `g` ORDER BY `f` ASC) AS `foo`
FROM alltypes
SELECT lag(t0.`f` - lag(t0.`f`) OVER (PARTITION BY t0.`g` ORDER BY t0.`f` ASC)) OVER (PARTITION BY t0.`g` ORDER BY t0.`f` ASC) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT `g`, (rank() OVER (ORDER BY `f`) - 1) AS `minr`,
(dense_rank() OVER (ORDER BY `f`) - 1) AS `denser`
FROM alltypes
SELECT t0.`g`, (rank() OVER (ORDER BY t0.`f`) - 1) AS `minr`,
(dense_rank() OVER (ORDER BY t0.`f`) - 1) AS `denser`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT *, (row_number() OVER (PARTITION BY `g`) - 1) AS `foo`
FROM alltypes
SELECT t0.*, (row_number() OVER (PARTITION BY t0.`g`) - 1) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SELECT *, (row_number() OVER (PARTITION BY `g` ORDER BY `f`) - 1) AS `foo`
FROM alltypes
SELECT t0.*,
(row_number() OVER (PARTITION BY t0.`g` ORDER BY t0.`f`) - 1) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT *, (row_number() OVER (ORDER BY `f`) - 1) / 2 AS `new`
FROM alltypes
SELECT t0.*, (row_number() OVER (ORDER BY t0.`f`) - 1) / 2 AS `new`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`d`) OVER (ORDER BY `f` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes
SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`d`) OVER (ORDER BY `f` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes
SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`d`) OVER (ORDER BY `f` ASC ROWS BETWEEN 10 PRECEDING AND 5 PRECEDING) AS `foo`
FROM alltypes
SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN 10 PRECEDING AND 5 PRECEDING) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`d`) OVER (ORDER BY `f` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) AS `foo`
FROM alltypes
SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`d`) OVER (ORDER BY `f` ASC ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING) AS `foo`
FROM alltypes
SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`d`) OVER (ORDER BY `f` ASC ROWS BETWEEN 5 FOLLOWING AND 10 FOLLOWING) AS `foo`
FROM alltypes
SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN 5 FOLLOWING AND 10 FOLLOWING) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`d`) OVER (ORDER BY `f` ASC RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS `foo`
FROM alltypes
SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`d`) OVER (ORDER BY `f` ASC ROWS BETWEEN 5 PRECEDING AND UNBOUNDED FOLLOWING) AS `foo`
FROM alltypes
SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN 5 PRECEDING AND UNBOUNDED FOLLOWING) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`d`) OVER (ORDER BY `f` ASC ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes
SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`d`) OVER (ORDER BY `f` ASC ROWS BETWEEN 5 PRECEDING AND 2 FOLLOWING) AS `foo`
FROM alltypes
SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN 5 PRECEDING AND 2 FOLLOWING) AS `foo`
FROM alltypes t0
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT sum(`d`) OVER (ORDER BY `f` ASC ROWS BETWEEN 10 PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes
SELECT sum(t0.`d`) OVER (ORDER BY t0.`f` ASC ROWS BETWEEN 10 PRECEDING AND CURRENT ROW) AS `foo`
FROM alltypes t0
48 changes: 16 additions & 32 deletions ibis/backends/mssql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,14 @@

from __future__ import annotations

import atexit
import contextlib
from typing import Literal

import sqlalchemy as sa

import ibis.expr.schema as sch
from ibis.backends.base.sql.alchemy import BaseAlchemyBackend
from ibis.backends.mssql.compiler import MsSqlCompiler
from ibis.backends.mssql.datatypes import _FieldDescription, _type_from_result_set_info
from ibis.backends.mssql.datatypes import _type_from_result_set_info


class Backend(BaseAlchemyBackend):
Expand Down Expand Up @@ -45,37 +43,23 @@ def do_connect(
@contextlib.contextmanager
def begin(self):
with super().begin() as bind:
previous_datefirst = bind.execute('SELECT @@DATEFIRST').scalar()
bind.execute('SET DATEFIRST 1')
try:
yield bind
finally:
bind.execute(f"SET DATEFIRST {previous_datefirst}")
prev = bind.exec_driver_sql("SELECT @@DATEFIRST").scalar()
bind.exec_driver_sql("SET DATEFIRST 1")
yield bind
bind.execute(sa.text("SET DATEFIRST :prev").bindparams(prev=prev))

def _get_schema_using_query(self, query):
def _metadata(self, query):
if query in self.list_tables():
query = f"SELECT * FROM [{query}]"

query = sa.text("EXEC sp_describe_first_result_set @tsql = :query").bindparams(
query=query
)
with self.begin() as bind:
result = bind.execute(
f"EXEC sp_describe_first_result_set @tsql = N'{query}';"
)
result_set_info: list[_FieldDescription] = result.mappings().fetchall()
fields = [
(column['name'], _type_from_result_set_info(column))
for column in result_set_info
]
return sch.Schema.from_tuples(fields)
for column in bind.execute(query).mappings():
yield column["name"], _type_from_result_set_info(column)

def _get_temp_view_definition(
self,
name: str,
definition: sa.sql.compiler.Compiled,
self, name: str, definition: sa.sql.compiler.Compiled
) -> str:
return f"CREATE OR ALTER VIEW {name} AS {definition}"

def _register_temp_view_cleanup(self, name: str, raw_name: str) -> None:
query = f"DROP VIEW IF EXISTS {name}"

def drop(self, raw_name: str, query: str):
self.con.execute(query)
self._temp_views.discard(raw_name)

atexit.register(drop, self, raw_name, query)
yield f"CREATE OR ALTER VIEW {name} AS {definition}"
Loading