29 changes: 15 additions & 14 deletions ci/schema/bigquery.sql
Original file line number Diff line number Diff line change
Expand Up @@ -56,24 +56,25 @@ INSERT INTO {dataset}.numeric_table VALUES
('2nd value', 0.000000002);

CREATE OR REPLACE TABLE {dataset}.json_t (
rowid INT64,
js JSON
);

INSERT INTO {dataset}.json_t VALUES
(JSON '{{"a": [1,2,3,4], "b": 1}}'),
(JSON '{{"a":null,"b":2}}'),
(JSON '{{"a":"foo", "c":null}}'),
(JSON 'null'),
(JSON '[42,47,55]'),
(JSON '[]'),
(JSON '"a"'),
(JSON '""'),
(JSON '"b"'),
(NULL),
(JSON 'true'),
(JSON 'false'),
(JSON '42'),
(JSON '37.37');
(1, JSON '{{"a": [1,2,3,4], "b": 1}}'),
(2, JSON '{{"a":null,"b":2}}'),
(3, JSON '{{"a":"foo", "c":null}}'),
(4, JSON 'null'),
(5, JSON '[42,47,55]'),
(6, JSON '[]'),
(7, JSON '"a"'),
(8, JSON '""'),
(9, JSON '"b"'),
(10, NULL),
(11, JSON 'true'),
(12, JSON 'false'),
(13, JSON '42'),
(14, JSON '37.37');


LOAD DATA OVERWRITE {dataset}.functional_alltypes (
Expand Down
30 changes: 15 additions & 15 deletions ci/schema/duckdb.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,23 @@ INSERT INTO struct VALUES
(NULL),
({'a': 3.0, 'b': 'orange', 'c': NULL});

CREATE OR REPLACE TABLE json_t (js JSON);
CREATE OR REPLACE TABLE json_t (rowid BIGINT, js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]'),
('"a"'),
('""'),
('"b"'),
(NULL),
('true'),
('false'),
('42'),
('37.37');
(1, '{"a": [1,2,3,4], "b": 1}'),
(2, '{"a":null,"b":2}'),
(3, '{"a":"foo", "c":null}'),
(4, 'null'),
(5, '[42,47,55]'),
(6, '[]'),
(7, '"a"'),
(8, '""'),
(9, '"b"'),
(10, NULL),
(11, 'true'),
(12, 'false'),
(13, '42'),
(14, '37.37');

CREATE OR REPLACE TABLE win (g TEXT, x BIGINT NOT NULL, y BIGINT);
INSERT INTO win VALUES
Expand Down
30 changes: 29 additions & 1 deletion ci/schema/exasol.sql
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ CREATE OR REPLACE TABLE EXASOL."awards_players"
"yearID" BIGINT,
"lgID" VARCHAR(256),
"tie" VARCHAR(256),
"notes" VARCHAR(256)
"notes" VARCHAR(256)
);

CREATE OR REPLACE TABLE EXASOL."functional_alltypes"
Expand All @@ -68,11 +68,39 @@ CREATE OR REPLACE TABLE EXASOL."functional_alltypes"
"month" INTEGER
);

CREATE OR REPLACE TABLE EXASOL."astronauts"
(
"id" INTEGER,
"number" INTEGER,
"nationwide_number" INTEGER,
"name" VARCHAR(256),
"original_name" VARCHAR(256),
"sex" VARCHAR(256),
"year_of_birth" INTEGER,
"nationality" VARCHAR(256),
"military_civilian" VARCHAR(256),
"selection" VARCHAR(256),
"year_of_selection" INTEGER,
"mission_number" INTEGER,
"total_number_of_missions" INTEGER,
"occupation" VARCHAR(256),
"year_of_mission" INTEGER,
"mission_title" VARCHAR(256),
"ascend_shuttle" VARCHAR(256),
"in_orbit" VARCHAR(256),
"descend_shuttle" VARCHAR(256),
"hours_mission" DOUBLE PRECISION,
"total_hrs_sum" DOUBLE PRECISION,
"field21" INTEGER,
"eva_hrs_mission" DOUBLE PRECISION,
"total_eva_hrs" DOUBLE PRECISION
);

IMPORT INTO EXASOL."diamonds" FROM LOCAL CSV FILE '/data/diamonds.csv' COLUMN SEPARATOR = ',' SKIP = 1;
IMPORT INTO EXASOL."batting" FROM LOCAL CSV FILE '/data/batting.csv' COLUMN SEPARATOR = ',' SKIP = 1;
IMPORT INTO EXASOL."awards_players" FROM LOCAL CSV FILE '/data/awards_players.csv' COLUMN SEPARATOR = ',' SKIP = 1;
IMPORT INTO EXASOL."functional_alltypes" FROM LOCAL CSV FILE '/data/functional_alltypes.csv' COLUMN SEPARATOR = ',' SKIP = 1;
IMPORT INTO EXASOL."astronauts" FROM LOCAL CSV FILE '/data/astronauts.csv' COLUMN SEPARATOR = ',' SKIP = 1;

CREATE OR REPLACE TABLE EXASOL."win"
(
Expand Down
30 changes: 15 additions & 15 deletions ci/schema/mysql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -100,23 +100,23 @@ CREATE TABLE functional_alltypes (

DROP TABLE IF EXISTS json_t CASCADE;

CREATE TABLE IF NOT EXISTS json_t (js JSON);
CREATE TABLE IF NOT EXISTS json_t (rowid BIGINT, js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]'),
('"a"'),
('""'),
('"b"'),
(NULL),
('true'),
('false'),
('42'),
('37.37');
(1, '{"a": [1,2,3,4], "b": 1}'),
(2, '{"a":null,"b":2}'),
(3, '{"a":"foo", "c":null}'),
(4, 'null'),
(5, '[42,47,55]'),
(6, '[]'),
(7, '"a"'),
(8, '""'),
(9, '"b"'),
(10, NULL),
(11, 'true'),
(12, 'false'),
(13, '42'),
(14, '37.37');

DROP TABLE IF EXISTS win CASCADE;

Expand Down
50 changes: 35 additions & 15 deletions ci/schema/postgres.sql
Original file line number Diff line number Diff line change
Expand Up @@ -250,23 +250,43 @@ CREATE INDEX IF NOT EXISTS idx_geo_geo_polygon ON geo USING GIST (geo_polygon);

DROP TABLE IF EXISTS json_t CASCADE;

CREATE TABLE IF NOT EXISTS json_t (js JSON);
CREATE TABLE IF NOT EXISTS json_t (rowid BIGINT, js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]'),
('"a"'),
('""'),
('"b"'),
(NULL),
('true'),
('false'),
('42'),
('37.37');
(1, '{"a": [1,2,3,4], "b": 1}'),
(2, '{"a":null,"b":2}'),
(3, '{"a":"foo", "c":null}'),
(4, 'null'),
(5, '[42,47,55]'),
(6, '[]'),
(7, '"a"'),
(8, '""'),
(9, '"b"'),
(10, NULL),
(11, 'true'),
(12, 'false'),
(13, '42'),
(14, '37.37');

DROP TABLE IF EXISTS jsonb_t CASCADE;

CREATE TABLE IF NOT EXISTS jsonb_t (rowid BIGINT, js JSONB);

INSERT INTO jsonb_t VALUES
(1, '{"a": [1,2,3,4], "b": 1}'),
(2, '{"a":null,"b":2}'),
(3, '{"a":"foo", "c":null}'),
(4, 'null'),
(5, '[42,47,55]'),
(6, '[]'),
(7, '"a"'),
(8, '""'),
(9, '"b"'),
(10, NULL),
(11, 'true'),
(12, 'false'),
(13, '42'),
(14, '37.37');

DROP TABLE IF EXISTS win CASCADE;
CREATE TABLE win (g TEXT, x BIGINT NOT NULL, y BIGINT);
Expand Down
30 changes: 15 additions & 15 deletions ci/schema/risingwave.sql
Original file line number Diff line number Diff line change
Expand Up @@ -157,23 +157,23 @@ INSERT INTO "array_types" VALUES

DROP TABLE IF EXISTS "json_t" CASCADE;

CREATE TABLE IF NOT EXISTS "json_t" ("js" JSONB);
CREATE TABLE IF NOT EXISTS "json_t" (rowid BIGINT, "js" JSONB);

INSERT INTO "json_t" VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]'),
('"a"'),
('""'),
('"b"'),
(NULL),
('true'),
('false'),
('42'),
('37.37');
(1, '{"a": [1,2,3,4], "b": 1}'),
(2, '{"a":null,"b":2}'),
(3, '{"a":"foo", "c":null}'),
(4, 'null'),
(5, '[42,47,55]'),
(6, '[]'),
(7, '"a"'),
(8, '""'),
(9, '"b"'),
(10, NULL),
(11, 'true'),
(12, 'false'),
(13, '42'),
(14, '37.37');

DROP TABLE IF EXISTS "win" CASCADE;
CREATE TABLE "win" ("g" TEXT, "x" BIGINT, "y" BIGINT);
Expand Down
32 changes: 16 additions & 16 deletions ci/schema/snowflake.sql
Original file line number Diff line number Diff line change
Expand Up @@ -123,23 +123,23 @@ INSERT INTO "struct" ("abc")
SELECT NULL UNION
SELECT {'a': 3.0, 'b': 'orange', 'c': NULL};

CREATE OR REPLACE TABLE "json_t" ("js" VARIANT);
CREATE OR REPLACE TABLE "json_t" ("rowid" BIGINT, "js" VARIANT);

INSERT INTO "json_t" ("js")
SELECT parse_json('{"a": [1,2,3,4], "b": 1}') UNION
SELECT parse_json('{"a":null,"b":2}') UNION
SELECT parse_json('{"a":"foo", "c":null}') UNION
SELECT parse_json('null') UNION
SELECT parse_json('[42,47,55]') UNION
SELECT parse_json('[]') UNION
SELECT parse_json('"a"') UNION
SELECT parse_json('""') UNION
SELECT parse_json('"b"') UNION
SELECT NULL UNION
SELECT parse_json('true') UNION
SELECT parse_json('false') UNION
SELECT parse_json('42') UNION
SELECT parse_json('37.37');
INSERT INTO "json_t" ("rowid", "js")
SELECT 1, parse_json('{"a": [1,2,3,4], "b": 1}') UNION
SELECT 2, parse_json('{"a":null,"b":2}') UNION
SELECT 3, parse_json('{"a":"foo", "c":null}') UNION
SELECT 4, parse_json('null') UNION
SELECT 5, parse_json('[42,47,55]') UNION
SELECT 6, parse_json('[]') UNION
SELECT 7, parse_json('"a"') UNION
SELECT 8, parse_json('""') UNION
SELECT 9, parse_json('"b"') UNION
SELECT 10, NULL UNION
SELECT 11, parse_json('true') UNION
SELECT 12, parse_json('false') UNION
SELECT 13, parse_json('42') UNION
SELECT 14, parse_json('37.37');

CREATE OR REPLACE TABLE "win" ("g" TEXT, "x" BIGINT NOT NULL, "y" BIGINT);
INSERT INTO "win" VALUES
Expand Down
30 changes: 15 additions & 15 deletions ci/schema/sqlite.sql
Original file line number Diff line number Diff line change
Expand Up @@ -101,23 +101,23 @@ CREATE TABLE diamonds (

DROP TABLE IF EXISTS json_t;

CREATE TABLE json_t (js JSON);
CREATE TABLE json_t (rowid BIGINT, js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]'),
('"a"'),
('""'),
('"b"'),
(NULL),
('true'),
('false'),
('42'),
('37.37');
(1, '{"a": [1,2,3,4], "b": 1}'),
(2, '{"a":null,"b":2}'),
(3, '{"a":"foo", "c":null}'),
(4, 'null'),
(5, '[42,47,55]'),
(6, '[]'),
(7, '"a"'),
(8, '""'),
(9, '"b"'),
(10, NULL),
(11, 'true'),
(12, 'false'),
(13, '42'),
(14, '37.37');

DROP TABLE IF EXISTS win;
CREATE TABLE win (g TEXT, x BIGINT NOT NULL, y BIGINT);
Expand Down
50 changes: 23 additions & 27 deletions ci/schema/trino.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
DROP TABLE IF EXISTS hive.default.diamonds;
CREATE TABLE hive.default.diamonds (
CREATE TABLE IF NOT EXISTS hive.default.diamonds (
"carat" DOUBLE,
"cut" VARCHAR,
"color" VARCHAR,
Expand All @@ -18,8 +17,7 @@ CREATE TABLE hive.default.diamonds (
CREATE OR REPLACE VIEW memory.default.diamonds AS
SELECT * FROM hive.default.diamonds;

DROP TABLE IF EXISTS hive.default.astronauts;
CREATE TABLE hive.default.astronauts (
CREATE TABLE IF NOT EXISTS hive.default.astronauts (
"id" BIGINT,
"number" BIGINT,
"nationwide_number" BIGINT,
Expand Down Expand Up @@ -52,8 +50,7 @@ CREATE TABLE hive.default.astronauts (
CREATE OR REPLACE VIEW memory.default.astronauts AS
SELECT * FROM hive.default.astronauts;

DROP TABLE IF EXISTS hive.default.batting;
CREATE TABLE hive.default.batting (
CREATE TABLE IF NOT EXISTS hive.default.batting (
"playerID" VARCHAR,
"yearID" BIGINT,
"stint" BIGINT,
Expand Down Expand Up @@ -84,24 +81,22 @@ CREATE TABLE hive.default.batting (
CREATE OR REPLACE VIEW memory.default.batting AS
SELECT * FROM hive.default.batting;

DROP TABLE IF EXISTS hive.default.awards_players;
CREATE TABLE hive.default.awards_players (
CREATE TABLE IF NOT EXISTS hive.default.awards_players (
"playerID" VARCHAR,
"awardID" VARCHAR,
"yearID" BIGINT,
"lgID" VARCHAR,
"tie" VARCHAR,
"notes" VARCHAR
) WITH (
external_location = 's3a://trino/awards-players',
external_location = 's3a://trino/awards_players',
format = 'PARQUET'
);

CREATE OR REPLACE VIEW memory.default.awards_players AS
SELECT * FROM hive.default.awards_players;

DROP TABLE IF EXISTS hive.default.functional_alltypes;
CREATE TABLE hive.default.functional_alltypes (
CREATE TABLE IF NOT EXISTS hive.default.functional_alltypes (
"id" INTEGER,
"bool_col" BOOLEAN,
"tinyint_col" TINYINT,
Expand All @@ -116,9 +111,10 @@ CREATE TABLE hive.default.functional_alltypes (
"year" INTEGER,
"month" INTEGER
) WITH (
external_location = 's3a://trino/functional-alltypes',
external_location = 's3a://trino/functional_alltypes',
format = 'PARQUET'
);

CREATE OR REPLACE VIEW memory.default.functional_alltypes AS
SELECT * FROM hive.default.functional_alltypes;

Expand Down Expand Up @@ -160,23 +156,23 @@ INSERT INTO struct

DROP TABLE IF EXISTS memory.default.json_t;

CREATE TABLE IF NOT EXISTS memory.default.json_t (js JSON);
CREATE TABLE IF NOT EXISTS memory.default.json_t (rowid BIGINT, js JSON);

INSERT INTO memory.default.json_t VALUES
(JSON '{"a": [1,2,3,4], "b": 1}'),
(JSON '{"a":null,"b":2}'),
(JSON '{"a":"foo", "c":null}'),
(JSON 'null'),
(JSON '[42,47,55]'),
(JSON '[]'),
(JSON '"a"'),
(JSON '""'),
(JSON '"b"'),
(NULL),
(JSON 'true'),
(JSON 'false'),
(JSON '42'),
(JSON '37.37');
(1, JSON '{"a": [1,2,3,4], "b": 1}'),
(2, JSON '{"a":null,"b":2}'),
(3, JSON '{"a":"foo", "c":null}'),
(4, JSON 'null'),
(5, JSON '[42,47,55]'),
(6, JSON '[]'),
(7, JSON '"a"'),
(8, JSON '""'),
(9, JSON '"b"'),
(10, NULL),
(11, JSON 'true'),
(12, JSON 'false'),
(13, JSON '42'),
(14, JSON '37.37');

DROP TABLE IF EXISTS win;
CREATE TABLE win (g VARCHAR, x BIGINT, y BIGINT);
Expand Down
14 changes: 7 additions & 7 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
services:
clickhouse:
image: clickhouse/clickhouse-server:24.5.1.1763-alpine
image: clickhouse/clickhouse-server:24.6.2.17-alpine
ports:
- 8123:8123 # http port
- 9000:9000 # native protocol port
Expand Down Expand Up @@ -94,7 +94,7 @@ services:
- trino

minio:
image: bitnami/minio:2024.6.11
image: bitnami/minio:2024.7.16
environment:
MINIO_ROOT_USER: accesskey
MINIO_ROOT_PASSWORD: secretkey
Expand Down Expand Up @@ -199,7 +199,7 @@ services:
- druid

druid-coordinator:
image: apache/druid:29.0.1
image: apache/druid:30.0.0
hostname: coordinator
container_name: coordinator
volumes:
Expand All @@ -224,7 +224,7 @@ services:
- druid

druid-broker:
image: apache/druid:29.0.1
image: apache/druid:30.0.0
hostname: broker
container_name: broker
volumes:
Expand Down Expand Up @@ -252,7 +252,7 @@ services:
- druid

druid-historical:
image: apache/druid:29.0.1
image: apache/druid:30.0.0
hostname: historical
container_name: historical
volumes:
Expand All @@ -279,7 +279,7 @@ services:
- druid

druid-middlemanager:
image: apache/druid:29.0.1
image: apache/druid:30.0.0
hostname: middlemanager
container_name: middlemanager
volumes:
Expand Down Expand Up @@ -307,7 +307,7 @@ services:
- druid

druid:
image: apache/druid:29.0.1
image: apache/druid:30.0.0
hostname: router
container_name: router
volumes:
Expand Down
10 changes: 4 additions & 6 deletions conda/environment-arm64-flink.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ channels:
- conda-forge
dependencies:
# runtime dependencies
- python =3.10
- python >=3.10,<3.12
- atpublic >=2.3
- bidict >=0.22.1
- black >=22.1.0,<25
- clickhouse-connect >=0.5.23
- dask >=2022.9.1
Expand All @@ -28,7 +27,7 @@ dependencies:
- pins >=0.8.2
- poetry-core >=1.0.0
- poetry-dynamic-versioning >=0.18.0
- polars >=0.20.17
- polars >=1,<2
- psycopg2 >=2.8.4
- pyarrow =11.0.0
- pyarrow-tests
Expand Down Expand Up @@ -57,7 +56,7 @@ dependencies:
# test dependencies
- filelock >=3.7.0,<4
- hypothesis >=6.58.0,<7
- pytest >=8.0.2,<9
- pytest >=8.2.0,<9
- pytest-benchmark >=3.4.1,<5
- pytest-clarity >=1.0.1,<2
- pytest-cov >=3.0.0,<5
Expand Down Expand Up @@ -88,7 +87,6 @@ dependencies:
- poetry-plugin-export
- pre-commit
- prettier
- pydeps >=1.12.7
- pyinstrument
- ruff >=0.1.8
- taplo
Expand All @@ -98,4 +96,4 @@ dependencies:
- py4j =0.10.9.7
- pip
- pip:
- apache-flink >=1.19.0
- apache-flink >=1.19.1
6 changes: 2 additions & 4 deletions conda/environment-arm64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ dependencies:
# runtime dependencies
- python >=3.10
- atpublic >=2.3
- bidict >=0.22.1
- black >=22.1.0,<25
- clickhouse-connect >=0.5.23
- dask >=2022.9.1
Expand All @@ -28,7 +27,7 @@ dependencies:
- pins >=0.8.2
- poetry-core >=1.0.0
- poetry-dynamic-versioning >=0.18.0
- polars >=0.20.17
- polars >=1,<2
- psycopg2 >=2.8.4
- pyarrow >=10.0.1
- pyarrow-tests
Expand Down Expand Up @@ -56,7 +55,7 @@ dependencies:
# test dependencies
- filelock >=3.7.0,<4
- hypothesis >=6.58.0,<7
- pytest >=8.0.2,<9
- pytest >=8.2.0,<9
- pytest-benchmark >=3.4.1,<5
- pytest-clarity >=1.0.1,<2
- pytest-cov >=3.0.0,<5
Expand Down Expand Up @@ -87,7 +86,6 @@ dependencies:
- poetry-plugin-export
- pre-commit
- prettier
- pydeps >=1.12.7
- pyinstrument
- ruff >=0.1.8
- taplo
Expand Down
6 changes: 2 additions & 4 deletions conda/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ dependencies:
# runtime dependencies
- apache-flink
- atpublic >=2.3
- bidict >=0.22.1
- black >=22.1.0,<25
- clickhouse-connect >=0.5.23
- dask >=2022.9.1
Expand All @@ -29,7 +28,7 @@ dependencies:
- pip
- poetry-core >=1.0.0
- poetry-dynamic-versioning >=0.18.0
- polars >=0.20.17
- polars >=1,<2
- psycopg2 >=2.8.4
- pyarrow >=10.0.1
- pyarrow-hotfix >=0.4
Expand Down Expand Up @@ -59,7 +58,7 @@ dependencies:
# test dependencies
- filelock >=3.7.0,<4
- hypothesis >=6.58.0,<7
- pytest >=8.0.2,<9
- pytest >=8.2.0,<9
- pytest-benchmark >=3.4.1,<5
- pytest-clarity >=1.0.1,<2
- pytest-cov >=3.0.0,<5
Expand Down Expand Up @@ -91,7 +90,6 @@ dependencies:
- poetry-plugin-export
- pre-commit
- prettier
- pydeps >=1.12.7
- pyinstrument
- ruff >=0.1.8
- taplo
Expand Down
4 changes: 2 additions & 2 deletions docker/flink/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
ARG FLINK_VERSION=1.19.0
ARG FLINK_VERSION=1.19.1
FROM flink:${FLINK_VERSION}

# ibis-flink requires PyFlink dependency
ARG FLINK_VERSION=1.19.0
ARG FLINK_VERSION=1.19.1
RUN wget -nv -P $FLINK_HOME/lib/ https://repo1.maven.org/maven2/org/apache/flink/flink-python/${FLINK_VERSION}/flink-python-${FLINK_VERSION}.jar

# install python3 and pip3
Expand Down
1 change: 1 addition & 0 deletions docs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ objects.json
# generated notebooks and files
*.ipynb
*_files
backends/*_methods.json

# inventories
_inv
Expand Down
16 changes: 16 additions & 0 deletions docs/_freeze/posts/1tbc/index/execute-results/html.json

Large diffs are not rendered by default.

104 changes: 104 additions & 0 deletions docs/_freeze/posts/1tbc/index/figure-html/cell-10-output-1.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
242 changes: 242 additions & 0 deletions docs/_freeze/posts/1tbc/index/figure-html/cell-13-output-1.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions docs/_freeze/posts/ibis-bench/index/execute-results/html.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

40 changes: 34 additions & 6 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ interlinks:
sources:
python:
url: https://docs.python.org/3/
sqlalchemy:
url: https://docs.sqlalchemy.org/
arrow:
url: https://arrow.apache.org/docs/
pandas:
Expand Down Expand Up @@ -286,12 +284,19 @@ quartodoc:
name: Generic expressions
desc: Scalars and columns of any element type.
contents:
# types
- name: Value
package: ibis.expr.types.generic
- name: Column
package: ibis.expr.types.generic
- name: Deferred
package: ibis.common.deferred
- name: Scalar
package: ibis.expr.types.generic

# constants
- name: deferred
package: ibis.expr.api
- name: literal
dynamic: true
signature_name: full
Expand All @@ -301,27 +306,37 @@ quartodoc:
- name: "null"
dynamic: true
signature_name: full
- name: range
dynamic: true
signature_name: full
- name: coalesce
dynamic: true
signature_name: full

# comparisons
- name: least
dynamic: true
signature_name: full
- name: greatest
dynamic: true
signature_name: full
- name: asc
- name: ifelse
dynamic: true
signature_name: full
- name: desc
- name: case
dynamic: true
signature_name: full
- name: ifelse

# sorting
- name: asc
dynamic: true
signature_name: full
- name: case
- name: desc
dynamic: true
signature_name: full

# conversions
# TODO: add decompile here once the API is not experimental
- name: to_sql
dynamic: true
signature_name: full
Expand Down Expand Up @@ -356,6 +371,12 @@ quartodoc:
dynamic: true
signature_name: full

# constants
- name: e
package: ibis.expr.api
- name: pi
package: ibis.expr.api

- kind: page
path: expression-strings
package: ibis.expr.types.strings
Expand Down Expand Up @@ -642,6 +663,13 @@ quartodoc:
desc: "Scalar user-defined function APIs"
contents:
- scalar
- kind: page
path: aggregate-udfs
summary:
name: Aggregate UDFs (experimental)
desc: "Aggregate user-defined function APIs"
contents:
- agg

- title: Configuration
desc: "Ibis configuration"
Expand Down
4 changes: 3 additions & 1 deletion docs/backends/_templates/api.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#| echo: false
#| output: asis
from _utils import get_backend, render_methods
from _utils import get_backend, render_methods, dump_methods_to_json_for_algolia
# defined in the backend qmd, e.g., ../bigquery.qmd
module = BACKEND.lower()
Expand All @@ -17,5 +17,7 @@ methods = sorted(
if value.name != "do_connect"
)
dump_methods_to_json_for_algolia(backend, methods)
render_methods(backend, *methods, level=3)
```
22 changes: 22 additions & 0 deletions docs/backends/_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import json
from functools import cache, partial
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -78,3 +79,24 @@ def render_methods(obj, *methods: str, level: int) -> None:

def render_do_connect(backend, level: int = 4) -> None:
render_methods(get_backend(backend), "do_connect", level=level)


def dump_methods_to_json_for_algolia(backend, methods):
backend_algolia_methods = list()
backend_name = backend.canonical_path.split(".")[2]
base_url_template = "backends/{backend}#ibis.backends.{backend}.Backend.{method}"

for method in methods:
base_url = base_url_template.format(backend=backend_name, method=method)
record = {
"objectID": base_url,
"href": base_url,
"title": f"{backend_name}.Backend.{method}",
"text": getattr(backend.all_members[method].docstring, "value", ""),
"crumbs": ["Backend API", "API", f"{backend_name} methods"],
}

backend_algolia_methods.append(record)

with open(f"{backend_name}_methods.json", "w") as f:
json.dump(backend_algolia_methods, f)
6 changes: 3 additions & 3 deletions docs/backends/risingwave.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ con = ibis.risingwave.connect() # <1>

## `conda`

Install for Risingwave:
Install for RisingWave:

```{.bash}
conda install -c conda-forge ibis-risingwave
Expand All @@ -48,7 +48,7 @@ con = ibis.risingwave.connect() # <1>

## `mamba`

Install for Risingwave:
Install for RisingWave:

```{.bash}
mamba install -c conda-forge ibis-risingwave
Expand Down Expand Up @@ -98,7 +98,7 @@ render_do_connect("risingwave")

```{python}
#| echo: false
BACKEND = "Risingwave"
BACKEND = "RisingWave"
```

{{< include ./_templates/api.qmd >}}
2 changes: 1 addition & 1 deletion docs/backends/support/matrix.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ import ibis.expr.operations as ops
def make_support_matrix():
"""Construct the backend operation support matrix data."""
from ibis.backends.sql.compiler import ALL_OPERATIONS
from ibis.backends.sql.compilers.base import ALL_OPERATIONS
support_matrix_ignored_operations = (ops.ScalarParameter,)
Expand Down
2 changes: 1 addition & 1 deletion docs/concepts/who.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ GitHub](https://github.com/ibis-project/ibis/graphs/contributors).
## History

Ibis was originally created by [Wes McKinney](https://wesmckinney.com/). Wes
created pandas, co-created Apache Arrrow, and co-founded Voltron Data (among
created pandas, co-created Apache Arrow, and co-founded Voltron Data (among
other things). Ibis was initially a pandas-like dataframe library for Apache
Impala, but has since grown to support many other backends and mature under the
stewardship of [Phillip Cloud](https://github.com/cpcloud) and others on the
Expand Down
2 changes: 1 addition & 1 deletion docs/how-to/analytics/basics.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Use the `.mutate()` method to create new columns:

```{python}
t.mutate(bill_length_cm=t["bill_length_mm"] / 10).relocate(
t.columns[0:2], "bill_length_cm"
*t.columns[:2], "bill_length_cm"
)
```

Expand Down
2 changes: 1 addition & 1 deletion docs/how-to/extending/builtin.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ a defined API for a built-in database function.
See [the reference documentation](../../reference/index.qmd) for existing APIs.
:::

Let's the use the DuckDB backend to demonstrate how to access an aggregate
Let's use the DuckDB backend to demonstrate how to access an aggregate
function that isn't exposed in ibis:
[`kurtosis`](https://en.wikipedia.org/wiki/Kurtosis).

Expand Down
97 changes: 97 additions & 0 deletions docs/how-to/extending/streaming.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
---
title: Ibis for streaming
---

Ibis has support for streaming operations, which can be executed on Flink,
Spark Structured Streaming, and RisingWave.

## Setup

We demonstrate the streaming operations with a real-time fraud detection example.
If you have Kafka set up in your infrastructure, you can connect to your existing Kafka
topics as well.

You can find our code setup [here](https://github.com/ibis-project/realtime-fraud-detection).
Feel free to clone the repository if you want to follow along.

## Window aggregation
Computes aggregations over windows.

The output schema consists of `window_start`, `window_end`, the group
by column if applicable (optional), and the aggregation results.

Tumble and hop windows are supported. Tumbling windows have a fixed size and do not overlap.
Hopping windows (aka sliding windows) are configured by both window size and window slide. The
additional window slide parameter controls how frequently a sliding window is started.

For more, see [Flink's documentation on Windowing TVFs](https://nightlies.apache.org/flink/flink-docs-release-1.19/docs/dev/table/sql/queries/window-tvf/)
and [Spark's documentation on time windows](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#types-of-time-windows).

```python
t = con.table("payment") # table corresponding to the `payment` topic

# tumble window
expr = (
t.window_by(time_col=t.createTime)
.tumble(size=ibis.interval(seconds=30))
.agg(by=["provinceId"], avgPayAmount=_.payAmount.mean())
)

# hop window
expr = (
t.window_by(time_col=t.createTime)
.hop(size=ibis.interval(seconds=30), slide=ibis.interval(seconds=15))
.agg(by=["provinceId"], avgPayAmount=_.payAmount.mean())
)
```

## Over aggregation
Computes aggregate values for every input row, over either a row range or a time range.

::: {.callout-note}
Spark Structured Streaming does not support aggregation using the `OVER` syntax. You need to use
window aggregation to aggregate over time windows.
:::

```python
expr = (
t.select(
province_id=t.provinceId,
pay_amount=t.payAmount.sum().over(
range=(-ibis.interval(seconds=10), 0),
group_by=t.provinceId,
order_by=t.createTime,
),
)
)
```


## Stream-table join
Joining a stream with a static table.

```python
provinces = (
"Beijing",
"Shanghai",
"Hangzhou",
"Shenzhen",
"Jiangxi",
"Chongqing",
"Xizang",
)
province_id_to_name_df = pd.DataFrame(
enumerate(provinces), columns=["provinceId", "province"]
)
expr = t.join(province_id_to_name_df, ["provinceId"])
```

## Stream-stream join
Joining two streams.

```python
order = con.table("order") # table corresponding to the `order` topic
expr = t.join(
order, [t.orderId == order.orderId, t.createTime == order.createTime]
)
```
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
@st.cache_data
def get_emoji():
resp = requests.get(
"https://raw.githubusercontent.com/omnidan/node-emoji/master/lib/emoji.json"
"https://raw.githubusercontent.com/omnidan/node-emoji/master/lib/emoji.json",
timeout=60,
)
resp.raise_for_status()
emojis = resp.json()
Expand All @@ -26,10 +27,10 @@ def get_emoji():
@st.cache_data
def query():
return (
con.tables.recipes.relabel("snake_case")
con.tables.recipes.rename("snake_case")
.mutate(ner=_.ner.map(lambda n: n.lower()).unnest())
.ner.topk(max(options))
.relabel(dict(ner="ingredient"))
.rename(ingredient="ner")
.to_pandas()
.assign(
emoji=lambda df: df.ingredient.map(
Expand All @@ -42,7 +43,7 @@ def query():

emojis = get_emoji()

con = st.experimental_connection("ch", type=IbisConnection)
con = st.connection("ch", type=IbisConnection)

if n := st.radio("Ingredients", options, index=1, horizontal=True):
table, whole = st.columns((2, 1))
Expand Down
2 changes: 1 addition & 1 deletion docs/how-to/visualization/streamlit.qmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Streamlit + Ibis

Ibis supports the [streamlit `experimental_connection` interface](https://blog.streamlit.io/introducing-st-experimental_connection/), making it easier than ever to combine the powers of both tools!
Ibis supports the [streamlit `connection` interface](https://docs.streamlit.io/develop/concepts/connections/connecting-to-data), making it easier than ever to combine the powers of both tools!

Check out the example application below that shows the top N ingredients from a corpus of recipes using [the ClickHouse backend](../../backends/clickhouse.qmd)!

Expand Down
4 changes: 4 additions & 0 deletions docs/posts/1tbc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ibis-bench
tpch_data
results_data
bench_logs_v*
Binary file added docs/posts/1tbc/ibis-datafusion-sort.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/posts/1tbc/ibis-duckdb-sort.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
762 changes: 762 additions & 0 deletions docs/posts/1tbc/index.qmd

Large diffs are not rendered by default.

Binary file added docs/posts/1tbc/polars-lazy-sort.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/posts/1tbc/polars-lazy-streaming-sort.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
20 changes: 10 additions & 10 deletions docs/posts/flink-announcement/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,15 @@ pixi add ibis-flink

## Spinning up the services using Docker Compose

The [ibis-project/ibis-flink-example GitHub
repository](https://github.com/ibis-project/ibis-flink-example) includes the
The [ibis-project/realtime-fraud-detection GitHub
repository](https://github.com/ibis-project/realtime-fraud-detection) includes the
relevant Docker Compose configuration for this tutorial. Clone the repository,
and run `docker compose up` from the cloned directory to create Kafka topics,
generate sample data, and launch a Flink cluster:

```bash
git clone https://github.com/claypotai/ibis-flink-example.git
cd ibis-flink-example
git clone https://github.com/claypotai/realtime-fraud-detection.git
cd realtime-fraud-detection
docker compose up
```

Expand All @@ -88,12 +88,12 @@ After a few seconds, you should see messages indicating your Kafka environment
is ready:

```bash
ibis-flink-example-init-kafka-1 | Successfully created the following topics:
ibis-flink-example-init-kafka-1 | payment_msg
ibis-flink-example-init-kafka-1 | sink
ibis-flink-example-init-kafka-1 exited with code 0
ibis-flink-example-data-generator-1 | Connected to Kafka
ibis-flink-example-data-generator-1 | Producing 20000 records to Kafka topic payment_msg
realtime-fraud-detection-init-kafka-1 | Successfully created the following topics:
realtime-fraud-detection-init-kafka-1 | payment_msg
realtime-fraud-detection-init-kafka-1 | sink
realtime-fraud-detection-init-kafka-1 exited with code 0
realtime-fraud-detection-data-generator-1 | Connected to Kafka
realtime-fraud-detection-data-generator-1 | Producing 20000 records to Kafka topic payment_msg
```

This example uses mock payments data. The `payment_msg` Kafka topic contains
Expand Down
3 changes: 3 additions & 0 deletions docs/posts/ibis-bench/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
tpch_data
results_data
bench_logs_v*
Binary file added docs/posts/ibis-bench/figure1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1,314 changes: 1,314 additions & 0 deletions docs/posts/ibis-bench/index.qmd

Large diffs are not rendered by default.

146 changes: 146 additions & 0 deletions docs/posts/run-on-snowflake/index.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
---
title: "Ibis - Now flying on Snowflake"
author:
- Phillip Cloud
- Tyler White
error: false
date: "2024-06-19"
categories:
- blog
- new feature
- snowflake
---

Ibis allows you to push down compute operations on your data where it lives,
with the performance being as powerful as the backend you're connected to. But
what happens if Ibis is running _inside_ the backend you're connected to?

In this post, we will discuss how we got Ibis running on a Snowflake virtual
warehouse.

## Why would we want to do this?

Snowflake has released several features to enable users to execute native
Python code on the platform. These features include a new notebook development
interface, Streamlit in Snowflake, the Native App framework, and Python within
functions and stored procedures.

If users could use Ibis directly within the platform, developers could more
easily switch between a local execution engine during development and
efficiently deploy and operationalize that same code on Snowflake.

But this isn't without its challenges; there were a few things we needed to
figure out, and these are the questions we will answer throughout the post.

- How can we get an Ibis connection to Snowflake - from within Snowflake?
- How can we use third-party packages in Snowflake?
- How are we going to test this to ensure it works?

## Getting the Ibis connection

The release of Ibis 9.0 includes the introduction of a new method,
[`from_snowpark`](../../backends/snowflake.qmd#ibis.backends.snowflake.Backend.from_snowpark)
to provide users with a convenient mechanism to take an existing Snowpark
session and create an Ibis Snowflake backend instance with it.

Here's what this looks like:

```python
import ibis
import snowflake.snowpark as sp

session = sp.Session.builder.create()
con = ibis.snowflake.from_snowpark(session)
```

This connection uses the same session within Snowflake, so temporary objects
can be accessed using Snowpark or Ibis in the same process! The contexts of
stored procedures already have a session available, meaning we can use this
new method and start writing Ibis expressions.

The way this works is that Ibis plucks out an attribute on the Snowpark
session, which gives us the [`snowflake-connector-python`](https://github.com/snowflakedb/snowflake-connector-python) [`SnowflakeConnection`](https://github.com/snowflakedb/snowflake-connector-python/blob/42fa6ebe9404e0e17afdacfcaceb311dda5cde3e/src/snowflake/connector/connection.py#L313) instance used
by Snowpark.

Since Ibis uses `snowflake-connector-python` for all Snowflake-related
connection we just reuse that existing instance.

## Uploading third-party packages

Snowflake has many packages already made available out of the box through the
Snowflake Anaconda channel, but unfortunately, Ibis and a few of its
dependencies aren't available. Packages containing pure Python code can be
uploaded to stages for use within the platform, so we devised a clever solution
to upload and reference these to get them working.

```python
import os
import shutil
import tempfile


def add_packages(d, session):
import parsy
import pyarrow_hotfix
import rich
import sqlglot
import ibis

for module in (ibis, parsy, pyarrow_hotfix, rich, sqlglot):
pkgname = module.__name__
pkgpath = os.path.join(d, pkgname)
shutil.copytree(os.path.dirname(module.__file__), pkgpath)
session.add_import(pkgname, import_path=pkgname)


d = tempfile.TemporaryDirectory()
os.chdir(d.name)
add_packages(d.name, session)
```

We can now register a stored procedure that imports these modules and is able
to reference some of the additional dependencies that are already available.

```python
session.sproc.register(
ibis_sproc,
return_type=sp.types.StructType(),
name="THE_IBIS_SPROC",
imports=["ibis", "parsy", "pyarrow_hotfix", "sqlglot", "rich"],
packages=[
"snowflake-snowpark-python",
"toolz",
"atpublic",
"pyarrow",
"pandas",
"numpy",
],
)
```

::: {.callout-info}
## More permanent solutions to packaging

It's possible that a more permanent solution could be achieved with a `put` or
`put_stream` method rather than using the `add_import` method. This would allow
for the packages to be referenced across multiple stored procedures or other
places within the Snowflake platform.
:::

## Testing!

While this is a clever solution, we must ensure it works consistently. A
special unit test has been written for this exact case! The test creates a
stored procedure by adding the necessary imports to the Snowpark session.
Within the stored procedure, we define an Ibis expression, and we use the Ibis
`to_sql` method to extract the generated SQL to pass to Snowpark to return a
Snowpark DataFrame!

## Conclusion

While it's usually pretty easy to add new backends with Ibis, this was the
first instance of supporting an additional interface to an existing backend.

We hope you take this for a spin! If you run into any challenges or want
additional support, open an [issue](https://github.com/ibis-project/ibis/issues)
or join us on [Zulip](https://ibis-project.zulipchat.com/) and let us know!
6 changes: 3 additions & 3 deletions docs/posts/unified-stream-batch/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ website](https://docs.docker.com/compose/install/).
* Follow [the setup
tutorial](../../tutorials/open-source-software/apache-flink/0_setup.qmd) to
install the Flink backend for Ibis.
* Clone the [example repository](https://github.com/ibis-project/ibis-flink-example).
* Clone the [example repository](https://github.com/ibis-project/realtime-fraud-detection).
:::

::: {.callout-note}
Expand All @@ -199,8 +199,8 @@ would extract a chunk of the data and load it in batch:
# | code-fold: true
# | include: false
!pip install apache-flink kafka-python
!git clone https://github.com/ibis-project/ibis-flink-example
!cd ibis-flink-example && docker compose up kafka init-kafka data-generator -d && sleep 10 && cd ..
!git clone https://github.com/ibis-project/realtime-fraud-detection
!cd realtime-fraud-detection && docker compose up kafka init-kafka data-generator -d && sleep 10 && cd ..
```

```{python}
Expand Down
109 changes: 109 additions & 0 deletions docs/release_notes_generated.qmd

Large diffs are not rendered by default.

File renamed without changes.
1 change: 1 addition & 0 deletions docs/why.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ Ibis already works well with dashboarding libraries like:
- [Streamlit](https://github.com/streamlit/streamlit)
- [Dash](https://github.com/plotly/dash)
- [Quarto dashboards](https://github.com/quarto-dev/quarto-cli)
- [Shiny](https://shiny.posit.co/py/)

Ibis already works well with machine learning libraries like:

Expand Down
18 changes: 9 additions & 9 deletions flake.lock
9 changes: 6 additions & 3 deletions gen_redirects.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
"/backends/{version}/BigQuery/": "/backends/bigquery/",
"/backends/{version}/Clickhouse/": "/backends/clickhouse/",
"/backends/{version}/Dask/": "/backends/dask/",
"/backends/{version}/Datafusion/": "/backends/datafusion/",
"/backends/{version}/DataFusion/": "/backends/datafusion/",
"/backends/{version}/Datafusion/": "/backends/datafusion/", # For backwards compatibility
"/backends/{version}/Druid/": "/backends/druid/",
"/backends/{version}/DuckDB/": "/backends/duckdb/",
"/backends/{version}/Impala/": "/backends/impala/",
Expand All @@ -30,7 +31,8 @@
"/docs/{version}/backends/BigQuery/": "/backends/bigquery/",
"/docs/{version}/backends/Clickhouse/": "/backends/clickhouse/",
"/docs/{version}/backends/Dask/": "/backends/dask/",
"/docs/{version}/backends/Datafusion/": "/backends/datafusion/",
"/docs/{version}/backends/DataFusion/": "/backends/datafusion/",
"/docs/{version}/backends/Datafusion/": "/backends/datafusion/", # For backwards compatibility
"/docs/{version}/backends/Druid/": "/backends/druid/",
"/docs/{version}/backends/DuckDB/": "/backends/duckdb/",
"/docs/{version}/backends/Impala/": "/backends/impala/",
Expand Down Expand Up @@ -73,7 +75,8 @@
"/backends/BigQuery/": "/backends/bigquery/",
"/backends/Clickhouse/": "/backends/clickhouse/",
"/backends/Dask/": "/backends/dask/",
"/backends/Datafusion/": "/backends/datafusion/",
"/backends/DataFusion/": "/backends/datafusion/",
"/backends/Datafusion/": "/backends/datafusion/", # For backwards compatibility
"/backends/Druid/": "/backends/druid/",
"/backends/DuckDB/": "/backends/duckdb/",
"/backends/Impala/": "/backends/impala/",
Expand Down
6 changes: 3 additions & 3 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

__version__ = "9.1.0"
__version__ = "9.2.0"

import warnings
from typing import Any
Expand Down Expand Up @@ -123,8 +123,8 @@ def connect(*args, **kwargs):
proxy._from_url = backend._from_url
proxy._to_sqlglot = backend._to_sqlglot
# Add any additional methods that should be exposed at the top level
for name in getattr(backend, "_top_level_methods", ()):
setattr(proxy, name, getattr(backend, name))
for attr in getattr(backend, "_top_level_methods", ()):
setattr(proxy, attr, getattr(backend, attr))

return proxy

Expand Down
67 changes: 27 additions & 40 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import urllib.parse
from pathlib import Path
from typing import TYPE_CHECKING, Any, ClassVar
from urllib.parse import parse_qs, urlparse

import ibis
import ibis.common.exceptions as exc
Expand All @@ -21,6 +20,7 @@

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Mapping, MutableMapping
from urllib.parse import ParseResult

import pandas as pd
import polars as pl
Expand Down Expand Up @@ -767,13 +767,12 @@ class BaseBackend(abc.ABC, _FileIOHandler):
def __init__(self, *args, **kwargs):
self._con_args: tuple[Any] = args
self._con_kwargs: dict[str, Any] = kwargs
self._can_reconnect: bool = True
# expression cache
self._query_cache = RefCountedCache(
populate=self._load_into_cache,
lookup=lambda name: self.table(name).op(),
finalize=self._clean_up_cached_table,
generate_name=functools.partial(util.gen_name, "cache"),
key=lambda expr: expr.op(),
)

@property
Expand Down Expand Up @@ -858,7 +857,10 @@ def _convert_kwargs(kwargs: MutableMapping) -> None:
# TODO(kszucs): should call self.connect(*self._con_args, **self._con_kwargs)
def reconnect(self) -> None:
"""Reconnect to the database already configured with connect."""
self.do_connect(*self._con_args, **self._con_kwargs)
if self._can_reconnect:
self.do_connect(*self._con_args, **self._con_kwargs)
else:
raise exc.IbisError("Cannot reconnect to unconfigured {self.name} backend")

def do_connect(self, *args, **kwargs) -> None:
"""Connect to database specified by `args` and `kwargs`."""
Expand Down Expand Up @@ -1018,9 +1020,15 @@ def _register_udfs(self, expr: ir.Expr) -> None:
if self.supports_python_udfs:
raise NotImplementedError(self.name)

def _register_in_memory_tables(self, expr: ir.Expr):
def _register_in_memory_tables(self, expr: ir.Expr) -> None:
for memtable in expr.op().find(ops.InMemoryTable):
self._register_in_memory_table(memtable)

def _register_in_memory_table(self, op: ops.InMemoryTable):
if self.supports_in_memory_tables:
raise NotImplementedError(self.name)
raise NotImplementedError(
f"{self.name} must implement `_register_in_memory_table` to support in-memory tables"
)

def _run_pre_execute_hooks(self, expr: ir.Expr) -> None:
"""Backend-specific hooks to run before an expression is executed."""
Expand Down Expand Up @@ -1222,8 +1230,7 @@ def _cached(self, expr: ir.Table):
"""
op = expr.op()
if (result := self._query_cache.get(op)) is None:
self._query_cache.store(expr)
result = self._query_cache[op]
result = self._query_cache.store(expr)
return ir.CachedTable(result)

def _release_cached(self, expr: ir.CachedTable) -> None:
Expand All @@ -1235,12 +1242,12 @@ def _release_cached(self, expr: ir.CachedTable) -> None:
Cached expression to release
"""
del self._query_cache[expr.op()]
self._query_cache.release(expr.op().name)

def _load_into_cache(self, name, expr):
raise NotImplementedError(self.name)

def _clean_up_cached_table(self, op):
def _clean_up_cached_table(self, name):
raise NotImplementedError(self.name)

def _transpile_sql(self, query: str, *, dialect: str | None = None) -> str:
Expand Down Expand Up @@ -1346,6 +1353,11 @@ def connect(resource: Path | str, **kwargs: Any) -> BaseBackend:
orig_kwargs = kwargs.copy()
kwargs = dict(urllib.parse.parse_qsl(parsed.query))

# convert single parameter lists value to single values
for name, value in kwargs.items():
if len(value) == 1:
kwargs[name] = value[0]

if scheme == "file":
path = parsed.netloc + parsed.path
# Merge explicit kwargs with query string, explicit kwargs
Expand All @@ -1363,35 +1375,21 @@ def connect(resource: Path | str, **kwargs: Any) -> BaseBackend:
else:
raise ValueError(f"Don't know how to connect to {resource!r}")

if kwargs:
# If there are kwargs (either explicit or from the query string),
# re-add them to the parsed URL
query = urllib.parse.urlencode(kwargs)
parsed = parsed._replace(query=query)

if scheme in ("postgres", "postgresql"):
# Treat `postgres://` and `postgresql://` the same
scheme = "postgres"

# Convert all arguments back to a single URL string
url = parsed.geturl()
if "://" not in url:
# urllib may roundtrip `duckdb://` to `duckdb:`. Here we re-add the
# missing `//`.
url = url.replace(":", "://", 1)
# Treat `postgres://` and `postgresql://` the same
scheme = scheme.replace("postgresql", "postgres")

try:
backend = getattr(ibis, scheme)
except AttributeError:
raise ValueError(f"Don't know how to connect to {resource!r}") from None

return backend._from_url(url, **orig_kwargs)
return backend._from_url(parsed, **kwargs)


class UrlFromPath:
__slots__ = ()

def _from_url(self, url: str, **kwargs) -> BaseBackend:
def _from_url(self, url: ParseResult, **kwargs: Any) -> BaseBackend:
"""Connect to a backend using a URL `url`.
Parameters
Expand All @@ -1407,7 +1405,6 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend:
A backend instance
"""
url = urlparse(url)
netloc = url.netloc
parts = list(filter(None, (netloc, url.path[bool(netloc) :])))
database = Path(*parts) if parts and parts != [":memory:"] else ":memory:"
Expand All @@ -1418,16 +1415,6 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend:
elif isinstance(database, Path):
database = database.absolute()

query_params = parse_qs(url.query)

for name, value in query_params.items():
if len(value) > 1:
kwargs[name] = value
elif len(value) == 1:
kwargs[name] = value[0]
else:
raise exc.IbisError(f"Invalid URL parameter: {name}")

self._convert_kwargs(kwargs)
return self.connect(database=database, **kwargs)

Expand All @@ -1437,7 +1424,7 @@ class NoUrl:

name: str

def _from_url(self, url: str, **kwargs) -> BaseBackend:
def _from_url(self, url: ParseResult, **kwargs) -> BaseBackend:
"""Connect to the backend with empty url.
Parameters
Expand Down
153 changes: 77 additions & 76 deletions ibis/backends/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import os
import re
from typing import TYPE_CHECKING, Any, Optional
from urllib.parse import parse_qs, urlparse

import google.api_core.exceptions
import google.auth.credentials
import google.cloud.bigquery as bq
import google.cloud.bigquery_storage_v1 as bqstorage
Expand All @@ -31,20 +31,20 @@
rename_partitioned_column,
schema_from_bigquery_table,
)
from ibis.backends.bigquery.compiler import BigQueryCompiler
from ibis.backends.bigquery.datatypes import BigQuerySchema
from ibis.backends.bigquery.udf.core import PythonToJavaScriptTranslator
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers import BigQueryCompiler
from ibis.backends.sql.datatypes import BigQueryType

if TYPE_CHECKING:
from collections.abc import Callable, Iterable, Mapping
from collections.abc import Iterable, Mapping
from pathlib import Path
from urllib.parse import ParseResult

import pandas as pd
import polars as pl
import pyarrow as pa
from google.cloud.bigquery.table import RowIterator


SCOPES = ["https://www.googleapis.com/auth/bigquery"]
Expand All @@ -54,7 +54,7 @@
"https://www.googleapis.com/auth/drive",
]
CLIENT_ID = "546535678771-gvffde27nd83kfl6qbrnletqvkdmsese.apps.googleusercontent.com"
CLIENT_SECRET = "iU5ohAF2qcqrujegE3hQ1cPt"
CLIENT_SECRET = "iU5ohAF2qcqrujegE3hQ1cPt" # noqa: S105


def _create_user_agent(application_name: str) -> str:
Expand Down Expand Up @@ -171,14 +171,17 @@ def _session_dataset(self):
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
raw_name = op.name

project = self._session_dataset.project
dataset = self._session_dataset.dataset_id
session_dataset = self._session_dataset
project = session_dataset.project
dataset = session_dataset.dataset_id

if raw_name not in self.list_tables(database=(project, dataset)):
table_ref = bq.TableReference(session_dataset, raw_name)
try:
self.client.get_table(table_ref)
except google.api_core.exceptions.NotFound:
table_id = sg.table(
raw_name, db=dataset, catalog=project, quoted=False
).sql(dialect=self.name)

bq_schema = BigQuerySchema.from_ibis(op.schema)
load_job = self.client.load_table_from_dataframe(
op.data.to_frame(),
Expand Down Expand Up @@ -329,12 +332,10 @@ def read_json(
)
return self._read_file(path, table_name=table_name, job_config=job_config)

def _from_url(self, url: str, **kwargs):
result = urlparse(url)
params = parse_qs(result.query)
def _from_url(self, url: ParseResult, **kwargs):
return self.connect(
project_id=result.netloc or params.get("project_id", [""])[0],
dataset_id=result.path[1:] or params.get("dataset_id", [""])[0],
project_id=url.netloc or kwargs.get("project_id", [""])[0],
dataset_id=url.path[1:] or kwargs.get("dataset_id", [""])[0],
**kwargs,
)

Expand Down Expand Up @@ -477,6 +478,37 @@ def do_connect(

self.partition_column = partition_column

@util.experimental
@classmethod
def from_connection(
cls,
client: bq.Client,
partition_column: str | None = "PARTITIONTIME",
storage_client: bqstorage.BigQueryReadClient | None = None,
dataset_id: str = "",
) -> Backend:
"""Create a BigQuery `Backend` from an existing ``Client``.
Parameters
----------
client
A `Client` from the `google.cloud.bigquery` package.
partition_column
Identifier to use instead of default `_PARTITIONTIME` partition
column. Defaults to `'PARTITIONTIME'`.
storage_client
A `BigQueryReadClient` from the `google.cloud.bigquery_storage_v1`
package.
dataset_id
A dataset id that lives inside of the project attached to `client`.
"""
return ibis.bigquery.connect(
client=client,
partition_column=partition_column,
storage_client=storage_client,
dataset_id=dataset_id,
)

def disconnect(self) -> None:
self.client.close()

Expand Down Expand Up @@ -547,10 +579,9 @@ def table(
self, name: str, database: str | None = None, schema: str | None = None
) -> ir.Table:
table_loc = self._warn_and_create_table_loc(database, schema)
table = sg.parse_one(f"`{name}`", into=sge.Table, read=self.name)

table = sg.parse_one(name, into=sge.Table, read=self.name)

# Bigquery, unlike other bcakends, had existing support for specifying
# Bigquery, unlike other backends, had existing support for specifying
# table hierarchy in the table name, e.g. con.table("dataset.table_name")
# so here we have an extra layer of disambiguation to handle.

Expand Down Expand Up @@ -582,8 +613,6 @@ def table(

project, dataset = self._parse_project_and_dataset(database)

table = sg.parse_one(name, into=sge.Table, read=self.name)

bq_table = self.client.get_table(
bq.TableReference(
bq.DatasetReference(project=project, dataset_id=dataset),
Expand Down Expand Up @@ -623,14 +652,6 @@ def _get_schema_using_query(self, query: str) -> sch.Schema:
)
return BigQuerySchema.to_ibis(job.schema)

def _execute(self, stmt, query_parameters=None):
job_config = bq.job.QueryJobConfig(query_parameters=query_parameters or [])
query = self.client.query(
stmt, job_config=job_config, project=self.billing_project
)
query.result() # blocks until finished
return query

def _to_sqlglot(
self,
expr: ir.Expr,
Expand Down Expand Up @@ -669,7 +690,7 @@ def _to_sqlglot(
).transform(_remove_null_ordering_from_unsupported_window)
return query

def raw_sql(self, query: str, params=None):
def raw_sql(self, query: str, params=None, page_size: int | None = None):
query_parameters = [
bigquery_param(
param.type(),
Expand All @@ -684,7 +705,14 @@ def raw_sql(self, query: str, params=None):
]
with contextlib.suppress(AttributeError):
query = query.sql(self.dialect)
return self._execute(query, query_parameters=query_parameters)

job_config = bq.job.QueryJobConfig(query_parameters=query_parameters or [])
return self.client.query_and_wait(
query,
job_config=job_config,
project=self.billing_project,
page_size=page_size,
)

@property
def current_catalog(self) -> str:
Expand Down Expand Up @@ -735,14 +763,23 @@ def execute(self, expr, params=None, limit="default", **kwargs):
Output from execution
"""
from ibis.backends.bigquery.converter import BigQueryPandasData

self._run_pre_execute_hooks(expr)

schema = expr.as_table().schema() - ibis.schema({"_TABLE_SUFFIX": "string"})

sql = self.compile(expr, limit=limit, params=params, **kwargs)
self._log(sql)
query = self.raw_sql(sql, params=params, **kwargs)

schema = expr.as_table().schema() - ibis.schema({"_TABLE_SUFFIX": "string"})
result = self.fetch_from_query(query, schema)
arrow_t = query.to_arrow(
progress_bar_type=None, bqstorage_client=self.storage_client
)

result = BigQueryPandasData.convert_table(
arrow_t.to_pandas(timestamp_as_object=True), schema
)

return expr.__pandas_result__(result, schema=schema)

Expand Down Expand Up @@ -784,40 +821,6 @@ def insert(
overwrite=overwrite,
)

def fetch_from_query(self, query, schema):
from ibis.backends.bigquery.converter import BigQueryPandasData

arrow_t = self._query_to_arrow(query)
df = arrow_t.to_pandas(timestamp_as_object=True)
return BigQueryPandasData.convert_table(
df, schema - ibis.schema({"_TABLE_SUFFIX": "string"})
)

def _query_to_arrow(
self,
query,
*,
method: (
Callable[[RowIterator], pa.Table | Iterable[pa.RecordBatch]] | None
) = None,
chunk_size: int | None = None,
):
if method is None:
method = lambda result: result.to_arrow(
progress_bar_type=None,
bqstorage_client=self.storage_client,
)
query_result = query.result(page_size=chunk_size)
# workaround potentially not having the ability to create read sessions
# in the dataset project
orig_project = query_result._project
query_result._project = self.billing_project
try:
arrow_obj = method(query_result)
finally:
query_result._project = orig_project
return arrow_obj

def to_pyarrow(
self,
expr: ir.Expr,
Expand All @@ -831,7 +834,10 @@ def to_pyarrow(
sql = self.compile(expr, limit=limit, params=params, **kwargs)
self._log(sql)
query = self.raw_sql(sql, params=params, **kwargs)
table = self._query_to_arrow(query)
table = query.to_arrow(
progress_bar_type=None, bqstorage_client=self.storage_client
)
table = table.rename_columns(list(expr.as_table().schema().names))
return expr.__pyarrow_result__(table)

def to_pyarrow_batches(
Expand All @@ -850,14 +856,8 @@ def to_pyarrow_batches(
self._register_in_memory_tables(expr)
sql = self.compile(expr, limit=limit, params=params, **kwargs)
self._log(sql)
query = self.raw_sql(sql, params=params, **kwargs)
batch_iter = self._query_to_arrow(
query,
method=lambda result: result.to_arrow_iterable(
bqstorage_client=self.storage_client
),
chunk_size=chunk_size,
)
query = self.raw_sql(sql, params=params, page_size=chunk_size, **kwargs)
batch_iter = query.to_arrow_iterable(bqstorage_client=self.storage_client)
return pa.ipc.RecordBatchReader.from_batches(schema.to_pyarrow(), batch_iter)

def _gen_udf_name(self, name: str, schema: Optional[str]) -> str:
Expand Down Expand Up @@ -1171,10 +1171,11 @@ def drop_view(
def _load_into_cache(self, name, expr):
self.create_table(name, expr, schema=expr.schema(), temp=True)

def _clean_up_cached_table(self, op):
def _clean_up_cached_table(self, name):
self.drop_table(
op.name,
name,
database=(self._session_dataset.project, self._session_dataset.dataset_id),
force=True,
)

def _get_udf_source(self, udf_node: ops.ScalarUDF):
Expand Down
49 changes: 31 additions & 18 deletions ibis/backends/bigquery/tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import collections
import datetime
import decimal
from urllib.parse import urlparse

import pandas as pd
import pandas.testing as tm
import pyarrow as pa
import pytest
import pytz

Expand Down Expand Up @@ -114,22 +116,6 @@ def test_different_partition_col_name(monkeypatch, con):
assert col in parted_alltypes.columns


def test_subquery_scalar_params(alltypes):
t = alltypes
p = ibis.param("timestamp").name("my_param")
expr = (
t[["float_col", "timestamp_col", "int_col", "string_col"]][
lambda t: t.timestamp_col < p
]
.group_by("string_col")
.aggregate(foo=lambda t: t.float_col.sum())
.foo.count()
.name("count")
)
result = expr.compile(params={p: "20140101"})
assert "datetime('2014-01-01T00:00:00')" in result


def test_repr_struct_of_array_of_struct():
name = "foo"
p = ibis.param("struct<x: array<struct<y: array<double>>>>").name(name)
Expand Down Expand Up @@ -186,8 +172,9 @@ def test_repr_struct_of_array_of_struct():


def test_raw_sql(con):
result = con.raw_sql("SELECT 1").result()
assert [row.values() for row in result] == [(1,)]
result = con.raw_sql("SELECT 1 as a").to_arrow()
expected = pa.Table.from_pydict({"a": [1]})
assert result.equals(expected)


def test_parted_column_rename(parted_alltypes):
Expand Down Expand Up @@ -436,9 +423,35 @@ def test_create_temp_table_from_scratch(project_id, dataset_id):
assert len(t.execute()) == 1


def test_create_table_from_scratch_with_spaces(project_id, dataset_id):
con = ibis.bigquery.connect(project_id=project_id, dataset_id=dataset_id)
name = f"{gen_name('bigquery_temp_table')} with spaces"
df = con.tables.functional_alltypes.limit(1)
t = con.create_table(name, obj=df)
try:
assert len(t.execute()) == 1
finally:
con.drop_table(name)


def test_table_suffix():
con = ibis.connect("bigquery://ibis-gbq")
t = con.table("gsod*", database="bigquery-public-data.noaa_gsod")
expr = t.filter(t._TABLE_SUFFIX == "1929", t.max != 9999.9).head(1)
result = expr.execute()
assert not result.empty


def test_parameters_in_url_connect(mocker):
spy = mocker.spy(ibis.bigquery, "_from_url")
parsed = urlparse("bigquery://ibis-gbq?location=us-east1")
ibis.connect("bigquery://ibis-gbq?location=us-east1")
spy.assert_called_once_with(parsed, location="us-east1")


def test_complex_column_name(con):
expr = ibis.literal(1).name(
"StringToTimestamp_StringConcat_date_string_col_' America_New_York'_'%F %Z'"
)
result = con.to_pandas(expr)
assert result == 1
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_1_1_4_55_59`
MOD(EXTRACT(dayofweek FROM DATETIME('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_1_1_4_55_59`
INITCAP(CAST(DATETIME('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_1_1_4_55_59`
MOD(EXTRACT(dayofweek FROM DATETIME('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_1_1_4_55_59`
INITCAP(CAST(DATETIME('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_1_1_4_55_59`
MOD(EXTRACT(dayofweek FROM DATETIME('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_1_1_4_55_59`
INITCAP(CAST(DATETIME('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
time(`t0`.`ts`) AS `tmp`
TIME(`t0`.`ts`) AS `tmp`
FROM `t` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(hour FROM datetime('2017-01-01T04:55:59')) AS `tmp`
EXTRACT(hour FROM DATETIME('2017-01-01T04:55:59')) AS `tmp`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(hour FROM time(4, 55, 59)) AS `tmp`
EXTRACT(hour FROM TIME(4, 55, 59)) AS `tmp`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(hour FROM datetime('2017-01-01T04:55:59')) AS `tmp`
EXTRACT(hour FROM DATETIME('2017-01-01T04:55:59')) AS `tmp`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(hour FROM time(4, 55, 59)) AS `tmp`
EXTRACT(hour FROM TIME(4, 55, 59)) AS `tmp`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(hour FROM datetime('2017-01-01T04:55:59')) AS `tmp`
EXTRACT(hour FROM DATETIME('2017-01-01T04:55:59')) AS `tmp`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_1_1_4_55_59`
EXTRACT(year FROM DATETIME('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_1_1_4_55_59`
EXTRACT(year FROM DATETIME('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_1_1_4_55_59`
EXTRACT(year FROM DATETIME('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
SELECT
*
FROM (
SELECT
*
FROM `t0` AS `t0`
EXCEPT DISTINCT
SELECT
*
FROM `t1` AS `t1`
) AS `t2`
FROM `t0` AS `t0`
EXCEPT DISTINCT
SELECT
*
FROM `t1` AS `t1`
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
SELECT
*
FROM (
SELECT
*
FROM `t0` AS `t0`
INTERSECT DISTINCT
SELECT
*
FROM `t1` AS `t1`
) AS `t2`
FROM `t0` AS `t0`
INTERSECT DISTINCT
SELECT
*
FROM `t1` AS `t1`
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
SELECT
*
FROM (
SELECT
*
FROM `t0` AS `t0`
UNION ALL
SELECT
*
FROM `t1` AS `t1`
) AS `t2`
FROM `t0` AS `t0`
UNION ALL
SELECT
*
FROM `t1` AS `t1`
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
SELECT
*
FROM (
SELECT
*
FROM `t0` AS `t0`
UNION DISTINCT
SELECT
*
FROM `t1` AS `t1`
) AS `t2`
FROM `t0` AS `t0`
UNION DISTINCT
SELECT
*
FROM `t1` AS `t1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
SELECT
COUNT(`t2`.`foo`) AS `count`
FROM (
SELECT
`t1`.`string_col`,
SUM(`t1`.`float_col`) AS `foo`
FROM (
SELECT
*
FROM `alltypes` AS `t0`
WHERE
`t0`.`timestamp_col` < DATETIME('2014-01-01T00:00:00')
) AS `t1`
GROUP BY
1
) AS `t2`
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SELECT
TIME_ADD(TIME(12, 34, 56), INTERVAL 789101 MICROSECOND) AS `datetime_time_12_34_56_789101`
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SELECT
TIME(12, 34, 56) AS `datetime_time_12_34_56`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
parse_timestamp('%F %Z', CONCAT(`t0`.`date_string_col`, ' America/New_York'), 'UTC') AS `StringToTimestamp_StringConcat_'%F %Z'`
parse_timestamp('%F %Z', CONCAT(`t0`.`date_string_col`, ' America/New_York'), 'UTC') AS `StringToTimestamp_StringConcat_date_string_col_' America_New_York'_'%F %Z'`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
SELECT
*
FROM (
SELECT
*
FROM `functional_alltypes` AS `t0`
UNION ALL
SELECT
*
FROM `functional_alltypes` AS `t0`
) AS `t1`
FROM `functional_alltypes` AS `t0`
UNION ALL
SELECT
*
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
SELECT
*
FROM (
SELECT
*
FROM `functional_alltypes` AS `t0`
UNION DISTINCT
SELECT
*
FROM `functional_alltypes` AS `t0`
) AS `t1`
FROM `functional_alltypes` AS `t0`
UNION DISTINCT
SELECT
*
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,13 @@ SELECT
FROM (
SELECT
*
FROM (
SELECT
*
FROM (
SELECT
*
FROM `t1` AS `t2`
UNION ALL
SELECT
*
FROM `t1` AS `t4`
) AS `t5`
) AS `t6`
FROM `t1` AS `t2`
UNION ALL
SELECT
*
FROM `t1` AS `t3`
) AS `t7`
FROM `t1` AS `t4`
) AS `t5`
UNION ALL
SELECT
*
FROM `t1` AS `t3`
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,13 @@ SELECT
FROM (
SELECT
*
FROM (
SELECT
*
FROM (
SELECT
*
FROM `t1` AS `t2`
UNION DISTINCT
SELECT
*
FROM `t1` AS `t4`
) AS `t5`
) AS `t6`
UNION ALL
FROM `t1` AS `t2`
UNION DISTINCT
SELECT
*
FROM `t1` AS `t3`
) AS `t7`
FROM `t1` AS `t4`
) AS `t5`
UNION ALL
SELECT
*
FROM `t1` AS `t3`
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,13 @@ SELECT
FROM (
SELECT
*
FROM (
SELECT
*
FROM (
SELECT
*
FROM `t1` AS `t2`
UNION ALL
SELECT
*
FROM `t1` AS `t4`
) AS `t5`
) AS `t6`
UNION DISTINCT
FROM `t1` AS `t2`
UNION ALL
SELECT
*
FROM `t1` AS `t3`
) AS `t7`
FROM `t1` AS `t4`
) AS `t5`
UNION DISTINCT
SELECT
*
FROM `t1` AS `t3`
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,13 @@ SELECT
FROM (
SELECT
*
FROM (
SELECT
*
FROM (
SELECT
*
FROM `t1` AS `t2`
UNION DISTINCT
SELECT
*
FROM `t1` AS `t4`
) AS `t5`
) AS `t6`
FROM `t1` AS `t2`
UNION DISTINCT
SELECT
*
FROM `t1` AS `t3`
) AS `t7`
FROM `t1` AS `t4`
) AS `t5`
UNION DISTINCT
SELECT
*
FROM `t1` AS `t3`
33 changes: 32 additions & 1 deletion ibis/backends/bigquery/tests/unit/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
from ibis import _
from ibis.backends.bigquery.compiler import BigQueryCompiler
from ibis.backends.sql.compilers import BigQueryCompiler
from ibis.common.annotations import ValidationError

to_sql = ibis.bigquery.compile
Expand Down Expand Up @@ -646,3 +646,34 @@ def test_unnest(snapshot):
)
def test_field_names_strip_whitespace(fieldname, expected):
assert BigQueryCompiler._gen_valid_name(fieldname) == expected


def test_subquery_scalar_params(snapshot):
t = ibis.table(
schema={
"float_col": "float64",
"timestamp_col": "timestamp",
"string_col": "string",
},
name="alltypes",
)
p = ibis.param("timestamp").name("my_param")
expr = (
t.filter(lambda t: t.timestamp_col < p)
.group_by("string_col")
.aggregate(foo=lambda t: t.float_col.sum())
.foo.count()
.name("count")
)
result = ibis.to_sql(expr, params={p: "20140101"}, dialect="bigquery")
snapshot.assert_match(result, "out.sql")


def test_time_from_hms_with_micros(snapshot):
literal = ibis.literal(datetime.time(12, 34, 56, 789101))
result = ibis.to_sql(literal, dialect="bigquery")
snapshot.assert_match(result, "micros.sql")

literal = ibis.literal(datetime.time(12, 34, 56))
result = ibis.to_sql(literal, dialect="bigquery")
snapshot.assert_match(result, "no_micros.sql")
2 changes: 1 addition & 1 deletion ibis/backends/bigquery/tests/unit/udf/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def f(a):
)
f.seek(0)
code = builtins.compile(f.read(), f.name, "exec")
exec(code, d)
exec(code, d) # noqa: S102
f = d["f"]
js = compile(f)
snapshot.assert_match(js, "out.js")
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/bigquery/tests/unit/udf/test_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def parse_stmt(stmt):


def eq(left, right):
if type(left) != type(right):
if type(left) is not type(right):
return False

if is_iterable(left) and is_iterable(right):
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/bigquery/udf/rewrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
def matches(value: ast.AST, pattern: ast.AST) -> bool:
"""Check whether `value` matches `pattern`."""
# types must match exactly
if type(value) != type(pattern):
if type(value) is not type(pattern):
return False

# primitive value, such as None, True, False etc
Expand Down
40 changes: 24 additions & 16 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from contextlib import closing
from functools import partial
from typing import TYPE_CHECKING, Any, Literal
from urllib.parse import parse_qs, urlparse
from urllib.parse import unquote_plus

import clickhouse_connect as cc
import pyarrow as pa
Expand All @@ -24,14 +24,15 @@
import ibis.expr.types as ir
from ibis import util
from ibis.backends import BaseBackend, CanCreateDatabase
from ibis.backends.clickhouse.compiler import ClickHouseCompiler
from ibis.backends.clickhouse.converter import ClickHousePandasData
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compiler import C
from ibis.backends.sql.compilers import ClickHouseCompiler
from ibis.backends.sql.compilers.base import C

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Mapping
from pathlib import Path
from urllib.parse import ParseResult

import pandas as pd
import polars as pl
Expand Down Expand Up @@ -60,7 +61,7 @@ class Options(ibis.config.Config):

bool_type: Literal["Bool", "UInt8", "Int8"] = "Bool"

def _from_url(self, url: str, **kwargs) -> BaseBackend:
def _from_url(self, url: ParseResult, **kwargs) -> BaseBackend:
"""Connect to a backend using a URL `url`.

Parameters
Expand All @@ -76,25 +77,17 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend:
A backend instance

"""
url = urlparse(url)
database = url.path[1:]
query_params = parse_qs(url.query)

connect_args = {
"user": url.username,
"password": url.password or "",
"password": unquote_plus(url.password or ""),
"host": url.hostname,
"database": database or "",
"port": url.port,
**kwargs,
}

for name, value in query_params.items():
if len(value) > 1:
connect_args[name] = value
elif len(value) == 1:
connect_args[name] = value[0]
else:
raise com.IbisError(f"Invalid URL parameter: {name}")

kwargs.update(connect_args)
self._convert_kwargs(kwargs)

Expand Down Expand Up @@ -170,6 +163,21 @@ def do_connect(
**kwargs,
)

@util.experimental
@classmethod
def from_connection(cls, con: cc.driver.Client) -> Backend:
"""Create an Ibis client from an existing ClickHouse Connect Client instance.

Parameters
----------
con
An existing ClickHouse Connect Client instance.
"""
new_backend = cls()
new_backend._can_reconnect = False
new_backend.con = con
return new_backend

@property
def version(self) -> str:
return self.con.server_version
Expand Down Expand Up @@ -423,7 +431,7 @@ def insert(
elif not isinstance(obj, ir.Table):
obj = ibis.memtable(obj)

query = self._build_insert_query(target=name, source=obj)
query = self._build_insert_from_table(target=name, source=obj)
external_tables = self._collect_in_memory_tables(obj, {})
external_data = self._normalize_external_tables(external_tables)
return self.con.command(query.sql(self.name), external_data=external_data)
Expand Down
84 changes: 82 additions & 2 deletions ibis/backends/clickhouse/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
from __future__ import annotations

import concurrent.futures
import contextlib
import os
import subprocess
from typing import TYPE_CHECKING, Any

import pytest
import sqlglot as sg

import ibis
import ibis.expr.types as ir
from ibis import util
from ibis.backends.tests.base import ServiceBackendTest

if TYPE_CHECKING:
from collections.abc import Callable, Iterable
from collections.abc import Callable, Iterable, Mapping
from pathlib import Path

CLICKHOUSE_HOST = os.environ.get("IBIS_TEST_CLICKHOUSE_HOST", "localhost")
Expand All @@ -31,6 +34,10 @@ class TestConf(ServiceBackendTest):
data_volume = "/var/lib/clickhouse/user_files/ibis"
service_name = "clickhouse"
deps = ("clickhouse_connect",)
supports_tpch = True
supports_tpcds = True
# Query 14 seems to require a bit more room here
tpc_absolute_tolerance = 0.0001

@property
def native_bool(self) -> bool:
Expand Down Expand Up @@ -71,12 +78,21 @@ def postload(self, **kw: Any):
self.connection = self.connect(database=IBIS_TEST_CLICKHOUSE_DB, **kw)

@staticmethod
def connect(*, tmpdir, worker_id, **kw: Any):
def connect(
*, tmpdir, worker_id, settings: Mapping[str, Any] | None = None, **kw: Any
):
if settings is None:
settings = {}

# without this setting TPC-DS 19 and 24 will fail
settings.setdefault("allow_experimental_join_condition", 1)

return ibis.clickhouse.connect(
host=CLICKHOUSE_HOST,
port=CLICKHOUSE_PORT,
password=CLICKHOUSE_PASS,
user=CLICKHOUSE_USER,
settings=settings,
**kw,
)

Expand All @@ -96,6 +112,70 @@ def least(f: Callable[..., ir.Value], *args: ir.Value) -> ir.Value:
)
return f(*args)

def preload(self):
super().preload()

suites = ("tpch", "tpcds")

service_name = self.service_name
data_volume = self.data_volume

for suite in suites:
subprocess.run(
[
"docker",
"compose",
"exec",
service_name,
"mkdir",
"-p",
f"{data_volume}/{suite}",
],
check=True,
)

with concurrent.futures.ThreadPoolExecutor() as executor:
for fut in concurrent.futures.as_completed(
executor.submit(
subprocess.run,
[
"docker",
"compose",
"cp",
str(path),
f"{service_name}:{data_volume}/{suite}/{path.name}",
],
check=True,
)
for suite in suites
for path in self.data_dir.joinpath(suite).rglob("*.parquet")
):
fut.result()

def _load_tpc(self, *, suite, scale_factor):
con = self.connection
schema = f"tpc{suite}"
con.con.command(f"CREATE DATABASE IF NOT EXISTS {schema}")
parquet_dir = self.data_dir.joinpath(schema, f"sf={scale_factor}", "parquet")
assert parquet_dir.exists(), parquet_dir
for path in parquet_dir.glob("*.parquet"):
table_name = path.with_suffix("").name
con.con.command(
f"CREATE VIEW IF NOT EXISTS {schema}.{table_name} AS "
f"SELECT * FROM file('ibis/{schema}/{path.name}', 'Parquet')"
)

def _transform_tpc_sql(self, parsed, *, suite, leaves):
def add_catalog_and_schema(node):
if isinstance(node, sg.exp.Table) and node.name in leaves:
return node.__class__(
catalog=f"tpc{suite}",
**{k: v for k, v in node.args.items() if k != "catalog"},
)
return node

return parsed.transform(add_catalog_and_schema)


@pytest.fixture(scope="session")
def con(tmp_path_factory, data_dir, worker_id):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."double_col"
"t0"."double_col" AS "double_col"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
GREATEST("t0"."int_col", 10) AS "Greatest()"
GREATEST("t0"."int_col", 10) AS "Greatest((int_col, 10))"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
GREATEST("t0"."int_col", "t0"."bigint_col") AS "Greatest()"
GREATEST("t0"."int_col", "t0"."bigint_col") AS "Greatest((int_col, bigint_col))"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
LEAST("t0"."int_col", 10) AS "Least()"
LEAST("t0"."int_col", 10) AS "Least((int_col, 10))"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
LEAST("t0"."int_col", "t0"."bigint_col") AS "Least()"
LEAST("t0"."int_col", "t0"."bigint_col") AS "Least((int_col, bigint_col))"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."bigint_col"
"t0"."bigint_col" AS "bigint_col"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."bool_col"
"t0"."bool_col" AS "bool_col"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."date_string_col"
"t0"."date_string_col" AS "date_string_col"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."double_col"
"t0"."double_col" AS "double_col"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."float_col"
"t0"."float_col" AS "float_col"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."id"
"t0"."id" AS "id"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."int_col"
"t0"."int_col" AS "int_col"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."month"
"t0"."month" AS "month"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."smallint_col"
"t0"."smallint_col" AS "smallint_col"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."string_col"
"t0"."string_col" AS "string_col"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."timestamp_col"
"t0"."timestamp_col" AS "timestamp_col"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."tinyint_col"
"t0"."tinyint_col" AS "tinyint_col"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
"t0"."year"
"t0"."year" AS "year"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
indexOf(['a','b','c'], "t0"."string_col") - 1 AS "FindInSet(string_col)"
indexOf(['a','b','c'], "t0"."string_col") - 1 AS "FindInSet(string_col, ('a', 'b', 'c'))"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ SELECT
WHEN "t0"."float_col" < 0
THEN "t0"."int_col"
ELSE 0
END AS "SearchedCase(0)"
END AS "SearchedCase((Greater(float_col, 0), Less(float_col, 0)), (Multiply(int_col, 2), int_col), 0)"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
CASE "t0"."string_col" WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS "SimpleCase(string_col, 'default')"
CASE "t0"."string_col" WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS "SimpleCase(string_col, ('foo', 'baz'), ('bar', 'qux'), 'default')"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ SELECT
CAST("t1"."string_col" AS Nullable(Float64)) AS "Cast(string_col, float64)"
FROM (
SELECT
"t0"."string_col",
"t0"."string_col" AS "string_col",
COUNT(*) AS "count"
FROM "functional_alltypes" AS "t0"
GROUP BY
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
SELECT
"t4"."a",
"t4"."b",
"t4"."c",
"t4"."d",
"t4"."a" AS "a",
"t4"."b" AS "b",
"t4"."c" AS "c",
"t4"."d" AS "d",
"t4"."c" / (
"t4"."a" - "t4"."b"
) AS "e"
FROM (
SELECT
"t2"."a",
"t2"."b",
"t3"."c",
"t3"."d"
"t2"."a" AS "a",
"t2"."b" AS "b",
"t3"."c" AS "c",
"t3"."d" AS "d"
FROM "s" AS "t2"
INNER JOIN "t" AS "t3"
ON "t2"."a" = "t3"."c"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT
"t0"."a",
"t0"."a" AS "a",
COALESCE(countIf(NOT (
"t0"."b"
)), 0) AS "A",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
SELECT
"t1"."id",
"t1"."bool_col",
"t1"."tinyint_col",
"t1"."smallint_col",
"t1"."int_col",
"t1"."bigint_col",
"t1"."float_col",
"t1"."double_col",
"t1"."date_string_col",
"t1"."string_col",
"t1"."timestamp_col",
"t1"."year",
"t1"."month"
"t1"."id" AS "id",
"t1"."bool_col" AS "bool_col",
"t1"."tinyint_col" AS "tinyint_col",
"t1"."smallint_col" AS "smallint_col",
"t1"."int_col" AS "int_col",
"t1"."bigint_col" AS "bigint_col",
"t1"."float_col" AS "float_col",
"t1"."double_col" AS "double_col",
"t1"."date_string_col" AS "date_string_col",
"t1"."string_col" AS "string_col",
"t1"."timestamp_col" AS "timestamp_col",
"t1"."year" AS "year",
"t1"."month" AS "month"
FROM "functional_alltypes" AS "t1"
INNER JOIN "functional_alltypes" AS "t2"
ON "t1"."id" = "t2"."id"
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT
"t1"."key",
"t1"."key" AS "key",
SUM((
(
"t1"."value" + 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT
"t1"."key",
"t1"."key" AS "key",
SUM((
(
"t1"."value" + 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
SELECT
"t2"."playerID",
"t2"."yearID",
"t2"."stint",
"t2"."teamID",
"t2"."lgID",
"t2"."G",
"t2"."AB",
"t2"."R",
"t2"."H",
"t2"."X2B",
"t2"."X3B",
"t2"."HR",
"t2"."RBI",
"t2"."SB",
"t2"."CS",
"t2"."BB",
"t2"."SO",
"t2"."IBB",
"t2"."HBP",
"t2"."SH",
"t2"."SF",
"t2"."GIDP"
"t2"."playerID" AS "playerID",
"t2"."yearID" AS "yearID",
"t2"."stint" AS "stint",
"t2"."teamID" AS "teamID",
"t2"."lgID" AS "lgID",
"t2"."G" AS "G",
"t2"."AB" AS "AB",
"t2"."R" AS "R",
"t2"."H" AS "H",
"t2"."X2B" AS "X2B",
"t2"."X3B" AS "X3B",
"t2"."HR" AS "HR",
"t2"."RBI" AS "RBI",
"t2"."SB" AS "SB",
"t2"."CS" AS "CS",
"t2"."BB" AS "BB",
"t2"."SO" AS "SO",
"t2"."IBB" AS "IBB",
"t2"."HBP" AS "HBP",
"t2"."SH" AS "SH",
"t2"."SF" AS "SF",
"t2"."GIDP" AS "GIDP"
FROM "batting" AS "t2"
ANY JOIN "awards_players" AS "t3"
ON "t2"."playerID" = "t3"."awardID"
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
SELECT
"t2"."playerID",
"t2"."yearID",
"t2"."stint",
"t2"."teamID",
"t2"."lgID",
"t2"."G",
"t2"."AB",
"t2"."R",
"t2"."H",
"t2"."X2B",
"t2"."X3B",
"t2"."HR",
"t2"."RBI",
"t2"."SB",
"t2"."CS",
"t2"."BB",
"t2"."SO",
"t2"."IBB",
"t2"."HBP",
"t2"."SH",
"t2"."SF",
"t2"."GIDP"
"t2"."playerID" AS "playerID",
"t2"."yearID" AS "yearID",
"t2"."stint" AS "stint",
"t2"."teamID" AS "teamID",
"t2"."lgID" AS "lgID",
"t2"."G" AS "G",
"t2"."AB" AS "AB",
"t2"."R" AS "R",
"t2"."H" AS "H",
"t2"."X2B" AS "X2B",
"t2"."X3B" AS "X3B",
"t2"."HR" AS "HR",
"t2"."RBI" AS "RBI",
"t2"."SB" AS "SB",
"t2"."CS" AS "CS",
"t2"."BB" AS "BB",
"t2"."SO" AS "SO",
"t2"."IBB" AS "IBB",
"t2"."HBP" AS "HBP",
"t2"."SH" AS "SH",
"t2"."SF" AS "SF",
"t2"."GIDP" AS "GIDP"
FROM "batting" AS "t2"
LEFT ANY JOIN "awards_players" AS "t3"
ON "t2"."playerID" = "t3"."awardID"
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
SELECT
"t2"."playerID",
"t2"."yearID",
"t2"."stint",
"t2"."teamID",
"t2"."lgID",
"t2"."G",
"t2"."AB",
"t2"."R",
"t2"."H",
"t2"."X2B",
"t2"."X3B",
"t2"."HR",
"t2"."RBI",
"t2"."SB",
"t2"."CS",
"t2"."BB",
"t2"."SO",
"t2"."IBB",
"t2"."HBP",
"t2"."SH",
"t2"."SF",
"t2"."GIDP"
"t2"."playerID" AS "playerID",
"t2"."yearID" AS "yearID",
"t2"."stint" AS "stint",
"t2"."teamID" AS "teamID",
"t2"."lgID" AS "lgID",
"t2"."G" AS "G",
"t2"."AB" AS "AB",
"t2"."R" AS "R",
"t2"."H" AS "H",
"t2"."X2B" AS "X2B",
"t2"."X3B" AS "X3B",
"t2"."HR" AS "HR",
"t2"."RBI" AS "RBI",
"t2"."SB" AS "SB",
"t2"."CS" AS "CS",
"t2"."BB" AS "BB",
"t2"."SO" AS "SO",
"t2"."IBB" AS "IBB",
"t2"."HBP" AS "HBP",
"t2"."SH" AS "SH",
"t2"."SF" AS "SF",
"t2"."GIDP" AS "GIDP"
FROM "batting" AS "t2"
INNER JOIN "awards_players" AS "t3"
ON "t2"."playerID" = "t3"."awardID"
Loading