26 changes: 21 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,24 @@ ci:
autoupdate_commit_msg: "chore(deps): pre-commit.ci autoupdate"
skip:
- actionlint
- deadnix
- just
- nixpkgs-fmt
- prettier
- ruff
- shellcheck
- shfmt
- statix
- taplo
default_stages:
- commit
repos:
- repo: https://github.com/rhysd/actionlint
rev: v1.6.23
rev: v1.6.24
hooks:
- id: actionlint
- repo: https://github.com/psf/black
rev: 23.1.0
rev: 23.3.0
hooks:
- id: black
- repo: local
Expand All @@ -36,7 +38,7 @@ repos:
require_serial: true
minimum_pre_commit_version: "2.9.2"
- repo: https://github.com/adrienverge/yamllint
rev: v1.29.0
rev: v1.30.0
hooks:
- id: yamllint
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down Expand Up @@ -71,10 +73,17 @@ repos:
entry: prettier
args: ["--write"]
types_or:
- javascript
- json
- toml
- yaml
- markdown
- yaml
- id: taplo
name: taplo
language: system
entry: taplo
args: ["fmt"]
types:
- toml
- id: shellcheck
name: shellcheck
language: system
Expand Down Expand Up @@ -121,3 +130,10 @@ repos:
pass_filenames: false
types:
- nix
- id: deadnix
name: deadnix
language: system
entry: deadnix
args: ["--edit", "--fail"]
types:
- nix
16 changes: 9 additions & 7 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
**/*.toml
.benchmarks
.direnv
.mypy_cache
.pytest_cache
.ruff_cache
result*
docs/release_notes.md
docs/overrides/*.html
docs/api/expressions/top_level.md
docs/SUMMARY.md
site
ci/udf/CMakeFiles
poetry.lock
docs/javascripts/*.js
docs/SUMMARY.md
docs/api/expressions/top_level.md
docs/overrides/*.html
docs/release_notes.md
ibis
poetry.lock
result*
site
3 changes: 2 additions & 1 deletion .releaserc.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ module.exports = {
[
"@semantic-release/exec",
{
verifyConditionsCmd: "ci/release/verify_conditions.sh ${options.dryRun}",
verifyConditionsCmd:
"ci/release/verify_conditions.sh ${options.dryRun}",
verifyReleaseCmd: "ci/release/verify_release.sh ${nextRelease.version}",
prepareCmd: "ci/release/prepare.sh ${nextRelease.version}",
publishCmd: "ci/release/publish.sh",
Expand Down
1 change: 1 addition & 0 deletions .taplo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include = ["*.toml", "ibis/**/*.toml"]
4 changes: 4 additions & 0 deletions ci/conda-lock/condarc
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# vim: ft=yaml
channels:
- conda-forge
channel_priority: strict

always_yes: true

# remote_connect_timeout_secs (float)
Expand Down
13 changes: 10 additions & 3 deletions ci/conda-lock/generate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ extras=(
-e decompiler
)
template="conda-lock/{platform}-${python_version}.lock"

linux_osx_extras=()
if [ "${python_version}" != "3.11" ]; then
# clickhouse cityhash doesn't exist for python 3.11
linux_osx_extras+=(-e clickhouse)
fi

conda lock \
--file pyproject.toml \
--file "${python_version_file}" \
Expand All @@ -37,9 +44,9 @@ conda lock \
--platform osx-64 \
--filename-template "${template}" \
--filter-extras \
--mamba \
--conda="$(which conda)" \
--category dev --category test --category docs \
"${extras[@]}" -e clickhouse -e datafusion
"${extras[@]}" "${linux_osx_extras[@]}" -e datafusion

conda lock \
--file pyproject.toml \
Expand All @@ -49,6 +56,6 @@ conda lock \
--platform win-64 \
--filename-template "${template}" \
--filter-extras \
--mamba \
--conda="$(which conda)" \
--category dev --category test --category docs \
"${extras[@]}"
5 changes: 5 additions & 0 deletions ci/schema/duckdb.sql
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,8 @@ INSERT INTO win VALUES
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);

CREATE OR REPLACE TABLE map (kv MAP(STRING, BIGINT));
INSERT INTO map VALUES
(MAP(['a', 'b', 'c'], [1, 2, 3])),
(MAP(['d', 'e', 'f'], [4, 5, 6]));
13 changes: 6 additions & 7 deletions ci/schema/postgresql.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
CREATE EXTENSION IF NOT EXISTS hstore;
CREATE EXTENSION IF NOT EXISTS postgis;
CREATE EXTENSION IF NOT EXISTS plpython3u;
CREATE EXTENSION IF NOT EXISTS vector;

DROP TABLE IF EXISTS diamonds CASCADE;

Expand Down Expand Up @@ -52,7 +53,11 @@ CREATE TABLE awards_players (
"yearID" BIGINT,
"lgID" TEXT,
tie TEXT,
notes TEXT
notes TEXT,
search TSVECTOR GENERATED ALWAYS AS (
setweight(to_tsvector('simple', notes), 'A')::TSVECTOR
) STORED,
simvec VECTOR GENERATED always AS ('[1,2,3]'::VECTOR) STORED
);

DROP TABLE IF EXISTS functional_alltypes CASCADE;
Expand Down Expand Up @@ -208,9 +213,3 @@ CREATE TABLE map (kv HSTORE);
INSERT INTO map VALUES
('a=>1,b=>2,c=>3'),
('d=>4,e=>5,c=>6');

ALTER TABLE awards_players
ADD search tsvector
GENERATED always AS (
setweight(to_tsvector('simple', notes), 'A') :: tsvector
) stored;
2 changes: 1 addition & 1 deletion ci/schema/sqlite.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ CREATE TABLE functional_alltypes (
double_col REAL,
date_string_col TEXT,
string_col TEXT,
timestamp_col TEXT,
timestamp_col TIMESTAMP,
year BIGINT,
month BIGINT,
CHECK (bool_col IN (0, 1))
Expand Down
364 changes: 181 additions & 183 deletions conda-lock/linux-64-3.10.lock

Large diffs are not rendered by default.

440 changes: 440 additions & 0 deletions conda-lock/linux-64-3.11.lock

Large diffs are not rendered by default.

364 changes: 181 additions & 183 deletions conda-lock/linux-64-3.8.lock

Large diffs are not rendered by default.

364 changes: 181 additions & 183 deletions conda-lock/linux-64-3.9.lock

Large diffs are not rendered by default.

342 changes: 178 additions & 164 deletions conda-lock/osx-64-3.10.lock

Large diffs are not rendered by default.

420 changes: 420 additions & 0 deletions conda-lock/osx-64-3.11.lock

Large diffs are not rendered by default.

360 changes: 178 additions & 182 deletions conda-lock/osx-64-3.8.lock

Large diffs are not rendered by default.

343 changes: 178 additions & 165 deletions conda-lock/osx-64-3.9.lock

Large diffs are not rendered by default.

342 changes: 178 additions & 164 deletions conda-lock/osx-arm64-3.10.lock

Large diffs are not rendered by default.

420 changes: 420 additions & 0 deletions conda-lock/osx-arm64-3.11.lock

Large diffs are not rendered by default.

342 changes: 178 additions & 164 deletions conda-lock/osx-arm64-3.8.lock

Large diffs are not rendered by default.

342 changes: 178 additions & 164 deletions conda-lock/osx-arm64-3.9.lock

Large diffs are not rendered by default.

307 changes: 161 additions & 146 deletions conda-lock/win-64-3.10.lock

Large diffs are not rendered by default.

421 changes: 421 additions & 0 deletions conda-lock/win-64-3.11.lock

Large diffs are not rendered by default.

307 changes: 161 additions & 146 deletions conda-lock/win-64-3.8.lock

Large diffs are not rendered by default.

307 changes: 161 additions & 146 deletions conda-lock/win-64-3.9.lock

Large diffs are not rendered by default.

54 changes: 37 additions & 17 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
version: "3.4"
services:
clickhouse:
image: clickhouse/clickhouse-server:23.2.4.12-alpine
image: clickhouse/clickhouse-server:23.3.1.2823-alpine
ports:
- 8123:8123
- 9000:9000
healthcheck:
interval: 1s
retries: 10
test:
- CMD-SHELL
- nc -z 127.0.0.1 9000
timeout: 10s
volumes:
- clickhouse:/var/lib/clickhouse/user_files/ibis
networks:
Expand All @@ -16,7 +22,7 @@ services:
environment:
PGPASSWORD: postgres
healthcheck:
interval: 30s
interval: 5s
retries: 20
test:
- CMD-SHELL
Expand Down Expand Up @@ -44,8 +50,8 @@ services:
environment:
POSTGRES_PASSWORD: postgres
healthcheck:
interval: 10s
retries: 3
interval: 1s
retries: 30
test:
- CMD
- pg_isready
Expand All @@ -66,6 +72,13 @@ services:
- 8051:8051
networks:
- impala
healthcheck:
interval: 5s
retries: 20
test:
- CMD-SHELL
- kudu cluster ksck kudu:7051
timeout: 10s
kudu-tserver:
cap_add:
- SYS_TIME
Expand All @@ -77,15 +90,22 @@ services:
- 8050:8050
networks:
- impala
healthcheck:
interval: 5s
retries: 20
test:
- CMD-SHELL
- kudu cluster ksck kudu:7051
timeout: 10s
mysql:
environment:
MYSQL_ALLOW_EMPTY_PASSWORD: "true"
MYSQL_DATABASE: ibis_testing
MYSQL_PASSWORD: ibis
MYSQL_USER: ibis
healthcheck:
interval: 10s
retries: 3
interval: 1s
retries: 30
test:
- CMD
- mysqladmin
Expand All @@ -105,8 +125,8 @@ services:
build: ./docker/postgres
image: ibis-postgres
healthcheck:
interval: 10s
retries: 3
interval: 1s
retries: 30
test:
- CMD
- pg_isready
Expand All @@ -121,8 +141,8 @@ services:
MSSQL_SA_PASSWORD: 1bis_Testing!
ACCEPT_EULA: "Y"
healthcheck:
interval: 10s
retries: 3
interval: 1s
retries: 30
test:
- CMD-SHELL
- /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P "$$MSSQL_SA_PASSWORD" -Q "IF DB_ID('ibis_testing') IS NULL BEGIN CREATE DATABASE [ibis_testing] END"
Expand All @@ -140,8 +160,8 @@ services:
POSTGRES_DB: ibis_testing
POSTGRES_USER: postgres
healthcheck:
interval: 10s
retries: 3
interval: 1s
retries: 30
test:
- CMD
- pg_isready
Expand All @@ -157,12 +177,12 @@ services:
- trino-postgres
healthcheck:
interval: 5s
retries: 6
retries: 10
test:
- CMD-SHELL
- trino --execute 'SELECT 1 AS one'
timeout: 30s
image: trinodb/trino:410
image: trinodb/trino:412
ports:
- 8080:8080
networks:
Expand All @@ -182,8 +202,8 @@ services:
- POSTGRES_USER=druid
- POSTGRES_DB=druid
healthcheck:
interval: 10s
retries: 9
interval: 1s
retries: 30
timeout: 90s
test:
- CMD-SHELL
Expand Down
7 changes: 6 additions & 1 deletion docker/postgres/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
FROM postgis/postgis:15-3.3-alpine
RUN apk add postgresql15-plpython3
RUN apk add --no-cache build-base clang15 llvm15 postgresql15-plpython3 python3 py3-pip && \
python3 -m pip install pgxnclient && \
pgxn install vector && \
python3 -m pip uninstall -y pgxnclient && \
rm -rf ~/.cache/pip && \
apk del build-base clang15 llvm15 python3 py3-pip
3 changes: 3 additions & 0 deletions docs/SUMMARY.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
* [Home](index.md)
* [Install](install.md)
* [Docs](docs/index.md)
* [Getting Started](getting_started.md)
* [How To Guide](how_to/)
* [Execution Backends](backends/)
* [User Guide](user_guide/)
Expand All @@ -22,8 +23,10 @@
* [Ibis for SQL Programmers](ibis-for-sql-programmers.ipynb)
* [Ibis for pandas Users](ibis-for-pandas-users.ipynb)
* [Backend Operations Matrix](backends/support_matrix.md)
* [Why Ibis?](why_ibis.md)
* [Releases](release_notes.md)
* Blog
* [Campaign Finance Analysis with Ibis](blog/rendered/campaign-finance.ipynb)
* [Ibis Sneak Peek: Writing to Files](blog/ibis-to-file.md)
* [Ibis Sneak Peek: Examples](blog/ibis-examples.md)
* [Maximizing Productivity with Selectors](blog/selectors.md)
Expand Down
2 changes: 1 addition & 1 deletion docs/api/selectors.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Column Selectors

<!-- prettier-ignore-start -->
::: ibis.expr.selectors
::: ibis.selectors
<!-- prettier-ignore-end -->
2 changes: 1 addition & 1 deletion docs/blog/ibis-examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ This module offers in-Ibis access to multiple small tables (the largest is aroun
│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │
│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │
│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │
│ Adelie │ Torgersen │ nan │ nan │ ∅ │ ∅ │ ∅ │ 2007 │
│ Adelie │ Torgersen │ nan │ nan │ NULL │ NULL │ NULL │ 2007 │
│ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ 3450 │ female │ 2007 │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
```
Expand Down
1,814 changes: 1,814 additions & 0 deletions docs/blog/rendered/campaign-finance.ipynb

Large diffs are not rendered by default.

30 changes: 15 additions & 15 deletions docs/blog/selectors.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ Out[12]:
│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │
│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │
│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │
│ Adelie │ Torgersen │ nan │ nan │ ∅ │ ∅ │ ∅ │ 2007 │
│ Adelie │ Torgersen │ nan │ nan │ NULL │ NULL │ NULL │ 2007 │
│ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ 3450 │ female │ 2007 │
│ Adelie │ Torgersen │ 39.3 │ 20.6 │ 190 │ 3650 │ male │ 2007 │
│ Adelie │ Torgersen │ 38.9 │ 17.8 │ 181 │ 3625 │ female │ 2007 │
│ Adelie │ Torgersen │ 39.2 │ 19.6 │ 195 │ 4675 │ male │ 2007 │
│ Adelie │ Torgersen │ 34.1 │ 18.1 │ 193 │ 3475 │ ∅ │ 2007 │
│ Adelie │ Torgersen │ 42.0 │ 20.2 │ 190 │ 4250 │ ∅ │ 2007 │
│ Adelie │ Torgersen │ 34.1 │ 18.1 │ 193 │ 3475 │ NULL │ 2007 │
│ Adelie │ Torgersen │ 42.0 │ 20.2 │ 190 │ 4250 │ NULL │ 2007 │
│ … │ … │ … │ … │ … │ … │ … │ … │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
```
Expand All @@ -57,13 +57,13 @@ Out[13]:
│ Adelie │ Torgersen │ -0.883205 │ 0.784300 │ -1.416272 │ -0.563317 │ male │ -1.257484 │
│ Adelie │ Torgersen │ -0.809939 │ 0.126003 │ -1.060696 │ -0.500969 │ female │ -1.257484 │
│ Adelie │ Torgersen │ -0.663408 │ 0.429833 │ -0.420660 │ -1.186793 │ female │ -1.257484 │
│ Adelie │ Torgersen │ nan │ nan │ nan │ nan │ ∅ │ -1.257484 │
│ Adelie │ Torgersen │ nan │ nan │ nan │ nan │ NULL │ -1.257484 │
│ Adelie │ Torgersen │ -1.322799 │ 1.088129 │ -0.562890 │ -0.937403 │ female │ -1.257484 │
│ Adelie │ Torgersen │ -0.846572 │ 1.746426 │ -0.776236 │ -0.688012 │ male │ -1.257484 │
│ Adelie │ Torgersen │ -0.919837 │ 0.328556 │ -1.416272 │ -0.719186 │ female │ -1.257484 │
│ Adelie │ Torgersen │ -0.864888 │ 1.240044 │ -0.420660 │ 0.590115 │ male │ -1.257484 │
│ Adelie │ Torgersen │ -1.799025 │ 0.480471 │ -0.562890 │ -0.906229 │ ∅ │ -1.257484 │
│ Adelie │ Torgersen │ -0.352029 │ 1.543873 │ -0.776236 │ 0.060160 │ ∅ │ -1.257484 │
│ Adelie │ Torgersen │ -1.799025 │ 0.480471 │ -0.562890 │ -0.906229 │ NULL │ -1.257484 │
│ Adelie │ Torgersen │ -0.352029 │ 1.543873 │ -0.776236 │ 0.060160 │ NULL │ -1.257484 │
│ … │ … │ … │ … │ … │ … │ … │ … │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────────┘
```
Expand All @@ -87,13 +87,13 @@ Out[14]:
│ Adelie │ Torgersen │ -0.883205 │ 0.784300 │ -1.416272 │ -0.563317 │ male │ 2007 │
│ Adelie │ Torgersen │ -0.809939 │ 0.126003 │ -1.060696 │ -0.500969 │ female │ 2007 │
│ Adelie │ Torgersen │ -0.663408 │ 0.429833 │ -0.420660 │ -1.186793 │ female │ 2007 │
│ Adelie │ Torgersen │ nan │ nan │ nan │ nan │ ∅ │ 2007 │
│ Adelie │ Torgersen │ nan │ nan │ nan │ nan │ NULL │ 2007 │
│ Adelie │ Torgersen │ -1.322799 │ 1.088129 │ -0.562890 │ -0.937403 │ female │ 2007 │
│ Adelie │ Torgersen │ -0.846572 │ 1.746426 │ -0.776236 │ -0.688012 │ male │ 2007 │
│ Adelie │ Torgersen │ -0.919837 │ 0.328556 │ -1.416272 │ -0.719186 │ female │ 2007 │
│ Adelie │ Torgersen │ -0.864888 │ 1.240044 │ -0.420660 │ 0.590115 │ male │ 2007 │
│ Adelie │ Torgersen │ -1.799025 │ 0.480471 │ -0.562890 │ -0.906229 │ ∅ │ 2007 │
│ Adelie │ Torgersen │ -0.352029 │ 1.543873 │ -0.776236 │ 0.060160 │ ∅ │ 2007 │
│ Adelie │ Torgersen │ -1.799025 │ 0.480471 │ -0.562890 │ -0.906229 │ NULL │ 2007 │
│ Adelie │ Torgersen │ -0.352029 │ 1.543873 │ -0.776236 │ 0.060160 │ NULL │ 2007 │
│ … │ … │ … │ … │ … │ … │ … │ … │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
```
Expand Down Expand Up @@ -183,13 +183,13 @@ Out[23]:
│ Adelie │ Torgersen │ 39.099998 │ 18.700001 │ 181.0 │ 3750.0 │ male │ 2007 │
│ Adelie │ Torgersen │ 39.500000 │ 17.400000 │ 186.0 │ 3800.0 │ female │ 2007 │
│ Adelie │ Torgersen │ 40.299999 │ 18.000000 │ 195.0 │ 3250.0 │ female │ 2007 │
│ Adelie │ Torgersen │ nan │ nan │ nan │ nan │ ∅ │ 2007 │
│ Adelie │ Torgersen │ nan │ nan │ nan │ nan │ NULL │ 2007 │
│ Adelie │ Torgersen │ 36.700001 │ 19.299999 │ 193.0 │ 3450.0 │ female │ 2007 │
│ Adelie │ Torgersen │ 39.299999 │ 20.600000 │ 190.0 │ 3650.0 │ male │ 2007 │
│ Adelie │ Torgersen │ 38.900002 │ 17.799999 │ 181.0 │ 3625.0 │ female │ 2007 │
│ Adelie │ Torgersen │ 39.200001 │ 19.600000 │ 195.0 │ 4675.0 │ male │ 2007 │
│ Adelie │ Torgersen │ 34.099998 │ 18.100000 │ 193.0 │ 3475.0 │ ∅ │ 2007 │
│ Adelie │ Torgersen │ 42.000000 │ 20.200001 │ 190.0 │ 4250.0 │ ∅ │ 2007 │
│ Adelie │ Torgersen │ 34.099998 │ 18.100000 │ 193.0 │ 3475.0 │ NULL │ 2007 │
│ Adelie │ Torgersen │ 42.000000 │ 20.200001 │ 190.0 │ 4250.0 │ NULL │ 2007 │
│ … │ … │ … │ … │ … │ … │ … │ … │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
```
Expand All @@ -207,13 +207,13 @@ Out[35]:
│ adelie │ torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │
│ adelie │ torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │
│ adelie │ torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │
│ adelie │ torgersen │ nan │ nan │ ∅ │ ∅ │ ∅ │ 2007 │
│ adelie │ torgersen │ nan │ nan │ NULL │ NULL │ NULL │ 2007 │
│ adelie │ torgersen │ 36.7 │ 19.3 │ 193 │ 3450 │ female │ 2007 │
│ adelie │ torgersen │ 39.3 │ 20.6 │ 190 │ 3650 │ male │ 2007 │
│ adelie │ torgersen │ 38.9 │ 17.8 │ 181 │ 3625 │ female │ 2007 │
│ adelie │ torgersen │ 39.2 │ 19.6 │ 195 │ 4675 │ male │ 2007 │
│ adelie │ torgersen │ 34.1 │ 18.1 │ 193 │ 3475 │ ∅ │ 2007 │
│ adelie │ torgersen │ 42.0 │ 20.2 │ 190 │ 4250 │ ∅ │ 2007 │
│ adelie │ torgersen │ 34.1 │ 18.1 │ 193 │ 3475 │ NULL │ 2007 │
│ adelie │ torgersen │ 42.0 │ 20.2 │ 190 │ 4250 │ NULL │ 2007 │
│ … │ … │ … │ … │ … │ … │ … │ … │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
```
Expand Down
124 changes: 63 additions & 61 deletions docs/community/contribute/01_environment.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,53 +9,6 @@ hide:

- [`git`](https://git-scm.com/)

=== "Nix"

#### Support Matrix

| Python Version :material-arrow-right: | Python 3.8 | Python 3.9 | Python 3.10 |
| -----------------------------------------: | :----------------------------------------------------: | :------------------------------------------------: | :------------------------------------------------: |
| **Operating System** :material-arrow-down: | | | |
| **Linux** | {{ config.extra.support_levels.supported.icon }}[^1] | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (x86_64)** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **Windows** | {{ config.extra.support_levels.unsupported.icon }}[^3] | {{ config.extra.support_levels.unsupported.icon }} | {{ config.extra.support_levels.unsupported.icon }} |

1. [Install `nix`](https://nixos.org/download.html)
1. Install `gh`:

=== "`nix-shell`"

```sh
nix-shell -p gh
```

=== "`nix-env`"

```sh
nix-env -iA gh
```

1. Fork and clone the ibis repository:

```sh
gh repo fork --clone --remote ibis-project/ibis
```

1. Set up the public `ibis` Cachix cache to pull pre-built dependencies:

```sh
nix-shell -p cachix --run 'cachix use ibis'
```

1. Run `nix-shell` in the checkout directory:

```sh
cd ibis
nix-shell
```

This may take a while due to artifact download from the cache.

=== "Conda"

!!! info "Some optional dependencies for Windows are not available through `conda`/`mamba`"
Expand All @@ -64,12 +17,13 @@ hide:

#### Support Matrix

| Python Version :material-arrow-right: | Python 3.8 | Python 3.9 | Python 3.10 |
| -----------------------------------------: | :--------------------------------------------------: | :----------------------------------------------: | :----------------------------------------------: |
| **Operating System** :material-arrow-down: | | | |
| **Linux** | {{ config.extra.support_levels.supported.icon }}[^1] | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **Windows** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| Python Version :material-arrow-right: | Python 3.8 | Python 3.9 | Python 3.10 | Python 3.11 |
| -----------------------------------------: | :--------------------------------------------------: | :----------------------------------------------: | :----------------------------------------------: | :----------------------------------------------: |
| **Operating System** :material-arrow-down: | | | | |
| **Linux** | {{ config.extra.support_levels.supported.icon }}[^1] | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (x86_64)** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (aarch64)** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **Windows** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |

{% set managers = {"conda": {"name": "Miniconda", "url": "https://docs.conda.io/en/latest/miniconda.html"}, "mamba": {"name": "Mamba", "url": "https://github.com/mamba-org/mamba"}} %}
{% for manager, params in managers.items() %}
Expand All @@ -92,13 +46,13 @@ hide:

1. Create a Conda environment from a lock file in the repo:

{% set platforms = {"Linux": "linux", "MacOS": "osx", "Windows": "win"} %}
{% set platforms = {"Linux": "linux-64", "macOS (x86_64)": "osx-64", "macOS (aarch64)": "osx-arm64", "Windows": "win-64"} %}
{% for os, platform in platforms.items() %}
=== "{{ os }}"

```sh
cd ibis
{{ manager }} create -n ibis-dev --file=conda-lock/{{ platform }}-64-3.9.lock
{{ manager }} create -n ibis-dev --file=conda-lock/{{ platform }}-3.10.lock
```
{% endfor %}

Expand Down Expand Up @@ -150,7 +104,7 @@ hide:
1. Install development dependencies

```sh
pip install 'poetry>=1.2'
pip install 'poetry>=1.3,<1.4'
pip install -r requirements.txt
```

Expand All @@ -160,16 +114,64 @@ hide:
pip install -e .
```

## Building the Docs
=== "Nix"

#### Support Matrix

| Python Version :material-arrow-right: | Python 3.8 | Python 3.9 | Python 3.10 | Python 3.11 |
| -----------------------------------------: | :----------------------------------------------------: | :------------------------------------------------: | :------------------------------------------------: | :------------------------------------------------: |
| **Operating System** :material-arrow-down: | | | | |
| **Linux** | {{ config.extra.support_levels.supported.icon }}[^1] | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (x86_64)** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (aarch64)** | {{ config.extra.support_levels.unknown.icon }}[^2] | {{ config.extra.support_levels.unknown.icon }} | {{ config.extra.support_levels.unknown.icon }} | {{ config.extra.support_levels.unknown.icon }} |
| **Windows** | {{ config.extra.support_levels.unsupported.icon }}[^3] | {{ config.extra.support_levels.unsupported.icon }} | {{ config.extra.support_levels.unsupported.icon }} | {{ config.extra.support_levels.unsupported.icon }} |

1. [Install `nix`](https://nixos.org/download.html)
1. Install `gh`:

=== "`nix-shell`"

```sh
nix-shell -p gh
```

=== "`nix-env`"

```sh
nix-env -iA gh
```

1. Fork and clone the ibis repository:

!!! warning "You **must** set up an environment with Nix as above to build the website and docs."
```sh
gh repo fork --clone --remote ibis-project/ibis
```

Then, run:
1. Set up the public `ibis` Cachix cache to pull pre-built dependencies:

```sh
mkdocs serve
```sh
nix-shell -p cachix --run 'cachix use ibis'
```

1. Run `nix-shell` in the checkout directory:

```sh
cd ibis
nix-shell
```

This may take a while due to artifact download from the cache.

## Building the Docs

Run

```bash
mkdocs serve --strict
```

to build and serve the documentation.

{% for data in config.extra.support_levels.values() %}
[^{{ loop.index }}]: {{ data.description }}
{% endfor %}
655 changes: 655 additions & 0 deletions docs/getting_started.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/ibis-for-sql-programmers.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 3 additions & 4 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,15 @@ hide:
---

<div class="install-tutorial-button" markdown>
[Install](./install.md){ .md-button .md-button--primary }
[Tutorial](https://github.com/ibis-project/ibis-examples){ .md-button }
[Getting Started](./getting_started.md){ .md-button .md-button--primary }
[Install](./install.md){ .md-button }
</div>

---

```python title="Write high-level Python code"
>>> import ibis
>>> con = ibis.connect('movielens.sqlite')
>>> movies = con.tables.movies
>>> movies = ibis.examples.ml_latest_small_movies.fetch()
>>> rating_by_year = movies.group_by('year').avg_rating.mean()
>>> q = rating_by_year.order_by(rating_by_year.year.desc())
```
Expand Down
104 changes: 104 additions & 0 deletions docs/release_notes.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,110 @@
Release Notes
---

## [5.1.0](https://github.com/ibis-project/ibis/compare/5.0.0...5.1.0) (2023-04-11)


### Features

* **api:** expand `distinct` API for dropping duplicates based on column subsets ([3720ea5](https://github.com/ibis-project/ibis/commit/3720ea5d86b45b5455870ade991d2de755254760))
* **api:** implement pyarrow memtables ([9d4fbbd](https://github.com/ibis-project/ibis/commit/9d4fbbd0e9da85146a15f698be34cd7bb63bfe05))
* **api:** support passing a format string to `Table.relabel` ([0583959](https://github.com/ibis-project/ibis/commit/05839595ca203715e36d528dae83cd68a804a05d))
* **api:** thread kwargs around properly to support more complex connection arguments ([7e0e15b](https://github.com/ibis-project/ibis/commit/7e0e15b0f226b523968384a59942dda840f60911))
* **backends:** add more array functions ([5208801](https://github.com/ibis-project/ibis/commit/5208801cc38ff2ccb02fc9f7e83ad40c44222af1))
* **bigquery:** make `to_pyarrow_batches()` smarter ([42f5987](https://github.com/ibis-project/ibis/commit/42f5987762da25a63a598b5fdf0abc3eea5ae8e0))
* **bigquery:** support bignumeric type ([d7c0f49](https://github.com/ibis-project/ibis/commit/d7c0f49eebae9649b968a9554f62c7175742a3c2))
* default repr to showing all columns in Jupyter notebooks ([91a0811](https://github.com/ibis-project/ibis/commit/91a08113a9913971abba49683953b7e342bbb8aa))
* **druid:** add re_search support ([946202b](https://github.com/ibis-project/ibis/commit/946202b8058ee8df83a8cb4ac326d4c9c94ff1a5))
* **duckdb:** add map operations ([a4c4e77](https://github.com/ibis-project/ibis/commit/a4c4e77be24e3692608d6f1773bf563bf9a39085))
* **duckdb:** support sqlalchemy 2 ([679bb52](https://github.com/ibis-project/ibis/commit/679bb52daa72793ab5ea598e1f0399780c2a0801))
* **mssql:** implement ops.StandardDev, ops.Variance ([e322f1d](https://github.com/ibis-project/ibis/commit/e322f1d5c36d725c9e96b764f86242482e4f577a))
* **pandas:** support memtable in pandas backend ([6e4d621](https://github.com/ibis-project/ibis/commit/6e4d621754557c08e60fd46212ee9eac39594aed)), closes [#5467](https://github.com/ibis-project/ibis/issues/5467)
* **polars:** implement count distinct ([aea4ccd](https://github.com/ibis-project/ibis/commit/aea4ccd7bc85b020d23506c610daf3fe5b204c2f))
* **postgres:** implement `ops.Arbitrary` ([ee8dbab](https://github.com/ibis-project/ibis/commit/ee8dbabfa210257102010d76f7ae7f0cd4d01e05))
* **pyspark:** `pivot_longer` ([f600c90](https://github.com/ibis-project/ibis/commit/f600c90f5abc06f167a1110bcecf66c6b7ed35f2))
* **pyspark:** add ArrayFilter operation ([2b1301e](https://github.com/ibis-project/ibis/commit/2b1301e14fe6abf80ce0d942fbaa5ba5427b9b7a))
* **pyspark:** add ArrayMap operation ([e2c159c](https://github.com/ibis-project/ibis/commit/e2c159cb497eb5268dc253dc50348b86020bd21c))
* **pyspark:** add DateDiff operation ([bfd6109](https://github.com/ibis-project/ibis/commit/bfd61093feb7da46edf56ba9d4b9dbc151700629))
* **pyspark:** add partial support for interval types ([067120d](https://github.com/ibis-project/ibis/commit/067120df1f3c0d2b45af60fc1b28cb4b0463d5fb))
* **pyspark:** add read_csv, read_parquet, and register ([7bd22af](https://github.com/ibis-project/ibis/commit/7bd22af6158ff60d2aa70d41a7d5156a67e14bfa))
* **pyspark:** implement count distinct ([db29e10](https://github.com/ibis-project/ibis/commit/db29e10fe7fd26ef8e9024686fef38fc137546b0))
* **pyspark:** support basic caching ([ab0df7a](https://github.com/ibis-project/ibis/commit/ab0df7aac6f06727d857c7c23a3279f341bdee64))
* **snowflake:** add optional 'connect_args' param ([8bf2043](https://github.com/ibis-project/ibis/commit/8bf2043d1a1a5517e51f0638686596d0428a3f3c))
* **snowflake:** native pyarrow support ([ce3d6a4](https://github.com/ibis-project/ibis/commit/ce3d6a450e780e5efcc65ca056ec3e2ec41a2001))
* **sqlalchemy:** support unknown types ([fde79fa](https://github.com/ibis-project/ibis/commit/fde79fa0ee48b0b87bc7e82023e2b81d621c8cef))
* **sqlite:** implement `ops.Arbitrary` ([9bcdf77](https://github.com/ibis-project/ibis/commit/9bcdf77ddafce75f0e5d8714d01dde81ed0b90f2))
* **sql:** use temp views where possible ([5b9d8c0](https://github.com/ibis-project/ibis/commit/5b9d8c0db244e2742f2eddeca0787661aa516642))
* **table:** implement `pivot_wider` API ([60e7731](https://github.com/ibis-project/ibis/commit/60e7731f5e58c1236c6bf50a29cebe807fa08c77))
* **ux:** move `ibis.expr.selectors` to `ibis.selectors` and deprecate for removal in 6.0 ([0ae639d](https://github.com/ibis-project/ibis/commit/0ae639d6ca8f91cd098f069b4bb118b9c6d05059))


### Bug Fixes

* **api:** disambiguate attribute errors from a missing `resolve` method ([e12c4df](https://github.com/ibis-project/ibis/commit/e12c4df0436a7e8f13f8e63d0af1e7a780012ca6))
* **api:** support filter on literal followed by aggregate ([68d65c8](https://github.com/ibis-project/ibis/commit/68d65c89098563e7a48073837ad8d73ec0f2bdba))
* **clickhouse:** do not render aliases when compiling aggregate expression components ([46caf3b](https://github.com/ibis-project/ibis/commit/46caf3b877a4c7e0457dd78a1085bfcff082a2d9))
* **clickhouse:** ensure that clickhouse depends on sqlalchemy for `make_url` usage ([ea10a27](https://github.com/ibis-project/ibis/commit/ea10a2752e02ab8e30396f7e5244b6a20c80ddd3))
* **clickhouse:** ensure that truncate works ([1639914](https://github.com/ibis-project/ibis/commit/163991453e233cb1a9c92bb571327bf3840d10a1))
* **clickhouse:** fix `create_table` implementation ([5a54489](https://github.com/ibis-project/ibis/commit/5a544898ab221731a6f6a847c24afda64c39e44c))
* **clickhouse:** workaround sqlglot issue with calling `match` ([762f4d6](https://github.com/ibis-project/ibis/commit/762f4d64a61c8e961682bdb7f8ac5832973625fb))
* **deps:** support pandas 2.0 ([4f1d9fe](https://github.com/ibis-project/ibis/commit/4f1d9fefb991733d14d03677bb63fdee93a7fc60))
* **duckdb:** branch to avoid unnecessary dataframe construction ([9d5d943](https://github.com/ibis-project/ibis/commit/9d5d94311889dd99f29affc4524b02f362c55f0f))
* **duckdb:** disable the progress bar by default ([1a1892c](https://github.com/ibis-project/ibis/commit/1a1892c1f567672a16140943d9406c6a0796b4a5))
* **duckdb:** drop use of experimental parallel csv reader ([47d8b92](https://github.com/ibis-project/ibis/commit/47d8b92cc021b8f5fd1b284bab8c1ac8e1e71a07))
* **duckdb:** generate `SIMILAR TO` instead of tilde to workaround sqlglot issue ([434da27](https://github.com/ibis-project/ibis/commit/434da2755a2ed41884b035bcec3810143c6ccd05))
* improve typing signature of .dropna() ([e11de3f](https://github.com/ibis-project/ibis/commit/e11de3fe0aab878fe88ea5a2cf65ebbf6374cc99))
* **mssql:** improve aggregation on expressions ([58aa78d](https://github.com/ibis-project/ibis/commit/58aa78d7ac78c3b4dc7159e184740c6daa8cdb54))
* **mssql:** remove invalid aggregations ([1ce3ef9](https://github.com/ibis-project/ibis/commit/1ce3ef963ec3caf22a2a985873f93666fc17dcb0))
* **polars:** backwards compatibility for the `time_zone` and `time_unit` properties ([3a2c4df](https://github.com/ibis-project/ibis/commit/3a2c4df9c73b2ce99be2b7730b529df53d617be1))
* **postgres:** allow inference of unknown types ([343fb37](https://github.com/ibis-project/ibis/commit/343fb375d11e612807061818c612d387903ca469))
* **pyspark:** fail when aggregation contains a `having` filter ([bd81a9f](https://github.com/ibis-project/ibis/commit/bd81a9ff5f8fafb674a238eb6a0abb31849eb39e))
* **pyspark:** raise proper error when trying to generate sql ([51afc13](https://github.com/ibis-project/ibis/commit/51afc134ecb99fcd81d496e9c2660147320a6c2f))
* **snowflake:** fix new array operations; remove `ArrayRemove` operation ([772668b](https://github.com/ibis-project/ibis/commit/772668bec9d535a21f1c5435e80f47753429182f))
* **snowflake:** make sure ephemeral tables following backend quoting rules ([9a845df](https://github.com/ibis-project/ibis/commit/9a845df3e0c473c129c94c45b3683aaf68863410))
* **snowflake:** make sure pyarrow is used when possible ([01f5154](https://github.com/ibis-project/ibis/commit/01f5154e76c1f24a1e2bf5b71133eecc43ab7c59))
* **sql:** ensure that set operations resolve to a single relation ([3a02965](https://github.com/ibis-project/ibis/commit/3a029653e01ea853ba16200210d7e462618d151b))
* **sql:** generate consistent `pivot_longer` semantics in the presence of multiple `unnest`s ([6bc301a](https://github.com/ibis-project/ibis/commit/6bc301ab07c33b2afb3d66750367e086ddc96400))
* **sqlglot:** work with newer versions ([6f7302d](https://github.com/ibis-project/ibis/commit/6f7302d52cfdb37c4d0b73fbfa0fa9defbf034cb))
* **trino,duckdb,postgres:** make cumulative `notany`/`notall` aggregations work ([c2e985f](https://github.com/ibis-project/ibis/commit/c2e985f493fcb3306cc74793ac457661ee942967))
* **trino:** only support `how='first'` with `arbitrary` reduction ([315b5e7](https://github.com/ibis-project/ibis/commit/315b5e73b264c0f47a4c2be229386b790763de58))
* **ux:** use guaranteed length-1 characters for `NULL` values ([8618789](https://github.com/ibis-project/ibis/commit/86187899803a34921f53063ec114caf693cfdff4))


### Refactors

* **api:** remove explicit use of `.projection` in favor of the shorter `.select` ([73df8df](https://github.com/ibis-project/ibis/commit/73df8df279127021fa42f996225d1f38696e5c14))
* **cache:** factor out ref counted cache ([c816f00](https://github.com/ibis-project/ibis/commit/c816f006779cbeb04bd835e9c14b206ec9bb1f06))
* **duckdb:** simplify `to_pyarrow_batches` implementation ([d6235ee](https://github.com/ibis-project/ibis/commit/d6235ee0d12a0c3136ef09e19832d82169b7bb40))
* **duckdb:** source loaded and installed extensions from duckdb ([fb06262](https://github.com/ibis-project/ibis/commit/fb0626281b03aa0c4239b9b0bd813f2fe0bf1ce2))
* **duckdb:** use native duckdb parquet reader unless auth required ([e9f57eb](https://github.com/ibis-project/ibis/commit/e9f57eb19934c3e84a4a72246a6f2874ad88dcd8))
* generate uuid-based names for temp tables ([a1164df](https://github.com/ibis-project/ibis/commit/a1164df5d1bc4fa454371626a0527d30ca8dd296))
* **memtable:** clean up dispatch code ([9a19302](https://github.com/ibis-project/ibis/commit/9a1930226f22b5707f03e64309e31dd8c59e37ff))
* **memtable:** dedup table proxy code ([3bccec0](https://github.com/ibis-project/ibis/commit/3bccec06ac736674ed72865b1275b6cae6b190fd))
* **sqlalchemy:** remove unused `_meta` instance attributes ([523e198](https://github.com/ibis-project/ibis/commit/523e1981732ca4c36c2c4c6be0a7e871c566dcb7))


### Deprecations

* **api:** deprecate `Table.set_column` in favor of `Table.mutate` ([954a6b7](https://github.com/ibis-project/ibis/commit/954a6b7a88e2727d598d4d2e5c3ac4d7cccd5f7e))


### Documentation

* add a getting started guide ([8fd03ce](https://github.com/ibis-project/ibis/commit/8fd03cefc49eab144e94344711e805bf6b5be40e))
* add warning about comparisons to `None` ([5cf186a](https://github.com/ibis-project/ibis/commit/5cf186acad6578d84ad5b8d63ff9180d3c4a8df6))
* **blog:** add campaign finance blog post ([383c708](https://github.com/ibis-project/ibis/commit/383c708ed0aa4dab61482157c371d32a28ac733d))
* **blog:** add campaign finance to `SUMMARY.md` ([0bdd093](https://github.com/ibis-project/ibis/commit/0bdd093f9ca3728d4c40a8f7ce8abe5301c2f0d2))
* clean up agg argument descriptions and add join examples ([93d3059](https://github.com/ibis-project/ibis/commit/93d3059f33280f625326866d291d7d206fe2077e))
* **comparison:** add a "why ibis" page ([011cc19](https://github.com/ibis-project/ibis/commit/011cc1939eef94e45d688e1ae3dc45a8c3649863))
* move conda before nix in dev setup instructions ([6b2cbaa](https://github.com/ibis-project/ibis/commit/6b2cbaaa33f0c9cc7b0174e082d5155f045cc2fe))
* **nth:** improve docstring for nth() ([fb7b34b](https://github.com/ibis-project/ibis/commit/fb7b34b424cb333cb956d0c20f145e5d2f52f5bc))
* patch docs build to fix anchor links ([51be459](https://github.com/ibis-project/ibis/commit/51be4592473e99b20dcdb56d67c4fece40a0a1b0))
* **penguins:** add citation for palmer penguins data ([679848d](https://github.com/ibis-project/ibis/commit/679848d0c28d1c18156a929344330902e585fb10))
* **penguins:** change to flipper ([eec3706](https://github.com/ibis-project/ibis/commit/eec370606bf25ec2fbb53451036d5000f5b24afe))
* refresh environment setup pages ([b609571](https://github.com/ibis-project/ibis/commit/b6095714b8cfed2c11092fefd330470075b77f66))
* **selectors:** make doctests more complete and actually run them ([c8f2964](https://github.com/ibis-project/ibis/commit/c8f2964e224b07d5d676fd57106043883da4d207))
* style and review fixes in getting started guide ([3b0f8db](https://github.com/ibis-project/ibis/commit/3b0f8dbe93b6112101e084737e8733fa61c6558a))

## [5.0.0](https://github.com/ibis-project/ibis/compare/4.1.0...5.0.0) (2023-03-15)


Expand Down
4 changes: 2 additions & 2 deletions docs/stylesheets/extra.css
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
color: #ff9100;
}

.bug {
color: #f50057;
.caution {
color: #eed202;
}

.cancel {
Expand Down
80 changes: 80 additions & 0 deletions docs/why_ibis.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
---
hide:
- footer
---

# Why try Ibis?

Ibis is a dataframe interface to execution engines with support for [15+
backends](./backends/index.md). Ibis doesn't replace your existing execution
engine, it _extends_ it with powerful abstractions and intuitive syntax.

Ibis works with what you already have, so why not check out our [getting started
guide](./getting_started.md)?

# How does Ibis compare to...

Let us stop you there. Ibis is an interface that empowers you to craft complex
and powerful queries that execute on your engine of choice.

The answer to, "how does Ibis compare to `X`?" is "Ibis helps you use `X`."

!!! tip "[Get in touch](https://github.com/ibis-project/ibis/issues) if you're having trouble using `X`!"

Now that we've said that, here are some other tools that you might compare Ibis
with:

## Big Data engines like `BigQuery`, `Snowflake`, `Spark`, ...

See above. Ibis works with your existing execution engine, it doesn't replace it.

## SQL

SQL is the 800 lb gorilla in the room. One of our developers [gave a whole
talk](https://www.youtube.com/watch?v=XdZklxTbCEA) comparing Ibis and SQL, but
we can summarize some key points:

- SQL fails at runtime, Ibis validates expressions as you construct them
- Ibis is written in Python and features some pretty killer tab-completion
- Ibis lets you use SQL when you want to (for our SQL-based backends)

If your SQL-fu is strong, we might not convince you to leave it all behind, but
check out our [Ibis for SQL Programmers guide](./ibis-for-sql-programmers.ipynb)
and see if it whets your appetite.

## `pandas`

`pandas` is the 800 lb panda in the room. Ibis, like every dataframe API in the
PyData ecosystem, takes a fair bit of inspiration from `pandas`.

And like the other engine comparisons above, Ibis doesn't replace `pandas`, it works _with_ `pandas`.

`pandas` is an in-memory analysis engine -- if your data are bigger than the
amount of RAM you have, things will go poorly.

Ibis defers execution, and is agnostic to the backend that runs a given query.
If your analysis is causing `pandas` to hit an out-of-memory error, you can use
Ibis to quickly and easily switch to a different backend that supports
out-of-core execution.

Ibis syntax is similar to `pandas` syntax, but it isn't a drop-in replacement.
Check out our [Ibis for Pandas Users guide](./ibis-for-pandas-users.ipynb) if
you'd like to give Ibis a try!

## `sqlalchemy` and `sqlglot`

[`sqlalchemy`](https://www.sqlalchemy.org/) and
[`sqlglot`](https://sqlglot.com/sqlglot.html) are amazing tools and we are big
fans. Ibis uses both of these heavily to validate and generate SQL to send to
our SQL backends.

If you need super-fine-grained control over which SQL primitives are used to
construct a query and you are using Python, SQLAlchemy is definitely the tool
for you.

If you are looking for a Python-based SQL transpiler, we strongly recommend
using SQLGlot.

If you are looking for a dataframe API to construct and execute your analytics
queries against a large collection of powerful execution engines, then allow us
point you at the [Ibis Getting Started guide](./getting_started.md).
18 changes: 9 additions & 9 deletions flake.lock
9 changes: 6 additions & 3 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
inherit localSystem;
overlays = [ self.overlays.default ];
};
inherit (pkgs) lib;

backendDevDeps = with pkgs; [
# impala UDFs
Expand Down Expand Up @@ -73,13 +72,15 @@

preCommitDeps = with pkgs; [
actionlint
deadnix
git
just
nixpkgs-fmt
prettier
nodePackages.prettier
shellcheck
shfmt
statix
taplo-cli
];

mkDevShell = env: pkgs.mkShell {
Expand Down Expand Up @@ -113,6 +114,8 @@
MYSQL_PWD = "ibis";
MSSQL_SA_PASSWORD = "1bis_Testing!";
DRUID_URL = "druid://localhost:8082/druid/v2/sql";

__darwinAllowLocalNetworking = true;
};
in
rec {
Expand All @@ -121,7 +124,7 @@

default = pkgs.ibis311;

inherit (pkgs) update-lock-files gen-all-extras gen-examples;
inherit (pkgs) update-lock-files gen-all-extras gen-examples check-poetry-version;
};

devShells = rec {
Expand Down
2 changes: 1 addition & 1 deletion ibis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Initialize Ibis module."""
from __future__ import annotations

__version__ = "5.0.0"
__version__ = "5.1.0"

from ibis import examples, util
from ibis.backends.base import BaseBackend
Expand Down
59 changes: 24 additions & 35 deletions ibis/backends/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import re
import sys
import urllib.parse
from collections import Counter
from pathlib import Path
from typing import (
TYPE_CHECKING,
Expand All @@ -21,14 +20,13 @@
MutableMapping,
)

from bidict import MutableBidirectionalMapping, bidict

import ibis
import ibis.common.exceptions as exc
import ibis.config
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis import util
from ibis.common.caching import RefCountedCache

if TYPE_CHECKING:
import pandas as pd
Expand Down Expand Up @@ -460,11 +458,13 @@ def __init__(self, *args, **kwargs):
self._con_args: tuple[Any] = args
self._con_kwargs: dict[str, Any] = kwargs
# expression cache
self._query_cache: MutableBidirectionalMapping[
ops.TableNode, ops.PhysicalTable
] = bidict()

self._refs = Counter()
self._query_cache = RefCountedCache(
populate=self._load_into_cache,
lookup=lambda name: self.table(name).op(),
finalize=self._clean_up_cached_table,
generate_name=functools.partial(util.gen_name, "cache"),
key=lambda expr: expr.op(),
)

def __getstate__(self):
return dict(
Expand Down Expand Up @@ -530,7 +530,7 @@ def connect(self, *args, **kwargs) -> BaseBackend:
new_backend.reconnect()
return new_backend

def _from_url(self, url: str) -> BaseBackend:
def _from_url(self, url: str, **kwargs) -> BaseBackend:
"""Construct an ibis backend from a SQLAlchemy-conforming URL."""
raise NotImplementedError(
f"`_from_url` not implemented for the {self.name} backend"
Expand Down Expand Up @@ -902,7 +902,7 @@ def has_operation(cls, operation: type[ops.Value]) -> bool:
f"{cls.name} backend has not implemented `has_operation` API"
)

def _cached(self, expr):
def _cached(self, expr: ir.Table):
"""Cache the provided expression.
All subsequent operations on the returned expression will be performed on the cached data.
Expand All @@ -920,38 +920,25 @@ def _cached(self, expr):
"""
op = expr.op()
if (result := self._query_cache.get(op)) is None:
name = util.generate_unique_table_name("cache")
self._load_into_cache(name, expr)
self._query_cache[op] = result = self.table(name).op()
self._refs[op] += 1
self._query_cache.store(expr)
result = self._query_cache[op]
return ir.CachedTable(result)

def _release_cached(self, expr):
def _release_cached(self, expr: ir.CachedTable) -> None:
"""Releases the provided cached expression.
Parameters
----------
expr
Cached expression to release
"""
op = expr.op()
# we need to remove the expression representing the temp table as well
# as the expression that was used to create the temp table
#
# bidict automatically handles this for us; without it we'd have to
# do to the bookkeeping ourselves with two dicts
if (key := self._query_cache.inverse.get(op)) is None:
raise exc.IbisError(
"This expression has already been released. Did you call "
"`.release()` twice on the same expression?"
)
del self._query_cache[expr.op()]

self._refs[key] -= 1
def _load_into_cache(self, name, expr):
raise NotImplementedError(self.name)

if not self._refs[key]:
del self._query_cache[key]
del self._refs[key]
self._clean_up_cached_table(op)
def _clean_up_cached_table(self, op):
raise NotImplementedError(self.name)


@functools.lru_cache(maxsize=None)
Expand Down Expand Up @@ -1004,12 +991,14 @@ def connect(resource: Path | str, **kwargs: Any) -> BaseBackend:
parsed = urllib.parse.urlparse(url)
scheme = parsed.scheme or "file"

# Merge explicit kwargs with query string, explicit kwargs
# taking precedence
kwargs = dict(urllib.parse.parse_qsl(parsed.query), **kwargs)
orig_kwargs = kwargs.copy()
kwargs = dict(urllib.parse.parse_qsl(parsed.query))

if scheme == "file":
path = parsed.netloc + parsed.path
# Merge explicit kwargs with query string, explicit kwargs
# taking precedence
kwargs.update(orig_kwargs)
if path.endswith(".duckdb"):
return ibis.duckdb.connect(path, **kwargs)
elif path.endswith((".sqlite", ".db")):
Expand Down Expand Up @@ -1051,4 +1040,4 @@ def connect(resource: Path | str, **kwargs: Any) -> BaseBackend:
except AttributeError:
raise ValueError(f"Don't know how to connect to {resource!r}") from None

return backend._from_url(url)
return backend._from_url(url, **orig_kwargs)
13 changes: 9 additions & 4 deletions ibis/backends/base/sql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from functools import lru_cache
from typing import TYPE_CHECKING, Any, Iterable, Mapping

import toolz

import ibis.common.exceptions as exc
import ibis.expr.analysis as an
import ibis.expr.operations as ops
Expand All @@ -31,13 +33,15 @@ class BaseSQLBackend(BaseBackend):
table_class = ops.DatabaseTable
table_expr_class = ir.Table

def _from_url(self, url: str) -> BaseBackend:
def _from_url(self, url: str, **kwargs: Any) -> BaseBackend:
"""Connect to a backend using a URL `url`.
Parameters
----------
url
URL with which to connect to a backend.
kwargs
Additional keyword arguments passed to the `connect` method.
Returns
-------
Expand All @@ -47,7 +51,7 @@ def _from_url(self, url: str) -> BaseBackend:
import sqlalchemy as sa

url = sa.engine.make_url(url)

new_kwargs = kwargs.copy()
kwargs = {}

for name in ("host", "port", "database", "password"):
Expand All @@ -60,8 +64,9 @@ def _from_url(self, url: str) -> BaseBackend:
kwargs["user"] = username

kwargs.update(url.query)
self._convert_kwargs(kwargs)
return self.connect(**kwargs)
new_kwargs = toolz.merge(kwargs, new_kwargs)
self._convert_kwargs(new_kwargs)
return self.connect(**new_kwargs)

def table(self, name: str, database: str | None = None) -> ir.Table:
"""Construct a table expression.
Expand Down
24 changes: 16 additions & 8 deletions ibis/backends/base/sql/alchemy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ class BaseAlchemyBackend(BaseSQLBackend):
database_class = AlchemyDatabase
table_class = AlchemyTable
compiler = AlchemyCompiler
quote_table_names = None

def _build_alchemy_url(self, url, host, port, user, password, database, driver):
if url is not None:
Expand Down Expand Up @@ -278,7 +277,7 @@ def _columns_from_schema(self, name: str, schema: sch.Schema) -> list[sa.Column]
colname,
to_sqla_type(dialect, dtype),
nullable=dtype.nullable,
quote=self.compiler.translator_class._always_quote_columns,
quote=self.compiler.translator_class._quote_column_names,
)
for colname, dtype in zip(schema.names, schema.types)
]
Expand All @@ -295,7 +294,7 @@ def _table_from_schema(
sa.MetaData(),
*columns,
prefixes=prefixes,
quote=self.quote_table_names,
quote=self.compiler.translator_class._quote_table_names,
)

def drop_table(
Expand Down Expand Up @@ -425,7 +424,7 @@ def _get_sqla_table(
sa.MetaData(),
schema=schema,
autoload_with=self.con if autoload else None,
quote=self.quote_table_names,
quote=self.compiler.translator_class._quote_table_names,
)
nulltype_cols = frozenset(
col.name for col in table.c if isinstance(col.type, sa.types.NullType)
Expand Down Expand Up @@ -453,7 +452,7 @@ def _handle_failed_column_type_inference(
colname,
to_sqla_type(dialect, type),
nullable=type.nullable,
quote=self.compiler.translator_class._always_quote_columns,
quote=self.compiler.translator_class._quote_column_names,
),
replace_existing=True,
)
Expand Down Expand Up @@ -620,7 +619,10 @@ def insert(

def _quote(self, name: str) -> str:
"""Quote an identifier."""
return self.con.dialect.identifier_preparer.quote(name)
preparer = self.con.dialect.identifier_preparer
if self.compiler.translator_class._quote_table_names:
return preparer.quote_identifier(name)
return preparer.quote(name)

def _get_temp_view_definition(
self, name: str, definition: sa.sql.compiler.Compiled
Expand Down Expand Up @@ -692,7 +694,10 @@ def create_view(
source = self.compile(obj)
view = sav.CreateView(
sa.Table(
name, sa.MetaData(), schema=database, quote=self.quote_table_names
name,
sa.MetaData(),
schema=database,
quote=self.compiler.translator_class._quote_table_names,
),
source,
or_replace=overwrite,
Expand All @@ -708,7 +713,10 @@ def drop_view(

view = sav.DropView(
sa.Table(
name, sa.MetaData(), schema=database, quote=self.quote_table_names
name,
sa.MetaData(),
schema=database,
quote=self.compiler.translator_class._quote_table_names,
),
if_exists=not force,
)
Expand Down
19 changes: 16 additions & 3 deletions ibis/backends/base/sql/alchemy/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import ibis.expr.datatypes as dt
import ibis.expr.schema as sch
from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported
from ibis.common.collections import FrozenDict

if geospatial_supported:
import geoalchemy2 as ga
Expand All @@ -29,10 +30,12 @@ def compiles_array(element, compiler, **kw):


class StructType(sat.UserDefinedType):
cache_ok = True

def __init__(self, fields: Mapping[str, sat.TypeEngine]) -> None:
self.fields = {
name: sat.to_instance(type) for name, type in dict(fields).items()
}
self.fields = FrozenDict(
{name: sat.to_instance(typ) for name, typ in fields.items()}
)


@compiles(StructType, "default")
Expand Down Expand Up @@ -115,6 +118,10 @@ def compiles_uuid(element, compiler, **kw):
return "UUID"


class Unknown(sa.Text):
pass


# TODO(cleanup)
ibis_type_to_sqla = {
dt.Null: sat.NullType,
Expand All @@ -141,6 +148,7 @@ def compiles_uuid(element, compiler, **kw):
dt.UInt64: UInt64,
dt.JSON: sa.JSON,
dt.Interval: sa.Interval,
dt.Unknown: Unknown,
}


Expand Down Expand Up @@ -262,6 +270,11 @@ def sa_json(_, satype, nullable=True):
return dt.JSON(nullable=nullable)


@dt.dtype.register(Dialect, Unknown)
def sa_unknown(_, satype, nullable=True):
return dt.Unknown(nullable=nullable)


if geospatial_supported:

@dt.dtype.register(Dialect, (ga.Geometry, ga.types._GISType))
Expand Down
51 changes: 24 additions & 27 deletions ibis/backends/base/sql/alchemy/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,11 @@ def _format_table(self, op):
result = ref_op.sqla_table
elif isinstance(ref_op, ops.UnboundTable):
# use SQLAlchemy's TableClause for unbound tables
result = sa.table(
ref_op.name, *translator._schema_to_sqlalchemy_columns(ref_op.schema)
result = sa.Table(
ref_op.name,
sa.MetaData(),
*translator._schema_to_sqlalchemy_columns(ref_op.schema),
quote=translator._quote_table_names,
)
elif isinstance(ref_op, ops.SQLQueryResult):
columns = translator._schema_to_sqlalchemy_columns(ref_op.schema)
Expand All @@ -109,8 +112,11 @@ def _format_table(self, op):
# TODO(kszucs): avoid converting to expression
child_expr = ref_op.child.to_expr()
definition = child_expr.compile()
result = sa.table(
ref_op.name, *translator._schema_to_sqlalchemy_columns(ref_op.schema)
result = sa.Table(
ref_op.name,
sa.MetaData(),
*translator._schema_to_sqlalchemy_columns(ref_op.schema),
quote=translator._quote_table_names,
)
backend = child_expr._find_backend()
backend._create_temp_view(view=result, definition=definition)
Expand Down Expand Up @@ -141,7 +147,12 @@ def _format_table(self, op):
def _format_in_memory_table(self, op, ref_op, translator):
columns = translator._schema_to_sqlalchemy_columns(ref_op.schema)
if self.context.compiler.cheap_in_memory_tables:
result = sa.table(ref_op.name, *columns)
result = sa.Table(
ref_op.name,
sa.MetaData(),
*columns,
quote=translator._quote_table_names,
)
elif self.context.compiler.support_values_syntax_in_select:
rows = list(ref_op.data.to_frame().itertuples(index=False))
result = sa.values(*columns, name=ref_op.name).data(rows)
Expand Down Expand Up @@ -330,30 +341,16 @@ def _convert_group_by(self, exprs):
class AlchemySetOp(SetOp):
def compile(self):
context = self.context
selects = []

def call(distinct, *args):
return (
self.distinct_func(*args) if distinct else self.non_distinct_func(*args)
)
distincts = self.distincts

for table in self.tables:
table_set = context.get_compiled_expr(table)
selects.append(table_set.cte().select())
assert (
len(set(distincts)) == 1
), "more than one distinct found; this shouldn't be possible because all unions are projected"

if len(set(self.distincts)) == 1:
# distinct is either all True or all False, handle with a single
# call. This generates much more concise SQL.
return call(self.distincts[0], *selects)
else:
# We need to iteratively apply the set operations to handle
# disparate `distinct` values. Subqueries _must_ be converted using
# `.subquery().select()` to get sqlalchemy to put parenthesis in
# the proper places.
result = selects[0]
for select, distinct in zip(selects[1:], self.distincts):
result = call(distinct, result.subquery().select(), select)
return result
func = self.distinct_func if distincts[0] else self.non_distinct_func
return func(
*(context.get_compiled_expr(table).cte().select() for table in self.tables)
)


class AlchemyUnion(AlchemySetOp):
Expand Down
16 changes: 8 additions & 8 deletions ibis/backends/base/sql/alchemy/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,16 @@ class substr(GenericFunction):
inherit_cache = True


def variance_reduction(func_name):
suffix = {'sample': 'samp', 'pop': 'pop'}
def variance_reduction(func_name, suffix=None):
suffix = suffix or {'sample': '_samp', 'pop': '_pop'}

def variance_compiler(t, op):
arg = op.arg

if arg.output_dtype.is_boolean():
arg = ops.Cast(op.arg, to=dt.int32)

func = getattr(sa.func, f'{func_name}_{suffix[op.how]}')
func = getattr(sa.func, f'{func_name}{suffix[op.how]}')

if op.where is not None:
arg = ops.Where(op.where, arg, None)
Expand Down Expand Up @@ -117,7 +117,7 @@ def _table_column(t, op):
sa_table = get_sqla_table(ctx, table)

out_expr = get_col(sa_table, op)
out_expr.quote = t._always_quote_columns
out_expr.quote = t._quote_column_names

# If the column does not originate from the table set in the current SELECT
# context, we should format as a subquery
Expand Down Expand Up @@ -154,7 +154,7 @@ def _exists_subquery(t, op):
filtered = (
op.foreign_table.to_expr()
.filter([pred.to_expr() for pred in op.predicates])
.projection([ir.literal(1).name("")])
.select(ir.literal(1).name(""))
)

sub_ctx = ctx.subcontext()
Expand All @@ -175,7 +175,7 @@ def _cast(t, op):

# specialize going from an integer type to a timestamp
if arg_dtype.is_integer() and typ.is_timestamp():
return t.integer_to_timestamp(sa_arg)
return t.integer_to_timestamp(sa_arg, tz=typ.timezone)

if arg_dtype.is_binary() and typ.is_string():
return sa.func.encode(sa_arg, 'escape')
Expand Down Expand Up @@ -369,8 +369,8 @@ def _window_function(t, window):
end = _translate_window_boundary(window.frame.end)
additional_params = {how: (start, end)}

result = reduction.over(
partition_by=partition_by, order_by=order_by, **additional_params
result = sa.over(
reduction, partition_by=partition_by, order_by=order_by, **additional_params
)

if isinstance(window.func, (ops.RowNumber, ops.DenseRank, ops.MinRank, ops.NTile)):
Expand Down
21 changes: 16 additions & 5 deletions ibis/backends/base/sql/alchemy/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,18 @@ class AlchemyExprTranslator(ExprTranslator):

_bool_aggs_need_cast_to_int32 = True
_has_reduction_filter_syntax = False
_integer_to_timestamp = staticmethod(sa.func.to_timestamp)
_timestamp_type = sa.TIMESTAMP

def integer_to_timestamp(self, arg, tz: str | None = None):
return sa.cast(
self._integer_to_timestamp(arg),
self._timestamp_type(timezone=tz is not None),
)

integer_to_timestamp = sa.func.to_timestamp
native_json_type = True
_always_quote_columns = None # let the dialect decide how to quote
_quote_column_names = None # let the dialect decide how to quote
_quote_table_names = None

_require_order_by = (
ops.DenseRank,
Expand All @@ -70,11 +78,14 @@ def dialect(self) -> sa.engine.interfaces.Dialect:

def _schema_to_sqlalchemy_columns(self, schema):
return [
sa.column(name, self.get_sqla_type(dtype)) for name, dtype in schema.items()
sa.Column(name, self.get_sqla_type(dtype), quote=self._quote_column_names)
for name, dtype in schema.items()
]

def name(self, translated, name, force=True):
return translated.label(name)
def name(self, translated, name, force=False):
return translated.label(
sa.sql.quoted_name(name, quote=force or self._quote_column_names)
)

def get_sqla_type(self, data_type):
return to_sqla_type(self.dialect, data_type)
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/base/sql/compiler/select_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def _collect_Selection(self, op, toplevel=False):
self.table_set = table
self.filters = filters

def _collect_PandasInMemoryTable(self, node, toplevel=False):
def _collect_InMemoryTable(self, node, toplevel=False):
if toplevel:
self.select_set = [node]
self.table_set = node
Expand Down
17 changes: 7 additions & 10 deletions ibis/backends/base/sql/compiler/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,22 +73,19 @@ def get_compiled_expr(self, node):
self.top_context.subquery_memo[node] = result
return result

def make_alias(self, node):
i = self._alias_counter
def _next_alias(self) -> str:
alias = self._alias_counter
self._alias_counter += 1
return f"t{alias:d}"

def make_alias(self, node):
# check for existing tables that we're referencing from a parent context
for ctx in itertools.islice(self._contexts(), 1, None):
try:
alias = ctx.table_refs[node]
except KeyError:
pass
else:
if (alias := ctx.table_refs.get(node)) is not None:
self.set_ref(node, alias)
return

self._alias_counter += 1
alias = f't{i:d}'
self.set_ref(node, alias)
self.set_ref(node, self._next_alias())

def _contexts(
self,
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/base/sql/registry/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def table_column(translator, op):
# context, we should format as a subquery
if translator.permit_subquery and ctx.is_foreign_expr(op.table):
# TODO(kszucs): avoid the expression roundtrip
proj_expr = op.table.to_expr().projection([op.name]).to_array().op()
proj_expr = op.table.to_expr().select([op.name]).to_array().op()
return table_array_view(translator, proj_expr)

alias = ctx.get_ref(op.table, search_parents=True)
Expand All @@ -168,7 +168,7 @@ def exists_subquery(translator, op):
filtered = op.foreign_table.to_expr().filter(
[pred.to_expr() for pred in op.predicates]
)
node = filtered.projection([dummy]).op()
node = filtered.select(dummy).op()

subquery = ctx.get_compiled_expr(node)

Expand Down
66 changes: 37 additions & 29 deletions ibis/backends/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from __future__ import annotations

import contextlib
import warnings
from typing import TYPE_CHECKING, Any, Mapping
from typing import TYPE_CHECKING, Any, Callable, Iterable, Mapping
from urllib.parse import parse_qs, urlparse

import google.auth.credentials
Expand All @@ -31,12 +30,14 @@
rename_partitioned_column,
)
from ibis.backends.bigquery.compiler import BigQueryCompiler
from ibis.util import deprecated

with contextlib.suppress(ImportError):
from ibis.backends.bigquery.udf import udf # noqa: F401

if TYPE_CHECKING:
import pyarrow as pa
from google.cloud.bigquery.table import RowIterator

SCOPES = ["https://www.googleapis.com/auth/bigquery"]
EXTERNAL_DATA_SCOPES = [
Expand Down Expand Up @@ -68,12 +69,13 @@ class Backend(BaseSQLBackend):
database_class = BigQueryDatabase
table_class = BigQueryTable

def _from_url(self, url):
def _from_url(self, url: str, **kwargs):
result = urlparse(url)
params = parse_qs(result.query)
return self.connect(
project_id=result.netloc or params.get("project_id", [""])[0],
dataset_id=result.path[1:] or params.get("dataset_id", [""])[0],
**kwargs,
)

def do_connect(
Expand Down Expand Up @@ -305,19 +307,15 @@ def execute(self, expr, params=None, limit="default", **kwargs):

return result

@deprecated(
instead="use name in con.list_databases()", as_of="2.0", removed_in="6.0"
)
def exists_database(self, name):
"""Return whether a database name exists in the current connection.
Deprecated in Ibis 2.0. Use `name in client.list_databases()`
instead.
"""
warnings.warn(
"`client.exists_database(name)` is deprecated, and will be "
"removed in a future version of Ibis. Use "
"`name in client.list_databases()` instead.",
FutureWarning,
)

project, dataset = self._parse_project_and_dataset(name)
client = self.client
dataset_ref = client.dataset(dataset, project=project)
Expand All @@ -328,19 +326,15 @@ def exists_database(self, name):
else:
return True

@deprecated(
instead="use `table in con.list_tables()`", as_of="2.0", removed_in="6.0"
)
def exists_table(self, name: str, database: str | None = None) -> bool:
"""Return whether a table name exists in the database.
Deprecated in Ibis 2.0. Use `name in client.list_tables()`
instead.
"""
warnings.warn(
"`client.exists_table(name)` is deprecated, and will be "
"removed in a future version of Ibis. Use "
"`name in client.list_tables()` instead.",
FutureWarning,
)

table_id = self._fully_qualified_name(name, database)
client = self.client
try:
Expand All @@ -355,22 +349,31 @@ def fetch_from_cursor(self, cursor, schema):
df = arrow_t.to_pandas(timestamp_as_object=True)
return schema.apply_to(df)

def _cursor_to_arrow(self, cursor):
def _cursor_to_arrow(
self,
cursor,
*,
method: Callable[[RowIterator], pa.Table | Iterable[pa.RecordBatch]]
| None = None,
chunk_size: int | None = None,
):
if method is None:
method = lambda result: result.to_arrow(
progress_bar_type=None,
bqstorage_client=None,
create_bqstorage_client=True,
)
query = cursor.query
query_result = query.result()
query_result = query.result(page_size=chunk_size)
# workaround potentially not having the ability to create read sessions
# in the dataset project
orig_project = query_result._project
query_result._project = self.billing_project
try:
arrow_table = query_result.to_arrow(
progress_bar_type=None,
bqstorage_client=None,
create_bqstorage_client=True,
)
arrow_obj = method(query_result)
finally:
query_result._project = orig_project
return arrow_table
return arrow_obj

def to_pyarrow(
self,
Expand Down Expand Up @@ -403,14 +406,19 @@ def to_pyarrow_batches(
chunk_size: int = 1_000_000,
**kwargs: Any,
):
self._import_pyarrow()
pa = self._import_pyarrow()

schema = expr.as_table().schema()

# kind of pointless, but it'll work if there's enough memory
query_ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params)
sql = query_ast.compile()
cursor = self.raw_sql(sql, params=params, **kwargs)
table = self._cursor_to_arrow(cursor)
return table.to_reader(chunk_size)
batch_iter = self._cursor_to_arrow(
cursor,
method=lambda result: result.to_arrow_iterable(),
chunk_size=chunk_size,
)
return pa.RecordBatchReader.from_batches(schema.to_pyarrow(), batch_iter)

def get_schema(self, name, database=None):
table_id = self._fully_qualified_name(name, database)
Expand Down
25 changes: 19 additions & 6 deletions ibis/backends/bigquery/tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
import pandas.testing as tm
import pytest
import pytz
import toolz

import ibis
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
from ibis.backends.bigquery.client import bigquery_param
from ibis.util import guid


def test_column_execute(alltypes, df):
Expand Down Expand Up @@ -72,15 +74,12 @@ def test_count_distinct_with_filter(alltypes):
assert result == expected


@pytest.mark.parametrize("type", ["date", dt.date])
def test_cast_string_to_date(alltypes, df, type):
import toolz

def test_cast_string_to_date(alltypes, df):
string_col = alltypes.date_string_col
month, day, year = toolz.take(3, string_col.split("/"))

expr = "20" + ibis.literal("-").join([year, month, day])
expr = expr.cast(type)
expr = expr.cast("date")

result = (
expr.execute()
Expand All @@ -90,7 +89,7 @@ def test_cast_string_to_date(alltypes, df, type):
.rename("date_string_col")
)
expected = (
pd.to_datetime(df.date_string_col)
pd.to_datetime(df.date_string_col, format="%m/%d/%y")
.dt.normalize()
.sort_values()
.reset_index(drop=True)
Expand Down Expand Up @@ -337,3 +336,17 @@ def test_approx_median(alltypes):
# Since 6 and 7 are right on the edge for median in the range of months
# (1-12), accept either for the approximate function.
assert result in (6, 7)


def test_create_table_bignumeric(client):
temp_table_name = f"temp_to_table_{guid()[:6]}"
schema = ibis.schema({'col1': dt.Decimal(76, 38)})
temporary_table = client.create_table(temp_table_name, schema=schema)
try:
client.raw_sql(
f"INSERT {client.current_database}.{temp_table_name} (col1) VALUES (10.2)"
)
df = temporary_table.execute()
assert df.shape == (1, 1)
finally:
client.drop_table(temp_table_name)
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
SELECT t0.*
FROM t0 t0
EXCEPT DISTINCT
SELECT t0.*
FROM t1 t0
SELECT t0.`a`
FROM (
SELECT t1.*
FROM t0 t1
EXCEPT DISTINCT
SELECT t1.*
FROM t1 t1
) t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
SELECT t0.*
FROM t0 t0
INTERSECT DISTINCT
SELECT t0.*
FROM t1 t0
SELECT t0.`a`
FROM (
SELECT t1.*
FROM t0 t1
INTERSECT DISTINCT
SELECT t1.*
FROM t1 t1
) t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
SELECT t0.*
FROM t0 t0
UNION ALL
SELECT t0.*
FROM t1 t0
SELECT t0.`a`
FROM (
SELECT t1.*
FROM t0 t1
UNION ALL
SELECT t1.*
FROM t1 t1
) t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
SELECT t0.*
FROM t0 t0
UNION DISTINCT
SELECT t0.*
FROM t1 t0
SELECT t0.`a`
FROM (
SELECT t1.*
FROM t0 t1
UNION DISTINCT
SELECT t1.*
FROM t1 t1
) t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
SELECT t0.*
FROM functional_alltypes t0
UNION ALL
SELECT t0.*
FROM functional_alltypes t0
SELECT t0.`index`, t0.`Unnamed_0`, t0.`id`, t0.`bool_col`, t0.`tinyint_col`,
t0.`smallint_col`, t0.`int_col`, t0.`bigint_col`, t0.`float_col`,
t0.`double_col`, t0.`date_string_col`, t0.`string_col`,
t0.`timestamp_col`, t0.`year`, t0.`month`
FROM (
SELECT t1.*
FROM functional_alltypes t1
UNION ALL
SELECT t1.*
FROM functional_alltypes t1
) t0
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
SELECT t0.*
FROM functional_alltypes t0
UNION DISTINCT
SELECT t0.*
FROM functional_alltypes t0
SELECT t0.`index`, t0.`Unnamed_0`, t0.`id`, t0.`bool_col`, t0.`tinyint_col`,
t0.`smallint_col`, t0.`int_col`, t0.`bigint_col`, t0.`float_col`,
t0.`double_col`, t0.`date_string_col`, t0.`string_col`,
t0.`timestamp_col`, t0.`year`, t0.`month`
FROM (
SELECT t1.*
FROM functional_alltypes t1
UNION DISTINCT
SELECT t1.*
FROM functional_alltypes t1
) t0
Original file line number Diff line number Diff line change
@@ -1,15 +1,35 @@
WITH t0 AS (
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
SELECT t2.`string_col`, sum(t2.`double_col`) AS `metric`
FROM functional_alltypes t2
GROUP BY 1
)
SELECT *
FROM t0
UNION ALL
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
UNION ALL
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
SELECT t1.`string_col`, t1.`metric`
FROM (
WITH t0 AS (
SELECT t2.`string_col`, sum(t2.`double_col`) AS `metric`
FROM functional_alltypes t2
GROUP BY 1
),
t2 AS (
SELECT t3.`string_col`, t3.`metric`
FROM (
WITH t0 AS (
SELECT t2.`string_col`, sum(t2.`double_col`) AS `metric`
FROM functional_alltypes t2
GROUP BY 1
)
SELECT *
FROM t0
UNION ALL
SELECT t4.`string_col`, sum(t4.`double_col`) AS `metric`
FROM functional_alltypes t4
GROUP BY 1
) t3
)
SELECT *
FROM t2
UNION ALL
SELECT t4.`string_col`, sum(t4.`double_col`) AS `metric`
FROM functional_alltypes t4
GROUP BY 1
) t1
Original file line number Diff line number Diff line change
@@ -1,15 +1,35 @@
WITH t0 AS (
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
SELECT t2.`string_col`, sum(t2.`double_col`) AS `metric`
FROM functional_alltypes t2
GROUP BY 1
)
SELECT *
FROM t0
UNION DISTINCT
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
UNION ALL
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
SELECT t1.`string_col`, t1.`metric`
FROM (
WITH t0 AS (
SELECT t2.`string_col`, sum(t2.`double_col`) AS `metric`
FROM functional_alltypes t2
GROUP BY 1
),
t2 AS (
SELECT t3.`string_col`, t3.`metric`
FROM (
WITH t0 AS (
SELECT t2.`string_col`, sum(t2.`double_col`) AS `metric`
FROM functional_alltypes t2
GROUP BY 1
)
SELECT *
FROM t0
UNION DISTINCT
SELECT t4.`string_col`, sum(t4.`double_col`) AS `metric`
FROM functional_alltypes t4
GROUP BY 1
) t3
)
SELECT *
FROM t2
UNION ALL
SELECT t4.`string_col`, sum(t4.`double_col`) AS `metric`
FROM functional_alltypes t4
GROUP BY 1
) t1
Original file line number Diff line number Diff line change
@@ -1,15 +1,35 @@
WITH t0 AS (
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
SELECT t2.`string_col`, sum(t2.`double_col`) AS `metric`
FROM functional_alltypes t2
GROUP BY 1
)
SELECT *
FROM t0
UNION ALL
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
UNION DISTINCT
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
SELECT t1.`string_col`, t1.`metric`
FROM (
WITH t0 AS (
SELECT t2.`string_col`, sum(t2.`double_col`) AS `metric`
FROM functional_alltypes t2
GROUP BY 1
),
t2 AS (
SELECT t3.`string_col`, t3.`metric`
FROM (
WITH t0 AS (
SELECT t2.`string_col`, sum(t2.`double_col`) AS `metric`
FROM functional_alltypes t2
GROUP BY 1
)
SELECT *
FROM t0
UNION ALL
SELECT t4.`string_col`, sum(t4.`double_col`) AS `metric`
FROM functional_alltypes t4
GROUP BY 1
) t3
)
SELECT *
FROM t2
UNION DISTINCT
SELECT t4.`string_col`, sum(t4.`double_col`) AS `metric`
FROM functional_alltypes t4
GROUP BY 1
) t1
Original file line number Diff line number Diff line change
@@ -1,15 +1,35 @@
WITH t0 AS (
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
SELECT t2.`string_col`, sum(t2.`double_col`) AS `metric`
FROM functional_alltypes t2
GROUP BY 1
)
SELECT *
FROM t0
UNION DISTINCT
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
UNION DISTINCT
SELECT t1.`string_col`, sum(t1.`double_col`) AS `metric`
FROM functional_alltypes t1
GROUP BY 1
SELECT t1.`string_col`, t1.`metric`
FROM (
WITH t0 AS (
SELECT t2.`string_col`, sum(t2.`double_col`) AS `metric`
FROM functional_alltypes t2
GROUP BY 1
),
t2 AS (
SELECT t3.`string_col`, t3.`metric`
FROM (
WITH t0 AS (
SELECT t2.`string_col`, sum(t2.`double_col`) AS `metric`
FROM functional_alltypes t2
GROUP BY 1
)
SELECT *
FROM t0
UNION DISTINCT
SELECT t4.`string_col`, sum(t4.`double_col`) AS `metric`
FROM functional_alltypes t4
GROUP BY 1
) t3
)
SELECT *
FROM t2
UNION DISTINCT
SELECT t4.`string_col`, sum(t4.`double_col`) AS `metric`
FROM functional_alltypes t4
GROUP BY 1
) t1
149 changes: 100 additions & 49 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from ibis.backends.base import BaseBackend
from ibis.backends.clickhouse.compiler import translate
from ibis.backends.clickhouse.datatypes import parse, serialize
from ibis.config import options

if TYPE_CHECKING:
import pandas as pd
Expand Down Expand Up @@ -111,13 +110,15 @@ def sql(self, query: str, schema=None) -> ir.Table:
schema = self._get_schema_using_query(query)
return ops.SQLQueryResult(query, ibis.schema(schema), self).to_expr()

def _from_url(self, url: str) -> BaseBackend:
def _from_url(self, url: str, **kwargs) -> BaseBackend:
"""Connect to a backend using a URL `url`.
Parameters
----------
url
URL with which to connect to a backend.
kwargs
Additional keyword arguments
Returns
-------
Expand All @@ -126,11 +127,14 @@ def _from_url(self, url: str) -> BaseBackend:
"""
url = sa.engine.make_url(url)

kwargs = {
name: value
for name in ("host", "port", "database", "password")
if (value := getattr(url, name, None))
}
kwargs = toolz.merge(
{
name: value
for name in ("host", "port", "database", "password")
if (value := getattr(url, name, None))
},
kwargs,
)
if username := url.username:
kwargs["user"] = username

Expand Down Expand Up @@ -255,10 +259,22 @@ def _normalize_external_tables(self, external_tables=None):
raise TypeError(f'Schema is empty for external table {name}')

df = obj.data.to_frame()
structure = list(zip(schema.names, map(serialize, schema.types)))
external_tables_list.append(
dict(name=name, data=df.to_dict("records"), structure=structure)
structure = list(
zip(
schema.names,
map(
serialize,
(
# unwrap nested structures because clickhouse does
# not accept nullable arrays, maps or structs
typ.copy(nullable=not typ.is_nested())
for typ in schema.types
),
),
)
)
data = dict(name=name, data=df.to_dict("records"), structure=structure)
external_tables_list.append(data)
return external_tables_list

def _client_execute(self, query, external_tables=None):
Expand Down Expand Up @@ -398,7 +414,8 @@ def table(self, name: str, database: str | None = None) -> ir.Table:
Table expression
"""
schema = self.get_schema(name, database=database)
return ClickhouseTable(ops.DatabaseTable(name, schema, self))
qname = self._fully_qualified_name(name, database)
return ClickhouseTable(ops.DatabaseTable(qname, schema, self))

def raw_sql(
self,
Expand Down Expand Up @@ -440,14 +457,12 @@ def close(self):
self._client.disconnect()
self.con.close()

def _fully_qualified_name(self, name, database):
return sg.table(name, db=database or self.current_database).sql()
def _fully_qualified_name(self, name: str, database: str | None) -> str:
return sg.table(name, db=database or self.current_database or None).sql(
dialect="clickhouse"
)

def get_schema(
self,
table_name: str,
database: str | None = None,
) -> sch.Schema:
def get_schema(self, table_name: str, database: str | None = None) -> sch.Schema:
"""Return a Schema object for the indicated table and database.
Parameters
Expand All @@ -468,22 +483,16 @@ def get_schema(
f"DESCRIBE {qualified_name}"
)

return sch.Schema.from_tuples(zip(column_names, map(parse, types)))

def _ensure_temp_db_exists(self):
name = (options.clickhouse.temp_db,)
if name not in self.list_databases():
self.create_database(name, force=True)
return sch.Schema(dict(zip(column_names, map(parse, types))))

def _get_schema_using_query(self, query: str) -> sch.Schema:
[(raw_plans,)] = self._client.execute(
f"EXPLAIN json = 1, description = 0, header = 1 {query}"
)
[plan] = json.loads(raw_plans)
fields = [
(field["Name"], parse(field["Type"])) for field in plan["Plan"]["Header"]
]
return sch.Schema.from_tuples(fields)
return sch.Schema(
{field["Name"]: parse(field["Type"]) for field in plan["Plan"]["Header"]}
)

@classmethod
def has_operation(cls, operation: type[ops.Value]) -> bool:
Expand All @@ -494,21 +503,21 @@ def has_operation(cls, operation: type[ops.Value]) -> bool:
def create_database(
self, name: str, *, force: bool = False, engine: str = "Atomic"
) -> None:
self.raw_sql(
f"CREATE DATABASE {'IF NOT EXISTS ' * force}{name} ENGINE = {engine}"
)
if_not_exists = "IF NOT EXISTS " * force
self.raw_sql(f"CREATE DATABASE {if_not_exists}{name} ENGINE = {engine}")

def drop_database(self, name: str, *, force: bool = False) -> None:
self.raw_sql(f"DROP DATABASE {'IF EXISTS ' * force}{name}")
if_exists = "IF EXISTS " * force
self.raw_sql(f"DROP DATABASE {if_exists}{name}")

def truncate_table(self, name: str, database: str | None = None) -> None:
ident = ".".join(filter(None, (database, name)))
self.raw_sql(f"DELETE FROM {ident}")
ident = self._fully_qualified_name(name, database)
self.raw_sql(f"TRUNCATE TABLE {ident}")

def drop_table(
self, name: str, database: str | None = None, force: bool = False
) -> None:
ident = ".".join(filter(None, (database, name)))
ident = self._fully_qualified_name(name, database)
self.raw_sql(f"DROP TABLE {'IF EXISTS ' * force}{ident}")

def create_table(
Expand All @@ -521,30 +530,67 @@ def create_table(
temp: bool = False,
overwrite: bool = False,
# backend specific arguments
engine: str | None,
engine: str,
order_by: Iterable[str] | None = None,
partition_by: Iterable[str] | None = None,
sample_by: str | None = None,
settings: Mapping[str, Any] | None = None,
) -> ir.Table:
"""Create a table in a ClickHouse database.
Parameters
----------
name
Name of the table to create
obj
Optional data to create the table with
schema
Optional names and types of the table
database
Database to create the table in
temp
Create a temporary table. This is not yet supported, and exists for
API compatibility.
overwrite
Whether to overwrite the table
engine
The table engine to use. See [ClickHouse's `CREATE TABLE` documentation](https://clickhouse.com/docs/en/sql-reference/statements/create/table)
for specifics.
order_by
String column names to order by. Required for some table engines like `MergeTree`.
partition_by
String column names to partition by
sample_by
String column names to sample by
settings
Key-value pairs of settings for table creation
Returns
-------
Table
The new table
"""
if temp:
raise com.IbisError("ClickHouse temporary tables are not yet supported")

tmp = "TEMPORARY " * temp
replace = "OR REPLACE " * overwrite
code = f"CREATE {replace}{tmp}TABLE {name}"
code = (
f"CREATE {replace}{tmp}TABLE {self._fully_qualified_name(name, database)}"
)

if obj is None and schema is None:
raise com.IbisError("The schema or obj parameter is required")

if schema is not None:
code += f" ({schema})"
serialized_schema = ", ".join(
f"`{name}` {serialize(typ)}" for name, typ in schema.items()
)
code += f" ({serialized_schema})"

if isinstance(obj, pd.DataFrame):
if obj is not None and not isinstance(obj, ir.Expr):
obj = ibis.memtable(obj, schema=schema)

if obj is not None:
self._register_in_memory_tables(obj)
query = self.compile(obj)
code += f" AS {query}"

code += f" ENGINE = {engine}"

if order_by is not None:
Expand All @@ -560,6 +606,11 @@ def create_table(
kvs = ", ".join(f"{name}={value!r}" for name, value in settings.items())
code += f" SETTINGS {kvs}"

if obj is not None:
self._register_in_memory_tables(obj)
query = self.compile(obj)
code += f" AS {query}"

self.raw_sql(code)
return self.table(name, database=database)

Expand All @@ -571,16 +622,16 @@ def create_view(
database: str | None = None,
overwrite: bool = False,
) -> ir.Table:
name = ".".join(filter(None, (database, name)))
qualname = self._fully_qualified_name(name, database)
replace = "OR REPLACE " * overwrite
query = self.compile(obj)
code = f"CREATE {replace}VIEW {name} AS {query}"
code = f"CREATE {replace}VIEW {qualname} AS {query}"
self.raw_sql(code)
return self.table(name, database=database)

def drop_view(
self, name: str, *, database: str | None = None, force: bool = False
) -> None:
name = ".".join(filter(None, (database, name)))
if_not_exists = "IF EXISTS " * force
self.raw_sql(f"DROP VIEW {if_not_exists}{name}")
name = self._fully_qualified_name(name, database)
if_exists = "IF EXISTS " * force
self.raw_sql(f"DROP VIEW {if_exists}{name}")
9 changes: 5 additions & 4 deletions ibis/backends/clickhouse/compiler/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def _dummy(op: ops.DummyTable, **kw):

@translate_rel.register(ops.PhysicalTable)
def _physical_table(op: ops.PhysicalTable, **_):
return sg.table(op.name)
return sg.parse_one(op.name, into=sg.exp.Table)


@translate_rel.register(ops.Selection)
Expand Down Expand Up @@ -72,6 +72,7 @@ def _selection(op: ops.Selection, *, table, needs_alias=False, **kw):
@translate_rel.register(ops.Aggregation)
def _aggregation(op: ops.Aggregation, *, table, **kw):
tr_val = partial(translate_val, **kw)
tr_val_no_alias = partial(translate_val, render_aliases=False, **kw)

by = tuple(map(tr_val, op.by))
metrics = tuple(map(tr_val, op.metrics))
Expand All @@ -82,13 +83,13 @@ def _aggregation(op: ops.Aggregation, *, table, **kw):
sel = sel.group_by(*map(str, range(1, len(by) + 1)), dialect="clickhouse")

if predicates := op.predicates:
sel = sel.where(*map(tr_val, predicates), dialect="clickhouse")
sel = sel.where(*map(tr_val_no_alias, predicates), dialect="clickhouse")

if having := op.having:
sel = sel.having(*map(tr_val, having), dialect="clickhouse")
sel = sel.having(*map(tr_val_no_alias, having), dialect="clickhouse")

if sort_keys := op.sort_keys:
sel = sel.order_by(*map(tr_val, sort_keys), dialect="clickhouse")
sel = sel.order_by(*map(tr_val_no_alias, sort_keys), dialect="clickhouse")

return sel

Expand Down
Loading