229 changes: 208 additions & 21 deletions .github/workflows/ibis-backends.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ on:
branches:
- master
- "*.x.x"
merge_group:

permissions:
# this allows extractions/setup-just to list releases for `just` at a higher
Expand All @@ -31,6 +32,9 @@ concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

env:
FORCE_COLOR: "1"

jobs:
test_backends:
name: ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }}
Expand All @@ -45,7 +49,7 @@ jobs:
- windows-latest
python-version:
- "3.8"
- "3.10"
- "3.11"
backend:
- name: dask
title: Dask
Expand All @@ -67,11 +71,6 @@ jobs:
title: Datafusion
extras:
- datafusion
- name: pyspark
title: PySpark
serial: true
extras:
- pyspark
- name: polars
title: Polars
extras:
Expand Down Expand Up @@ -128,6 +127,36 @@ jobs:
- postgres
services:
- trino
- name: druid
title: Druid
extras:
- druid
services:
- druid
include:
- os: ubuntu-latest
python-version: "3.8"
backend:
name: pyspark
title: PySpark
serial: true
extras:
- pyspark
- os: ubuntu-latest
python-version: "3.10"
backend:
name: pyspark
title: PySpark
serial: true
extras:
- pyspark
- os: ubuntu-latest
python-version: "3.11"
backend:
name: pyspark
title: PySpark
extras:
- pyspark
exclude:
- os: windows-latest
backend:
Expand All @@ -148,13 +177,6 @@ jobs:
- clickhouse
services:
- clickhouse
- os: windows-latest
backend:
name: pyspark
title: PySpark
serial: true
extras:
- pyspark
- os: windows-latest
backend:
name: postgres
Expand Down Expand Up @@ -200,6 +222,14 @@ jobs:
extras:
- trino
- postgres
- os: windows-latest
backend:
name: druid
title: Druid
extras:
- druid
services:
- druid
steps:
- name: update and install system dependencies
if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null
Expand Down Expand Up @@ -239,17 +269,17 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- uses: syphar/restore-virtualenv@v1
- uses: syphar/restore-pip-download-cache@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: ${{ matrix.backend.name }}-${{ steps.install_python.outputs.python-version }}

- uses: syphar/restore-pip-download-cache@v1
- run: python -m pip install --upgrade pip 'poetry<1.4'

- uses: syphar/restore-virtualenv@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: ${{ matrix.backend.name }}-${{ steps.install_python.outputs.python-version }}

- run: python -m pip install --upgrade pip 'poetry>=1.2'
custom_cache_key_element: ${{ matrix.backend.name }}

- name: install ibis
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}"
Expand Down Expand Up @@ -298,7 +328,7 @@ jobs:
- windows-latest
python-version:
- "3.8"
- "3.10"
- "3.11"
backend:
- name: dask
title: Dask
Expand Down Expand Up @@ -333,7 +363,7 @@ jobs:
extras:
- postgres
- geospatial
- python-version: "3.10"
- python-version: "3.11"
backend:
name: postgres
title: PostgreSQL
Expand Down Expand Up @@ -372,7 +402,7 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- run: python -m pip install --upgrade pip 'poetry>=1.2'
- run: python -m pip install --upgrade pip 'poetry<1.4'

- name: install minimum versions
run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }}
Expand Down Expand Up @@ -409,11 +439,168 @@ jobs:
if: ${{ failure() }}
run: docker compose logs

gen_lockfile_sqlalchemy2:
name: Generate Poetry Lockfile for SQLAlchemy 2
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v3

- name: install python
uses: actions/setup-python@v4
with:
python-version: "3.11"

- run: python -m pip install --upgrade pip 'poetry<1.4'

- name: remove deps that are not compatible with sqlalchemy 2
run: poetry remove duckdb-engine snowflake-sqlalchemy

- name: add sqlalchemy 2
run: poetry add --lock --optional 'sqlalchemy>=2,<3'

- name: checkout the lock file
run: git checkout poetry.lock

- name: lock with no updates
# poetry add is aggressive and will update other dependencies like
# numpy and pandas so we keep the pyproject.toml edits and then relock
# without updating anything except the requested versions
run: poetry lock --no-update

- name: check the sqlalchemy version
run: poetry show sqlalchemy --no-ansi | grep version | cut -d ':' -f2- | sed 's/ //g' | grep -P '^2\.'

- name: upload deps file
uses: actions/upload-artifact@v3
with:
name: deps
path: |
pyproject.toml
poetry.lock
test_backends_sqlalchemy2:
name: SQLAlchemy 2 ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
needs: gen_lockfile_sqlalchemy2
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
python-version:
- "3.11"
backend:
- name: mssql
title: MS SQL Server
services:
- mssql
extras:
- mssql
- name: mysql
title: MySQL
services:
- mysql
extras:
- geospatial
- mysql
- name: postgres
title: PostgreSQL
services:
- postgres
extras:
- geospatial
- postgres
- name: sqlite
title: SQLite
extras:
- sqlite
- name: trino
title: Trino
services:
- trino
extras:
- trino
- postgres
steps:
- name: checkout
uses: actions/checkout@v3

- name: install libgeos for shapely
if: ${{ matrix.backend.name == 'postgres' }}
run: sudo apt-get install -qq -y build-essential libgeos-dev

- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: download backend data
run: just download-data

- name: start services
if: matrix.backend.services != null
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }}

- name: install python
uses: actions/setup-python@v4
id: install_python
with:
python-version: ${{ matrix.python-version }}

- name: download poetry lockfile
uses: actions/download-artifact@v3
with:
name: deps
path: deps

- name: pull out lockfile
run: |
set -euo pipefail
mv -f deps/* .
rm -r deps
- uses: syphar/restore-virtualenv@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: ${{ matrix.backend.name }}-sqlalchemy2

- uses: syphar/restore-pip-download-cache@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: ${{ matrix.backend.name }}-sqlalchemy2-${{ steps.install_python.outputs.python-version }}

- run: python -m pip install --upgrade pip 'poetry<1.4'

- name: install ibis
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}"

- name: run tests
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup

- name: upload code coverage
if: success()
uses: codecov/codecov-action@v3
with:
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}

- name: publish test report
uses: actions/upload-artifact@v3
if: success() || failure()
with:
name: ${{ matrix.backend.name }}-sqlalchemy2-${{ matrix.os }}-${{ matrix.python-version }}
path: junit.xml

- name: Show docker compose logs on fail
if: ${{ failure() }}
run: docker compose logs

backends:
# this job exists so that we can use a single job from this workflow to gate merging
runs-on: ubuntu-latest
needs:
- test_backends_min_version
- test_backends
- test_backends_sqlalchemy2
steps:
- run: exit 0
19 changes: 10 additions & 9 deletions .github/workflows/ibis-docs-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ on:
branches:
- master
- "*.x.x"
merge_group:

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
Expand All @@ -30,7 +31,7 @@ jobs:
fetch-depth: 0

- name: install nix
uses: cachix/install-nix-action@v18
uses: cachix/install-nix-action@v20
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
Expand All @@ -46,7 +47,7 @@ jobs:
uses: actions/checkout@v3

- name: install nix
uses: cachix/install-nix-action@v18
uses: cachix/install-nix-action@v20
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand All @@ -73,22 +74,22 @@ jobs:
uses: actions/setup-python@v4
id: install_python
with:
python-version: "3.10"
python-version: "3.11"

- name: install system dependencies
run: sudo apt-get install -qq -y build-essential libgeos-dev

- uses: syphar/restore-virtualenv@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: benchmarks-${{ steps.install_python.outputs.python-version }}
custom_cache_key_element: benchmarks

- uses: syphar/restore-pip-download-cache@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: benchmarks-${{ steps.install_python.outputs.python-version }}

- run: python -m pip install --upgrade pip 'poetry>=1.2'
- run: python -m pip install --upgrade pip 'poetry<1.4'

- name: install ibis
run: poetry install --without dev --without docs --all-extras
Expand Down Expand Up @@ -121,7 +122,7 @@ jobs:
concurrency: docs-${{ github.repository }}-${{ github.head_ref || github.sha }}
steps:
- name: install nix
uses: cachix/install-nix-action@v18
uses: cachix/install-nix-action@v20
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand Down Expand Up @@ -154,7 +155,7 @@ jobs:
- benchmarks
steps:
- name: install nix
uses: cachix/install-nix-action@v18
uses: cachix/install-nix-action@v20
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand Down Expand Up @@ -189,7 +190,7 @@ jobs:
- name: build and push dev docs
run: |
nix develop --ignore-environment -c \
mike deploy --push --rebase --prefix docs --message 'docs(dev): ibis@${{ github.sha }}' dev
mkdocs gh-deploy --message 'docs: ibis@${{ github.sha }}'
simulate_release:
runs-on: ubuntu-latest
Expand All @@ -198,7 +199,7 @@ jobs:
with:
fetch-depth: 0

- uses: cachix/install-nix-action@v18
- uses: cachix/install-nix-action@v20
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/ibis-docs-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: install nix
uses: cachix/install-nix-action@v18
uses: cachix/install-nix-action@v20

- name: setup cachix
uses: cachix/cachix-action@v12
Expand Down Expand Up @@ -48,4 +48,5 @@ jobs:
- name: build and push docs on tag
run: |
nix develop --ignore-environment -c mike deploy --push --rebase --update-aliases --prefix docs --message "docs(release): ibis@${GITHUB_REF_NAME}" "${GITHUB_REF_NAME}" latest
nix develop --ignore-environment -c \
mkdocs gh-deploy --message "docs(release): ibis@${GITHUB_REF_NAME}" "${GITHUB_REF_NAME}"
11 changes: 10 additions & 1 deletion .github/workflows/ibis-main-skip-helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,17 @@ on:
branches:
- master
- "*.x.x"
merge_group:
jobs:
nix-lint:
test_core:
runs-on: ubuntu-latest
steps:
- run: echo "No build required"
test_shapely_duckdb_import:
runs-on: ubuntu-latest
steps:
- run: echo "No build required"
test_doctests:
runs-on: ubuntu-latest
steps:
- run: echo "No build required"
85 changes: 76 additions & 9 deletions .github/workflows/ibis-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ on:
branches:
- master
- "*.x.x"
merge_group:

permissions:
contents: read
Expand All @@ -27,8 +28,11 @@ concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

env:
FORCE_COLOR: "1"

jobs:
test_no_backends:
test_core:
name: Test ${{ matrix.os }} python-${{ matrix.python-version }}
env:
SQLALCHEMY_WARN_20: "1"
Expand All @@ -43,6 +47,7 @@ jobs:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
steps:
- name: checkout
uses: actions/checkout@v3
Expand All @@ -53,15 +58,17 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- uses: syphar/restore-virtualenv@v1
- uses: syphar/restore-pip-download-cache@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: no-backends-${{ steps.install_python.outputs.python-version }}

- uses: syphar/restore-pip-download-cache@v1
- run: python -m pip install --upgrade pip 'poetry<1.4'

- uses: syphar/restore-virtualenv@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: no-backends-${{ steps.install_python.outputs.python-version }}
custom_cache_key_element: core

- name: install ${{ matrix.os }} system dependencies
if: matrix.os == 'ubuntu-latest'
Expand All @@ -75,8 +82,6 @@ jobs:
if: matrix.os == 'windows-latest'
run: choco install graphviz

- run: python -m pip install --upgrade pip 'poetry>=1.2'

- name: install ibis
run: poetry install --without dev --without docs --extras visualization

Expand Down Expand Up @@ -114,7 +119,7 @@ jobs:
os:
- ubuntu-latest
python-version:
- "3.10"
- "3.11"
steps:
- name: checkout
uses: actions/checkout@v3
Expand All @@ -128,7 +133,7 @@ jobs:
- uses: syphar/restore-virtualenv@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: shapely-duckdb-${{ steps.install_python.outputs.python-version }}
custom_cache_key_element: shapely-duckdb

- uses: syphar/restore-pip-download-cache@v1
with:
Expand All @@ -142,11 +147,73 @@ jobs:
sudo apt-get update -y -q
sudo apt-get install -y -q build-essential libgeos-dev
- run: python -m pip install --upgrade pip 'poetry>=1.2'
- run: python -m pip install --upgrade pip 'poetry<1.4'

- name: install ibis
# install duckdb and geospatial because of https://github.com/ibis-project/ibis/issues/4856
run: poetry install --without dev --without docs --without test --extras duckdb --extras geospatial

- name: check shapely and duckdb imports
run: poetry run python -c 'import shapely.geometry, duckdb'

test_doctests:
name: Doctests
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
python-version:
- "3.11"
steps:
- name: install system dependencies
run: |
set -euo pipefail
sudo apt-get update -y -q
sudo apt-get install -y -q build-essential graphviz libgeos-dev libkrb5-dev
- name: checkout
uses: actions/checkout@v3

- name: install python
uses: actions/setup-python@v4
id: install_python
with:
python-version: ${{ matrix.python-version }}

- uses: syphar/restore-pip-download-cache@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: doctests-${{ steps.install_python.outputs.python-version }}

- run: python -m pip install --upgrade pip 'poetry<1.4'

- uses: syphar/restore-virtualenv@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: doctests

- name: install ibis with all extras
run: poetry install --without dev --without docs --extras all

- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: run doctests
run: just doctest --junitxml=junit.xml --cov=ibis --cov-report=xml:coverage.xml

- name: upload code coverage
if: success()
uses: codecov/codecov-action@v3
with:
flags: core,doctests,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}

- name: publish test report
uses: actions/upload-artifact@v3
if: success() || failure()
with:
name: doctest-${{ matrix.os }}-${{ matrix.python-version }}
path: junit.xml
30 changes: 30 additions & 0 deletions .github/workflows/ibis-tpch-queries-skip-helper.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: TPC-H

on:
push:
paths:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
branches:
- master
- "*.x.x"
pull_request:
paths:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
branches:
- master
- "*.x.x"
merge_group:

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

jobs:
tpch:
runs-on: ubuntu-latest
steps:
- run: echo "No build required"
11 changes: 10 additions & 1 deletion .github/workflows/ibis-tpch-queries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,22 @@ name: TPC-H

on:
push:
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
branches:
- master
- "*.x.x"
pull_request:
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
branches:
- master
- "*.x.x"
merge_group:

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
Expand All @@ -32,7 +41,7 @@ jobs:
uses: actions/setup-python@v4
id: install_python
with:
python-version: "3.10"
python-version: "3.11"

- name: install tpc-queries dependencies
working-directory: tpc-queries
Expand Down
26 changes: 13 additions & 13 deletions .github/workflows/nix-skip-helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,22 @@ name: Nix

on:
push:
paths-ignore:
- "**/*.nix"
- "pyproject.toml"
- "poetry.lock"
- "flake.lock"
- "nix/**"
paths:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
branches:
- master
- "*.x.x"
pull_request:
paths-ignore:
- "**/*.nix"
- "pyproject.toml"
- "poetry.lock"
- "flake.lock"
- "nix/**"
paths:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
branches:
- master
- "*.x.x"
merge_group:

jobs:
nix:
Expand All @@ -33,10 +30,13 @@ jobs:
matrix:
os:
- ubuntu-latest
- macos-latest
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
include:
- os: macos-latest
python-version: "3.10"
steps:
- run: echo "No build required"
35 changes: 14 additions & 21 deletions .github/workflows/nix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,22 @@ name: Nix

on:
push:
paths:
- "**/*.nix"
- "pyproject.toml"
- "poetry.lock"
- "flake.lock"
- "nix/**"
- "!docs/**"
- "!mkdocs.yml"
- "!**/*.md"
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
branches:
- master
- "*.x.x"
pull_request:
paths:
- "**/*.nix"
- "pyproject.toml"
- "poetry.lock"
- "flake.lock"
- "nix/**"
- "!docs/**"
- "!mkdocs.yml"
- "!**/*.md"
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
branches:
- master
- "*.x.x"
merge_group:

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
Expand All @@ -40,17 +31,20 @@ jobs:
matrix:
os:
- ubuntu-latest
- macos-latest
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
include:
- os: macos-latest
python-version: "3.10"
steps:
- name: checkout
uses: actions/checkout@v3

- name: install nix
uses: cachix/install-nix-action@v18
uses: cachix/install-nix-action@v20
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
Expand All @@ -69,7 +63,6 @@ jobs:
version='${{ matrix.python-version }}'
nix build ".#ibis${version//./}" --fallback --keep-going --print-build-logs
# build the whole dev shell when pushing to upstream, so that the cachix cache is populated
- name: nix build devShell
if: github.event_name == 'push'
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
fetch-depth: 0
token: ${{ steps.generate_token.outputs.token }}

- uses: cachix/install-nix-action@v18
- uses: cachix/install-nix-action@v20
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/update-deps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
matrix: ${{ steps.get-flakes.outputs.matrix }}
steps:
- uses: actions/checkout@v3
- uses: cachix/install-nix-action@v18
- uses: cachix/install-nix-action@v20
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand All @@ -34,7 +34,7 @@ jobs:
steps:
- uses: actions/checkout@v3

- uses: cachix/install-nix-action@v18
- uses: cachix/install-nix-action@v20
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand Down Expand Up @@ -97,7 +97,7 @@ jobs:
author: "ibis-squawk-bot[bot] <ibis-squawk-bot[bot]@users.noreply.github.com>"
title: "chore(flake/${{ matrix.flake }}): `${{ steps.get_current_commit.outputs.short-rev }}` -> `${{ steps.get_new_commit.outputs.short-rev }}`"
body: ${{ steps.compare_commits.outputs.differences }}
labels: dependencies,nix,autorebase:opt-in
labels: dependencies,nix

- uses: juliangruber/approve-pull-request-action@v2.0.3
if: fromJSON(steps.needs_pr.outputs.did_change)
Expand Down
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,8 @@ tags
.DS_Store
prof/
.hypothesis
.RData
.Rhistory
ibis/examples/data
ibis/examples/descriptions
.coverage*
22 changes: 15 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ ci:
autoupdate_commit_msg: "chore(deps): pre-commit.ci autoupdate"
skip:
- actionlint
- just
- nixpkgs-fmt
- prettier
- ruff
- shellcheck
- shfmt
- just
- nixpkgs-fmt
- statix
default_stages:
- commit
Expand All @@ -18,14 +19,22 @@ repos:
hooks:
- id: actionlint
- repo: https://github.com/psf/black
rev: 22.12.0
rev: 23.1.0
hooks:
- id: black
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.231
- repo: local
hooks:
- id: ruff
args: ["--fix", "--show-source"]
name: ruff
description: "Run 'ruff' for extremely fast Python linting"
entry: ruff
language: system
types_or:
- python
- pyi
args: ["check", "--force-exclude", "--show-source", "--fix"]
require_serial: true
minimum_pre_commit_version: "2.9.2"
- repo: https://github.com/adrienverge/yamllint
rev: v1.29.0
hooks:
Expand All @@ -35,7 +44,6 @@ repos:
hooks:
- id: check-added-large-files
- id: check-case-conflict
- id: check-docstring-first
- id: check-executables-have-shebangs
- id: check-merge-conflict
- id: check-shebang-scripts-are-executable
Expand Down
176 changes: 97 additions & 79 deletions README.md

Large diffs are not rendered by default.

51 changes: 29 additions & 22 deletions ci/schema/clickhouse.sql
Original file line number Diff line number Diff line change
@@ -1,33 +1,17 @@
-- NB: The paths in this file are all relative to /var/lib/clickhouse/user_files

CREATE OR REPLACE TABLE diamonds ENGINE = Memory AS
SELECT * FROM file('parquet/diamonds/diamonds.parquet', 'Parquet');
SELECT * FROM file('ibis/diamonds.parquet', 'Parquet');

CREATE OR REPLACE TABLE batting ENGINE = Memory AS
SELECT * FROM file('parquet/batting/batting.parquet', 'Parquet');
SELECT * FROM file('ibis/batting.parquet', 'Parquet');

CREATE OR REPLACE TABLE awards_players ENGINE = Memory AS
SELECT * FROM file('parquet/awards_players/awards_players.parquet', 'Parquet');
SELECT * FROM file('ibis/awards_players.parquet', 'Parquet');

CREATE OR REPLACE TABLE functional_alltypes (
`index` Nullable(Int64),
`Unnamed: 0` Nullable(Int64),
id Nullable(Int32),
bool_col Nullable(Bool),
tinyint_col Nullable(Int8),
smallint_col Nullable(Int16),
int_col Nullable(Int32),
bigint_col Nullable(Int64),
float_col Nullable(Float32),
double_col Nullable(Float64),
date_string_col Nullable(String),
string_col Nullable(String),
-- TODO: clean this up when timestamp scale is supported
timestamp_col Nullable(DateTime),
year Nullable(Int32),
month Nullable(Int32)
) ENGINE = Memory AS
SELECT * FROM file('functional_alltypes.csv', 'CSVWithNames');
CREATE OR REPLACE TABLE functional_alltypes ENGINE = Memory AS
SELECT * REPLACE(CAST(timestamp_col AS Nullable(DateTime)) AS timestamp_col)
FROM file('ibis/functional_alltypes.parquet', 'Parquet');

CREATE OR REPLACE TABLE tzone (
ts Nullable(DateTime),
Expand All @@ -52,6 +36,29 @@ INSERT INTO array_types VALUES
([2, NULL, 3], ['b', NULL, 'c'], NULL, 'b', 5.0, []),
([4, NULL, NULL, 5], ['d', NULL, NULL, 'e'], [4.0, NULL, NULL, 5.0], 'c', 6.0, [[1, 2, 3]]);

CREATE OR REPLACE TABLE time_df1 (
time Int64,
value Nullable(Float64),
key Nullable(String)
) ENGINE = Memory;
INSERT INTO time_df1 VALUES
(1, 1.0, 'x'),
(20, 20.0, 'x'),
(30, 30.0, 'x'),
(40, 40.0, 'x'),
(50, 50.0, 'x');

CREATE OR REPLACE TABLE time_df2 (
time Int64,
value Nullable(Float64),
key Nullable(String)
) ENGINE = Memory;
INSERT INTO time_df2 VALUES
(19, 19.0, 'x'),
(21, 21.0, 'x'),
(39, 39.0, 'x'),
(49, 49.0, 'x'),
(1000, 1000.0, 'x');

CREATE OR REPLACE TABLE struct (
abc Tuple(
Expand Down
47 changes: 47 additions & 0 deletions ci/schema/druid.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
REPLACE INTO "diamonds"
OVERWRITE ALL
SELECT *
FROM TABLE(
EXTERN(
'{"type":"local","files":["/opt/shared/diamonds.parquet"]}',
'{"type":"parquet"}',
'[{"name":"carat","type":"double"},{"name":"cut","type":"string"},{"name":"color","type":"string"},{"name":"clarity","type":"string"},{"name":"depth","type":"double"},{"name":"table","type":"double"},{"name":"price","type":"long"},{"name":"x","type":"double"},{"name":"y","type":"double"},{"name":"z","type":"double"}]'
)
)
PARTITIONED BY ALL TIME;

REPLACE INTO "batting"
OVERWRITE ALL
SELECT *
FROM TABLE(
EXTERN(
'{"type":"local","files":["/opt/shared/batting.parquet"]}',
'{"type":"parquet"}',
'[{"name":"playerID","type":"string"},{"name":"yearID","type":"long"},{"name":"stint","type":"long"},{"name":"teamID","type":"string"},{"name":"lgID","type":"string"},{"name":"G","type":"long"},{"name":"AB","type":"long"},{"name":"R","type":"long"},{"name":"H","type":"long"},{"name":"X2B","type":"long"},{"name":"X3B","type":"long"},{"name":"HR","type":"long"},{"name":"RBI","type":"long"},{"name":"SB","type":"long"},{"name":"CS","type":"long"},{"name":"BB","type":"long"},{"name":"SO","type":"long"},{"name":"IBB","type":"long"},{"name":"HBP","type":"long"},{"name":"SH","type":"long"},{"name":"SF","type":"long"},{"name":"GIDP","type":"long"}]'
)
)
PARTITIONED BY ALL TIME;

REPLACE INTO "awards_players"
OVERWRITE ALL
SELECT *
FROM TABLE(
EXTERN(
'{"type":"local","files":["/opt/shared/awards_players.parquet"]}',
'{"type":"parquet"}',
'[{"name":"playerID","type":"string"},{"name":"awardID","type":"string"},{"name":"yearID","type":"long"},{"name":"lgID","type":"string"},{"name":"tie","type":"string"},{"name":"notes","type":"string"}]'
)
)
PARTITIONED BY ALL TIME;

REPLACE INTO "functional_alltypes"
OVERWRITE ALL
SELECT *
FROM TABLE(
EXTERN(
'{"type":"local","files":["/opt/shared/functional_alltypes.parquet"]}',
'{"type":"parquet"}',
'[{"name":"index","type":"long"},{"name":"Unnamed: 0","type":"long"},{"name":"id","type":"long"},{"name":"bool_col","type":"long"},{"name":"tinyint_col","type":"long"},{"name":"smallint_col","type":"long"},{"name":"int_col","type":"long"},{"name":"bigint_col","type":"long"},{"name":"float_col","type":"double"},{"name":"double_col","type":"double"},{"name":"date_string_col","type":"string"},{"name":"string_col","type":"string"},{"name":"timestamp_col","type":"string"},{"name":"year","type":"long"},{"name":"month","type":"long"}]'
)
)
PARTITIONED BY ALL TIME;
6 changes: 6 additions & 0 deletions ci/schema/postgresql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,9 @@ CREATE TABLE map (kv HSTORE);
INSERT INTO map VALUES
('a=>1,b=>2,c=>3'),
('d=>4,e=>5,c=>6');

ALTER TABLE awards_players
ADD search tsvector
GENERATED always AS (
setweight(to_tsvector('simple', notes), 'A') :: tsvector
) stored;
22 changes: 11 additions & 11 deletions ci/schema/snowflake.sql
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
CREATE OR REPLACE FILE FORMAT ibis_testing
CREATE OR REPLACE TEMP FILE FORMAT ibis_testing
type = 'CSV'
field_delimiter = ','
skip_header = 1
field_optionally_enclosed_by = '"';

CREATE OR REPLACE STAGE ibis_testing file_format = ibis_testing;
CREATE OR REPLACE TEMP STAGE ibis_testing file_format = ibis_testing;

CREATE TEMP TABLE diamonds (
CREATE OR REPLACE TABLE diamonds (
"carat" FLOAT,
"cut" TEXT,
"color" TEXT,
Expand All @@ -19,7 +19,7 @@ CREATE TEMP TABLE diamonds (
"z" FLOAT
);

CREATE TEMP TABLE batting (
CREATE OR REPLACE TABLE batting (
"playerID" TEXT,
"yearID" BIGINT,
"stint" BIGINT,
Expand All @@ -44,7 +44,7 @@ CREATE TEMP TABLE batting (
"GIDP" BIGINT
);

CREATE TEMP TABLE awards_players (
CREATE OR REPLACE TABLE awards_players (
"playerID" TEXT,
"awardID" TEXT,
"yearID" BIGINT,
Expand All @@ -53,7 +53,7 @@ CREATE TEMP TABLE awards_players (
"notes" TEXT
);

CREATE TEMP TABLE functional_alltypes (
CREATE OR REPLACE TABLE functional_alltypes (
"index" BIGINT,
"Unnamed: 0" BIGINT,
"id" INTEGER,
Expand All @@ -71,7 +71,7 @@ CREATE TEMP TABLE functional_alltypes (
"month" INTEGER
);

CREATE TEMP TABLE array_types (
CREATE OR REPLACE TABLE array_types (
"x" ARRAY,
"y" ARRAY,
"z" ARRAY,
Expand All @@ -88,14 +88,14 @@ INSERT INTO array_types ("x", "y", "z", "grouper", "scalar_column", "multi_dim")
SELECT [2, NULL, 3], ['b', NULL, 'c'], NULL, 'b', 5.0, NULL UNION
SELECT [4, NULL, NULL, 5], ['d', NULL, NULL, 'e'], [4.0, NULL, NULL, 5.0], 'c', 6.0, [[1, 2, 3]];

CREATE TEMP TABLE map ("kv" OBJECT);
CREATE OR REPLACE TABLE map ("kv" OBJECT);

INSERT INTO map ("kv")
SELECT object_construct('a', 1, 'b', 2, 'c', 3) UNION
SELECT object_construct('d', 4, 'e', 5, 'c', 6);


CREATE TEMP TABLE struct ("abc" OBJECT);
CREATE OR REPLACE TABLE struct ("abc" OBJECT);

INSERT INTO struct ("abc")
SELECT {'a': 1.0, 'b': 'banana', 'c': 2} UNION
Expand All @@ -106,7 +106,7 @@ INSERT INTO struct ("abc")
SELECT NULL UNION
SELECT {'a': 3.0, 'b': 'orange', 'c': NULL};

CREATE TEMP TABLE json_t ("js" VARIANT);
CREATE OR REPLACE TABLE json_t ("js" VARIANT);

INSERT INTO json_t ("js")
SELECT parse_json('{"a": [1,2,3,4], "b": 1}') UNION
Expand All @@ -116,7 +116,7 @@ INSERT INTO json_t ("js")
SELECT parse_json('[42,47,55]') UNION
SELECT parse_json('[]');

CREATE TEMP TABLE win ("g" TEXT, "x" BIGINT, "y" BIGINT);
CREATE OR REPLACE TABLE win ("g" TEXT, "x" BIGINT, "y" BIGINT);
INSERT INTO win VALUES
('a', 0, 3),
('a', 1, 2),
Expand Down
155 changes: 77 additions & 78 deletions conda-lock/linux-64-3.10.lock

Large diffs are not rendered by default.

156 changes: 78 additions & 78 deletions conda-lock/linux-64-3.8.lock

Large diffs are not rendered by default.

156 changes: 78 additions & 78 deletions conda-lock/linux-64-3.9.lock

Large diffs are not rendered by default.

172 changes: 85 additions & 87 deletions conda-lock/osx-64-3.10.lock

Large diffs are not rendered by default.

163 changes: 81 additions & 82 deletions conda-lock/osx-64-3.8.lock

Large diffs are not rendered by default.

173 changes: 87 additions & 86 deletions conda-lock/osx-64-3.9.lock

Large diffs are not rendered by default.

210 changes: 96 additions & 114 deletions conda-lock/osx-arm64-3.10.lock

Large diffs are not rendered by default.

164 changes: 82 additions & 82 deletions conda-lock/osx-arm64-3.8.lock

Large diffs are not rendered by default.

211 changes: 97 additions & 114 deletions conda-lock/osx-arm64-3.9.lock

Large diffs are not rendered by default.

210 changes: 94 additions & 116 deletions conda-lock/win-64-3.10.lock

Large diffs are not rendered by default.

212 changes: 95 additions & 117 deletions conda-lock/win-64-3.8.lock

Large diffs are not rendered by default.

212 changes: 95 additions & 117 deletions conda-lock/win-64-3.9.lock

Large diffs are not rendered by default.

195 changes: 185 additions & 10 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
version: "3.4"
services:
clickhouse:
build:
context: .
dockerfile: ./docker/clickhouse/Dockerfile
image: ibis-clickhouse
image: clickhouse/clickhouse-server:23.2.4.12-alpine
ports:
- 8123:8123
- 9000:9000
volumes:
- clickhouse:/var/lib/clickhouse/user_files/ibis
networks:
- clickhouse
impala:
Expand Down Expand Up @@ -92,7 +91,7 @@ services:
- mysqladmin
- ping
timeout: 5s
image: mariadb:10.10.2
image: mariadb:10.11.2
ports:
- 3306:3306
networks:
Expand All @@ -117,6 +116,7 @@ services:
networks:
- postgres
mssql:
image: mcr.microsoft.com/mssql/server:2022-latest
environment:
MSSQL_SA_PASSWORD: 1bis_Testing!
ACCEPT_EULA: "Y"
Expand All @@ -127,12 +127,10 @@ services:
- CMD-SHELL
- /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P "$$MSSQL_SA_PASSWORD" -Q "IF DB_ID('ibis_testing') IS NULL BEGIN CREATE DATABASE [ibis_testing] END"
timeout: 10s
build:
context: .
dockerfile: ./docker/mssql/Dockerfile
image: ibis-mssql
ports:
- 1433:1433
volumes:
- mssql:/data
networks:
- mssql
trino-postgres:
Expand Down Expand Up @@ -164,7 +162,7 @@ services:
- CMD-SHELL
- trino --execute 'SELECT 1 AS one'
timeout: 30s
image: trinodb/trino:405
image: trinodb/trino:410
ports:
- 8080:8080
networks:
Expand All @@ -174,10 +172,187 @@ services:
- $PWD/docker/trino/catalog/memory.properties:/etc/trino/catalog/memory.properties:ro
- $PWD/docker/trino/jvm.config:/etc/trino/jvm.config:ro

druid-postgres:
image: postgres:15.2-alpine
container_name: druid-postgres
volumes:
- metadata_data:/var/lib/postgresql/data
environment:
- POSTGRES_PASSWORD=FoolishPassword
- POSTGRES_USER=druid
- POSTGRES_DB=druid
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 5432
networks:
- druid

# Need 3.5 or later for container nodes
druid-zookeeper:
hostname: zookeeper
container_name: zookeeper
image: zookeeper:3.8
environment:
- ZOO_MY_ID=1
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 2181
networks:
- druid

druid-coordinator:
image: apache/druid:25.0.0
hostname: coordinator
container_name: coordinator
volumes:
- druid:/opt/shared
- coordinator_var:/opt/druid/var
depends_on:
- druid-zookeeper
- druid-postgres
command:
- coordinator
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 8081
env_file:
- ./docker/druid/environment
networks:
- druid

druid-broker:
image: apache/druid:25.0.0
hostname: broker
container_name: broker
volumes:
- broker_var:/opt/druid/var
depends_on:
- druid-zookeeper
- druid-postgres
- druid-coordinator
command:
- broker
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 8082
ports:
- "8082:8082"
env_file:
- ./docker/druid/environment
networks:
- druid

druid-historical:
image: apache/druid:25.0.0
hostname: historical
container_name: historical
volumes:
- druid:/opt/shared
- historical_var:/opt/druid/var
depends_on:
- druid-zookeeper
- druid-postgres
- druid-coordinator
command:
- historical
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 8083
env_file:
- ./docker/druid/environment
networks:
- druid

druid-middlemanager:
image: apache/druid:25.0.0
hostname: middlemanager
container_name: middlemanager
volumes:
- druid:/opt/shared
- middle_var:/opt/druid/var
depends_on:
- druid-zookeeper
- druid-postgres
- druid-coordinator
command:
- middleManager
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 8091
env_file:
- ./docker/druid/environment
networks:
- druid

druid:
image: apache/druid:25.0.0
hostname: router
container_name: router
volumes:
- router_var:/opt/druid/var
depends_on:
- druid-zookeeper
- druid-postgres
- druid-coordinator
- druid-middlemanager
- druid-historical
- druid-broker
ports:
- "8888:8888"
command:
- router
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 8888
env_file:
- ./docker/druid/environment
networks:
- druid

networks:
impala:
mysql:
mssql:
clickhouse:
postgres:
trino:
druid:

volumes:
metadata_data:
middle_var:
historical_var:
broker_var:
coordinator_var:
router_var:
clickhouse:
druid:
mssql:
2 changes: 0 additions & 2 deletions docker/clickhouse/Dockerfile

This file was deleted.

56 changes: 56 additions & 0 deletions docker/druid/environment
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

# Java tuning
DRUID_XMX=1g
DRUID_XMS=64m
DRUID_MAXNEWSIZE=128m
DRUID_NEWSIZE=64m
DRUID_MAXDIRECTMEMORYSIZE=1g

druid_emitter_logging_logLevel=debug

druid_extensions_loadList=["postgresql-metadata-storage", "druid-multi-stage-query", "druid-parquet-extensions", "druid-avro-extensions"]

druid_zk_service_host=zookeeper

druid_worker_capacity=6
druid_generic_useDefaultValueForNull=true

druid_metadata_storage_host=
druid_metadata_storage_type=postgresql
druid_metadata_storage_connector_connectURI=jdbc:postgresql://druid-postgres:5432/druid
druid_metadata_storage_connector_user=druid
druid_metadata_storage_connector_password=FoolishPassword

druid_coordinator_balancer_strategy=cachingCost

druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms64m", "-XX:MaxDirectMemorySize=1g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=64MiB

druid_storage_type=local
druid_storage_storageDirectory=/opt/shared/segments
druid_indexer_logs_type=file
druid_indexer_logs_directory=/opt/shared/indexing-logs

druid_processing_numThreads=1
druid_processing_numMergeBuffers=1
druid_processing_buffer_sizeBytes=64m

DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
2 changes: 0 additions & 2 deletions docker/mssql/Dockerfile

This file was deleted.

2 changes: 1 addition & 1 deletion docker/postgres/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
FROM postgis/postgis:15-3.3-alpine
RUN apk add postgresql14-plpython3
RUN apk add postgresql15-plpython3
1 change: 1 addition & 0 deletions docs/CNAME
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ibis-project.org
8 changes: 4 additions & 4 deletions docs/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ We love new contributors!

To get started:

1. [Set up a development environment](https://ibis-project.org/docs/latest/community/contribute/01_environment/)
1. [Learn about the commit workflow](https://ibis-project.org/docs/latest/community/contribute/02_workflow/)
1. [Review the code style guidelines](https://ibis-project.org/docs/latest/community/contribute/03_style/)
1. [Dig into the nitty gritty of being a maintainer](https://ibis-project.org/docs/latest/community/contribute/05_maintainers_guide/)
1. [Set up a development environment](https://ibis-project.org/community/contribute/01_environment/)
1. [Learn about the commit workflow](https://ibis-project.org/community/contribute/02_workflow/)
1. [Review the code style guidelines](https://ibis-project.org/community/contribute/03_style/)
1. [Dig into the nitty gritty of being a maintainer](https://ibis-project.org/community/contribute/05_maintainers_guide/)
13 changes: 4 additions & 9 deletions docs/SUMMARY.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
* [Home](index.md)
* [Install](install.md)
* [Docs](docs/index.md)
* [Tutorial](tutorial/index.md)
* [Getting Started](tutorial/01-Introduction-to-Ibis.ipynb)
* [Aggregating and Joining](tutorial/02-Aggregates-Joins.ipynb)
* [Lazy Mode and Logging](tutorial/03-Expressions-Lazy-Mode-Logging.ipynb)
* [More Value Expressions](tutorial/04-More-Value-Expressions.ipynb)
* [Creating and Inserting External Data](tutorial/05-IO-Create-Insert-External-Data.ipynb)
* [Complex Filtering](tutorial/06-ComplexFiltering.ipynb)
* [Analytics Tools](tutorial/07-Analytics-Tools.ipynb)
* [Geospatial Analysis](tutorial/rendered/08-Geospatial-Analysis.ipynb)
* [How To Guide](how_to/)
* [Execution Backends](backends/)
* [User Guide](user_guide/)
Expand All @@ -33,6 +24,10 @@
* [Backend Operations Matrix](backends/support_matrix.md)
* [Releases](release_notes.md)
* Blog
* [Ibis Sneak Peek: Writing to Files](blog/ibis-to-file.md)
* [Ibis Sneak Peek: Examples](blog/ibis-examples.md)
* [Maximizing Productivity with Selectors](blog/selectors.md)
* [Ibis + DuckDB + Substrait](blog/ibis_substrait_to_duckdb.md)
* [Ibis v4.0.0](blog/ibis-version-4.0.0-release.md)
* [Analyzing Ibis's CI Data with Ibis](blog/rendered/ci-analysis.ipynb)
* [ffill and bfill using ibis](blog/ffill-and-bfill-using-ibis.md)
Expand Down
1 change: 1 addition & 0 deletions docs/api/expressions/top_level.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ These methods and objects are available directly in the `ibis` module.
::: ibis.coalesce
::: ibis.cumulative_window
::: ibis.date
::: ibis.deferred
::: ibis.desc
::: ibis.difference
::: ibis.get_backend
Expand Down
11 changes: 11 additions & 0 deletions docs/backends/Druid.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
backend_name: Druid
backend_url: https://druid.apache.org/
backend_module: druid
backend_param_style: a SQLAlchemy connection string
backend_connection_example: ibis.connect("druid://localhost:8082/druid/v2/sql")
is_experimental: true
version_added: "5.0"
---

{% include 'backends/template.md' %}
1 change: 0 additions & 1 deletion docs/backends/Impala.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ backend.
options:
heading_level: 3
members:
- set_database
- create_database
- drop_database
- list_databases
Expand Down
19 changes: 9 additions & 10 deletions docs/backends/app/backend_info_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@

@st.experimental_memo(ttl=ONE_HOUR_IN_SECONDS)
def support_matrix_df():
resp = requests.get(
"https://ibis-project.org/docs/dev/backends/raw_support_matrix.csv"
)
resp = requests.get("https://ibis-project.org/backends/raw_support_matrix.csv")
resp.raise_for_status()

with tempfile.NamedTemporaryFile() as f:
Expand All @@ -47,8 +45,9 @@ def backends_info_df():
{
"bigquery": ["string", "sql"],
"clickhouse": ["string", "sql"],
'dask': ["dataframe"],
"datafusion": ["dataframe"],
"dask": ["dataframe"],
"datafusion": ["sql"],
"druid": ["sqlalchemy", "sql"],
"duckdb": ["sqlalchemy", "sql"],
"impala": ["string", "sql"],
"mssql": ["sqlalchemy", "sql"],
Expand Down Expand Up @@ -129,6 +128,7 @@ def get_selected_operation_categories():


current_backend_names = get_selected_backend_name()
sort_by_coverage = st.sidebar.checkbox('Sort by API Coverage', value=False)
current_ops_categories = get_selected_operation_categories()

hide_supported_by_all_backends = st.sidebar.selectbox(
Expand Down Expand Up @@ -184,7 +184,9 @@ def get_selected_operation_categories():
.T
)

table = pd.concat([coverage, df.replace({True: "✔", False: "🚫"})])
table = pd.concat([coverage, df.replace({True: "✔", False: "🚫"})]).loc[
:, slice(None) if sort_by_coverage else sorted(df.columns)
]
st.dataframe(table)
else:
st.write("No data")
Expand All @@ -194,10 +196,7 @@ def get_selected_operation_categories():
pretty_sql_query = sqlglot.transpile(
sql_query, read='duckdb', write='duckdb', pretty=True
)[0]
st.code(
pretty_sql_query,
language='sql',
)
st.code(pretty_sql_query, language='sql')

with st.expander("Source code"):
st.code(Path(__file__).read_text())
10 changes: 7 additions & 3 deletions docs/backends/index.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Backends

See the [configuration guide](../user_guide/configuration.md#default-backend)
to inspect or reconfigure the backend used by default.

## String Generating Backends

The first category of backend translate Ibis expressions into string queries.
Expand All @@ -20,14 +23,15 @@ system's expressions, for example, SQLAlchemy.
Instead of generating strings for each expression these backends produce
another kind of expression and typically have high-level APIs for execution.

- [Apache Arrow Datafusion](Datafusion.md)
- [Apache Druid](Druid.md)
- [Apache PySpark](PySpark.md)
- [Dask](Dask.md)
- [Datafusion](Datafusion.md)
- [DuckDB](DuckDB.md)
- [MySQL](MySQL.md)
- [MS SQL Server](MSSQL.md)
- [MySQL](MySQL.md)
- [Polars](Polars.md)
- [PostgreSQL](PostgreSQL.md)
- [PySpark](PySpark.md)
- [SQLite](SQLite.md)
- [Snowflake](Snowflake.md)
- [Trino](Trino.md)
Expand Down
Binary file added docs/blog/assets/tab_complete.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
50 changes: 50 additions & 0 deletions docs/blog/ibis-examples.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Ibis Sneak Peek: Examples

**by Kae Suarez**

Ibis has been moving quickly to provide a powerful but easy-to-use interface for interacting with analytical engines. However, as we’re approaching the 5.0 release of Ibis, we’ve realized that moving from not knowing Ibis to writing a first expression is not trivial.

As is, in our tutorial structure, work must be done on the user’s part — though we do provide the commands — to download a SQLite database onto disk, which can only be used with said backend. We feel that this put too much emphasis on a single backend, and added too much effort into picking the right backend for the first tutorial. We want minimal steps between users and learning the Ibis API.

This is why we’ve added the `examples` module.

## Getting Started with Examples

This module offers in-Ibis access to multiple small tables (the largest is around only 30k rows), which are downloaded when requested and immediately read into the backend upon completion. We worked to keep pulling in examples simple, so it looks like this:

```python
>>> import ibis
>>> import ibis.examples as ex

>>> t = ex.penguins.fetch()
>>> t.head()
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │
├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie │ Torgersen │ 39.118.71813750 │ male │ 2007
│ Adelie │ Torgersen │ 39.517.41863800 │ female │ 2007
│ Adelie │ Torgersen │ 40.318.01953250 │ female │ 2007
│ Adelie │ Torgersen │ nan │ nan │ ∅ │ ∅ │ ∅ │ 2007
│ Adelie │ Torgersen │ 36.719.31933450 │ female │ 2007
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
```

Another advantage of this new method is that we were able to register all of them so you can tab-complete, as you can see here:

![Tab Complete](assets/tab_complete.png)

Once you’ve retrieved an example table, you can get straight to learning and experimenting, instead of struggling with just getting the data itself.

In the future, our tutorials will use the _examples_ module to to help speed up learning of the Ibis framework.

Interested in Ibis? Docs are available on this very website, at:

- [Ibis Docs](https://ibis-project.org/)

and the repo is always at:

- [Ibis GitHub](https://github.com/ibis-project/ibis)

Please feel free to reach out on GitHub!
56 changes: 56 additions & 0 deletions docs/blog/ibis-to-file.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Ibis Sneak Peek: Writing to Files

**by Kae Suarez**

Ibis 5.0 is coming soon and will offer new functionality and fixes to users. To enhance clarity around this process, we’re sharing a sneak peek into what we’re working on.

In Ibis 4.0, we added the ability to read CSVs and Parquet via the Ibis interface. We felt this was important because, well, the ability to read files is simply necessary, be it on a local scale, legacy data, data not yet in a database, and so on. However, for a user, the natural next question was “can I go ahead and write when I’m done?” The answer was no. We didn’t like that, especially since we do care about file-based use cases.

So, we’ve gone ahead and fixed that for Ibis 5.0.

## Files in, Files out

Before we can write a file, we need data — so let’s read in a file, to start this off:

```python
t = ibis.read_csv(
"https://storage.googleapis.com/ibis-examples/data/penguins.csv.gz"
)
```

Of course, we could just write out, but let’s do an operation first — how about using selectors, which you can read more about [here](https://ibis-project.org/blog/selectors/)? Self-promotion aside, here’s an operation:

```python
expr = (
t.group_by("species")
.mutate(s.across(s.numeric() & ~s.c("year"), (_ - _.mean()) / _.std()))
)
```

Now, finally, time to do the exciting part:

```python
expr.to_parquet("normalized.parquet")
```

Like many things in Ibis, this is as simple and plain-looking as it is important. Being able to create files from Ibis instead of redirecting into other libraries first enables operation at larger scales and fewer steps. Where desired, you can address a backend directly to use its native export functionality — we want to make sure you have the flexibility to use Ibis or the backend as you see fit.

## Wrapping Up

Ibis is an interface tool for analytical engines that can reach scales far beyond a laptop. Files are important to Ibis because:

- Ibis also supports local execution, where files are the standard unit of data — we want to support all our users.
- Files are useful for moving between platforms, and long-term storage that isn’t tied to a particular backend.
- Files can move more easily between our backends than database files, so we think this adds some convenience for the multi-backend use case.

We’re excited to release this functionality in Ibis 5.0.

Interested in Ibis? Docs are available on this very website, at:

- [Ibis Docs](https://ibis-project.org/)

and the repo is always at:

- [Ibis GitHub](https://github.com/ibis-project/ibis)

Please feel free to reach out on GitHub!
8 changes: 4 additions & 4 deletions docs/blog/ibis-version-4.0.0-release.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Let’s talk about some of the new changes 4.0 brings for Ibis users.

## Backends

Ibis 4.0 brings [Polars](https://ibis-project.org/docs/4.0.0/backends/Polars/), [Snowflake](https://ibis-project.org/docs/4.0.0/backends/Snowflake/), and [Trino](https://ibis-project.org/docs/4.0.0/backends/Trino/) into an already-impressive stock of supported backends.
Ibis 4.0 brings [Polars](https://ibis-project.org/backends/Polars/), [Snowflake](https://ibis-project.org/backends/Snowflake/), and [Trino](https://ibis-project.org/backends/Trino/) into an already-impressive stock of supported backends.
The [Polars](https://www.pola.rs/) backend adds another way for users to work locally with DataFrames.
The [Snowflake](https://www.snowflake.com/en/) and [Trino](https://trino.io/) backends add a free and familiar python API to popular data warehouses.

Expand All @@ -23,7 +23,7 @@ Alongside these new backends, Google BigQuery and Microsoft SQL have been moved

There are a lot of improvements incoming, but some notable changes include:

- [read API](https://github.com/ibis-project/ibis/pull/5005): allows users to read various file formats directly into their [configured `default_backend`](https://ibis-project.org/docs/dev/api/config/?h=default#ibis.config.Options) (default DuckDB) through `read_*` functions, which makes working with local files easier than ever.
- [read API](https://github.com/ibis-project/ibis/pull/5005): allows users to read various file formats directly into their [configured `default_backend`](https://ibis-project.org/api/config/?h=default#ibis.config.Options) (default DuckDB) through `read_*` functions, which makes working with local files easier than ever.
- [to_pyarrow and to_pyarrow_batches](https://github.com/ibis-project/ibis/pull/4454#issuecomment-1262640204): users can now return PyArrow objects (Tables, Arrays, Scalars, RecordBatchReader) and therefore grants all of the functionality that PyArrow provides
- [JSON getitem](https://github.com/ibis-project/ibis/pull/4525): users can now run getitem on a JSON field using Ibis expressions with some backends
- [Plotting support through `__array__`](https://github.com/ibis-project/ibis/pull/4547): allows users to plot Ibis expressions out of the box
Expand All @@ -36,6 +36,6 @@ Notable changes include removing intermediate expressions, improving the testing
## Additional Changes

As mentioned previously, additional functionality, bugfixes, and more have been included in the latest 4.0 release.
To stay up to date and learn more about recent changes: check out the project's homepage at [ibis-project.org](https://ibis-project.org/docs/latest/), follow [@IbisData](https://twitter.com/IbisData) on Twitter, find the source code and community on [GitHub](https://github.com/ibis-project/ibis), and join the discussion on [Gitter](https://gitter.im/ibis-dev/Lobby).
To stay up to date and learn more about recent changes: check out the project's homepage at [ibis-project.org](https://ibis-project.org/docs), follow [@IbisData](https://twitter.com/IbisData) on Twitter, find the source code and community on [GitHub](https://github.com/ibis-project/ibis), and join the discussion on [Gitter](https://gitter.im/ibis-dev/Lobby).

As always, try Ibis by [installing](https://ibis-project.org/docs/latest/install/) it today.
As always, try Ibis by [installing](https://ibis-project.org/install/) it today.
405 changes: 405 additions & 0 deletions docs/blog/ibis_substrait_to_duckdb.md

Large diffs are not rendered by default.

711 changes: 349 additions & 362 deletions docs/blog/rendered/ci-analysis.ipynb

Large diffs are not rendered by default.

448 changes: 448 additions & 0 deletions docs/blog/selectors.md

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions docs/community/contribute/05_maintainers_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,27 @@ Occasionally you may need to lock [`poetry`](https://python-poetry.org) dependen
poetry lock --no-update
```

## Adding Examples

If you're not a maintainer, please open an issue asking us to add your example.

### Requirements

You need the ability to write to the `gs://ibis-examples` GCS bucket to add an example.

### Instructions

Make sure you're in the root of the ibis git repository.

Assuming your file is called `example.csv`:

1. Add a gzip-compressed CSV file with the path `ibis/examples/data/example.csv.gz`.
1. Add a file named `ibis/examples/descriptions/example` that contains a
description of your example. One line is best, but not necessary.
1. Run one of the following **from the git root of an ibis clone**:
- `python ibis/examples/gen_registry.py` (doesn't include R dependenices)
- `nix run '.#gen-examples'` (includes R dependenices)

## Release

Ibis is released on [PyPI](https://pypi.org/project/ibis-framework/) and [Conda Forge](https://github.com/conda-forge/ibis-framework-feedstock).
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ Welcome to the ibis documentation!

- **Coming from Pandas?**: Check out [ibis for pandas users](../ibis-for-pandas-users.ipynb)!
- **Coming from SQL?**: Take a look at [ibis for SQL programmers](../ibis-for-sql-programmers.ipynb)!
- **Want to see some more examples?**: We've got [a set of tutorial notebooks](../tutorial/index.md) for that!
- **Want to see some more examples?**: We've got [a set of tutorial notebooks](https://github.com/ibis-project/ibis-examples) for that!
- **Looking for API docs?**: Start [here](../api/expressions/top_level.md)!
- **Interested in contributing?**: Our [contribution section](../community/contribute/index.md) has what you need!
55 changes: 30 additions & 25 deletions docs/how_to/sessionize.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,73 +2,78 @@

Suppose you have entities (users, objects, actions, etc) that have event logs through polling or event triggers.

You might be interested in partitioning these logs by something called **sessions**, which can be defined as the duration of an event.
You might be interested in partitioning these logs by something called **sessions**, which can be defined as groups of consecutive event records without long interruptions for a given entity.

In the case of a user portal, it might be the time spent completing a task or navigating an app.
For games, it might be a time spent playing the game or remaining logged in.
For retail, it might be checking out or walking the premises.
In the case of a user portal, it might be grouping the navigation events that result in completing a task or buying a product.
For online games, it might be a the grouping of activity events of a given user playing the game while remaining logged in.

This guide on sessionization is inspired by [_The Expressions API in Polars is Amazing_](https://www.pola.rs/posts/the-expressions-api-in-polars-is-amazing/),
a blog post in the [Polars](https://www.pola.rs/) community demonstrating the strength of polars expressions.
Sessionization can also be useful on longer time scales, for instance to reconstruct active subscription data from a raw payment or activity log, so as to model customer churn.

This guide on sessionization is inspired by [_The Expressions API in Polars is Amazing_](https://www.pola.rs/posts/the-expressions-api-in-polars-is-amazing/), a blog post in the [Polars](https://www.pola.rs/) community demonstrating the strength of Polars expressions.

## Sessionizing Logs on a Cadence

For this example, we have a dataset that contains entities polled on a cadence.
The data used here can be found at `https://storage.googleapis.com/ibis-tutorial-data/wowah_data/wowah_data.csv`.
You can use `ibis.read("https://storage.googleapis.com/ibis-tutorial-data/wowah_data/wowah_data.csv")` to quickly get it into a table expression.
For this example, we use an activity log from the online game "World of Warcraft" with more than 10 million records for 37,354 unique players [made available](https://www.kaggle.com/datasets/mylesoneill/warcraft-avatar-history?select=wowah_data.csv) under the CC0 / Public Domain license. A copy of the data can be found at `https://storage.googleapis.com/ibis-tutorial-data/wowah_data/wowah_data_raw.parquet` (75 MB) under the parquet format to reduce load times. You can use `ibis.read_parquet` to quickly get it into a table expression via the default `DuckDB` backend.

Our data contains the following:
This data contains the following fields:

- `char` : a unique identifier for a character (or a player). This is our entity column
- `timestamp`: a timestamp denoting when a `char` was polled. This occurs every ~10 minutes
- `char` : a unique identifier for a character (or a player). This is our entity column.
- `timestamp`: a timestamp denoting when a `char` was polled. This occurs every ~10 minutes.

We can take this information, along with a definition of what separates two sessions for an entity, and break our dataset up into sessions **without using any joins**:

```python
# Imports
import ibis
from ibis import _ as c
from ibis import deferred as c

# Read files into table expressions with ibis.read:
data = ibis.read("https://storage.googleapis.com/ibis-tutorial-data/wowah_data/wowah_data_raw.parquet")
# Read files into table expressions with ibis.read_parquet:
data = ibis.read_parquet(
"https://storage.googleapis.com/ibis-tutorial-data/wowah_data/wowah_data_raw.parquet"
)

# integer delay in seconds noting if a row should be included in the previous session for an entity
# Integer delay in seconds noting if a row should be included in the previous
# session for an entity.
session_boundary_threshold = 30 * 60

# Window for finding session ids per character
entity_window = ibis.cumulative_window(group_by=c.char, order_by=c.timestamp)

# Take the previous timestamp within a window (by character ordered by timestamp):
# Note: the first value in a window will be null
# Note: the first value in a window will be null.
ts_lag = c.timestamp.lag().over(entity_window)

# Subtract the lag from the current timestamp to get a timedelta
# Subtract the lag from the current timestamp to get a timedelta.
ts_delta = c.timestamp - ts_lag

# Compare timedelta to our session delay in seconds to determine if the
# current timestamp falls outside of the session.
# Cast as int for aggregation
# Cast as int for aggregation.
is_new_session = (ts_delta > ibis.interval(seconds=session_boundary_threshold))

# Window for finding session min/max
# Window to compute session min/max and duration.
session_window = ibis.window(group_by=[c.char, c.session_id])

# Generate all of the data we need to analyze sessions:
sessionized = (
data
# Create a session id for each character by using a cumulative sum
# over the `new_session` column
# over the `new_session` column.
.mutate(new_session=is_new_session.fillna(True))
# Create a session id for each character by using a cumulative sum
# over the `new_session` column
# over the `new_session` column.
.mutate(session_id=c.new_session.sum().over(entity_window))
# Drop `new_session` because it is no longer needed
# Drop `new_session` because it is no longer needed.
.drop("new_session")
.mutate(
# Get session duration using max(timestamp) - min(timestamp) over our window
# Get session duration using max(timestamp) - min(timestamp) over our window.
session_duration=c.timestamp.max().over(session_window) - c.timestamp.min().over(session_window)
)
# Sort for convenience
# Sort for convenience.
.order_by([c.char, c.timestamp])
)
```

Calling `ibis.show_sql(sessionized)` displays the SQL query and can be used to confirm that this Ibis expression does not rely on any join operations.

Calling `sessionized.execute()` should complete in less than a minute, depending on the speed of the internet connection to download the data and the number of CPU cores available to parallelize the processing of this nested query.
3,275 changes: 1,067 additions & 2,208 deletions docs/ibis-for-pandas-users.ipynb

Large diffs are not rendered by default.

1,017 changes: 678 additions & 339 deletions docs/ibis-for-sql-programmers.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ hide:

<div class="install-tutorial-button" markdown>
[Install](./install.md){ .md-button .md-button--primary }
[Tutorial](./tutorial/index.md){ .md-button }
[Tutorial](https://github.com/ibis-project/ibis-examples){ .md-button }
</div>

---
Expand Down
10 changes: 7 additions & 3 deletions docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ hide:
=== "pip"

```sh
pip install ibis-framework # (1)
pip install 'ibis-framework[duckdb]' # (1) (2)
```

1. Note that the `ibis-framework` package is *not* the same as the `ibis` package in PyPI. These two libraries cannot coexist in the same Python environment, as they are both imported with the `ibis` module name.
1. We suggest starting with the DuckDB backend. It's performant and fully featured. If you would like to use a different backend, all of the available options are listed below.

2. Note that the `ibis-framework` package is *not* the same as the `ibis` package in PyPI. These two libraries cannot coexist in the same Python environment, as they are both imported with the `ibis` module name.

{% for mgr in ["conda", "mamba"] %}
=== "{{ mgr }}"
Expand All @@ -27,18 +29,20 @@ hide:
## Install backend dependencies

{% for backend in sorted(ibis.backends.base._get_backend_names()) %}
{% if backend != "spark" %}
=== "{{ backend }}"

```sh
pip install 'ibis-framework[{{ backend }}]'
```

{% endif %}
{% endfor %}

---

After you've successfully installed Ibis, try going through the tutorial:

<div class="install-tutorial-button" markdown>
[Go to the Tutorial](./tutorial/index.md){ .md-button .md-button--primary }
[Go to the Tutorial](https://github.com/ibis-project/ibis-examples){ .md-button .md-button--primary }
</div>
345 changes: 345 additions & 0 deletions docs/release_notes.md

Large diffs are not rendered by default.

561 changes: 0 additions & 561 deletions docs/tutorial/01-Introduction-to-Ibis.ipynb

This file was deleted.

709 changes: 0 additions & 709 deletions docs/tutorial/02-Aggregates-Joins.ipynb

This file was deleted.

689 changes: 0 additions & 689 deletions docs/tutorial/03-Expressions-Lazy-Mode-Logging.ipynb

This file was deleted.

526 changes: 0 additions & 526 deletions docs/tutorial/04-More-Value-Expressions.ipynb

This file was deleted.

279 changes: 0 additions & 279 deletions docs/tutorial/05-IO-Create-Insert-External-Data.ipynb

This file was deleted.

227 changes: 0 additions & 227 deletions docs/tutorial/06-ComplexFiltering.ipynb

This file was deleted.

238 changes: 0 additions & 238 deletions docs/tutorial/07-Analytics-Tools.ipynb

This file was deleted.

11 changes: 0 additions & 11 deletions docs/tutorial/index.md

This file was deleted.

653 changes: 0 additions & 653 deletions docs/tutorial/rendered/08-Geospatial-Analysis.ipynb

This file was deleted.

22 changes: 22 additions & 0 deletions docs/user_guide/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,25 @@ def cowsay(msg):

ibis.options.verbose_log = cowsay
```

## Default backend

`ibis.options.default_backend` controls which backend is used by table
expressions returned by top-level functions such as `ibis.memtable`,
`ibis.read_csv` or `ibis.read_parquet`.

By default, it points to an instance of DuckDB backend. Assuming the [backend
dependencies have been installed](../install.md), it can be updated by passing
the name of the backend to `ibis.set_backend` as follows:

```python
import ibis

expr = ibis.memtable({"column": [0, 1, 2, 3, 4]})
ibis.get_backend(expr)
# <ibis.backends.duckdb.Backend at 0x12fa0fb50>

ibis.set_backend("sqlite")
ibis.get_backend(expr)
# <ibis.backends.sqlite.Backend at 0x158411d10>
```
18 changes: 9 additions & 9 deletions flake.lock
17 changes: 6 additions & 11 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@
git
just
nixpkgs-fmt
pre-commit
prettier
shellcheck
shfmt
Expand Down Expand Up @@ -113,21 +112,23 @@
PGPASSWORD = "postgres";
MYSQL_PWD = "ibis";
MSSQL_SA_PASSWORD = "1bis_Testing!";
DRUID_URL = "druid://localhost:8082/druid/v2/sql";
};
in
rec {
packages = {
inherit (pkgs) ibis38 ibis39 ibis310;
inherit (pkgs) ibis38 ibis39 ibis310 ibis311;

default = pkgs.ibis310;
default = pkgs.ibis311;

inherit (pkgs) update-lock-files gen-all-extras;
inherit (pkgs) update-lock-files gen-all-extras gen-examples;
};

devShells = rec {
ibis38 = mkDevShell pkgs.ibisDevEnv38;
ibis39 = mkDevShell pkgs.ibisDevEnv39;
ibis310 = mkDevShell pkgs.ibisDevEnv310;
ibis311 = mkDevShell pkgs.ibisDevEnv311;

default = ibis310;

Expand All @@ -143,13 +144,7 @@

release = pkgs.mkShell {
name = "release";
nativeBuildInputs = with pkgs; [
git
poetry
nodejs
unzip
gnugrep
];
nativeBuildInputs = with pkgs; [ git poetry nodejs unzip gnugrep ];
};
};
}
Expand Down
19 changes: 4 additions & 15 deletions gen_matrix.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,17 @@
from __future__ import annotations

import io
from pathlib import Path

import mkdocs_gen_files
import pandas as pd
import tomli

import ibis
import ibis.expr.operations as ops


def get_backends():
pyproject = tomli.loads(Path("pyproject.toml").read_text())
backends = pyproject["tool"]["poetry"]["plugins"]["ibis.backends"]
del backends["spark"]
return [(backend, getattr(ibis, backend)) for backend in sorted(backends.keys())]
entry_points = sorted(ep.name for ep in ibis.util.backend_entry_points())
return [(backend, getattr(ibis, backend)) for backend in entry_points]


def get_leaf_classes(op):
Expand Down Expand Up @@ -45,15 +41,8 @@ def main():

df = pd.DataFrame(support).set_index("operation").sort_index()

file_path = Path("backends", "raw_support_matrix.csv")
local_path = Path(__file__).parent / "docs" / file_path

buf = io.StringIO()
df.to_csv(buf, index_label="FullOperation")

local_path.write_text(buf.getvalue())
with mkdocs_gen_files.open(file_path, "w") as f:
f.write(buf.getvalue())
with mkdocs_gen_files.open(Path("backends", "raw_support_matrix.csv"), "w") as f:
df.to_csv(f, index_label="FullOperation")


main()
53 changes: 53 additions & 0 deletions gen_redirects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import pathlib

import mkdocs_gen_files

HTML_TEMPLATE = """
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Redirecting...</title>
<link rel="canonical" href="{url}">
<meta name="robots" content="noindex">
<script>var anchor=window.location.hash.substr(1);location.href="{url}"+(anchor?"#"+anchor:"")</script>
<meta http-equiv="refresh" content="0; url={url}">
</head>
<body>
Redirecting...
</body>
</html>
"""

# Versions for templated redirects
VERSIONS = ["latest", "dev", "4.1.0", "4.0.0", "3.2.0", "3.1.0"]

# Templated redirects
TEMPLATED_REDIRECTS = {
"/docs/{version}/": "/",
"/docs/{version}/install/": "/install/",
"/docs/{version}/docs/": "/docs/",
"/docs/{version}/backends/": "/backends/",
}

# Untemplated redirects
REDIRECTS = {}

# Fill in templates
REDIRECTS.update(
{
old.format(version=version): new
for version in VERSIONS
for old, new in TEMPLATED_REDIRECTS.items()
}
)

# Write all redirect files
for old, new in REDIRECTS.items():
if old.endswith("/"):
old = old + "index.html"

html = HTML_TEMPLATE.format(url=new)

with mkdocs_gen_files.open(pathlib.Path(old.lstrip("/")), "w") as f:
f.write(html)
64 changes: 54 additions & 10 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
"""Initialize Ibis module."""
from __future__ import annotations

from ibis import util
__version__ = "5.0.0"

from ibis import examples, util
from ibis.backends.base import BaseBackend
from ibis.common.exceptions import IbisError
from ibis.config import options
from ibis.expr import api
from ibis.expr import types as ir
from ibis.expr.api import * # noqa: F403

__all__ = ['api', 'ir', 'util', 'BaseBackend', 'IbisError', 'options']
__all__ += api.__all__

__version__ = "4.1.0"
__all__ = [ # noqa: PLE0604
'api',
'examples',
'ir',
'util',
'BaseBackend',
'IbisError',
'options',
*api.__all__,
]

_KNOWN_BACKENDS = ['heavyai']

Expand Down Expand Up @@ -57,15 +65,51 @@ def __getattr__(name: str) -> BaseBackend:
"and just leave the one that needs to be used."
)

import types

import ibis

(entry_point,) = entry_points
module = entry_point.load()
backend = module.Backend()

# The first time a backend is loaded, we register its options, and we set
# it as an attribute of `ibis`, so `__getattr__` is not called again for it
backend.register_options()

import ibis

setattr(ibis, name, backend)
return backend
# We don't want to expose all the methods on an unconnected backend to the user.
# In lieu of a full redesign, we create a proxy module and add only the methods
# that are valid to call without a connect call. These are:
#
# - connect
# - compile
# - has_operation
# - add_operation
# - _from_url
# - _to_sql
# - _sqlglot_dialect (if defined)
#
# We also copy over the docstring from `do_connect` to the proxy `connect`
# method, since that's where all the backend-specific kwargs are currently
# documented. This is all admittedly gross, but it works and doesn't
# require a backend redesign yet.

def connect(*args, **kwargs):
return backend.connect(*args, **kwargs)

connect.__doc__ = backend.do_connect.__doc__
connect.__wrapped__ = backend.do_connect
connect.__module__ = f"ibis.{name}"

proxy = types.ModuleType(f"ibis.{name}")
setattr(ibis, name, proxy)
proxy.connect = connect
proxy.compile = backend.compile
proxy.has_operation = backend.has_operation
proxy.add_operation = backend.add_operation
proxy.name = name
proxy._from_url = backend._from_url
proxy._to_sql = backend._to_sql
if hasattr(backend, "_sqlglot_dialect"):
proxy._sqlglot_dialect = backend._sqlglot_dialect

return proxy
Loading