136 changes: 49 additions & 87 deletions .github/workflows/ibis-backends.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,21 @@ on:
# Skip the backend suite if all changes are docs
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "**/*.qmd"
- "codecov.yml"
- ".envrc"
branches:
- master
- "*.x.x"
pull_request:
# Skip the backend suite if all changes are docs
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "**/*.qmd"
- "codecov.yml"
- ".envrc"
branches:
- master
- "*.x.x"
Expand Down Expand Up @@ -47,7 +49,7 @@ jobs:
- "3.11"
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: install python
id: install_python
Expand All @@ -56,7 +58,7 @@ jobs:
python-version: "${{ matrix.python-version }}"

- name: install poetry
run: python -m pip install --upgrade pip 'poetry<1.4'
run: python -m pip install --upgrade pip 'poetry==1.6.1'

- uses: syphar/restore-pip-download-cache@v1
with:
Expand Down Expand Up @@ -199,6 +201,15 @@ jobs:
- oracle
services:
- oracle
- name: flink
title: Flink
serial: true
extras:
- flink
additional_deps:
- apache-flink
even_more_deps:
- pandas~=1.5
exclude:
- os: windows-latest
backend:
Expand Down Expand Up @@ -295,6 +306,17 @@ jobs:
- oracle
services:
- oracle
- python-version: "3.11"
backend:
name: flink
title: Flink
serial: true
extras:
- flink
additional_deps:
- apache-flink
even_more_deps:
- pandas~=1.5
steps:
- name: update and install system dependencies
if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null
Expand All @@ -309,7 +331,7 @@ jobs:
run: choco install sqlite

- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- uses: extractions/setup-just@v1
env:
Expand Down Expand Up @@ -348,7 +370,7 @@ jobs:
custom_cache_key_element: ${{ steps.install_python.outputs.python-version }}

- name: install poetry
run: python -m pip install --upgrade pip 'poetry<1.4'
run: python -m pip install --upgrade pip 'poetry==1.6.1'

- uses: syphar/restore-virtualenv@v1
with:
Expand All @@ -360,7 +382,12 @@ jobs:

- name: install other deps
if: matrix.backend.additional_deps != null
run: poetry run pip install "${{ join(matrix.backend.additional_deps, ' ') }}"
run: poetry run pip install ${{ join(matrix.backend.additional_deps, ' ') }}

# FIXME(deepyaman)
- name: install even more deps
if: matrix.backend.even_more_deps != null
run: poetry run pip install ${{ join(matrix.backend.even_more_deps, ' ') }}

- name: show installed deps
run: poetry run pip list
Expand All @@ -381,7 +408,13 @@ jobs:
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}

- name: "run serial tests: ${{ matrix.backend.name }}"
if: matrix.backend.serial && matrix.backend.name != 'impala'
if: matrix.backend.serial && matrix.backend.name == 'flink'
run: just ci-check -m ${{ matrix.backend.name }} ibis/backends/flink/tests
env:
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}

- name: "run serial tests: ${{ matrix.backend.name }}"
if: matrix.backend.serial && matrix.backend.name != 'impala' && matrix.backend.name != 'flink'
run: just ci-check -m ${{ matrix.backend.name }}
env:
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}
Expand Down Expand Up @@ -465,7 +498,7 @@ jobs:
- geospatial
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: install libgeos for shapely
if: matrix.backend.name == 'postgres'
Expand All @@ -489,7 +522,7 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: install poetry
run: python -m pip install --upgrade pip 'poetry<1.4'
run: python -m pip install --upgrade pip 'poetry==1.6.1'

- name: install minimum versions
run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }}
Expand Down Expand Up @@ -553,7 +586,7 @@ jobs:
- snowflake-connector-python
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- uses: actions/setup-java@v3
with:
Expand All @@ -574,7 +607,7 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: install poetry
run: python -m pip install --upgrade pip 'poetry<1.4'
run: python -m pip install --upgrade pip 'poetry==1.6.1'

- name: remove conflicting deps
if: matrix.pandas.conflicts != null
Expand Down Expand Up @@ -613,14 +646,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: install python
uses: actions/setup-python@v4
with:
python-version: "3.11"

- run: python -m pip install --upgrade pip 'poetry<1.4'
- run: python -m pip install --upgrade pip 'poetry==1.6.1'

- name: remove deps that are not compatible with sqlalchemy 2
run: poetry remove snowflake-sqlalchemy
Expand Down Expand Up @@ -704,7 +737,7 @@ jobs:
- oracle
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: install libgeos for shapely
if: ${{ matrix.backend.name == 'postgres' }}
Expand Down Expand Up @@ -755,7 +788,7 @@ jobs:
custom_cache_key_element: ${{ steps.install_python.outputs.python-version }}

- name: install poetry
run: python -m pip install --upgrade pip 'poetry<1.4'
run: python -m pip install --upgrade pip 'poetry==1.6.1'

- name: install ibis
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}"
Expand All @@ -773,76 +806,6 @@ jobs:
with:
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}

test_flink:
name: Flink ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
needs:
- gen_lockfile_backends
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
python-version:
- "3.9"
- "3.11"
steps:
- name: checkout
uses: actions/checkout@v3

- name: install python
uses: actions/setup-python@v4
id: install_python
with:
python-version: ${{ matrix.python-version }}

- name: download poetry lockfile
uses: actions/download-artifact@v3
with:
name: backend-deps-${{ matrix.python-version }}
path: deps

- name: pull out lockfile
shell: bash
run: |
set -euo pipefail
mv -f deps/* .
rm -r deps
- uses: syphar/restore-pip-download-cache@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: ${{ steps.install_python.outputs.python-version }}

- name: install poetry
run: python -m pip install --upgrade pip 'poetry<1.4'

- uses: syphar/restore-virtualenv@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: flink-${{ steps.install_python.outputs.python-version }}

- name: install ibis
run: poetry install --without dev --without docs --extras flink

- name: show installed deps
run: poetry run pip list

- name: "run parallel tests: flink"
run: poetry run pytest --junitxml=junit.xml --cov=ibis --cov-report=xml:coverage.xml ibis/backends/flink/tests --numprocesses auto --dist=loadgroup

- name: check that no untracked files were produced
shell: bash
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep .

- name: upload code coverage
if: success()
uses: codecov/codecov-action@v3
with:
flags: backend,flink,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}

backends:
# this job exists so that we can use a single job from this workflow to gate merging
runs-on: ubuntu-latest
Expand All @@ -851,6 +814,5 @@ jobs:
- test_backends
- test_backends_sqlalchemy2
- test_pyspark
- test_flink
steps:
- run: exit 0
117 changes: 58 additions & 59 deletions .github/workflows/ibis-docs-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ jobs:
if: github.event_name == 'pull_request'
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: install nix
uses: cachix/install-nix-action@v22
uses: cachix/install-nix-action@v23
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
Expand All @@ -44,10 +44,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: install nix
uses: cachix/install-nix-action@v22
uses: cachix/install-nix-action@v23
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand All @@ -67,12 +67,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: install nix
uses: cachix/install-nix-action@v22
uses: cachix/install-nix-action@v23
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand All @@ -92,7 +92,7 @@ jobs:
if: github.event_name == 'push'
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: install python
uses: actions/setup-python@v4
Expand All @@ -113,7 +113,7 @@ jobs:
requirement_files: poetry.lock
custom_cache_key_element: benchmarks-${{ steps.install_python.outputs.python-version }}

- run: python -m pip install --upgrade pip 'poetry<1.4'
- run: python -m pip install --upgrade pip 'poetry==1.6.1'

- name: install ibis
run: poetry install --without dev --without docs --all-extras
Expand All @@ -124,7 +124,7 @@ jobs:
- name: benchmark
run: poetry run pytest --benchmark-enable --benchmark-json .benchmarks/output.json ibis/tests/benchmarks

- uses: tibdex/github-app-token@v1
- uses: tibdex/github-app-token@v2
id: generate-token
with:
app_id: ${{ secrets.SQUAWK_BOT_APP_ID }}
Expand All @@ -137,26 +137,32 @@ jobs:
output-file-path: .benchmarks/output.json
benchmark-data-dir-path: ./bench
auto-push: false
comment-on-alert: true
alert-threshold: "300%"
comment-on-alert: false

- name: checkout gh-pages
run: git checkout gh-pages

- name: upload benchmark data
uses: actions/upload-artifact@v3
- uses: google-github-actions/auth@v1
with:
name: bench
path: ./bench
if-no-files-found: error
credentials_json: ${{ secrets.GCP_CREDENTIALS }}

- uses: google-github-actions/setup-gcloud@v1

- name: show gcloud info
run: gcloud info

- name: copy benchmark data to gcs
run: |
# remove whitespace and compress
jq -rcM < ./.benchmarks/output.json | gzip -c > output.json.gz
timestamp="$(date --iso-8601=ns --utc | tr ',' '.')"
gsutil cp output.json.gz "gs://ibis-benchmark-data/ci/${timestamp}.json.gz"
docs_pr:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
concurrency: docs-${{ github.repository }}-${{ github.head_ref || github.sha }}
concurrency: docs_pr-${{ github.repository }}-${{ github.head_ref || github.sha }}
steps:
- name: install nix
uses: cachix/install-nix-action@v22
uses: cachix/install-nix-action@v23
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand All @@ -169,12 +175,19 @@ jobs:
extraPullNames: nix-community,poetry2nix

- name: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
uses: actions/checkout@v4

- name: run doctest
run: nix develop --ignore-environment -c just doctest

- name: generate api docs
run: nix develop --ignore-environment -c just docs-apigen --verbose

- name: build docs
run: nix develop --ignore-environment -c mkdocs build --strict
run: nix develop --ignore-environment -c just docs-render

- name: check that all frozen computations were done before push
run: git diff --exit-code --stat

- name: verify internal links
run: nix develop --ignore-environment '.#links' -c just checklinks --offline --no-progress
Expand All @@ -183,13 +196,9 @@ jobs:
runs-on: ubuntu-latest
if: github.event_name == 'push'
concurrency: docs-${{ github.repository }}
needs:
# wait on benchmarks to prevent a race condition when pushing to the
# gh-pages branch
- benchmarks
steps:
- name: install nix
uses: cachix/install-nix-action@v22
uses: cachix/install-nix-action@v23
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand All @@ -201,47 +210,37 @@ jobs:
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix

- name: Generate a GitHub token
uses: tibdex/github-app-token@v1
id: generate_token
with:
app_id: ${{ secrets.DOCS_BOT_APP_ID }}
private_key: ${{ secrets.DOCS_BOT_APP_PRIVATE_KEY }}

- name: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
token: ${{ steps.generate_token.outputs.token }}
uses: actions/checkout@v4

- name: Configure git info
run: |
set -euo pipefail
- name: run doctests
run: nix develop --ignore-environment -c just doctest

git config user.name 'ibis-docs-bot[bot]'
git config user.email 'ibis-docs-bot[bot]@users.noreply.github.com'
git config http.postBuffer 157286400
git config http.version 'HTTP/1.1'
- name: build api docs
run: nix develop --ignore-environment -c just docs-apigen --verbose

- name: download benchmark data
uses: actions/download-artifact@v3
with:
name: bench
path: docs/bench
- name: build docs
run: nix develop --ignore-environment -c just docs-render

- name: build and push dev docs
run: |
nix develop --ignore-environment -c \
mkdocs gh-deploy --message 'docs: ibis@${{ github.sha }}' --ignore-version
- name: check that all frozen computations were done before push
run: git diff --exit-code --stat

- name: verify internal links
run: nix develop --ignore-environment '.#links' -c just checklinks --offline --no-progress

- name: build and push quarto docs
run: nix develop --ignore-environment --keep NETLIFY_AUTH_TOKEN -c just docs-deploy
env:
NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}

simulate_release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0

- uses: cachix/install-nix-action@v22
- uses: cachix/install-nix-action@v23
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/ibis-main-skip-helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@ on:
push:
paths:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "**/*.qmd"
- ".envrc"
branches:
- master
- "*.x.x"
pull_request:
paths:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "**/*.qmd"
- ".envrc"
branches:
- master
- "*.x.x"
Expand Down
18 changes: 10 additions & 8 deletions .github/workflows/ibis-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@ on:
# Skip the test suite if all changes are in the docs directory
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "**/*.qmd"
- ".envrc"
branches:
- master
- "*.x.x"
pull_request:
# Skip the test suite if all changes are in the docs directory
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "**/*.qmd"
- ".envrc"
branches:
- master
- "*.x.x"
Expand Down Expand Up @@ -49,7 +51,7 @@ jobs:
- "3.11"
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: install python
uses: actions/setup-python@v4
Expand All @@ -62,7 +64,7 @@ jobs:
requirement_files: poetry.lock
custom_cache_key_element: no-backends-${{ steps.install_python.outputs.python-version }}

- run: python -m pip install --upgrade pip 'poetry<1.4'
- run: python -m pip install --upgrade pip 'poetry==1.6.1'

- uses: syphar/restore-virtualenv@v1
with:
Expand Down Expand Up @@ -114,7 +116,7 @@ jobs:
- "3.11"
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: install python
uses: actions/setup-python@v4
Expand All @@ -139,7 +141,7 @@ jobs:
sudo apt-get update -y -q
sudo apt-get install -y -q build-essential libgeos-dev
- run: python -m pip install --upgrade pip 'poetry<1.4'
- run: python -m pip install --upgrade pip 'poetry==1.6.1'

- name: install ibis
# install duckdb and geospatial because of https://github.com/ibis-project/ibis/issues/4856
Expand Down Expand Up @@ -167,7 +169,7 @@ jobs:
sudo apt-get install -y -q build-essential graphviz libgeos-dev libkrb5-dev freetds-dev
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: install python
uses: actions/setup-python@v4
Expand All @@ -180,7 +182,7 @@ jobs:
requirement_files: poetry.lock
custom_cache_key_element: doctests-${{ steps.install_python.outputs.python-version }}

- run: python -m pip install --upgrade pip 'poetry<1.4'
- run: python -m pip install --upgrade pip 'poetry==1.6.1'

- uses: syphar/restore-virtualenv@v1
with:
Expand Down
30 changes: 0 additions & 30 deletions .github/workflows/ibis-tpch-queries-skip-helper.yml

This file was deleted.

61 changes: 0 additions & 61 deletions .github/workflows/ibis-tpch-queries.yml

This file was deleted.

6 changes: 4 additions & 2 deletions .github/workflows/nix-skip-helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@ on:
push:
paths:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "**/*.qmd"
- ".envrc"
branches:
- master
- "*.x.x"
pull_request:
paths:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "**/*.qmd"
- ".envrc"
branches:
- master
- "*.x.x"
Expand Down
12 changes: 7 additions & 5 deletions .github/workflows/nix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@ on:
push:
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "**/*.qmd"
- ".envrc"
branches:
- master
- "*.x.x"
pull_request:
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "**/*.qmd"
- ".envrc"
branches:
- master
- "*.x.x"
Expand All @@ -40,17 +42,17 @@ jobs:
python-version: "3.10"
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: install nix
uses: cachix/install-nix-action@v22
uses: cachix/install-nix-action@v23
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: check poetry version
run: nix run '.#check-poetry-version' -- "1.3"
run: nix run '.#check-poetry-version' -- "1.6.1"

- name: setup cachix
uses: cachix/cachix-action@v12
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pre-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
pre-release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0

Expand All @@ -32,7 +32,7 @@ jobs:
run: python -m pip install --upgrade pip

- name: install poetry
run: python -m pip install 'poetry<1.4' poetry-dynamic-versioning
run: python -m pip install 'poetry==1.6.1' poetry-dynamic-versioning

- name: compute ibis version
id: get_version
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@ jobs:
release:
runs-on: ubuntu-latest
steps:
- uses: tibdex/github-app-token@v1
- uses: tibdex/github-app-token@v2
id: generate_token
with:
app_id: ${{ secrets.APP_ID }}
private_key: ${{ secrets.APP_PRIVATE_KEY }}

- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ steps.generate_token.outputs.token }}

- uses: cachix/install-nix-action@v22
- uses: cachix/install-nix-action@v23
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/update-deps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ jobs:
outputs:
matrix: ${{ steps.get-flakes.outputs.matrix }}
steps:
- uses: actions/checkout@v3
- uses: cachix/install-nix-action@v22
- uses: actions/checkout@v4
- uses: cachix/install-nix-action@v23
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand All @@ -32,9 +32,9 @@ jobs:
fail-fast: false
matrix: ${{ fromJSON(needs.get-flakes.outputs.matrix) }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: cachix/install-nix-action@v22
- uses: cachix/install-nix-action@v23
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand All @@ -45,15 +45,15 @@ jobs:
name: ibis
extraPullNames: nix-community,poetry2nix

- uses: cpcloud/flake-dep-info-action@v2.0.10
- uses: cpcloud/flake-dep-info-action@v2.0.11
id: get_current_commit
with:
input: ${{ matrix.flake }}

- name: update ${{ matrix.flake }}
run: nix flake lock --update-input ${{ matrix.flake }}

- uses: cpcloud/flake-dep-info-action@v2.0.10
- uses: cpcloud/flake-dep-info-action@v2.0.11
id: get_new_commit
with:
input: ${{ matrix.flake }}
Expand All @@ -62,21 +62,21 @@ jobs:
id: needs_pr
run: echo "did_change=${{ steps.get_current_commit.outputs.rev != steps.get_new_commit.outputs.rev }}" >> "$GITHUB_OUTPUT"

- uses: tibdex/github-app-token@v1
- uses: tibdex/github-app-token@v2
if: fromJSON(steps.needs_pr.outputs.did_change)
id: generate_pr_token
with:
app_id: ${{ secrets.SQUAWK_BOT_APP_ID }}
private_key: ${{ secrets.SQUAWK_BOT_APP_PRIVATE_KEY }}

- uses: tibdex/github-app-token@v1
- uses: tibdex/github-app-token@v2
if: fromJSON(steps.needs_pr.outputs.did_change)
id: generate_pr_approval_token
with:
app_id: ${{ secrets.PR_APPROVAL_BOT_APP_ID }}
private_key: ${{ secrets.PR_APPROVAL_BOT_APP_PRIVATE_KEY }}

- uses: cpcloud/compare-commits-action@v5.0.34
- uses: cpcloud/compare-commits-action@v5.0.37
if: fromJSON(steps.needs_pr.outputs.did_change)
id: compare_commits
with:
Expand Down
44 changes: 32 additions & 12 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ venv.bak/
Icon?

# documentation files
docs/source/generated
docs/source/generated-notebooks
*.org

# Ibis testing data
Expand All @@ -73,14 +71,9 @@ ci/udf/Makefile
scripts/ibis-testing*
ibis_testing*
.tox/
.asv/
.ipynb_checkpoints/
.pytest_cache
.mypy_cache
docs/source/backends/generated

# temporary doc build
docbuild

# nix generated files
.direnv
Expand All @@ -89,25 +82,52 @@ result-*

# generated mkdocs website
.benchmarks
site

# tutorial data
geography.db
*.pbf
*.pbf.zst

# build artifacts
ci/udf/.ninja_deps
ci/udf/.ninja_log
ci/udf/build.ninja

# coverage
junit.xml

# spark generated files
spark-warehouse
docs/backends/*support_matrix.csv

# bytecode
__pycache__

# ctags
tags

# macos junk
.DS_Store

# profiling data
prof/

# hypothesis generated files
.hypothesis

# r junk
.RData
.Rhistory

# coverage files
.coverage*

# quarto generated files
.local

# lang server generated
.null-ls*

# generated examples data
ibis/examples/data
ibis/examples/descriptions
.coverage*

# chat
*zuliprc*
15 changes: 8 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,18 @@ default_stages:
- commit
repos:
- repo: https://github.com/rhysd/actionlint
rev: v1.6.25
rev: v1.6.26
hooks:
- id: actionlint-system
- repo: https://github.com/psf/black
rev: 23.7.0
rev: 23.9.1
hooks:
- id: black
- repo: https://github.com/kynan/nbstripout
rev: 0.6.1
- repo: https://github.com/keewis/blackdoc
rev: v0.3.8
hooks:
- id: nbstripout
exclude: .+/rendered/.+
- id: blackdoc
exclude: ibis/examples/__init__\.py
- repo: https://github.com/codespell-project/codespell
rev: v2.2.5
hooks:
Expand Down Expand Up @@ -56,6 +56,7 @@ repos:
rev: v4.4.0
hooks:
- id: check-added-large-files
args: ["--maxkb=700"]
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-merge-conflict
Expand All @@ -66,7 +67,7 @@ repos:
args: ["--allow-missing-credentials"]
- id: detect-private-key
- id: end-of-file-fixer
exclude: .+/snapshots/.+
exclude: (.+/snapshots/.+)|(.*docs/_freeze/.+\.json)|(.+\.excalidraw)
- id: fix-byte-order-marker
- id: mixed-line-ending
- id: trailing-whitespace
Expand Down
9 changes: 6 additions & 3 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ docs/SUMMARY.md
docs/api/expressions/top_level.md
docs/overrides/*.html
docs/release_notes.md
docs/_freeze
docs/_publish.yml
docs/.quarto
docs/_output
ibis
poetry.lock
result*
site
*.ipynb
*.delta
*.lock
2 changes: 1 addition & 1 deletion .releaserc.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ module.exports = {
[
"@semantic-release/changelog",
{
changelogTitle: "Release Notes\n---",
changelogTitle: "Release notes\n---",
changelogFile: "docs/release_notes.md",
},
],
Expand Down
3 changes: 3 additions & 0 deletions .yamllint.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
---
extends: default

ignore:
- docs/_publish.yml

rules:
document-start: disable
line-length: disable # we already enforce this with prettier
Expand Down
37 changes: 0 additions & 37 deletions LICENSES/pandas.txt

This file was deleted.

7 changes: 2 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Ibis

[![Documentation Status](https://img.shields.io/badge/docs-docs.ibis--project.org-blue.svg)](http://ibis-project.org)
[![Project Chat](https://img.shields.io/badge/zulip-join_chat-purple.svg?logo=zulip)](https://ibis-project.zulipchat.com)
[![Anaconda-Server Badge](https://anaconda.org/conda-forge/ibis-framework/badges/version.svg)](https://anaconda.org/conda-forge/ibis-framework)
[![PyPI](https://img.shields.io/pypi/v/ibis-framework.svg)](https://pypi.org/project/ibis-framework)
[![Build status](https://github.com/ibis-project/ibis/actions/workflows/ibis-main.yml/badge.svg)](https://github.com/ibis-project/ibis/actions/workflows/ibis-main.yml?query=branch%3Amaster)
Expand Down Expand Up @@ -187,10 +188,6 @@ Ibis is an open source project and welcomes contributions from anyone in the com
- We care about keeping the community welcoming for all. Check out [the code of conduct](https://github.com/ibis-project/ibis/blob/master/docs/CODE_OF_CONDUCT.md).
- The Ibis project is open sourced under the [Apache License](https://github.com/ibis-project/ibis/blob/master/LICENSE.txt).

Join our community here:

- Twitter: https://twitter.com/IbisData
- Gitter: https://gitter.im/ibis-dev/Lobby
- StackOverflow: https://stackoverflow.com/questions/tagged/ibis
Join our community by interacting on GitHub or chatting with us on [Zulip](https://ibis-project.zulipchat.com/).

For more information visit https://ibis-project.org/.
2 changes: 1 addition & 1 deletion ci/check_disallowed_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def check_dependency_rules(dependency_graph, disallowed_imports):
}


if __name__ == '__main__':
if __name__ == "__main__":
dependency_graph = generate_dependency_graph(*sys.argv[1:])
prohibited_deps = check_dependency_rules(dependency_graph, disallowed_imports)

Expand Down
5 changes: 4 additions & 1 deletion ci/make_geography_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@
import datetime
import tempfile
from pathlib import Path
from typing import Any, Mapping
from typing import TYPE_CHECKING, Any

import requests
import sqlalchemy as sa
import toolz

if TYPE_CHECKING:
from collections.abc import Mapping

SCHEMAS = {
"countries": [
("iso_alpha2", sa.TEXT),
Expand Down
2 changes: 1 addition & 1 deletion ci/release/dry_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ nix develop '.#release' -c npx --yes \
-p "@semantic-release/exec" \
-p "@semantic-release/git" \
-p "semantic-release-replace-plugin@1.2.0" \
-p "conventional-changelog-conventionalcommits" \
-p "conventional-changelog-conventionalcommits@6.1.0" \
semantic-release \
--ci \
--dry-run \
Expand Down
2 changes: 1 addition & 1 deletion ci/release/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ nix develop '.#release' -c npx --yes \
-p "@semantic-release/exec" \
-p "@semantic-release/git" \
-p "semantic-release-replace-plugin@1.2.0" \
-p "conventional-changelog-conventionalcommits" \
-p "conventional-changelog-conventionalcommits@6.1.0" \
semantic-release --ci
3 changes: 3 additions & 0 deletions ci/schema/clickhouse.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ CREATE OR REPLACE TABLE ibis_testing.functional_alltypes ENGINE = Memory AS
SELECT * REPLACE(CAST(timestamp_col AS Nullable(DateTime)) AS timestamp_col)
FROM file('ibis/functional_alltypes.parquet', 'Parquet');

CREATE OR REPLACE TABLE ibis_testing.astronauts ENGINE = Memory AS
SELECT * FROM file('ibis/astronauts.parquet', 'Parquet');

CREATE OR REPLACE TABLE ibis_testing.tzone (
ts Nullable(DateTime),
key Nullable(String),
Expand Down
12 changes: 12 additions & 0 deletions ci/schema/druid.sql
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,15 @@ FROM TABLE(
)
)
PARTITIONED BY ALL TIME;

REPLACE INTO "astronauts"
OVERWRITE ALL
SELECT *
FROM TABLE(
EXTERN(
'{"type":"local","files":["/data/astronauts.parquet"]}',
'{"type":"parquet"}',
'[{"name":"id","type":"long"},{"name":"number","type":"long"},{"name":"nationwide_number","type":"long"},{"name":"name","type":"string"},{"name":"original_name","type":"string"},{"name":"sex","type":"string"},{"name":"year_of_birth","type":"long"},{"name":"nationality","type":"string"},{"name":"military_civilian","type":"string"},{"name":"selection","type":"string"},{"name":"year_of_selection","type":"long"},{"name":"mission_number","type":"long"},{"name":"total_number_of_missions","type":"long"},{"name":"occupation","type":"string"},{"name":"year_of_mission","type":"long"},{"name":"mission_title","type":"string"},{"name":"ascend_shuttle","type":"string"},{"name":"in_orbit","type":"string"},{"name":"descend_shuttle","type":"string"},{"name":"hours_mission","type":"double"},{"name":"total_hrs_sum","type":"double"},{"name":"field21","type":"long"},{"name":"eva_hrs_mission","type":"double"},{"name":"total_eva_hrs","type":"double"}]'
)
)
PARTITIONED BY ALL TIME;
33 changes: 33 additions & 0 deletions ci/schema/mssql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,39 @@ BULK INSERT diamonds
FROM '/data/diamonds.csv'
WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2)

DROP TABLE IF EXISTS astronauts;

CREATE TABLE astronauts (
"id" BIGINT,
"number" BIGINT,
"nationwide_number" BIGINT,
"name" VARCHAR(MAX),
"original_name" VARCHAR(MAX),
"sex" VARCHAR(MAX),
"year_of_birth" BIGINT,
"nationality" VARCHAR(MAX),
"military_civilian" VARCHAR(MAX),
"selection" VARCHAR(MAX),
"year_of_selection" BIGINT,
"mission_number" BIGINT,
"total_number_of_missions" BIGINT,
"occupation" VARCHAR(MAX),
"year_of_mission" BIGINT,
"mission_title" VARCHAR(MAX),
"ascend_shuttle" VARCHAR(MAX),
"in_orbit" VARCHAR(MAX),
"descend_shuttle" VARCHAR(MAX),
"hours_mission" DOUBLE PRECISION,
"total_hrs_sum" DOUBLE PRECISION,
"field21" BIGINT,
"eva_hrs_mission" DOUBLE PRECISION,
"total_eva_hrs" DOUBLE PRECISION
);

BULK INSERT astronauts
FROM '/data/astronauts.csv'
WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2)

DROP TABLE IF EXISTS batting;

CREATE TABLE batting (
Expand Down
29 changes: 29 additions & 0 deletions ci/schema/mysql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,35 @@ CREATE TABLE diamonds (
z FLOAT
) DEFAULT CHARACTER SET = utf8;

DROP TABLE IF EXISTS astronauts;

CREATE TABLE astronauts (
`id` BIGINT,
`number` BIGINT,
`nationwide_number` BIGINT,
`name` TEXT,
`original_name` TEXT,
`sex` TEXT,
`year_of_birth` BIGINT,
`nationality` TEXT,
`military_civilian` TEXT,
`selection` TEXT,
`year_of_selection` BIGINT,
`mission_number` BIGINT,
`total_number_of_missions` BIGINT,
`occupation` TEXT,
`year_of_mission` BIGINT,
`mission_title` TEXT,
`ascend_shuttle` TEXT,
`in_orbit` TEXT,
`descend_shuttle` TEXT,
`hours_mission` FLOAT,
`total_hrs_sum` FLOAT,
`field21` BIGINT,
`eva_hrs_mission` FLOAT,
`total_eva_hrs` FLOAT
);

DROP TABLE IF EXISTS batting;

CREATE TABLE batting (
Expand Down
29 changes: 29 additions & 0 deletions ci/schema/oracle.sql
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,35 @@ CREATE TABLE "diamonds" (
"z" BINARY_FLOAT
);

DROP TABLE IF EXISTS "astronauts";

CREATE TABLE "astronauts" (
"id" NUMBER(18),
"number" NUMBER(18),
"nationwide_number" NUMBER(18),
"name" VARCHAR2(255),
"original_name" VARCHAR2(255),
"sex" VARCHAR2(255),
"year_of_birth" NUMBER(18),
"nationality" VARCHAR2(255),
"military_civilian" VARCHAR2(255),
"selection" VARCHAR2(255),
"year_of_selection" NUMBER(18),
"mission_number" NUMBER(18),
"total_number_of_missions" NUMBER(18),
"occupation" VARCHAR2(255),
"year_of_mission" NUMBER(18),
"mission_title" VARCHAR2(255),
"ascend_shuttle" VARCHAR2(255),
"in_orbit" VARCHAR2(255),
"descend_shuttle" VARCHAR2(255),
"hours_mission" BINARY_FLOAT,
"total_hrs_sum" BINARY_FLOAT,
"field21" NUMBER(18),
"eva_hrs_mission" BINARY_FLOAT,
"total_eva_hrs" BINARY_FLOAT
);

DROP TABLE IF EXISTS "batting";

CREATE TABLE "batting" (
Expand Down
29 changes: 29 additions & 0 deletions ci/schema/oracle/astronauts.ctl
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
options (SKIP=1)
load data
infile '/opt/oracle/data/astronauts.csv'
into table "astronauts"
fields csv with embedded
("id",
"number",
"nationwide_number",
"name",
"original_name",
"sex",
"year_of_birth",
"nationality",
"military_civilian",
"selection",
"year_of_selection",
"mission_number",
"total_number_of_missions",
"occupation",
"year_of_mission",
"mission_title",
"ascend_shuttle",
"in_orbit",
"descend_shuttle",
"hours_mission",
"total_hrs_sum",
"field21",
"eva_hrs_mission",
"total_eva_hrs")
2 changes: 1 addition & 1 deletion ci/schema/oracle/awards_players.ctl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ options (SKIP=1)
load data
infile '/opt/oracle/data/awards_players.csv'
into table "awards_players"
fields terminated by "," optionally enclosed by '"'
fields csv without embedded
TRAILING NULLCOLS
( "playerID", "awardID", "yearID", "lgID", "tie", "notes" )
2 changes: 1 addition & 1 deletion ci/schema/oracle/batting.ctl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ options (SKIP=1)
load data
infile '/opt/oracle/data/batting.csv'
into table "batting"
fields terminated by "," optionally enclosed by '"'
fields csv without embedded
TRAILING NULLCOLS
( "playerID", "yearID", "stint", "teamID", "lgID", "G", "AB", "R", "H", "X2B", "X3B", "HR", "RBI", "SB", "CS", "BB", "SO", "IBB", "HBP", "SH", "SF", "GIDP" )
2 changes: 1 addition & 1 deletion ci/schema/oracle/diamonds.ctl
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ options (SKIP=1)
load data
infile '/opt/oracle/data/diamonds.csv'
into table "diamonds"
fields terminated by "," optionally enclosed by '"'
fields csv without embedded
( "carat", "cut", "color", "clarity", "depth", "table", "price", "x", "y", "z" )
2 changes: 1 addition & 1 deletion ci/schema/oracle/functional_alltypes.ctl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ options (SKIP=1)
load data
infile '/opt/oracle/data/functional_alltypes.csv'
into table "functional_alltypes"
fields terminated by "," optionally enclosed by '"'
fields csv without embedded
TRAILING NULLCOLS
( "id",
"bool_col",
Expand Down
32 changes: 32 additions & 0 deletions ci/schema/postgres.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ CREATE EXTENSION IF NOT EXISTS postgis;
CREATE EXTENSION IF NOT EXISTS plpython3u;
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS first_last_agg;
CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;

DROP TABLE IF EXISTS diamonds CASCADE;

Expand All @@ -21,6 +22,37 @@ CREATE TABLE diamonds (

COPY diamonds FROM '/data/diamonds.csv' WITH (FORMAT CSV, HEADER TRUE, DELIMITER ',');

DROP TABLE IF EXISTS astronauts CASCADE;

CREATE TABLE astronauts (
"id" BIGINT,
"number" BIGINT,
"nationwide_number" BIGINT,
"name" VARCHAR,
"original_name" VARCHAR,
"sex" VARCHAR,
"year_of_birth" BIGINT,
"nationality" VARCHAR,
"military_civilian" VARCHAR,
"selection" VARCHAR,
"year_of_selection" BIGINT,
"mission_number" BIGINT,
"total_number_of_missions" BIGINT,
"occupation" VARCHAR,
"year_of_mission" BIGINT,
"mission_title" VARCHAR,
"ascend_shuttle" VARCHAR,
"in_orbit" VARCHAR,
"descend_shuttle" VARCHAR,
"hours_mission" DOUBLE PRECISION,
"total_hrs_sum" DOUBLE PRECISION,
"field21" BIGINT,
"eva_hrs_mission" DOUBLE PRECISION,
"total_eva_hrs" DOUBLE PRECISION
);

COPY astronauts FROM '/data/astronauts.csv' WITH (FORMAT CSV, HEADER TRUE, DELIMITER ',');

DROP TABLE IF EXISTS batting CASCADE;

CREATE TABLE batting (
Expand Down
35 changes: 27 additions & 8 deletions ci/schema/snowflake.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
CREATE OR REPLACE TEMP FILE FORMAT ibis_testing
type = 'CSV'
field_delimiter = ','
skip_header = 1
field_optionally_enclosed_by = '"';

CREATE OR REPLACE TEMP STAGE ibis_testing file_format = ibis_testing;

CREATE OR REPLACE TABLE diamonds (
"carat" FLOAT,
"cut" TEXT,
Expand All @@ -19,6 +11,33 @@ CREATE OR REPLACE TABLE diamonds (
"z" FLOAT
);

CREATE OR REPLACE TABLE astronauts (
"id" BIGINT,
"number" BIGINT,
"nationwide_number" BIGINT,
"name" TEXT,
"original_name" TEXT,
"sex" TEXT,
"year_of_birth" BIGINT,
"nationality" TEXT,
"military_civilian" TEXT,
"selection" TEXT,
"year_of_selection" BIGINT,
"mission_number" BIGINT,
"total_number_of_missions" BIGINT,
"occupation" TEXT,
"year_of_mission" BIGINT,
"mission_title" TEXT,
"ascend_shuttle" TEXT,
"in_orbit" TEXT,
"descend_shuttle" TEXT,
"hours_mission" FLOAT,
"total_hrs_sum" FLOAT,
"field21" BIGINT,
"eva_hrs_mission" FLOAT,
"total_eva_hrs" FLOAT
);

CREATE OR REPLACE TABLE batting (
"playerID" TEXT,
"yearID" BIGINT,
Expand Down
29 changes: 29 additions & 0 deletions ci/schema/sqlite.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,35 @@ CREATE TABLE functional_alltypes (
CHECK (bool_col IN (0, 1))
);

DROP TABLE IF EXISTS astronauts;

CREATE TABLE astronauts (
"id" BIGINT,
"number" BIGINT,
"nationwide_number" BIGINT,
"name" TEXT,
"original_name" TEXT,
"sex" TEXT,
"year_of_birth" BIGINT,
"nationality" TEXT,
"military_civilian" TEXT,
"selection" TEXT,
"year_of_selection" BIGINT,
"mission_number" BIGINT,
"total_number_of_missions" BIGINT,
"occupation" TEXT,
"year_of_mission" BIGINT,
"mission_title" TEXT,
"ascend_shuttle" TEXT,
"in_orbit" TEXT,
"descend_shuttle" TEXT,
"hours_mission" FLOAT,
"total_hrs_sum" FLOAT,
"field21" BIGINT,
"eva_hrs_mission" FLOAT,
"total_eva_hrs" FLOAT
);

DROP TABLE IF EXISTS awards_players;

CREATE TABLE awards_players (
Expand Down
4 changes: 3 additions & 1 deletion codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ codecov:
comment: false

ignore:
- "docs/**"
- "docs/**/*"
- "ibis/backends/bigquery/**/*"
- "ibis/backends/snowflake/**/*"

coverage:
status:
Expand Down
18 changes: 9 additions & 9 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: "3.4"
services:
clickhouse:
image: clickhouse/clickhouse-server:23.7.1.2470-alpine
image: clickhouse/clickhouse-server:23.9.1.1854-alpine
ports:
- 8123:8123
- 9000:9000
Expand Down Expand Up @@ -59,7 +59,7 @@ services:
- CMD
- pg_isready
timeout: 5s
image: postgres:13.11-alpine
image: postgres:13.12-alpine
networks:
- impala

Expand Down Expand Up @@ -117,7 +117,7 @@ services:
- mariadb-admin
- ping
timeout: 5s
image: mariadb:10.11.4
image: mariadb:10.11.5
ports:
- 3306:3306
networks:
Expand Down Expand Up @@ -189,7 +189,7 @@ services:
- trino-postgres:/data

hive-metastore-mariadb:
image: mariadb:10.11.4
image: mariadb:10.11.5
environment:
MYSQL_ALLOW_EMPTY_PASSWORD: "true"
MYSQL_USER: admin
Expand Down Expand Up @@ -234,9 +234,9 @@ services:
retries: 10
test:
- CMD-SHELL
- trino --execute 'SELECT 1 AS one'
- trino --output-format null --execute 'show schemas in postgresql; show schemas in hive; show schemas in memory'
timeout: 30s
image: trinodb/trino:422
image: trinodb/trino:427
ports:
- 8080:8080
networks:
Expand All @@ -248,7 +248,7 @@ services:
- $PWD/docker/trino/jvm.config:/etc/trino/jvm.config:ro

druid-postgres:
image: postgres:15.3-alpine
image: postgres:16.0-alpine
container_name: druid-postgres
environment:
- POSTGRES_PASSWORD=FoolishPassword
Expand All @@ -268,7 +268,7 @@ services:
druid-zookeeper:
hostname: zookeeper
container_name: zookeeper
image: zookeeper:3.8
image: zookeeper:3.9
environment:
- ZOO_MY_ID=1
healthcheck:
Expand Down Expand Up @@ -412,7 +412,7 @@ services:
- druid

oracle:
image: gvenzl/oracle-free:23
image: gvenzl/oracle-free:23.3-slim
environment:
ORACLE_PASSWORD: ibis
ORACLE_DATABASE: IBIS_TESTING
Expand Down
2 changes: 1 addition & 1 deletion docker/trino/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM openjdk:8u342-jre
FROM eclipse-temurin:8u342-b07-jre-jammy

WORKDIR /opt

Expand Down
2 changes: 1 addition & 1 deletion docker/trino/catalog/hive.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
connector.name=hive

hive.allow-drop-table=true
hive.allow-rename-table=true
hive.ignore-absent-partitions=true
hive.metastore.thrift.delete-files-on-drop=true
hive.metastore.uri=thrift://hive-metastore:9083
Expand All @@ -10,6 +11,5 @@ hive.s3.aws-access-key=accesskey
hive.s3.aws-secret-key=secretkey
hive.s3.endpoint=http://hive-metastore-minio:9000
hive.s3.path-style-access=true
hive.s3select-pushdown.enabled=true
hive.storage-format=PARQUET
hive.timestamp-precision=MICROSECONDS
2 changes: 1 addition & 1 deletion docker/trino/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

export HADOOP_HOME=/opt/hadoop-3.2.0
export HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/tools/lib/aws-java-sdk-bundle-1.11.375.jar:${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-aws-3.2.0.jar
export JAVA_HOME=/usr/local/openjdk-8
export JAVA_HOME=/opt/java/openjdk

# Make sure mariadb is ready
MAX_TRIES=8
Expand Down
27 changes: 27 additions & 0 deletions docs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
.quarto
_output

*.ddb

site_libs

*.csv
*.parquet
*.delta
*.zip
*.db
diamonds.json
*.ndjson
reference/
objects.json
*support_matrix.csv

# generated notebooks and files
*.ipynb
*_files

# inventories
_inv
objects.txt

/.quarto/
5 changes: 5 additions & 0 deletions docs/404.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Page not found

The page you requested cannot be found (perhaps it was moved or renamed).

You may want to try searching to find the page's new location.
1 change: 0 additions & 1 deletion docs/CNAME

This file was deleted.

12 changes: 12 additions & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Ibis documentation with Quarto

TODO: update this README closer to merging.

## Setup

Checkout this PR/branch.

0. Create a Python environment with everything installed
1. Install Quarto
2. Install justfile
3. `just preview`
55 changes: 0 additions & 55 deletions docs/SUMMARY.md

This file was deleted.

3 changes: 3 additions & 0 deletions docs/_callouts/experimental_backend.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
::: {.callout-warning}
This backend is experimental and is subject to backwards incompatible changes.
:::
3 changes: 3 additions & 0 deletions docs/_callouts/pypi_warning.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
::: {.callout-warning}
Note that the `ibis-framework` package is _not_ the same as the `ibis` package in PyPI. These two libraries cannot coexist in the same Python environment, as they are both imported with the `ibis` module name.
:::
198 changes: 198 additions & 0 deletions docs/_code/input_output_penguins.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
## Data platforms

You can connect Ibis to any supported backend to read and write data in backend-native tables.

```{python}
# | code-fold: true
con = ibis.duckdb.connect("penguins.ddb")
t = con.create_table("penguins", t.to_pyarrow(), overwrite=True)
```

```{python}
con = ibis.duckdb.connect("penguins.ddb") # <1>
t = con.table("penguins") # <2>
t.head(3) # <3>
```

1. Connect to a backend.
2. Load a table.
3. Display the table.

```{python}
grouped = ( # <1>
t.group_by(["species", "island"]) # <1>
.aggregate(count=ibis._.count()) # <1>
.order_by(ibis.desc("count")) # <1>
) # <1>
con.create_table("penguins_grouped", grouped.to_pyarrow(), overwrite=True) # <2>
```

1. Create a lazily evaluated Ibis expression.
2. Write to a table.

## File formats

Depending on the backend, you can read and write data in several file formats.

::: {.panel-tabset}

## CSV

```{.bash}
pip install 'ibis-framework[duckdb]'
```

```{python}
t.to_csv("penguins.csv") # <1>
ibis.read_csv("penguins.csv").head(3) # <2>
```
1. Write the table to a CSV file. Dependent on backend.
2. Read the CSV file into a table. Dependent on backend.

## Delta Lake

```{.bash}
pip install 'ibis-framework[duckdb,deltalake]'
```

```{python}
t.to_delta("penguins.delta", mode="overwrite") # <1>
ibis.read_delta("penguins.delta").head(3) # <2>
```

1. Write the table to a Delta Lake table. Dependent on backend.
2. Read the Delta Lake table into a table. Dependent on backend.

## Parquet

```{.bash}
pip install 'ibis-framework[duckdb]'
```

```{python}
t.to_parquet("penguins.parquet") # <1>
ibis.read_parquet("penguins.parquet").head(3) # <2>
```

1. Write the table to a Parquet file. Dependent on backend.
2. Read the Parquet file into a table. Dependent on backend.

:::

## With other Python libraries

Ibis uses [Apache Arrow](https://arrow.apache.org/) for efficient data transfer
to and from other libraries. Ibis tables implement the `__dataframe__` and
`__array__` protocols, so you can pass them to any library that supports these
protocols.

::: {.panel-tabset}

## `pandas`

You can convert Ibis tables to pandas dataframes.

```bash
pip install pandas
```

```{python}
df = t.to_pandas() # <1>
df.head(3)
```

1. Returns a pandas dataframe.

Or you can convert pandas dataframes to Ibis tables.

```{python}
t = ibis.memtable(df) # <1>
t.head(3)
```

1. Returns an Ibis table.

## `polars`

You can convert Ibis tables to Polars dataframes.

```bash
pip install polars
```

```{python}
import polars as pl
df = pl.from_arrow(t.to_pyarrow())
df.head(3)
```

Or Polars dataframes to Ibis tables.

```{python}
t = ibis.memtable(df)
t.head(3)
```

## `pyarrow`

You can convert Ibis tables to PyArrow tables.

```bash
pip install pyarrow
```

```{python}
t.to_pyarrow()
```

Or PyArrow batches:

```{python}
t.to_pyarrow_batches()
```

And you can convert PyArrow tables to Ibis tables.

```{python}
ibis.memtable(t.to_pyarrow()).head(3)
```

## `torch`

You can convert Ibis tables to torch tensors.

```bash
pip install torch
```

```python
t.select(s.numeric()).limit(3).to_torch()
```

```
{'col2': tensor([39.1000, 39.5000, 40.3000], dtype=torch.float64),
'col3': tensor([18.7000, 17.4000, 18.0000], dtype=torch.float64),
'col4': tensor([181., 186., 195.], dtype=torch.float64),
'col5': tensor([3750., 3800., 3250.], dtype=torch.float64),
'col7': tensor([2007, 2007, 2007], dtype=torch.int16)}
```

## `__dataframe__`

You can directly call the `__dataframe__` protocol on Ibis tables, though this is typically handled by the library you're using.

```{python}
t.__dataframe__()
```

## `__array__`

You can directly call the `__array__` protocol on Ibis tables, though this is typically handled by the library you're using.

```{python}
t.__array__()
```

:::
14 changes: 14 additions & 0 deletions docs/_code/setup_penguins.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
```{python}
import ibis # <1>
import ibis.selectors as s # <1>
ibis.options.interactive = True # <2>
t = ibis.examples.penguins.fetch() # <3>
t.head(3) # <4>
```

1. Ensure you install Ibis first.
2. Use interactive mode for exploratory data analysis (EDA) or demos.
3. Load a dataset from the built-in examples.
4. Display the table.
3 changes: 3 additions & 0 deletions docs/_extensions/machow/interlinks/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.html
*.pdf
*_files/
7 changes: 7 additions & 0 deletions docs/_extensions/machow/interlinks/_extension.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
title: Interlinks
author: Michael Chow
version: 1.1.0
quarto-required: ">=1.2.0"
contributes:
filters:
- interlinks.lua
252 changes: 252 additions & 0 deletions docs/_extensions/machow/interlinks/interlinks.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
local function read_inv_text(filename)
-- read file
local file = io.open(filename, "r")
if file == nil then
return nil
end
local str = file:read("a")
file:close()


local project = str:match("# Project: (%S+)")
local version = str:match("# Version: (%S+)")

local data = {project = project, version = version, items = {}}

local ptn_data =
"^" ..
"(.-)%s+" .. -- name
"([%S:]-):" .. -- domain
"([%S]+)%s+" .. -- role
"(%-?%d+)%s+" .. -- priority
"(%S*)%s+" .. -- uri
"(.-)\r?$" -- dispname


-- Iterate through each line in the file content
for line in str:gmatch("[^\r\n]+") do
if not line:match("^#") then
-- Match each line against the pattern
local name, domain, role, priority, uri, dispName = line:match(ptn_data)

-- if name is nil, raise an error
if name == nil then
error("Error parsing line: " .. line)
end

data.items[#data.items + 1] = {
name = name,
domain = domain,
role = role,
priority = priority,
uri = uri,
dispName = dispName
}
end
end
return data
end

local function read_json(filename)

local file = io.open(filename, "r")
if file == nil then
return nil
end
local str = file:read("a")
file:close()

local decoded = quarto.json.decode(str)
return decoded
end

local function read_inv_text_or_json(base_name)
local file = io.open(base_name .. ".txt", "r")
if file then
-- TODO: refactors so we don't just close the file immediately
io.close(file)
json = read_inv_text(base_name .. ".txt")

else
json = read_json(base_name .. ".json")
end

return json
end

local inventory = {}

local function lookup(search_object)

local results = {}
for _, inv in ipairs(inventory) do
for _, item in ipairs(inv.items) do
-- e.g. :external+<inv_name>:<domain>:<role>:`<name>`
if item.inv_name and item.inv_name ~= search_object.inv_name then
goto continue
end

if item.name ~= search_object.name then
goto continue
end

if search_object.role and item.role ~= search_object.role then
goto continue
end

if search_object.domain and item.domain ~= search_object.domain then
goto continue
else
if search_object.domain or item.domain == "py" then
table.insert(results, item)
end

goto continue
end

::continue::
end
end

if #results == 1 then
return results[1]
end
if #results > 1 then
quarto.log.warning("Found multiple matches for " .. search_object.name .. ", using the first match.")
return results[1]
end
if #results == 0 then
quarto.log.warning("Found no matches for object:\n", search_object)
end

return nil
end

local function mysplit (inputstr, sep)
if sep == nil then
sep = "%s"
end
local t={}
for str in string.gmatch(inputstr, "([^"..sep.."]+)") do
table.insert(t, str)
end
return t
end

local function normalize_role(role)
if role == "func" then
return "function"
end
return role
end

local function build_search_object(str)
local starts_with_colon = str:sub(1, 1) == ":"
local search = {}
if starts_with_colon then
local t = mysplit(str, ":")
if #t == 2 then
-- e.g. :py:func:`my_func`
search.role = normalize_role(t[1])
search.name = t[2]:match("%%60(.*)%%60")
elseif #t == 3 then
-- e.g. :py:func:`my_func`
search.domain = t[1]
search.role = normalize_role(t[2])
search.name = t[3]:match("%%60(.*)%%60")
elseif #t == 4 then
-- e.g. :ext+inv:py:func:`my_func`
search.external = true

search.inv_name = t[1]:match("external%+(.*)")
search.domain = t[2]
search.role = normalize_role(t[3])
search.name = t[4]:match("%%60(.*)%%60")
else
quarto.log.warning("couldn't parse this link: " .. str)
return {}
end
else
search.name = str:match("%%60(.*)%%60")
end

if search.name == nil then
quarto.log.warning("couldn't parse this link: " .. str)
return {}
end

if search.name:sub(1, 1) == "~" then
search.shortened = true
search.name = search.name:sub(2, -1)
end
return search
end

local function report_broken_link(link, search_object, replacement)
-- TODO: how to unescape html elements like [?
return pandoc.Code(pandoc.utils.stringify(link.content))
end

function Link(link)
-- do not process regular links ----
if not link.target:match("%%60") then
return link
end

-- lookup item ----
local search = build_search_object(link.target)
local item = lookup(search)

-- determine replacement, used if no link text specified ----
local original_text = pandoc.utils.stringify(link.content)
local replacement = search.name
if search.shortened then
local t = mysplit(search.name, ".")
replacement = t[#t]
end

-- set link text ----
if original_text == "" and replacement ~= nil then
link.content = pandoc.Code(replacement)
end

-- report broken links ----
if item == nil then
return report_broken_link(link, search)
end
link.target = item.uri:gsub("%$$", search.name)


return link
end

local function fixup_json(json, prefix)
for _, item in ipairs(json.items) do
item.uri = prefix .. item.uri
end
table.insert(inventory, json)
end

return {
{
Meta = function(meta)
local json
local prefix
for k, v in pairs(meta.interlinks.sources) do
local base_name = quarto.project.offset .. "/_inv/" .. k .. "_objects"
json = read_inv_text_or_json(base_name)
prefix = pandoc.utils.stringify(v.url)
if json ~= nil then
fixup_json(json, prefix)
end
end
json = read_inv_text_or_json(quarto.project.offset .. "/objects")
if json ~= nil then
fixup_json(json, "/")
end
end
},
{
Link = Link
}
}
7 changes: 7 additions & 0 deletions docs/_extensions/quarto-ext/fontawesome/_extension.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
title: Font Awesome support
author: Carlos Scheidegger
version: 1.1.0
quarto-required: ">=1.2.269"
contributes:
shortcodes:
- fontawesome.lua
7,831 changes: 7,831 additions & 0 deletions docs/_extensions/quarto-ext/fontawesome/assets/css/all.css

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
.fa-tiny {
font-size: 0.5em;
}
.fa-scriptsize {
font-size: 0.7em;
}
.fa-footnotesize {
font-size: 0.8em;
}
.fa-small {
font-size: 0.9em;
}
.fa-normalsize {
font-size: 1em;
}
.fa-large {
font-size: 1.2em;
}
.fa-Large {
font-size: 1.5em;
}
.fa-LARGE {
font-size: 1.75em;
}
.fa-huge {
font-size: 2em;
}
.fa-Huge {
font-size: 2.5em;
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
84 changes: 84 additions & 0 deletions docs/_extensions/quarto-ext/fontawesome/fontawesome.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
local function ensureLatexDeps()
quarto.doc.use_latex_package("fontawesome5")
end

local function ensureHtmlDeps()
quarto.doc.add_html_dependency({
name = 'fontawesome6',
version = '0.1.0',
stylesheets = {'assets/css/all.css', 'assets/css/latex-fontsize.css'}
})
end

local function isEmpty(s)
return s == nil or s == ''
end

local function isValidSize(size)
local validSizes = {
"tiny",
"scriptsize",
"footnotesize",
"small",
"normalsize",
"large",
"Large",
"LARGE",
"huge",
"Huge"
}
for _, v in ipairs(validSizes) do
if v == size then
return size
end
end
return ""
end

return {
["fa"] = function(args, kwargs)

local group = "solid"
local icon = pandoc.utils.stringify(args[1])
if #args > 1 then
group = icon
icon = pandoc.utils.stringify(args[2])
end

local title = pandoc.utils.stringify(kwargs["title"])
if not isEmpty(title) then
title = " title=\"" .. title .. "\""
end

local label = pandoc.utils.stringify(kwargs["label"])
if isEmpty(label) then
label = " aria-label=\"" .. icon .. "\""
else
label = " aria-label=\"" .. label .. "\""
end

local size = pandoc.utils.stringify(kwargs["size"])

-- detect html (excluding epub which won't handle fa)
if quarto.doc.is_format("html:js") then
ensureHtmlDeps()
if not isEmpty(size) then
size = " fa-" .. size
end
return pandoc.RawInline(
'html',
"<i class=\"fa-" .. group .. " fa-" .. icon .. size .. "\"" .. title .. label .. "></i>"
)
-- detect pdf / beamer / latex / etc
elseif quarto.doc.is_format("pdf") then
ensureLatexDeps()
if isEmpty(isValidSize(size)) then
return pandoc.RawInline('tex', "\\faIcon{" .. icon .. "}")
else
return pandoc.RawInline('tex', "{\\" .. size .. "\\faIcon{" .. icon .. "}}")
end
else
return pandoc.Null()
end
end
}
15 changes: 15 additions & 0 deletions docs/_freeze/how-to/extending/builtin/execute-results/html.json

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions docs/_freeze/posts/bigquery-arrays/index/execute-results/html.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions docs/_freeze/posts/ci-analysis/index/execute-results/html.json

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions docs/_freeze/posts/ibis-examples/index/execute-results/html.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions docs/_freeze/posts/ibis-to-file/index/execute-results/html.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions docs/_freeze/posts/selectors/index/execute-results/html.json

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions docs/_freeze/posts/snowflake-io/index/execute-results/html.json

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions docs/_freeze/posts/torch/index/execute-results/html.json

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
15 changes: 15 additions & 0 deletions docs/_freeze/posts/v6.1.0-release/index/execute-results/html.json

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions docs/_publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- source: project
netlify:
- id: 3832aec3-a89a-4f2e-b185-232eb2f077da
url: 'https://ibis-quarto.netlify.app'
530 changes: 530 additions & 0 deletions docs/_quarto.yml

Large diffs are not rendered by default.

65 changes: 65 additions & 0 deletions docs/_renderer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from __future__ import annotations

import quartodoc as qd
import toolz
from plum import dispatch


class Renderer(qd.MdRenderer):
style = "ibis"

@dispatch
def render(self, el: qd.ast.ExampleCode) -> str:
lines = el.value.splitlines()

result = []

prompt = ">>> "
continuation = "..."

skip_doctest = "doctest: +SKIP"
expect_failure = "quartodoc: +EXPECTED_FAILURE"
quartodoc_skip_doctest = "quartodoc: +SKIP"

chunker = lambda line: line.startswith((prompt, continuation))
should_skip = (
lambda line: quartodoc_skip_doctest in line or skip_doctest in line
)

for chunk in toolz.partitionby(chunker, lines):
first, *rest = chunk

# only attempt to execute or render code blocks that start with the
# >>> prompt
if first.startswith(prompt):
# check whether to skip execution and if so, render the code
# block as `python` (not `{python}`) if it's marked with
# skip_doctest, expect_failure or quartodoc_skip_doctest
if not any(map(should_skip, chunk)):
start, end = "{}"
else:
start = end = ""

result.append(f"```{start}python{end}")

# if we expect failures, don't fail the notebook execution and
# render the error message
if expect_failure in first or any(
expect_failure in line for line in rest
):
assert (
start and end
), "expected failure should never occur alongside a skipped doctest example"
result.append("#| error: true")

# remove the quartodoc markers from the rendered code
result.append(
first.replace(f"# {quartodoc_skip_doctest}", "")
.replace(quartodoc_skip_doctest, "")
.replace(f"# {expect_failure}", "")
.replace(expect_failure, "")
)
result.extend(rest)
result.append("```\n")

return "\n".join(result)
68 changes: 68 additions & 0 deletions docs/_tabsets/install.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
You can install Ibis and a supported backend with `pip`, `conda`, `mamba`, or `pixi`.

::: {.panel-tabset}

```{python}
#| echo: false
#| output: asis

backends = [
{"name": "BigQuery", "module": "bigquery"},
{"name": "ClickHouse", "module": "clickhouse"},
{"name": "Dask", "module": "dask"},
{"name": "DataFusion", "module": "datafusion"},
{"name": "Druid", "module": "druid"},
{"name": "DuckDB", "module": "duckdb"},
{"name": "Impala", "module": "impala"},
{"name": "MSSQL", "module": "mssql"},
{"name": "MySQL", "module": "mysql"},
{"name": "Oracle", "module": "oracle"},
{"name": "pandas", "module": "pandas"},
{"name": "Polars", "module": "polars"},
{"name": "PostgreSQL", "module": "postgres"},
{"name": "PySpark", "module": "pyspark"},
{"name": "Snowflake", "module": "snowflake"},
{"name": "SQLite", "module": "sqlite"},
{"name": "Trino", "module": "trino"},
]

installers = [
{"name": "pip", "line": "Install with the `{extra}` extra:", "cmd": "pip install 'ibis-framework[{extra}]'"},
{"name": "conda", "line": "Install the `ibis-{extra}` package:", "cmd": "conda install -c conda-forge ibis-{extra}"},
{"name": "mamba", "line": "Install the `ibis-{extra}` package:", "cmd": "mamba install -c conda-forge ibis-{extra}"},
{"name": "pixi", "line": "Add the `ibis-{extra}` package:", "cmd": "pixi add ibis-{extra}"},
]

for installer in installers:
name = installer["name"]
cmd = installer["cmd"]
line = installer["line"]

print(f"## `{name}`")

print("::: {.panel-tabset}")
print()

for backend in backends:
name = backend["name"]
mod = backend["module"]
extra = backend.get("extra", mod)

print(f"## {name}")
print()
print(line.format(extra=extra))
print()
print(f"```bash\n{cmd.format(extra=extra)}\n```")
print()
print(f"Connect using [`ibis.{mod}.connect`](./backends/{name.lower()}.qmd#ibis.{mod}.connect).")
print()

if name == "pip":
print("{{< include /_callouts/pypi_warning.qmd >}}")

print()
print(":::")
print()
```

:::
31 changes: 31 additions & 0 deletions docs/_tabsets/install_default.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
We recommend starting with the default (DuckDB) backend for a performant, fully-featured local experience. You can install Ibis with `pip`, `conda`, `mamba`, or `pixi`.

::: {.panel-tabset}

## Using `pip`

```bash
pip install 'ibis-framework[duckdb]'
```

{{< include /_callouts/pypi_warning.qmd >}}

## Using `conda`

```bash
conda install -c conda-forge ibis-duckdb
```

## Using `mamba`

```bash
mamba install -c conda-forge ibis-duckdb
```

## Using `pixi`

```bash
pixi add ibis-duckdb
```

:::
36 changes: 0 additions & 36 deletions docs/backends/_support_matrix.md

This file was deleted.

21 changes: 21 additions & 0 deletions docs/backends/_templates/api.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
```{python}
#| echo: false
#| output: asis

from _utils import get_backend, render_methods

# defined in the backend qmd, e.g., ../bigquery.qmd
module = BACKEND.lower()
backend = get_backend(module)

print(f"## `{module}.Backend` {{ #{backend.canonical_path} }}")

methods = sorted(
key for key, value in backend.members.items()
if value.is_function
if not value.name.startswith("_")
if value.name != "do_connect"
)

render_methods(backend, *methods, level=3)
```
77 changes: 77 additions & 0 deletions docs/backends/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from __future__ import annotations

from functools import cache, partial
from typing import TYPE_CHECKING

from quartodoc import MdRenderer, get_object

if TYPE_CHECKING:
from collections.abc import Iterator


@cache
def get_renderer(level: int) -> MdRenderer:
return MdRenderer(header_level=level)


@cache
def get_backend(backend: str):
return get_object(f"ibis.backends.{backend}", "Backend")


def get_callable(obj, name):
try:
return obj.get_member(name)
except KeyError:
return obj.functions[name]


def find_member_with_docstring(member):
"""Find the first inherited member with a docstring."""
if member.docstring is not None:
return member

cls = member.parent
for base in cls.resolved_bases:
try:
parent_member = get_callable(base, member.name)
except KeyError:
continue
else:
if parent_member.docstring is not None:
return parent_member
return member


def render_method(*, member, renderer: MdRenderer) -> Iterator[str]:
header_level = renderer.crnt_header_level
header = "#" * header_level
name = member.name
try:
params = renderer.render(member.parameters)
except AttributeError:
params = None
yield "\n"
yield f"{header} {name} {{ #{member.path} }}"
yield "\n"
if params is not None:
yield f"`{name}({params})`"
yield "\n"

yield renderer.render(find_member_with_docstring(member))


def render_methods(obj, *methods: str, level: int) -> None:
renderer = get_renderer(level)
get = partial(get_callable, obj)
print( # noqa: T201
"\n".join(
line
for member in map(get, methods)
for line in render_method(member=member, renderer=renderer)
)
)


def render_do_connect(backend, level: int = 4) -> None:
render_methods(get_backend(backend), "do_connect", level=level)
42 changes: 21 additions & 21 deletions docs/backends/app/backend_info_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import datetime
import tempfile
from pathlib import Path
from typing import List, Optional
from typing import Optional

import pandas as pd
import requests
Expand All @@ -15,7 +15,7 @@

ONE_HOUR_IN_SECONDS = datetime.timedelta(hours=1).total_seconds()

st.set_page_config(layout='wide')
st.set_page_config(layout="wide")

# Track all queries. We display them at the bottom of the page.
ibis.options.verbose = True
Expand All @@ -32,7 +32,7 @@ def support_matrix_df():
f.write(resp.content)
return (
ibis.read_csv(f.name)
.relabel({'FullOperation': 'full_operation'})
.relabel({"FullOperation": "full_operation"})
.mutate(
short_operation=_.full_operation.split(".")[-1],
operation_category=_.full_operation.split(".")[-2],
Expand Down Expand Up @@ -63,7 +63,7 @@ def backends_info_df():
"sqlite": ["sqlalchemy", "sql"],
"trino": ["sqlalchemy", "sql"],
}.items(),
columns=['backend_name', 'categories'],
columns=["backend_name", "categories"],
)


Expand All @@ -76,7 +76,7 @@ def get_all_backend_categories():
return (
backend_info_table.select(category=_.categories.unnest())
.distinct()
.order_by('category')['category']
.order_by("category")["category"]
.to_pandas()
.tolist()
)
Expand All @@ -86,14 +86,14 @@ def get_all_backend_categories():
def get_all_operation_categories():
return (
support_matrix_table.select(_.operation_category)
.distinct()['operation_category']
.distinct()["operation_category"]
.to_pandas()
.tolist()
)


@st.cache_data(ttl=ONE_HOUR_IN_SECONDS)
def get_backend_names(categories: Optional[List[str]] = None):
def get_backend_names(categories: Optional[list[str]] = None):
backend_expr = backend_info_table.mutate(category=_.categories.unnest())
if categories:
backend_expr = backend_expr.filter(_.category.isin(categories))
Expand All @@ -105,43 +105,43 @@ def get_backend_names(categories: Optional[List[str]] = None):
def get_selected_backend_name():
backend_categories = get_all_backend_categories()
selected_categories_names = st.sidebar.multiselect(
'Backend category',
"Backend category",
options=backend_categories,
default=backend_categories,
)
return get_backend_names(selected_categories_names)


def get_backend_subset(subset):
return st.sidebar.multiselect('Backend name', options=subset, default=subset)
return st.sidebar.multiselect("Backend name", options=subset, default=subset)


def get_selected_operation_categories():
all_ops_categories = get_all_operation_categories()

selected_ops_categories = st.sidebar.multiselect(
'Operation category',
"Operation category",
options=sorted(all_ops_categories),
default=None,
)
if not selected_ops_categories:
selected_ops_categories = all_ops_categories
show_geospatial = st.sidebar.checkbox('Include Geospatial ops', value=True)
if not show_geospatial and 'geospatial' in selected_ops_categories:
show_geospatial = st.sidebar.checkbox("Include Geospatial ops", value=True)
if not show_geospatial and "geospatial" in selected_ops_categories:
selected_ops_categories.remove("geospatial")
return selected_ops_categories


current_backend_names = get_backend_subset(get_selected_backend_name())
sort_by_coverage = st.sidebar.checkbox('Sort by API Coverage', value=False)
sort_by_coverage = st.sidebar.checkbox("Sort by API Coverage", value=False)
current_ops_categories = get_selected_operation_categories()

hide_supported_by_all_backends = st.sidebar.selectbox(
'Operation compatibility',
['Show all', 'Show supported by all backends', 'Hide supported by all backends'],
"Operation compatibility",
["Show all", "Show supported by all backends", "Hide supported by all backends"],
0,
)
show_full_ops_name = st.sidebar.checkbox('Show full operation name', False)
show_full_ops_name = st.sidebar.checkbox("Show full operation name", False)

# Start ibis expression
table_expr = support_matrix_table
Expand All @@ -161,11 +161,11 @@ def get_selected_operation_categories():
getattr(table_expr, backend_name).ifelse(1, 0)
for backend_name in current_backend_names
)
if hide_supported_by_all_backends == 'Show supported by all backends':
if hide_supported_by_all_backends == "Show supported by all backends":
table_expr = table_expr.filter(
supported_backend_count == len(current_backend_names)
)
elif hide_supported_by_all_backends == 'Hide supported by all backends':
elif hide_supported_by_all_backends == "Hide supported by all backends":
table_expr = table_expr.filter(
supported_backend_count != len(current_backend_names)
)
Expand All @@ -175,7 +175,7 @@ def get_selected_operation_categories():

# Execute query
df = table_expr.to_pandas()
df = df.set_index('index')
df = df.set_index("index")

# Display result
all_visible_ops_count = len(df.index)
Expand All @@ -199,9 +199,9 @@ def get_selected_operation_categories():
with st.expander("SQL queries"):
for sql_query in sql_queries:
pretty_sql_query = sqlglot.transpile(
sql_query, read='duckdb', write='duckdb', pretty=True
sql_query, read="duckdb", write="duckdb", pretty=True
)[0]
st.code(pretty_sql_query, language='sql')
st.code(pretty_sql_query, language="sql")

with st.expander("Source code"):
st.code(Path(__file__).read_text())
88 changes: 0 additions & 88 deletions docs/backends/bigquery.md

This file was deleted.

Loading