343 changes: 118 additions & 225 deletions .github/workflows/ibis-backends.yml

Large diffs are not rendered by default.

29 changes: 20 additions & 9 deletions .github/workflows/ibis-docs-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ on:
push:
branches:
- master
- "*.x.x"
pull_request:
branches:
- master
- "*.x.x"

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
Expand All @@ -29,7 +31,7 @@ jobs:
nix_path: nixpkgs=channel:nixos-unstable-small

- name: lint commits
run: nix shell -f '<nixpkgs>' commitlint -c commitlint --from=${{ github.event.pull_request.base.sha }} --to=${{ github.sha }} --verbose
run: nix run 'nixpkgs#commitlint' -- --from=${{ github.event.pull_request.base.sha }} --to=${{ github.sha }} --verbose

lint:
runs-on: ubuntu-latest
Expand All @@ -49,8 +51,15 @@ jobs:
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix

- name: pre-commit checks
run: nix develop -f shell.nix --ignore-environment --keep-going -c pre-commit run -a
# run against a smaller shell for speed for pull requests
- name: pre-commit checks pull_request
if: ${{ github.event_name == 'pull_request' }}
run: nix develop -f nix preCommitShell --ignore-environment --keep-going -c pre-commit run --all-files

# run against the full shell.nix on push so it gets pushed to cachix
- name: pre-commit checks push
if: ${{ github.event_name == 'push' }}
run: nix develop -f shell.nix --ignore-environment --keep-going -c pre-commit run --all-files

benchmarks:
runs-on: ubuntu-latest
Expand All @@ -59,13 +68,13 @@ jobs:
uses: actions/checkout@v3

- name: install python
uses: actions/setup-python@v3
uses: actions/setup-python@v4
id: install_python
with:
python-version: "3.10"

- name: install system dependencies
run: sudo apt-get install -qq -y build-essential krb5-config libkrb5-dev
run: sudo apt-get install -qq -y build-essential krb5-config libkrb5-dev libgeos-dev

- uses: syphar/restore-virtualenv@v1
with:
Expand Down Expand Up @@ -148,11 +157,11 @@ jobs:

- name: build docs
if: ${{ github.event_name != 'push' }}
run: nix develop -f shell.nix --ignore-environment --keep-going -c mkdocs build
run: nix run -f nix ibisDevEnv310 -- -m mkdocs build

- name: verify internal links
if: ${{ github.event_name != 'push' }}
run: nix develop -f shell.nix --ignore-environment --keep-going -c just checklinks --offline --no-progress
run: nix shell -f nix --ignore-environment bash findutils just lychee -c just checklinks --offline --no-progress

- name: Configure git info
if: ${{ github.event_name == 'push' }}
Expand All @@ -167,13 +176,15 @@ jobs:
run: |
set -euo pipefail
nix develop -f shell.nix --keep-going -c \
mic deploy \
nix run -f nix mic -- \
deploy \
--push \
--rebase \
--prefix docs \
--message 'docs(dev): ibis@${{ github.sha }}' \
dev
env:
PYTHONPATH: .

simulate_release:
runs-on: ubuntu-latest
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/ibis-docs-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,13 @@ jobs:
run: |
set -euo pipefail
nix develop -f shell.nix --keep-going -c \
mic deploy \
nix run -f nix mic -- \
deploy \
--push \
--rebase \
--update-aliases \
--prefix docs \
--message "docs(release): ibis@${GITHUB_REF_NAME}" \
"${GITHUB_REF_NAME}" latest
env:
PYTHONPATH: .
2 changes: 2 additions & 0 deletions .github/workflows/ibis-main-skip-helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ on:
- "docs/**"
branches:
- master
- "*.x.x"
pull_request:
paths:
- "docs/**"
branches:
- master
- "*.x.x"
jobs:
nix-lint:
runs-on: ubuntu-latest
Expand Down
27 changes: 3 additions & 24 deletions .github/workflows/ibis-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,41 +8,20 @@ on:
- "docs/**"
branches:
- master
- "*.x.x"
pull_request:
# Skip the test suite if all changes are in the docs directory
paths-ignore:
- "docs/**"
branches:
- master
- "*.x.x"

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

jobs:
nix-lint:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v3

- name: install nix
uses: cachix/install-nix-action@v17
with:
nix_path: nixpkgs=channel:nixos-unstable-small

- name: setup cachix
uses: cachix/cachix-action@v10
with:
name: ibis
extraPullNames: nix-community,poetry2nix

- name: nixpkgs-fmt
run: nix shell -f ./nix nixpkgs-fmt -c nixpkgs-fmt --check $(find . -name '*.nix' -and \( -not -wholename '*nix/sources.nix' \))

- name: nix-linter
run: nix shell -f ./nix nix-linter -c nix-linter $(find . -name '*.nix' -and \( -not -wholename '*nix/sources.nix' \))

test_no_backends:
name: Test ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
Expand All @@ -61,7 +40,7 @@ jobs:
uses: actions/checkout@v3

- name: install python
uses: actions/setup-python@v3
uses: actions/setup-python@v4
id: install_python
with:
python-version: ${{ matrix.python-version }}
Expand Down
43 changes: 32 additions & 11 deletions .github/workflows/ibis-tpch-queries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ on:
push:
branches:
- master
- "*.x.x"
pull_request:
branches:
- master
- "*.x.x"

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
Expand All @@ -20,27 +22,46 @@ jobs:
- name: checkout
uses: actions/checkout@v3

- name: install python
uses: actions/setup-python@v3
with:
python-version: "3.10"

- run: python -m pip install --upgrade pip click sqlparse

- name: install ibis
run: python -m pip install "ibis-framework[duckdb]"

- name: clone tpc-queries
uses: actions/checkout@v3
with:
repository: ibis-project/tpc-queries
path: ./tpc-queries
ref: master

- name: install python
uses: actions/setup-python@v4
id: install_python
with:
python-version: "3.10"

- run: python -m pip install --upgrade pip coverage

- name: install tpc-queries dependencies
working-directory: tpc-queries
run: |
python -m pip install -r requirements.txt
python -m pip install -U duckdb>=0.4
- name: install ibis
run: python -m pip install ".[sqlite,duckdb]"

- name: generate tpc-h data
working-directory: tpc-queries
run: python -c "import duckdb; con = duckdb.connect('tpch.ddb'); con.execute('CALL dbgen(sf=0.1);')"

- name: run tpc-h queries
working-directory: tpc-queries
run: ./runtpc -i ibis -i duckdb -d 'tpch.ddb' -b 'duckdb'
run: coverage run --rcfile=../.coveragerc ./runtpc -i ibis -i duckdb -d 'tpch.ddb' -b 'duckdb'

- name: generate coverage report
working-directory: tpc-queries
run: coverage xml --rcfile=../.coveragerc -o ./junit.xml

- name: upload code coverage
if: success()
uses: codecov/codecov-action@v3
with:
files: ./tpc-queries/junit.xml
fail_ci_if_error: true
flags: tpc,tpch,duckdb,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
2 changes: 2 additions & 0 deletions .github/workflows/nix-skip-helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ on:
- "nix/**"
branches:
- master
- "*.x.x"
pull_request:
paths-ignore:
- "**/*.nix"
Expand All @@ -20,6 +21,7 @@ on:
- "nix/**"
branches:
- master
- "*.x.x"

jobs:
nix:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/nix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
- "nix/**"
branches:
- master
- "*.x.x"
pull_request:
paths:
- "**/*.nix"
Expand All @@ -18,6 +19,7 @@ on:
- "nix/**"
branches:
- master
- "*.x.x"

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/test-report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
- completed
branches-ignore:
- master
- "*.x.x"

concurrency: report

Expand All @@ -23,7 +24,7 @@ jobs:
path: artifacts

- name: publish test report
uses: EnricoMi/publish-unit-test-result-action@v1
uses: EnricoMi/publish-unit-test-result-action@v2
with:
commit: ${{ github.event.workflow_run.head_sha }}
files: artifacts/**/junit.xml
12 changes: 6 additions & 6 deletions .github/workflows/update-deps.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
name: Update Dependencies
on:
schedule:
# run every 24 hours at midnight
- cron: "0 */24 * * *"
# run every 3 days at midnight
- cron: "0 0 * * */3"
workflow_dispatch:

jobs:
Expand Down Expand Up @@ -40,15 +40,15 @@ jobs:
name: ibis
extraPullNames: nix-community,poetry2nix

- uses: cpcloud/niv-dep-info-action@main
- uses: cpcloud/niv-dep-info-action@v2.0.6
id: get_current_commit
with:
dependency: ${{ matrix.dep }}

- name: update ${{ matrix.dep }}
run: nix shell -f '<nixpkgs>' niv -c niv update ${{ matrix.dep }}
run: nix run 'nixpkgs#niv' -- update ${{ matrix.dep }}

- uses: cpcloud/niv-dep-info-action@main
- uses: cpcloud/niv-dep-info-action@v2.0.6
id: get_new_commit
with:
dependency: ${{ matrix.dep }}
Expand All @@ -74,7 +74,7 @@ jobs:
app_id: ${{ secrets.PR_APPROVAL_BOT_APP_ID }}
private_key: ${{ secrets.PR_APPROVAL_BOT_APP_PRIVATE_KEY }}

- uses: cpcloud/compare-commits-action@v5.0.14
- uses: cpcloud/compare-commits-action@v5.0.21
if: ${{ fromJSON(steps.needs_pr.outputs.did_change) }}
id: compare_commits
with:
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/update-setup-py.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ jobs:
- name: generate setup.py
run: ./dev/poetry2setup -o setup.py

- name: generate requirements.txt
run: nix run -f nix poetry -- export --dev --extras all --without-hashes --no-ansi > requirements.txt

- name: setup git credentials
uses: OleksiyRudenko/gha-git-credentials@v2.1
with:
Expand All @@ -58,9 +61,9 @@ jobs:
run: |
set -euo pipefail
git add setup.py
git add setup.py requirements.txt
if git commit -m 'chore(setup.py): regenerate'; then
if git commit -m 'chore(setuptools-files): regenerate'; then
# pull in case another commit happened in the meantime
#
# `ours` is actually the *other* changeset, not the current branch, per
Expand Down
7 changes: 0 additions & 7 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,6 @@ docbuild
result
result-*

# generate this with:
#
# poetry export --dev --without-hashes --no-ansi > requirements.txt
#
# if you need it
requirements.txt

# generated mkdocs website
.benchmarks
site
Expand Down
170 changes: 72 additions & 98 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,79 +1,57 @@
ci:
autofix_commit_msg: "style: auto fixes from pre-commit.ci hooks"
autofix_prs: false
autoupdate_commit_msg: "style: pre-commit.ci autoupdate"
skip:
- prettier
- shellcheck
- shfmt
- just
- nixpkgs-fmt
- nix-linter
default_stages:
- commit
repos:
- hooks:
- entry: black --check
exclude: ^$
files: ""
id: black
language: system
name: black
pass_filenames: true
stages:
- commit
types:
- python
- file
- python
types_or: []
- entry: flake8
exclude: ^$
files: ""
id: flake8
language: python
name: flake8
pass_filenames: true
stages:
- commit
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
hooks:
- id: flake8
- repo: https://github.com/psf/black
rev: 22.6.0
hooks:
- id: black
- repo: https://github.com/MarcoGorelli/absolufy-imports
rev: v0.3.1
hooks:
- id: absolufy-imports
- repo: https://github.com/asottile/pyupgrade
rev: v2.37.1
hooks:
- id: pyupgrade
exclude: setup.py
entry: pyupgrade --py38-plus
types:
- python
types_or: []
- entry: absolufy-imports
name: absolufy
id: absolufy
language: python
files: ""
stages:
- commit
types:
- python
- entry: isort --check
exclude: ^$
files: ""
id: isort
language: python
name: isort
pass_filenames: true
stages:
- commit
types:
- file
- python
- repo: local
hooks:
- id: prettier
name: prettier
language: system
entry: prettier --write
types_or:
- cython
- pyi
- python
- entry: pyupgrade --py38-plus
exclude: (setup.py)
files: ""
id: pyupgrade
- json
- toml
- yaml
- markdown
- id: shellcheck
name: shellcheck
language: system
name: pyupgrade
pass_filenames: true
stages:
- commit
types:
- python
types_or: []
- entry: shellcheck
exclude: ^$
entry: shellcheck
files: \.sh$
id: shellcheck
language: system
name: shellcheck
pass_filenames: true
stages:
- commit
types:
- shell
types_or:
Expand All @@ -84,44 +62,40 @@ repos:
- bats
- dash
- ksh
- entry: shfmt -i 2 -sr -d -s -l
exclude: ^$
files: \.sh$
id: shfmt
language: system
- id: shfmt
name: shfmt
pass_filenames: true
stages:
- commit
language: system
entry: shfmt -i 2 -sr -s
files: \.sh$
types:
- file
types_or:
- file
- entry: prettier --check
exclude: ^$
files: ""
id: prettier
- id: just
name: just
language: system
name: prettier
pass_filenames: true
stages:
- commit
entry: just --fmt --unstable --check
files: ^justfile$
pass_filenames: false
types:
- file
- id: nixpkgs-fmt
name: nixpkgs-fmt
language: system
entry: nixpkgs-fmt
exclude: nix/sources\.nix
files: \.nix$
types:
- text
- file
types_or:
- json
- toml
- yaml
- markdown
- entry: just --fmt --unstable --check
exclude: ^$
files: ^justfile$
id: just
- file
- id: nix-linter
name: nix-linter
language: system
name: just
pass_filenames: false
stages:
- commit
entry: nix-linter --check="no-FreeLetInFunc"
exclude: nix/sources\.nix
files: \.nix$
types:
- file
repo: local
types_or:
- file
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ docs/overrides/*.html
docs/SUMMARY.md
site
ci/udf/CMakeFiles
poetry.lock
28 changes: 28 additions & 0 deletions LICENSES/odo.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Copyright (c) 2014, Continuum Analytics, Inc. and contributors
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

Neither the name of Continuum Analytics nor the names of any contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
788 changes: 0 additions & 788 deletions ci/datamgr.py

This file was deleted.

9 changes: 7 additions & 2 deletions ci/release/prepare.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
#!/usr/bin/env nix-shell
#!nix-shell -p poetry nix -i bash
#!nix-shell -p gnugrep unzip poetry nix -i bash
# shellcheck shell=bash

set -euo pipefail

version="${1}"

# set version
poetry version "$1"
poetry version "$version"

./dev/poetry2setup -o setup.py

# build artifacts
poetry build

# ensure that the built wheel has the correct version number
unzip -p "dist/ibis_framework-${version}-py3-none-any.whl" ibis/__init__.py | grep -q "__version__ = \"$version\""
2 changes: 1 addition & 1 deletion ci/release/verify.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ set -euo pipefail
poetry check

# verify that the lock file is up to date
poetry lock --no-update
PYTHONHASHSEED=42 poetry lock --no-update
git diff --exit-code poetry.lock

# verify that we have a token available to push to pypi using set -u
Expand Down
35 changes: 28 additions & 7 deletions ci/schema/clickhouse.sql
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,34 @@ CREATE TABLE IF NOT EXISTS array_types (
y Array(Nullable(String)),
z Array(Nullable(Float64)),
grouper Nullable(String),
scalar_column Nullable(Float64)
scalar_column Nullable(Float64),
multi_dim Array(Array(Nullable(Int64)))
) ENGINE = Memory;

INSERT INTO array_types VALUES
([1, 2, 3], ['a', 'b', 'c'], [1.0, 2.0, 3.0], 'a', 1.0),
([4, 5], ['d', 'e'], [4.0, 5.0], 'a', 2.0),
([6, NULL], ['f', NULL], [6.0, NULL], 'a', 3.0),
([NULL, 1, NULL], [NULL, 'a', NULL], [], 'b', 4.0),
([2, NULL, 3], ['b', NULL, 'c'], NULL, 'b', 5.0),
([4, NULL, NULL, 5], ['d', NULL, NULL, 'e'], [4.0, NULL, NULL, 5.0], 'c', 6.0);
([1, 2, 3], ['a', 'b', 'c'], [1.0, 2.0, 3.0], 'a', 1.0, [[], [1, 2, 3], []]),
([4, 5], ['d', 'e'], [4.0, 5.0], 'a', 2.0, []),
([6, NULL], ['f', NULL], [6.0, NULL], 'a', 3.0, [[], [], []]),
([NULL, 1, NULL], [NULL, 'a', NULL], [], 'b', 4.0, [[1], [2], [], [3, 4, 5]]),
([2, NULL, 3], ['b', NULL, 'c'], NULL, 'b', 5.0, []),
([4, NULL, NULL, 5], ['d', NULL, NULL, 'e'], [4.0, NULL, NULL, 5.0], 'c', 6.0, [[1, 2, 3]]);


CREATE TABLE IF NOT EXISTS struct (
abc Tuple(
a Nullable(Float64),
b Nullable(String),
c Nullable(Int64)
)
) ENGINE = Memory;

-- NULL is the same as tuple(NULL, NULL, NULL) because clickhouse doesn't
-- support Nullable(Tuple(...))
INSERT INTO struct VALUES
(tuple(1.0, 'banana', 2)),
(tuple(2.0, 'apple', 3)),
(tuple(3.0, 'orange', 4)),
(tuple(NULL, 'banana', 2)),
(tuple(2.0, NULL, 3)),
(tuple(NULL, NULL, NULL)),
(tuple(3.0, 'orange', NULL));
31 changes: 24 additions & 7 deletions ci/schema/duckdb.sql
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,30 @@ CREATE TABLE IF NOT EXISTS array_types (
y TEXT[],
z DOUBLE PRECISION[],
grouper TEXT,
scalar_column DOUBLE PRECISION
scalar_column DOUBLE PRECISION,
multi_dim BIGINT[][]
);

INSERT INTO array_types VALUES
([1, 2, 3], ['a', 'b', 'c'], [1.0, 2.0, 3.0], 'a', 1.0),
([4, 5], ['d', 'e'], [4.0, 5.0], 'a', 2.0),
([6, NULL], ['f', NULL], [6.0, NULL], 'a', 3.0),
([NULL, 1, NULL], [NULL, 'a', NULL], [], 'b', 4.0),
([2, NULL, 3], ['b', NULL, 'c'], NULL, 'b', 5.0),
([4, NULL, NULL, 5], ['d', NULL, NULL, 'e'], [4.0, NULL, NULL, 5.0], 'c', 6.0);
([1, 2, 3], ['a', 'b', 'c'], [1.0, 2.0, 3.0], 'a', 1.0, [[], [1, 2, 3], NULL]),
([4, 5], ['d', 'e'], [4.0, 5.0], 'a', 2.0, []),
([6, NULL], ['f', NULL], [6.0, NULL], 'a', 3.0, [NULL, [], NULL]),
([NULL, 1, NULL], [NULL, 'a', NULL], [], 'b', 4.0, [[1], [2], [], [3, 4, 5]]),
([2, NULL, 3], ['b', NULL, 'c'], NULL, 'b', 5.0, NULL),
([4, NULL, NULL, 5], ['d', NULL, NULL, 'e'], [4.0, NULL, NULL, 5.0], 'c', 6.0, [[1, 2, 3]]);


DROP TABLE IF EXISTS struct CASCADE;

CREATE TABLE IF NOT EXISTS struct (
abc STRUCT(a DOUBLE, b STRING, c BIGINT)
);

INSERT INTO struct VALUES
({'a': 1.0, 'b': 'banana', 'c': 2}),
({'a': 2.0, 'b': 'apple', 'c': 3}),
({'a': 3.0, 'b': 'orange', 'c': 4}),
({'a': NULL, 'b': 'banana', 'c': 2}),
({'a': 2.0, 'b': NULL, 'c': 3}),
(NULL),
({'a': 3.0, 'b': 'orange', 'c': NULL});
15 changes: 8 additions & 7 deletions ci/schema/postgresql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,17 @@ CREATE TABLE IF NOT EXISTS array_types (
y TEXT[],
z DOUBLE PRECISION[],
grouper TEXT,
scalar_column DOUBLE PRECISION
scalar_column DOUBLE PRECISION,
multi_dim BIGINT[][]
);

INSERT INTO array_types VALUES
(ARRAY[1, 2, 3], ARRAY['a', 'b', 'c'], ARRAY[1.0, 2.0, 3.0], 'a', 1.0),
(ARRAY[4, 5], ARRAY['d', 'e'], ARRAY[4.0, 5.0], 'a', 2.0),
(ARRAY[6, NULL], ARRAY['f', NULL], ARRAY[6.0, NULL], 'a', 3.0),
(ARRAY[NULL, 1, NULL], ARRAY[NULL, 'a', NULL], ARRAY[]::DOUBLE PRECISION[], 'b', 4.0),
(ARRAY[2, NULL, 3], ARRAY['b', NULL, 'c'], NULL, 'b', 5.0),
(ARRAY[4, NULL, NULL, 5], ARRAY['d', NULL, NULL, 'e'], ARRAY[4.0, NULL, NULL, 5.0], 'c', 6.0);
(ARRAY[1, 2, 3], ARRAY['a', 'b', 'c'], ARRAY[1.0, 2.0, 3.0], 'a', 1.0, ARRAY[ARRAY[NULL::BIGINT, NULL, NULL], ARRAY[1, 2, 3]]),
(ARRAY[4, 5], ARRAY['d', 'e'], ARRAY[4.0, 5.0], 'a', 2.0, ARRAY[]::BIGINT[][]),
(ARRAY[6, NULL], ARRAY['f', NULL], ARRAY[6.0, NULL], 'a', 3.0, ARRAY[NULL, ARRAY[]::BIGINT[], NULL]),
(ARRAY[NULL, 1, NULL], ARRAY[NULL, 'a', NULL], ARRAY[]::DOUBLE PRECISION[], 'b', 4.0, ARRAY[ARRAY[1], ARRAY[2], ARRAY[NULL::BIGINT], ARRAY[3]]),
(ARRAY[2, NULL, 3], ARRAY['b', NULL, 'c'], NULL, 'b', 5.0, NULL),
(ARRAY[4, NULL, NULL, 5], ARRAY['d', NULL, NULL, 'e'], ARRAY[4.0, NULL, NULL, 5.0], 'c', 6.0, ARRAY[ARRAY[1, 2, 3]]);

DROP TABLE IF EXISTS films CASCADE;

Expand Down
31 changes: 27 additions & 4 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,30 @@
codecov:
notify:
# a high number to try to delay codecov reporting until most of the test
# runs have finished. Should match with comment.after_n_builds below.
after_n_builds: 32

comment:
layout: "reach, diff, flags, files"
after_n_builds: 32
layout: "reach, diff, files"
behavior: default
require_changes: false # if true: only post the comment if coverage changes
require_base: no # [yes :: must have a base report to post]
require_head: yes # [yes :: must have a head report to post]
require_changes: true # if true: only post the comment if coverage changes
require_base: false # [yes :: must have a base report to post]
require_head: true # [yes :: must have a head report to post]
branches: null

ignore:
- "docs/**"

coverage:
status:
project:
default:
# Allow for slight decreases in code coverage, makes
# the coverage status checks a little less finicky
threshold: 0.5%
only_pulls: true
patch:
default:
threshold: 0.5%
only_pulls: true
388 changes: 388 additions & 0 deletions conda-lock/linux-64-3.10.lock

Large diffs are not rendered by default.

347 changes: 179 additions & 168 deletions conda-lock/linux-64-3.8.lock

Large diffs are not rendered by default.

345 changes: 179 additions & 166 deletions conda-lock/linux-64-3.9.lock

Large diffs are not rendered by default.

372 changes: 372 additions & 0 deletions conda-lock/osx-64-3.10.lock

Large diffs are not rendered by default.

339 changes: 175 additions & 164 deletions conda-lock/osx-64-3.8.lock

Large diffs are not rendered by default.

337 changes: 175 additions & 162 deletions conda-lock/osx-64-3.9.lock

Large diffs are not rendered by default.

363 changes: 363 additions & 0 deletions conda-lock/win-64-3.10.lock

Large diffs are not rendered by default.

315 changes: 163 additions & 152 deletions conda-lock/win-64-3.8.lock

Large diffs are not rendered by default.

315 changes: 164 additions & 151 deletions conda-lock/win-64-3.9.lock

Large diffs are not rendered by default.

25 changes: 8 additions & 17 deletions default.nix
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
{ python ? "3.10"
, doCheck ? true
, backends ? [
"dask"
"datafusion"
"duckdb"
"pandas"
"sqlite"
]
}:
let
pkgs = import ./nix;
Expand All @@ -10,15 +17,6 @@ let
}:

let
backends = [
"dask"
"datafusion"
"duckdb"
"pandas"
"sqlite"
];

backendsString = lib.concatStringsSep " " backends;
buildInputs = with pkgs; [ gdal_2 graphviz-nox proj sqlite ];
checkInputs = buildInputs;
in
Expand All @@ -29,10 +27,7 @@ let
src = pkgs.gitignoreSource ./.;

overrides = pkgs.poetry2nix.overrides.withDefaults (
import ./poetry-overrides.nix {
inherit pkgs;
inherit (pkgs) lib stdenv;
}
import ./poetry-overrides.nix
);

preConfigure = ''
Expand All @@ -52,10 +47,6 @@ let
find "$tempdir" -type d -exec chmod u+rwx {} +
ln -s "$tempdir" ci/ibis-testing-data
for backend in ${backendsString}; do
python ci/datamgr.py load "$backend"
done
'';

checkPhase = ''
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: "3.4"
services:
clickhouse:
image: yandex/clickhouse-server:22-alpine
image: clickhouse/clickhouse-server:22-alpine
ports:
- 8123:8123
- 9000:9000
Expand Down Expand Up @@ -48,7 +48,7 @@ services:
- CMD
- pg_isready
timeout: 5s
image: postgres:13.6-alpine
image: postgres:13.7-alpine
networks:
- impala
kudu:
Expand Down
2 changes: 1 addition & 1 deletion docs/about/roadmap.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ functionality of that specific backend.
A concrete example of this is the [FARM_FINGERPRINT](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#farm_fingerprint)
function in BigQuery.

It is unlikely that the main ValueExpr API will ever grow such a method, but a
It is unlikely that the main Value API will ever grow such a method, but a
BigQuery user shouldn’t be restricted to using only the methods this API
provides. Moreover, users should be able to bring their own methods to this API
without having to consult the ibis developers and without the addition of such
Expand Down
2 changes: 1 addition & 1 deletion docs/about/team.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
## Contributors

{{ config.extra.project_name }} is developed and maintained by a [community of
volunteer contributors]({{ config.repo_url }}graphs/contributors).
volunteer contributors]({{ config.repo_url }}/graphs/contributors).

{% for group in config.extra.team %}

Expand Down
11 changes: 2 additions & 9 deletions docs/backends/Impala.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,6 @@ The best way to interact with a single table is through the
Ibis delegates all HDFS interaction to the
[`fsspec`](https://filesystem-spec.readthedocs.io/en/latest/) library.

<!-- prettier-ignore-start -->
::: ibis.backends.impala.hdfs_connect
rendering:
heading_level: 3

<!-- prettier-ignore-end -->

## The Impala client object

To use Ibis with Impala, you first must connect to a cluster using the
Expand Down Expand Up @@ -181,9 +174,9 @@ expression referencing a physical Impala table:
table = client.table('functional_alltypes', database='ibis_testing')
```

`ImpalaTable` is a Python subclass of the more general Ibis `TableExpr`
`ImpalaTable` is a Python subclass of the more general Ibis `Table`
that has additional Impala-specific methods. So you can use it
interchangeably with any code expecting a `TableExpr`.
interchangeably with any code expecting a `Table`.

Like all table expressions in Ibis, `ImpalaTable` has a `schema` method
you can use to examine its schema:
Expand Down
227 changes: 227 additions & 0 deletions docs/blog/Ibis-version-3.1.0-release.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
# Ibis v3.1.0

Marlene Mhangami

25 July 2022

## Introduction

Ibis 3.1 has officially been released as the latest version of the package.
With this release comes new convenience features, increased backend operation coverage and a plethora of bug fixes.
As usual, a full list of the changes can be found in the project release notes [here](../release_notes.md) Let’s talk about some of the new changes 3.1 brings for Ibis users.

## `ibis.connect`

The first significant change to note is that, Ibis now provides a more convenient way to connect to a backend using the `ibis.connect` method.
You can now use this function to connect to an appropriate backend using a connection string.

Here are some examples:

<!-- prettier-ignore-start -->

=== "DuckDB"

Initialize a DuckDB instance using `'duckdb://:memory:'`
~~~python
conn = ibis.connect('duckdb://:memory:')
~~~
And begin registering your tables:
~~~python
conn.register('csv://farm_data/dates.csv', 'dates')
conn.register('csv://farm_data/farmer_groups.csv', 'farmer_groups')
conn.register('csv://farm_data/crops.csv', 'crops')
conn.register('csv://farm_data/farms.csv', 'farms')
conn.register('csv://farm_data/harvest.csv', 'harvest')
conn.register('csv://farm_data/farmers.csv', 'farmers')
conn.register('csv://farm_data/tracts.csv', 'tracts')
conn.register('csv://farm_data/fields.csv', 'fields')
~~~
You can also do this programmatically:
~~~python
files = glob.glob('farm_data/*.csv')

for file in files:
fname = 'csv://' + file
tname = file.replace('farm_data/', '').replace('.csv', '')
conn.register(fname, tname)
~~~
This method isn’t limited to `csv://`. It works with `parquet://` and `csv.gz://` as well.
Give it a try!

=== "Postgres"

~~~python
conn = ibis.connect('postgres://<username>:<password>@<host>:<port>/<database>')
~~~
Or, using a [.pgpass file](https://www.postgresql.org/docs/9.3/libpq-pgpass.html):
~~~python
conn = ibis.connect('postgres://<username>@<host>:<port>/<database>')
~~~
<!-- prettier-ignore-end -->

## Unnest Support

One of the trickier parts about working with data is that it doesn’t usually come organized in neat, predictable rows and columns.
Instead data often consists of rows that could contain a single bit of data or arrays of it.
When data is organized in layers, as with arrays, it can sometimes be difficult to work with.
Ibis 3.1 introduces the `unnest` function as a way to flatten arrays of data.

Unnest takes a column containing an array of values and separates the individual values into rows as shown:

Before Unnest:

```
| col |
| ------ |
| [1, 2] |
```

After Unnest:

```
| col |
| --- |
| 1 |
| 2 |
```

Here is a self-contained example of creating a dataset with an array and then unnesting it:

<!-- prettier-ignore-start -->

=== "DuckDB"

~~~python
import ibis
import pandas as pd

# Parquet save path
fname = 'array_data.parquet'

# Mock Data
data = [
['array_id', 'array_value']
,[1, [1, 3, 4]]
,[2, [2, 4, 5]]
,[3, [6, 8]]
,[4, [1, 6]]
]

# Save as parquet
pd.DataFrame(data[1:], columns=data[0]).to_parquet(fname)

# Connect to the file using a DuckDB backend
conn = ibis.connect(f"duckdb://{fname}")

# Create a table expression for your loaded data
array_data = conn.table("array_data")

# Optionally execute the array data to preview
array_data.execute()

# select the unnested values with their corresponding IDs
array_data.select(['array_id', array_data['array_value'].unnest()]).execute()
~~~

=== "Postgres"

~~~python
import ibis
import pandas as pd

# Postgres connection string for user 'ibistutorials' with a valid .pgpass file in ~/
# See https://www.postgresql.org/docs/9.3/libpq-pgpass.html for details on ~/.pgpass
cstring = 'postgres://ibistutorials@localhost:5432/pg-ibis'

# Mock Data
data = [
['array_id', 'array_value']
,[1, [1, 3, 4]]
,[2, [2, 4, 5]]
,[3, [6, 8]]
,[4, [1, 6]]
]

# Create a dataframe for easy loading
df = pd.DataFrame(data[1:], columns=data[0])

# Postgres backend connection
conn = ibis.connect(cstring)

# SQLAlchemy Types
# Integer type
int_type = ibis.backends.postgres.sa.types.INT()
# Array type function
arr_f = ibis.backends.postgres.sa.types.ARRAY

# Load data to table using pd.DataFrame.to_sql
df.to_sql(
name='array_data'
,con=conn.con.connect()
,if_exists='replace'
,index=False
,dtype={
'array_id': int_type
,'array_value': arr_f(int_type)
}
)

# Array Data Table Expression
array_data = conn.table("array_data")

# Optionally execute to preview entire table
# array_data.execute()

# Unnest
array_data.select(['array_id', array_data['array_value'].unnest()]).execute()
~~~

<!-- prettier-ignore-end -->

## `_` API

There is now a shorthand for lambda functions using underscore (`_`).
This is useful for chaining expressions to one another and helps reduce total line characters and appearances of lambdas.

For example, let’s use `array_data` from above.
We will unnest `array_value`, find the weighted average, and then sum in one expression:

```python
from ibis import _

(
array_data
.select([
'array_id'
# array_data returns a TableExpr, `_` here is shorthand
# for that returned expression
,_['array_value'].unnest().name('arval')
# we can use it instead of saying `array_data`
,(_['array_value'].length().cast('float')
/ _['array_value'].length().sum().cast('float')).name('wgt')
])
# Since the above `select` statement returns a TableExpr, we can use
# `_` to reference that one as well:
.mutate(wgt_prod=_.arval * _.wgt)
# And again:
.aggregate(vsum=_.wgt_prod.sum(), vcount=_.wgt_prod.count())
# And again:
.mutate(wgt_mean=_.vsum / _.vcount)
).execute()
```

Note that if you import `_` directly from `ibis` (`from ibis import _`), the default `_`
object will lose its functionality, so be mindful if you have a habit of using it outside of Ibis.

## Additional Changes

Along with these changes, the operation matrix has had a few more holes filled.
Contributors should note that backend test data is now loaded dynamically.
Most users won’t be exposed to this update, but it should make contribution a bit more streamlined.

To see the full patch notes, go to the [patch notes page](../release_notes.md)

As always, Ibis is free and open source.
Contributions are welcome and encouraged–drop into the discussions, raise an issue, or put in a pull request.

Download ibis 3.1 today!
55 changes: 42 additions & 13 deletions docs/contribute/01_environment.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,6 @@ hide:

- [`git`](https://git-scm.com/)

!!! note "Python 3.10 is supported on a best-effort basis"

As of 2022-02-17 there is support for Python 3.10 when using `nix` for development.

`conda-forge` is still in [the process of migrating packages to Python
3.10](https://conda-forge.org/status/#python310).

=== "Nix"

#### Support Matrix
Expand Down Expand Up @@ -72,12 +65,12 @@ hide:

#### Support Matrix

| Python Version :material-arrow-right: | Python 3.8 | Python 3.9 | Python 3.10 |
| -----------------------------------------: | :--------------------------------------------------: | :----------------------------------------------: | :--------------------------------------------: |
| **Operating System** :material-arrow-down: | | | |
| **Linux** | {{ config.extra.support_levels.supported.icon }}[^1] | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.bug.icon }}[^2] |
| **macOS** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.bug.icon }} |
| **Windows** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.bug.icon }} |
| Python Version :material-arrow-right: | Python 3.8 | Python 3.9 | Python 3.10 |
| -----------------------------------------: | :--------------------------------------------------: | :----------------------------------------------: | :----------------------------------------------: |
| **Operating System** :material-arrow-down: | | | |
| **Linux** | {{ config.extra.support_levels.supported.icon }}[^1] | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **Windows** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |

{% set managers = {"conda": {"name": "Miniconda", "url": "https://docs.conda.io/en/latest/miniconda.html"}, "mamba": {"name": "Mamba", "url": "https://github.com/mamba-org/mamba"}} %}
{% for manager, params in managers.items() %}
Expand Down Expand Up @@ -131,6 +124,42 @@ hide:

{% endfor %}

=== "pip"

!!! warning "`pip` will not handle installation of system dependencies"

`pip` will not install system dependencies needed for some packages
such as `psycopg2` and `kerberos`.

For a better development experience see the `conda` or `nix` setup
instructions.

1. [Install `gh`](https://cli.github.com/manual/installation)

1. Fork and clone the ibis repository:

```sh
gh repo fork --clone --remote ibis-project/ibis
```

1. Change directory into `ibis`:

```sh
cd ibis
```

1. Install development dependencies

```sh
pip install -r requirements.txt
```

1. Install ibis in development mode

```sh
pip install -e .
```

Once you've set up an environment, try building the documentation:

```sh
Expand Down
21 changes: 7 additions & 14 deletions docs/contribute/04_backend_tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,16 @@ success

Congrats, you now have a PostgreSQL server running and are ready to run tests!

#### Load Data
#### Download Data

The backend needs to be populated with test data:
The backend needs to be populated with test data.
The data will be loaded automatically, when the test is run, but it needs to be downloaded first.

1. Download the data
To download the data run

```sh
python ci/datamgr.py download
```

2. In the original terminal, run

```sh
python ci/datamgr.py load postgres
```

You should see a bit of logging, and the command should complete shortly thereafter.
```sh
just download-data
```

#### Run the test suite

Expand Down
250 changes: 150 additions & 100 deletions docs/ibis-for-sql-programmers.ipynb

Large diffs are not rendered by default.

155 changes: 154 additions & 1 deletion docs/release_notes.md

Large diffs are not rendered by default.

57 changes: 57 additions & 0 deletions docs/stylesheets/extra.css
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,60 @@ body
> th:nth-child(1) {
min-width: 9.8rem;
}
body
> div.md-container
> main
> div
> div.md-content
> article
> div.md-typeset__scrollwrap {
overflow-y: auto;
height: 750px;
}

body
> div.md-container
> main
> div
> div.md-content
> article
> div.md-typeset__scrollwrap
> div
> table {
display: table;
}

body
> div.md-container
> main
> div
> div.md-content
> article
> div.md-typeset__scrollwrap
> div
> table
> thead {
position: sticky;
top: 0;
z-index: 2;
background-color: black;
}

body
> div.md-container
> main
> div
> div.md-content
> article
> div.md-typeset__scrollwrap
> div
> table
> tbody
> tr
> td:nth-of-type(1) {
position: sticky;
left: 0;
z-index: 1;
background-color: black;
text-align: right;
}
15 changes: 9 additions & 6 deletions docs/tutorial/01-Introduction-to-Ibis.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 33 additions & 23 deletions docs/tutorial/02-Aggregates-Joins.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 13 additions & 9 deletions docs/tutorial/03-Expressions-Lazy-Mode-Logging.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

112 changes: 63 additions & 49 deletions docs/tutorial/04-More-Value-Expressions.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 18 additions & 14 deletions docs/tutorial/05-IO-Create-Insert-External-Data.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 29 additions & 20 deletions docs/tutorial/06-ComplexFiltering.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 18 additions & 13 deletions docs/tutorial/07-Analytics-Tools.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 14 additions & 9 deletions docs/tutorial/08-Geospatial-Analysis.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions docs/user_guide/design.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ columns, or scalars.
<!-- prettier-ignore-start -->
Examples of expression types include
[`StringValue`][ibis.expr.types.StringValue] and
[`TableExpr`][ibis.expr.types.TableExpr].
[`Table`][ibis.expr.types.Table].
<!-- prettier-ignore-end -->

<!-- prettier-ignore-start -->
Expand All @@ -74,9 +74,9 @@ An example of usage is a node that representats a logarithm operation:
```python

import ibis.expr.rules as rlz
from ibis.expr.operations import ValueOp
from ibis.expr.operations import Value

class Log(ValueOp):
class Log(Value):
# A double scalar or column
arg = rlz.double
# Optional argument, defaults to None
Expand Down
20 changes: 11 additions & 9 deletions docs/user_guide/extending/elementwise.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 18 additions & 10 deletions docs/user_guide/extending/reduction.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 17 additions & 4 deletions gen_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,31 @@ def get_leaf_classes(op):
yield from get_leaf_classes(child_class)


EXCLUDED_OPS = {
# Never translates into anything
ops.UnresolvedExistsSubquery,
ops.UnresolvedNotExistsSubquery,
ops.ScalarParameter,
}

INCLUDED_OPS = {
# Parent class of MultiQuantile so it's ignored by `get_backends()`
ops.Quantile,
}


ICONS = {
True: ":material-check-decagram:{ .verified }",
False: ":material-cancel:{ .cancel }",
}


def main():
possible_ops = frozenset(get_leaf_classes(ops.ValueOp))
possible_ops = (
frozenset(get_leaf_classes(ops.Value)) | INCLUDED_OPS
) - EXCLUDED_OPS

support = {
"operation": [f"`{op.__name__}`" for op in possible_ops],
}
support = {"operation": [f"`{op.__name__}`" for op in possible_ops]}
support.update(
(name, list(map(backend.has_operation, possible_ops)))
for name, backend in get_backends()
Expand Down
36 changes: 23 additions & 13 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
"""Initialize Ibis module."""
from __future__ import annotations

import importlib.metadata as _importlib_metadata

# Converting an Ibis schema to a pandas DataFrame requires registering
# some type conversions that are currently registered in the pandas backend
Expand All @@ -12,16 +15,27 @@
from ibis.expr import api
from ibis.expr.api import * # noqa: F401,F403

try:
import importlib.metadata as importlib_metadata
except ImportError:
# TODO: remove this when Python 3.9 support is dropped
import importlib_metadata

__all__ = ['api', 'ir', 'util', 'IbisError', 'options']
__all__ = ['api', 'ir', 'util', 'BaseBackend', 'IbisError', 'options']
__all__ += api.__all__

__version__ = "3.0.2"
__version__ = "3.1.0"


def _get_backend_entrypoints() -> list[_importlib_metadata.EntryPoint]:
"""Get the list of installed `ibis.backend` entrypoints"""
import sys

if sys.version_info < (3, 10):
return list(_importlib_metadata.entry_points()['ibis.backends'])
else:
return list(_importlib_metadata.entry_points(group="ibis.backends"))


def __dir__() -> list[str]:
"""Adds tab completion for ibis backends to the top-level module"""
out = set(__all__)
out.update(ep.name for ep in _get_backend_entrypoints())
return sorted(out)


def __getattr__(name: str) -> BaseBackend:
Expand All @@ -39,11 +53,7 @@ def __getattr__(name: str) -> BaseBackend:
the `ibis.backends` entrypoints. If successful, the `ibis.sqlite`
attribute is "cached", so this function is only called the first time.
"""
entry_points = {
entry_point
for entry_point in importlib_metadata.entry_points()["ibis.backends"]
if name == entry_point.name
}
entry_points = {ep for ep in _get_backend_entrypoints() if ep.name == name}

if not entry_points:
raise AttributeError(
Expand Down
219 changes: 202 additions & 17 deletions ibis/backends/base/__init__.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,35 @@
from __future__ import annotations

import abc
import collections.abc
import functools
import keyword
import re
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Iterable, Mapping
from pathlib import Path
from typing import (
TYPE_CHECKING,
Any,
Callable,
ClassVar,
Iterable,
Iterator,
Mapping,
)

if TYPE_CHECKING:
import pandas as pd

from cached_property import cached_property

import ibis
import ibis.common.exceptions as exc
import ibis.config
import ibis.expr.operations as ops
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis.common.dispatch import RegexDispatcher
from ibis.common.exceptions import TranslationError
from ibis.util import deprecated

__all__ = ('BaseBackend', 'Database')
__all__ = ('BaseBackend', 'Database', 'connect')


class Database:
Expand Down Expand Up @@ -70,8 +81,8 @@ def tables(self) -> list[str]:
"""
return self.list_tables()

def __getitem__(self, table: str) -> ir.TableExpr:
"""Return a TableExpr for the given table name.
def __getitem__(self, table: str) -> ir.Table:
"""Return a Table for the given table name.
Parameters
----------
Expand All @@ -80,13 +91,13 @@ def __getitem__(self, table: str) -> ir.TableExpr:
Returns
-------
TableExpr
Table
Table expression
"""
return self.table(table)

def __getattr__(self, table: str) -> ir.TableExpr:
"""Return a TableExpr for the given table name.
def __getattr__(self, table: str) -> ir.Table:
"""Return a Table for the given table name.
Parameters
----------
Expand All @@ -95,7 +106,7 @@ def __getattr__(self, table: str) -> ir.TableExpr:
Returns
-------
TableExpr
Table
Table expression
"""
return self.table(table)
Expand All @@ -117,7 +128,7 @@ def drop(self, force: bool = False) -> None:
"""
self.client.drop_database(self.name, force=force)

def table(self, name: str) -> ir.TableExpr:
def table(self, name: str) -> ir.Table:
"""Return a table expression referencing a table in this database.
Parameters
Expand All @@ -127,7 +138,7 @@ def table(self, name: str) -> ir.TableExpr:
Returns
-------
TableExpr
Table
Table expression
"""
qualified_name = self._qualify(name)
Expand All @@ -144,6 +155,55 @@ def list_tables(self, like=None):
return self.client.list_tables(like, database=self.name)


class TablesAccessor(collections.abc.Mapping):
"""A mapping-like object for accessing tables off a backend.
Tables may be accessed by name using either index or attribute access:
Examples
--------
>>> con = ibis.sqlite.connect("example.db")
>>> people = con.tables['people'] # access via index
>>> people = con.tables.people # access via attribute
"""

def __init__(self, backend: BaseBackend):
self._backend = backend

def __getitem__(self, name) -> ir.Table:
try:
return self._backend.table(name)
except Exception as exc:
raise KeyError(name) from exc

def __getattr__(self, name) -> ir.Table:
if name.startswith("_"):
raise AttributeError(name)
try:
return self._backend.table(name)
except Exception as exc:
raise AttributeError(name) from exc

def __iter__(self) -> Iterator[str]:
return iter(sorted(self._backend.list_tables()))

def __len__(self) -> int:
return len(self._backend.list_tables())

def __dir__(self) -> list[str]:
o = set()
o.update(dir(type(self)))
o.update(
name
for name in self._backend.list_tables()
if name.isidentifier() and not keyword.iskeyword(name)
)
return list(o)

def _ipython_key_completions_(self) -> list[str]:
return self._backend.list_tables()


class BaseBackend(abc.ABC):
"""Base backend class.
Expand Down Expand Up @@ -173,7 +233,7 @@ def __hash__(self):
def __eq__(self, other):
return self.db_identity == other.db_identity

@cached_property
@functools.cached_property
def db_identity(self) -> str:
"""Return the identity of the database.
Expand Down Expand Up @@ -364,9 +424,23 @@ def exists_table(self, name: str, database: str | None = None) -> bool:
version='2.0',
instead='change the current database before calling `.table()`',
)
def table(self, name: str, database: str | None = None) -> ir.TableExpr:
def table(self, name: str, database: str | None = None) -> ir.Table:
"""Return a table expression from the database."""

@functools.cached_property
def tables(self):
"""An accessor for tables in the database.
Tables may be accessed by name using either index or attribute access:
Examples
--------
>>> con = ibis.sqlite.connect("example.db")
>>> people = con.tables['people'] # access via index
>>> people = con.tables.people # access via attribute
"""
return TablesAccessor(self)

@deprecated(version='2.0', instead='use `.table(name).schema()`')
def get_schema(self, table_name: str, database: str = None) -> sch.Schema:
"""Return the schema of `table_name`."""
Expand Down Expand Up @@ -478,7 +552,7 @@ def create_database(self, name: str, force: bool = False) -> None:
def create_table(
self,
name: str,
obj: pd.DataFrame | ir.TableExpr | None = None,
obj: pd.DataFrame | ir.Table | None = None,
schema: ibis.Schema | None = None,
database: str | None = None,
) -> None:
Expand Down Expand Up @@ -529,7 +603,7 @@ def drop_table(
def create_view(
self,
name: str,
expr: ir.TableExpr,
expr: ir.Table,
database: str | None = None,
) -> None:
"""Create a view.
Expand Down Expand Up @@ -568,7 +642,7 @@ def drop_view(
)

@classmethod
def has_operation(cls, operation: type[ops.ValueOp]) -> bool:
def has_operation(cls, operation: type[ops.Value]) -> bool:
"""Return whether the backend implements support for `operation`.
Parameters
Expand All @@ -593,3 +667,114 @@ def has_operation(cls, operation: type[ops.ValueOp]) -> bool:
raise NotImplementedError(
f"{cls.name} backend has not implemented `has_operation` API"
)


_connect = RegexDispatcher("_connect")


@_connect.register(r"(?P<backend>.+)://(?P<path>.*)", priority=10)
def _(_: str, *, backend: str, path: str, **kwargs: Any) -> BaseBackend:
"""Connect to given `backend` with `path`.
Examples
--------
>>> con = ibis.connect("duckdb://relative/path/to/data.db")
>>> con = ibis.connect("postgres://user:pass@hostname:port/database")
"""
instance = getattr(ibis, backend)
backend += (backend == "postgres") * "ql"
try:
return instance.connect(url=f"{backend}://{path}", **kwargs)
except TypeError:
return instance.connect(path, **kwargs)


@_connect.register(r"file://(?P<path>.*)", priority=10)
def _(_: str, *, path: str, **kwargs: Any) -> BaseBackend:
"""Connect to file located at `path`."""
return _connect(path, **kwargs)


@_connect.register(r".+\.(?P<backend>.+)", priority=1)
def _(path: str, *, backend: str, **kwargs: Any) -> BaseBackend:
"""Connect to given path.
The extension is assumed to be the name of an ibis backend.
Examples
--------
>>> con = ibis.connect("file://relative/path/to/data.duckdb")
"""
return getattr(ibis, backend).connect(path, **kwargs)


@functools.singledispatch
def connect(resource: Path | str, **_: Any) -> BaseBackend:
"""Connect to `resource`.
`resource` can be a `pathlib.Path` or a `str` specifying a URL or path.
Examples
--------
>>> con = ibis.connect("duckdb://relative/path/to/data.db")
>>> con = ibis.connect("relative/path/to/data.duckdb")
"""
raise NotImplementedError(type(resource))


@connect.register
def _(path: Path, **kwargs: Any) -> BaseBackend:
return _connect(str(path), **kwargs)


@connect.register
def _(url: str, **kwargs: Any) -> BaseBackend:
return _connect(url, **kwargs)


@_connect.register(
r"(?P<backend>.+)://(?P<filename>.+\.(?P<extension>.+))",
priority=11,
)
def _(
_: str,
*,
backend: str,
filename: str,
extension: str,
**kwargs: Any,
) -> BaseBackend:
"""Connect to `backend` and register a file.
The extension of the file will be used to register the file with
the backend.
Examples
--------
>>> con = ibis.connect("duckdb://relative/path/to/data.csv")
>>> con = ibis.connect("duckdb://relative/path/to/more/data.parquet")
"""
con = getattr(ibis, backend).connect(**kwargs)
con.register(f"{extension}://{filename}")
return con


@_connect.register(
r"(?P<filename>.+\.(?P<extension>parquet|csv))",
priority=8,
)
def _(
_: str,
*,
filename: str,
extension: str,
**kwargs: Any,
) -> BaseBackend:
"""Connect to `duckdb` and register a parquet or csv file.
Examples
--------
>>> con = ibis.connect("relative/path/to/data.csv")
>>> con = ibis.connect("relative/path/to/more/data.parquet")
"""
return _connect(f"duckdb://{filename}", **kwargs)
49 changes: 30 additions & 19 deletions ibis/backends/base/sql/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

import abc
import contextlib
from functools import lru_cache
from typing import Any, Mapping

import ibis.expr.operations as ops
Expand All @@ -21,9 +23,9 @@ class BaseSQLBackend(BaseBackend):

compiler = Compiler
table_class = ops.DatabaseTable
table_expr_class = ir.TableExpr
table_expr_class = ir.Table

def table(self, name: str, database: str | None = None) -> ir.TableExpr:
def table(self, name: str, database: str | None = None) -> ir.Table:
"""Construct a table expression.
Parameters
Expand All @@ -35,7 +37,7 @@ def table(self, name: str, database: str | None = None) -> ir.TableExpr:
Returns
-------
TableExpr
Table
Table expression
"""
qualified_name = self._fully_qualified_name(name, database)
Expand All @@ -47,7 +49,7 @@ def _fully_qualified_name(self, name, database):
# XXX
return name

def sql(self, query: str) -> ir.TableExpr:
def sql(self, query: str) -> ir.Table:
"""Convert a SQL query to an Ibis table expression.
Parameters
Expand All @@ -57,7 +59,7 @@ def sql(self, query: str) -> ir.TableExpr:
Returns
-------
TableExpr
Table
Table expression
"""
# Get the schema by adding a LIMIT 0 on to the end of the query. If
Expand Down Expand Up @@ -97,10 +99,14 @@ def raw_sql(self, query: str, results: bool = False) -> Any:
return cursor
cursor.release()

@contextlib.contextmanager
def _safe_raw_sql(self, *args, **kwargs):
yield self.raw_sql(*args, **kwargs)

def execute(
self,
expr: ir.Expr,
params: Mapping[ir.ScalarExpr, Any] | None = None,
params: Mapping[ir.Scalar, Any] | None = None,
limit: str = 'default',
**kwargs: Any,
):
Expand All @@ -126,9 +132,9 @@ def execute(
Returns
-------
DataFrame | Series | Scalar
* `TableExpr`: pandas.DataFrame
* `ColumnExpr`: pandas.Series
* `ScalarExpr`: Python scalar value
* `Table`: pandas.DataFrame
* `Column`: pandas.Series
* `Scalar`: Python scalar value
"""
# TODO Reconsider having `kwargs` here. It's needed to support
# `external_tables` in clickhouse, but better to deprecate that
Expand All @@ -140,9 +146,11 @@ def execute(
)
sql = query_ast.compile()
self._log(sql)
cursor = self.raw_sql(sql, **kwargs)

schema = self.ast_schema(query_ast, **kwargs)
result = self.fetch_from_cursor(cursor, schema)

with self._safe_raw_sql(sql, **kwargs) as cursor:
result = self.fetch_from_cursor(cursor, schema)

if hasattr(getattr(query_ast, 'dml', query_ast), 'result_handler'):
result = query_ast.dml.result_handler(result)
Expand Down Expand Up @@ -176,9 +184,9 @@ def ast_schema(self, query_ast, **kwargs) -> sch.Schema:
dml = getattr(query_ast, 'dml', query_ast)
expr = getattr(dml, 'parent_expr', getattr(dml, 'table_set', None))

if isinstance(expr, (ir.TableExpr, sch.HasSchema)):
if isinstance(expr, (ir.Table, sch.HasSchema)):
return expr.schema()
elif isinstance(expr, ir.ValueExpr):
elif isinstance(expr, ir.Value):
return sch.schema([(expr.get_name(), expr.type())])
else:
raise ValueError(
Expand Down Expand Up @@ -251,17 +259,20 @@ def explain(

statement = f'EXPLAIN {query}'

cur = self.raw_sql(statement)
result = self._get_list(cur)
cur.release()
with self._safe_raw_sql(statement) as cur:
result = self._get_list(cur)

return '\n'.join(['Query:', util.indent(query, 2), '', *result])

@classmethod
def has_operation(cls, operation: type[ops.ValueOp]) -> bool:
@lru_cache
def _get_operations(cls):
translator = cls.compiler.translator_class
op_classes = translator._registry.keys() | translator._rewrites.keys()
return operation in op_classes
return translator._registry.keys() | translator._rewrites.keys()

@classmethod
def has_operation(cls, operation: type[ops.Value]) -> bool:
return operation in cls._get_operations()

def _create_temp_view(self, view, definition):
raise NotImplementedError(
Expand Down
37 changes: 21 additions & 16 deletions ibis/backends/base/sql/alchemy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,31 +116,36 @@ def inspector(self):
self._inspector.info_cache.clear()
return self._inspector

@contextlib.contextmanager
def _safe_raw_sql(self, *args, **kwargs):
with self.begin() as con:
yield con.execute(*args, **kwargs)

@staticmethod
def _to_geodataframe(df, schema):
"""Convert `df` to a `GeoDataFrame`.
Required libraries for geospatial support must be installed and a
geospatial column is present in the dataframe.
"""
import geopandas
import geopandas as gpd
from geoalchemy2 import shape

def to_shapely(row, name):
return shape.to_shape(row[name]) if row[name] is not None else None

geom_col = None
for name, dtype in schema.items():
if isinstance(dtype, dt.GeoSpatial):
geom_col = geom_col or name
df[name] = df.apply(lambda x: to_shapely(x, name), axis=1)
df[name] = df[name].map(
lambda row: None if row is None else shape.to_shape(row)
)
if geom_col:
df = geopandas.GeoDataFrame(df, geometry=geom_col)
df[geom_col] = gpd.array.GeometryArray(df[geom_col].values)
df = gpd.GeoDataFrame(df, geometry=geom_col)
return df

def fetch_from_cursor(self, cursor, schema):
def fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame:
df = pd.DataFrame.from_records(
cursor.fetchall(),
cursor,
columns=cursor.keys(),
coerce_float=True,
)
Expand All @@ -157,7 +162,7 @@ def begin(self):
def create_table(
self,
name: str,
expr: pd.DataFrame | ir.TableExpr | None = None,
expr: pd.DataFrame | ir.Table | None = None,
schema: sch.Schema | None = None,
database: str | None = None,
force: bool = False,
Expand Down Expand Up @@ -361,7 +366,7 @@ def _get_sqla_table(
) -> sa.Table:
return sa.Table(name, self.meta, schema=schema, autoload=autoload)

def _sqla_table_to_expr(self, table: sa.Table) -> ir.TableExpr:
def _sqla_table_to_expr(self, table: sa.Table) -> ir.Table:
schema = self._schemas.get(table.name)
node = self.table_class(
source=self,
Expand All @@ -376,7 +381,7 @@ def table(
name: str,
database: str | None = None,
schema: str | None = None,
) -> ir.TableExpr:
) -> ir.Table:
"""Create a table expression from a table in the database.
Parameters
Expand All @@ -395,7 +400,7 @@ def table(
Returns
-------
TableExpr
Table
Table expression
"""
if database is not None and database != self.current_database:
Expand All @@ -414,7 +419,7 @@ def table(
def insert(
self,
table_name: str,
obj: pd.DataFrame | ir.TableExpr,
obj: pd.DataFrame | ir.Table,
database: str | None = None,
overwrite: bool = False,
) -> None:
Expand Down Expand Up @@ -457,7 +462,7 @@ def insert(
if_exists='replace' if overwrite else 'append',
schema=self._current_schema,
)
elif isinstance(obj, ir.TableExpr):
elif isinstance(obj, ir.Table):
to_table_expr = self.table(table_name)
to_table_schema = to_table_expr.schema()

Expand All @@ -484,7 +489,7 @@ def insert(
else:
raise ValueError(
"No operation is being performed. Either the obj parameter "
"is not a pandas DataFrame or is not a ibis TableExpr."
"is not a pandas DataFrame or is not a ibis Table."
f"The given obj is of type {type(obj).__name__} ."
)

Expand Down Expand Up @@ -514,6 +519,6 @@ def _create_temp_view(
compiled = definition.compile()
defn = self._get_temp_view_definition(name, definition=compiled)
query = sa.text(defn).bindparams(**compiled.params)
self.con.execute(query, definition)
self.con.execute(query)
self._temp_views.add(raw_name)
self._register_temp_view_cleanup(name, raw_name)
43 changes: 35 additions & 8 deletions ibis/backends/base/sql/alchemy/datatypes.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from __future__ import annotations

import functools
from typing import Optional
from typing import Iterable

import sqlalchemy as sa
from sqlalchemy.dialects import mysql, postgresql, sqlite
from sqlalchemy.dialects.mysql.base import MySQLDialect
from sqlalchemy.dialects.postgresql.base import PGDialect
from sqlalchemy.dialects.sqlite.base import SQLiteDialect
from sqlalchemy.engine.interfaces import Dialect
from sqlalchemy.types import UserDefinedType

import ibis.expr.datatypes as dt
import ibis.expr.schema as sch
Expand All @@ -16,7 +19,21 @@
import geoalchemy2 as ga


def table_from_schema(name, meta, schema, database: Optional[str] = None):
class StructType(UserDefinedType):
def __init__(
self,
pairs: Iterable[tuple[str, sa.types.TypeEngine]],
):
self.pairs = [
(name, sa.types.to_instance(type)) for name, type in pairs
]

def get_col_spec(self, **_):
pairs = ", ".join(f"{k} {v}" for k, v in self.pairs)
return f"STRUCT({pairs})"


def table_from_schema(name, meta, schema, database: str | None = None):
# Convert Ibis schema to SQLA table
columns = []

Expand Down Expand Up @@ -45,6 +62,7 @@ def table_from_schema(name, meta, schema, database: Optional[str] = None):
dt.Int16: sa.SmallInteger,
dt.Int32: sa.Integer,
dt.Int64: sa.BigInteger,
dt.JSON: sa.JSON,
}


Expand Down Expand Up @@ -77,15 +95,18 @@ def _(itype, **kwargs):

@to_sqla_type.register(dt.Array)
def _(itype, **kwargs):
ibis_type = itype.value_type
if not isinstance(ibis_type, (dt.Primitive, dt.String)):
raise TypeError(f'Type {ibis_type} is not a primitive or string type')
return sa.ARRAY(to_sqla_type(ibis_type, **kwargs))
# Unwrap the array element type because sqlalchemy doesn't allow arrays of
# arrays. This doesn't affect the underlying data.
while isinstance(itype, dt.Array):
itype = itype.value_type
return sa.ARRAY(to_sqla_type(itype, **kwargs))


@to_sqla_type.register(dt.Struct)
def _(itype, **kwargs):
return sa.TupleType([to_sqla_type(type) for type in itype.types])
def _(itype, **_):
return StructType(
[(name, to_sqla_type(type)) for name, type in itype.pairs.items()]
)


@to_sqla_type.register(dt.GeoSpatial)
Expand Down Expand Up @@ -274,6 +295,12 @@ def sa_array(dialect, satype, nullable=True):
return dt.Array(value_dtype, nullable=nullable)


@dt.dtype.register(Dialect, StructType)
def sa_struct(dialect, satype, nullable=True):
pairs = [(name, dt.dtype(dialect, typ)) for name, typ in satype.pairs]
return dt.Struct.from_tuples(pairs, nullable=nullable)


@sch.infer.register((sa.Table, sa.sql.TableClause))
def schema_from_table(table, schema=None):
"""Retrieve an ibis schema from a SQLAlchemy ``Table``.
Expand Down
6 changes: 3 additions & 3 deletions ibis/backends/base/sql/alchemy/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _can_lower_sort_column(table_set, expr):
# in the generic SQL compiler that "fuses" the sort with the
# aggregation so they appear in same query. It's generally for
# cosmetics and doesn't really affect query semantics.
bases = ops.find_all_base_tables(expr)
bases = {op: op.to_expr() for op in expr.op().root_tables()}
if len(bases) > 1:
return False

Expand Down Expand Up @@ -207,9 +207,9 @@ def _add_select(self, table_set):

has_select_star = False
for expr in self.select_set:
if isinstance(expr, ir.ValueExpr):
if isinstance(expr, ir.Value):
arg = self._translate(expr, named=True)
elif isinstance(expr, ir.TableExpr):
elif isinstance(expr, ir.Table):
if expr.equals(self.table_set):
cached_table = self.context.get_ref(expr)
if cached_table is None:
Expand Down
Loading