89 changes: 8 additions & 81 deletions .github/workflows/conda-lock.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# vim: filetype=yaml
name: Generate Conda Lockfiles

on:
Expand Down Expand Up @@ -36,85 +35,13 @@ jobs:
miniforge-variant: Mambaforge
activate-environment: conda-lock
python-version: ${{ matrix.python-version }}
condarc-file: ci/condarc
condarc-file: ci/conda-lock/condarc

- name: install conda-lock
run: mamba install 'conda-lock <1.0'
run: mamba install conda-lock

- name: generate lock file
run: |
set -euo pipefail
python_version_file="$(mktemp --suffix=.yml)"
{
echo 'name: conda-lock'
echo 'dependencies:'
echo ' - python=${{ matrix.python-version }}'
} > "${python_version_file}"
template='conda-lock/{platform}-${{ matrix.python-version }}.lock'
conda lock \
--kind explicit \
--file pyproject.toml \
--file "${python_version_file}" \
--platform linux-64 \
--platform osx-64 \
--filename-template "${template}" \
--extras all \
--mamba
# not great, but conda-forge is missing packages for duckdb and
# clickhouse-cityhash for windows
conda lock \
--kind explicit \
--file pyproject.toml \
--file "${python_version_file}" \
--platform win-64 \
--filename-template "${template}" \
-e dask \
-e datafusion \
-e geospatial \
-e impala \
-e mysql \
-e pandas \
-e postgres \
-e pyspark \
-e sqlite \
-e visualization \
--mamba
- name: generate lock file for osx-arm64
continue-on-error: true
run: |
set -euo pipefail
python_version_file="$(mktemp --suffix=.yml)"
{
echo 'name: conda-lock'
echo 'dependencies:'
echo ' - python=${{ matrix.python-version }}'
} > "${python_version_file}"
template='conda-lock/{platform}-${{ matrix.python-version }}.lock'
conda lock \
--kind explicit \
--file pyproject.toml \
--file "${python_version_file}" \
--platform osx-arm64 \
--filename-template "${template}" \
-e dask \
-e datafusion \
-e geospatial \
-e mysql \
-e pandas \
-e postgres \
-e pyspark \
-e sqlite \
-e visualization \
--mamba
run: ./ci/conda-lock/generate.sh "${{ matrix.python-version }}"

- name: create conda environment
run: mamba create --name ibis${{ matrix.python-version }} --file conda-lock/linux-64-${{ matrix.python-version }}.lock
Expand All @@ -126,7 +53,7 @@ jobs:
path: conda-lock/*-${{ matrix.python-version }}.lock

condalock_pr:
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
needs:
- condalock
Expand Down Expand Up @@ -169,21 +96,21 @@ jobs:
dependencies
autorebase:opt-in
- uses: juliangruber/approve-pull-request-action@v1.1.1
if: ${{ steps.create_pr.outputs.pull-request-operation == 'created' }}
- uses: juliangruber/approve-pull-request-action@v2.0.3
if: steps.create_pr.outputs.pull-request-operation == 'created'
with:
github-token: ${{ steps.generate_pr_approval_token.outputs.token }}
number: ${{ steps.create_pr.outputs.pull-request-number }}

- uses: peter-evans/enable-pull-request-automerge@v2
if: ${{ steps.create_pr.outputs.pull-request-operation == 'created' }}
if: steps.create_pr.outputs.pull-request-operation == 'created'
with:
token: ${{ steps.generate_pr_token.outputs.token }}
pull-request-number: ${{ steps.create_pr.outputs.pull-request-number }}
merge-method: rebase

condalock_push:
if: ${{ github.event_name == 'repository_dispatch' }}
if: github.event_name == 'repository_dispatch'
runs-on: ubuntu-latest
needs:
- condalock
Expand Down
84 changes: 84 additions & 0 deletions .github/workflows/ibis-backends-cloud.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: Cloud Backends

on:
push:
# Skip the backend suite if all changes are in the docs directory
paths-ignore:
- "docs/**"
- "mkdocs.yml"
branches:
- master

permissions:
# this allows extractions/setup-just to list releases for `just` at a higher
# rate limit while restricting GITHUB_TOKEN permissions elsewhere
contents: read

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

jobs:
test_backends:
name: ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
python-version:
- "3.10"
backend:
- name: snowflake
title: Snowflake
- name: bigquery
title: BigQuery
steps:
- name: checkout
uses: actions/checkout@v3

- name: install python
uses: actions/setup-python@v4
id: install_python
with:
python-version: ${{ matrix.python-version }}

- run: python -m pip install --upgrade pip 'poetry>=1.2'

- name: install ibis
run: poetry install --without dev --without docs --extras ${{ matrix.backend.name }}

- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: download backend data
run: just download-data

- uses: google-github-actions/auth@v1
if: matrix.backend.name == 'bigquery'
with:
credentials_json: ${{ secrets.GCP_CREDENTIALS }}

- name: setup snowflake credentials
if: matrix.backend.name == 'snowflake'
run: echo "SNOWFLAKE_URL=${SNOWFLAKE_URL}" >> "$GITHUB_ENV"
env:
SNOWFLAKE_URL: ${{ secrets.SNOWFLAKE_URL }}

- name: "run parallel tests: ${{ matrix.backend.name }}"
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup

- name: upload code coverage
if: success()
uses: codecov/codecov-action@v3
with:
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}

- name: publish test report
uses: actions/upload-artifact@v3
if: success() || failure()
with:
name: ${{ matrix.backend.name }}-${{ matrix.os }}-${{ matrix.python-version }}
path: junit.xml
4 changes: 4 additions & 0 deletions .github/workflows/ibis-backends-skip-helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,17 @@ on:
paths:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "codecov.yml"
branches:
- master
- "*.x.x"
pull_request:
paths:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "codecov.yml"
branches:
- master
- "*.x.x"
Expand Down
230 changes: 177 additions & 53 deletions .github/workflows/ibis-backends.yml
Original file line number Diff line number Diff line change
@@ -1,24 +1,32 @@
# vim: filetype=yaml
name: Backends

on:
push:
# Skip the backend suite if all changes are in the docs directory
# Skip the backend suite if all changes are docs
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "codecov.yml"
branches:
- master
- "*.x.x"
pull_request:
# Skip the backend suite if all changes are in the docs directory
# Skip the backend suite if all changes are docs
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
- "codecov.yml"
branches:
- master
- "*.x.x"

permissions:
# this allows extractions/setup-just to list releases for `just` at a higher
# rate limit while restricting GITHUB_TOKEN permissions elsewhere
contents: read

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
Expand All @@ -39,94 +47,188 @@ jobs:
backend:
- name: dask
title: Dask
extras:
- dask
- name: duckdb
title: DuckDB
extras:
- duckdb
- name: pandas
title: Pandas
extras:
- pandas
- name: sqlite
title: SQLite
extras:
- sqlite
- name: datafusion
title: Datafusion
extras:
- datafusion
- name: pyspark
title: PySpark
serial: true
extras:
- pyspark
- name: polars
title: Polars
extras:
- polars
- name: mysql
title: MySQL
services:
- mysql
extras:
- mysql
- geospatial
sys-deps:
- libgeos-dev
- name: clickhouse
title: ClickHouse
services:
- clickhouse
extras:
- clickhouse
- name: postgres
title: PostgreSQL
extras:
- postgres
- geospatial
services:
- postgres
sys-deps:
- libgeos-dev
- name: impala
title: Impala
serial: true
extras:
- impala
services:
- impala
- kudu
sys-deps:
- cmake
- ninja-build
- name: mssql
title: MS SQL Server
serial: true
extras:
- mssql
services:
- mssql
sys-deps:
- libkrb5-dev
- krb5-config
- name: trino
title: Trino
extras:
- trino
- postgres
services:
- trino
exclude:
- os: windows-latest
backend:
name: mysql
title: MySQL
extras:
- mysql
- geospatial
services:
- mysql
sys-deps:
- libgeos-dev
- os: windows-latest
backend:
name: clickhouse
title: ClickHouse
extras:
- clickhouse
services:
- clickhouse
- os: windows-latest
backend:
name: pyspark
title: PySpark
serial: true
extras:
- pyspark
- os: windows-latest
backend:
name: postgres
include:
- os: ubuntu-latest
python-version: "3.8"
title: PostgreSQL
extras:
- postgres
- geospatial
services:
- postgres
sys-deps:
- libgeos-dev
- os: windows-latest
backend:
name: impala
title: Impala
serial: true
extras:
- impala
services:
- impala
- kudu
sys-deps:
- cmake
- ninja-build
- krb5-config
- libkrb5-dev
- os: ubuntu-latest
python-version: "3.9"
- os: windows-latest
backend:
name: impala
title: Impala
name: mssql
title: MS SQL Server
serial: true
extras:
- mssql
services:
- impala
- kudu
- mssql
sys-deps:
- cmake
- ninja-build
- krb5-config
- libkrb5-dev
- krb5-config
- os: windows-latest
backend:
name: trino
title: Trino
services:
- trino
extras:
- trino
- postgres
steps:
- name: update and install system dependencies
if: ${{ matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null }}
if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null
run: |
set -euo pipefail
sudo apt-get update -qq -y
sudo apt-get install -qq -y build-essential python-dev ${{ join(matrix.backend.sys-deps, ' ') }}
sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }}
- name: install sqlite
if: ${{ matrix.os == 'windows-latest' && matrix.backend.name == 'sqlite' }}
if: matrix.os == 'windows-latest' && matrix.backend.name == 'sqlite'
run: choco install sqlite

- uses: actions/setup-java@v3
if: ${{ matrix.backend.name == 'pyspark' }}
if: matrix.backend.name == 'pyspark'
with:
distribution: temurin
java-version: 11
distribution: microsoft
java-version: 17

- name: checkout
uses: actions/checkout@v3

- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: download backend data
run: just download-data

- name: start services
if: ${{ matrix.backend.services != null }}
if: matrix.backend.services != null
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }}

- name: install python
Expand All @@ -145,27 +247,17 @@ jobs:
requirement_files: poetry.lock
custom_cache_key_element: ${{ matrix.backend.name }}-${{ steps.install_python.outputs.python-version }}

- run: python -m pip install --upgrade pip 'poetry<1.2'

- name: install ibis
if: ${{ matrix.backend.name != 'postgres' }}
run: poetry install --extras ${{ matrix.backend.name }}
- run: python -m pip install --upgrade pip 'poetry>=1.2'

- name: install ibis
if: ${{ matrix.backend.name == 'postgres' }}
run: poetry install --extras ${{ matrix.backend.name }} --extras geospatial

- uses: extractions/setup-just@v1

- name: download backend data
run: just download-data
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}"

- name: "run parallel tests: ${{ matrix.backend.name }}"
if: ${{ matrix.backend.name != 'pyspark' && matrix.backend.name != 'impala' }}
if: ${{ !matrix.backend.serial }}
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup

- name: "run serial tests: ${{ matrix.backend.name }}"
if: ${{ matrix.backend.name == 'pyspark' || matrix.backend.name == 'impala' }}
if: matrix.backend.serial
run: just ci-check -m ${{ matrix.backend.name }}
env:
IBIS_TEST_NN_HOST: localhost
Expand All @@ -187,6 +279,10 @@ jobs:
name: ${{ matrix.backend.name }}-${{ matrix.os }}-${{ matrix.python-version }}
path: junit.xml

- name: Show docker compose logs on fail
if: ${{ failure() }}
run: docker compose logs

test_backends_min_version:
name: ${{ matrix.backend.title }} Min Version ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
Expand All @@ -203,7 +299,9 @@ jobs:
- name: dask
title: Dask
deps:
- "dask[array,dataframe]@2021.10.0"
- "dask[array,dataframe]@2022.9.1"
extras:
- dask
- name: postgres
title: PostgreSQL
deps:
Expand All @@ -213,23 +311,55 @@ jobs:
- "Shapely@1.6"
services:
- postgres
extras:
- postgres
- geospatial
exclude:
- os: windows-latest
backend:
name: postgres
title: PostgreSQL
deps:
- "psycopg2@2.8.4"
- "GeoAlchemy2@0.6.3"
- "geopandas@0.6"
- "Shapely@1.6"
services:
- postgres
extras:
- postgres
- geospatial
- python-version: "3.10"
backend:
name: postgres
title: PostgreSQL
deps:
- "psycopg2@2.8.4"
- "GeoAlchemy2@0.6.3"
- "geopandas@0.6"
- "Shapely@1.6"
services:
- postgres
extras:
- postgres
- geospatial
steps:
- name: checkout
uses: actions/checkout@v3

- name: install libgeos for shapely
if: ${{ matrix.backend.name == 'postgres' }}
if: matrix.backend.name == 'postgres'
run: sudo apt-get install -qq -y build-essential libgeos-dev

- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: download backend data
run: just download-data

- name: start services
if: ${{ matrix.backend.services != null }}
if: matrix.backend.services != null
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }}

- name: install python
Expand All @@ -238,7 +368,7 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- run: python -m pip install --upgrade pip 'poetry<1.2'
- run: python -m pip install --upgrade pip 'poetry>=1.2'

- name: install minimum versions
run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }}
Expand All @@ -252,18 +382,8 @@ jobs:
# without updating anything except the requested versions
run: poetry lock --no-update

- uses: extractions/setup-just@v1

- name: install ibis
if: ${{ matrix.backend.name != 'postgres' }}
run: poetry install --extras ${{ matrix.backend.name }}

- name: install ibis
if: ${{ matrix.backend.name == 'postgres' }}
run: poetry install --extras ${{ matrix.backend.name }} --extras geospatial

- name: download backend data
run: just download-data
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}"

- name: run tests
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup
Expand All @@ -281,6 +401,10 @@ jobs:
name: ${{ matrix.backend.name }}-min-version-${{ matrix.os }}-${{ matrix.python-version }}
path: junit.xml

- name: Show docker compose logs on fail
if: ${{ failure() }}
run: docker compose logs

backends:
# this job exists so that we can use a single job from this workflow to gate merging
runs-on: ubuntu-latest
Expand Down
79 changes: 37 additions & 42 deletions .github/workflows/ibis-docs-lint.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# vim: filetype=yaml
name: Docs/Linting/Benchmarks

on:
Expand All @@ -15,20 +14,27 @@ concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

permissions:
# increase the rate limit for nix operations hitting github, but limit the
# permissions to reading things
contents: read

jobs:
commitlint:
runs-on: ubuntu-latest
if: ${{ github.event_name == 'pull_request' }}
if: github.event_name == 'pull_request'
steps:
- name: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: install nix
uses: cachix/install-nix-action@v17
uses: cachix/install-nix-action@v18
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: lint commits
run: nix run 'nixpkgs#commitlint' -- --from=${{ github.event.pull_request.base.sha }} --to=${{ github.sha }} --verbose
Expand All @@ -40,30 +46,25 @@ jobs:
uses: actions/checkout@v3

- name: install nix
uses: cachix/install-nix-action@v17
uses: cachix/install-nix-action@v18
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v10
uses: cachix/cachix-action@v12
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix

# run against a smaller shell for speed for pull requests
- name: pre-commit checks pull_request
if: ${{ github.event_name == 'pull_request' }}
run: nix develop -f nix preCommitShell --ignore-environment --keep-going -c pre-commit run --all-files

# run against the full shell.nix on push so it gets pushed to cachix
- name: pre-commit checks push
if: ${{ github.event_name == 'push' }}
run: nix develop -f shell.nix --ignore-environment --keep-going -c pre-commit run --all-files
- name: pre-commit checks
run: nix develop '.#preCommit' --ignore-environment --keep-going -c pre-commit run --all-files

benchmarks:
runs-on: ubuntu-latest
if: ${{ github.event_name == 'push' }}
if: github.event_name == 'push'
steps:
- name: checkout
uses: actions/checkout@v3
Expand All @@ -75,7 +76,7 @@ jobs:
python-version: "3.10"

- name: install system dependencies
run: sudo apt-get install -qq -y build-essential krb5-config libkrb5-dev libgeos-dev
run: sudo apt-get install -qq -y build-essential libgeos-dev

- uses: syphar/restore-virtualenv@v1
with:
Expand All @@ -87,10 +88,10 @@ jobs:
requirement_files: poetry.lock
custom_cache_key_element: benchmarks-${{ steps.install_python.outputs.python-version }}

- run: python -m pip install --upgrade pip 'poetry<1.2'
- run: python -m pip install --upgrade pip 'poetry>=1.2'

- name: install ibis
run: poetry install --extras all
run: poetry install --without dev --without docs --all-extras

- name: make benchmark output dir
run: mkdir .benchmarks
Expand All @@ -116,16 +117,17 @@ jobs:

docs_pr:
runs-on: ubuntu-latest
if: ${{ github.event_name != 'push' }}
if: github.event_name == 'pull_request'
concurrency: docs-${{ github.repository }}-${{ github.head_ref || github.sha }}
steps:
- name: install nix
uses: cachix/install-nix-action@v17
uses: cachix/install-nix-action@v18
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v10
uses: cachix/cachix-action@v12
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
Expand All @@ -137,27 +139,28 @@ jobs:
fetch-depth: 0

- name: build docs
run: nix run -f nix ibisDevEnv310 -- -m mkdocs build
run: nix develop --ignore-environment -c mkdocs build --strict

- name: verify internal links
run: nix shell -f nix --ignore-environment bash findutils just lychee -c just checklinks --offline --no-progress
run: nix develop --ignore-environment '.#links' -c just checklinks --offline --no-progress

docs_push:
runs-on: ubuntu-latest
if: ${{ github.event_name == 'push' }}
if: github.event_name == 'push'
concurrency: docs-${{ github.repository }}-${{ github.head_ref || github.sha }}
needs:
# wait on benchmarks to prevent a race condition when pushing to the
# gh-pages branch
- benchmarks
steps:
- name: install nix
uses: cachix/install-nix-action@v17
uses: cachix/install-nix-action@v18
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v10
uses: cachix/cachix-action@v12
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
Expand Down Expand Up @@ -185,17 +188,8 @@ jobs:
- name: build and push dev docs
run: |
set -euo pipefail
nix run -f nix mic -- \
deploy \
--push \
--rebase \
--prefix docs \
--message 'docs(dev): ibis@${{ github.sha }}' \
dev
env:
PYTHONPATH: .
nix develop --ignore-environment -c \
mike deploy --push --rebase --prefix docs --message 'docs(dev): ibis@${{ github.sha }}' dev
simulate_release:
runs-on: ubuntu-latest
Expand All @@ -204,11 +198,12 @@ jobs:
with:
fetch-depth: 0

- uses: cachix/install-nix-action@v17
- uses: cachix/install-nix-action@v18
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- uses: cachix/cachix-action@v10
- uses: cachix/cachix-action@v12
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
Expand Down
20 changes: 3 additions & 17 deletions .github/workflows/ibis-docs-release.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# vim: filetype=yaml
name: Docs Release Build

on:
Expand All @@ -11,12 +10,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: install nix
uses: cachix/install-nix-action@v17
with:
nix_path: nixpkgs=channel:nixos-unstable-small
uses: cachix/install-nix-action@v18

- name: setup cachix
uses: cachix/cachix-action@v10
uses: cachix/cachix-action@v12
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
Expand Down Expand Up @@ -51,15 +48,4 @@ jobs:
- name: build and push docs on tag
run: |
set -euo pipefail
nix run -f nix mic -- \
deploy \
--push \
--rebase \
--update-aliases \
--prefix docs \
--message "docs(release): ibis@${GITHUB_REF_NAME}" \
"${GITHUB_REF_NAME}" latest
env:
PYTHONPATH: .
nix develop --ignore-environment -c mike deploy --push --rebase --update-aliases --prefix docs --message "docs(release): ibis@${GITHUB_REF_NAME}" "${GITHUB_REF_NAME}" latest
2 changes: 2 additions & 0 deletions .github/workflows/ibis-main-skip-helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ on:
paths:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
branches:
- master
- "*.x.x"
pull_request:
paths:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
branches:
- master
- "*.x.x"
Expand Down
81 changes: 64 additions & 17 deletions .github/workflows/ibis-main.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# vim: filetype=yaml
name: Ibis

on:
Expand All @@ -7,6 +6,7 @@ on:
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
branches:
- master
- "*.x.x"
Expand All @@ -15,10 +15,14 @@ on:
paths-ignore:
- "docs/**"
- "mkdocs.yml"
- "**/*.md"
branches:
- master
- "*.x.x"

permissions:
contents: read

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
Expand Down Expand Up @@ -58,36 +62,33 @@ jobs:
custom_cache_key_element: no-backends-${{ steps.install_python.outputs.python-version }}

- name: install ${{ matrix.os }} system dependencies
if: ${{ matrix.os == 'ubuntu-latest' }}
if: matrix.os == 'ubuntu-latest'
run: |
set -euo pipefail
sudo apt-get update -y -q
sudo apt-get install -y -q build-essential graphviz krb5-config libkrb5-dev libgeos-dev
sudo apt-get install -y -q build-essential graphviz libgeos-dev
- name: install ${{ matrix.os }} system dependencies
if: ${{ matrix.os == 'windows-latest' }}
if: matrix.os == 'windows-latest'
run: choco install graphviz

- run: python -m pip install --upgrade pip 'poetry<1.2'

- name: install ibis
if: ${{ matrix.os == 'ubuntu-latest' }}
run: poetry install --extras all
- run: python -m pip install --upgrade pip 'poetry>=1.2'

- name: install ibis
if: ${{ matrix.os == 'windows-latest' }}
run: poetry install --extras visualization
run: poetry install --without dev --without docs --extras visualization

- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: run core tests
run: just ci-check -m core
- name: run all core tests and run benchmarks once parallel
if: matrix.os != 'windows-latest'
run: just ci-check -m "'core or benchmark'" -n auto

- name: run benchmarks once
if: ${{ matrix.os == 'ubuntu-latest' }}
# run benchmarks once to make sure they aren't broken
run: just ci-check -m benchmark
- name: run all core tests and run benchmarks once serial
if: matrix.os == 'windows-latest'
run: just ci-check -m "'core or benchmark'"

- name: upload code coverage
if: success()
Expand All @@ -101,3 +102,49 @@ jobs:
with:
name: no-backends-${{ matrix.os }}-${{ matrix.python-version }}
path: junit.xml

test_shapely_duckdb_import:
name: Test shapely and duckdb import
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
python-version:
- "3.10"
steps:
- name: checkout
uses: actions/checkout@v3

- name: install python
uses: actions/setup-python@v4
id: install_python
with:
python-version: ${{ matrix.python-version }}

- uses: syphar/restore-virtualenv@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: shapely-duckdb-${{ steps.install_python.outputs.python-version }}

- uses: syphar/restore-pip-download-cache@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: shapely-duckdb-${{ steps.install_python.outputs.python-version }}

- name: install ${{ matrix.os }} system dependencies
run: |
set -euo pipefail
sudo apt-get update -y -q
sudo apt-get install -y -q build-essential libgeos-dev
- run: python -m pip install --upgrade pip 'poetry>=1.2'

- name: install ibis
# install duckdb and geospatial because of https://github.com/ibis-project/ibis/issues/4856
run: poetry install --without dev --without docs --without test --extras duckdb --extras geospatial

- name: check shapely and duckdb imports
run: poetry run python -c 'import shapely.geometry, duckdb'
19 changes: 2 additions & 17 deletions .github/workflows/ibis-tpch-queries.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# vim: filetype=yaml
name: TPC-H

on:
Expand Down Expand Up @@ -35,8 +34,6 @@ jobs:
with:
python-version: "3.10"

- run: python -m pip install --upgrade pip coverage

- name: install tpc-queries dependencies
working-directory: tpc-queries
run: |
Expand All @@ -48,20 +45,8 @@ jobs:

- name: generate tpc-h data
working-directory: tpc-queries
run: python -c "import duckdb; con = duckdb.connect('tpch.ddb'); con.execute('CALL dbgen(sf=0.1);')"
run: python -c "import duckdb; duckdb.connect('tpch.ddb').execute('CALL dbgen(sf=0.1)')"

- name: run tpc-h queries
working-directory: tpc-queries
run: coverage run --rcfile=../.coveragerc ./runtpc -i ibis -i duckdb -d 'tpch.ddb' -b 'duckdb'

- name: generate coverage report
working-directory: tpc-queries
run: coverage xml --rcfile=../.coveragerc -o ./junit.xml

- name: upload code coverage
if: success()
uses: codecov/codecov-action@v3
with:
files: ./tpc-queries/junit.xml
fail_ci_if_error: true
flags: tpc,tpch,duckdb,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
run: ./runtpc -i ibis -i duckdb -d 'tpch.ddb' -b 'duckdb'
2 changes: 2 additions & 0 deletions .github/workflows/nix-skip-helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ on:
- "**/*.nix"
- "pyproject.toml"
- "poetry.lock"
- "flake.lock"
- "nix/**"
branches:
- master
Expand All @@ -18,6 +19,7 @@ on:
- "**/*.nix"
- "pyproject.toml"
- "poetry.lock"
- "flake.lock"
- "nix/**"
branches:
- master
Expand Down
34 changes: 28 additions & 6 deletions .github/workflows/nix.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# vim: filetype=yaml
name: Nix

on:
Expand All @@ -7,7 +6,11 @@ on:
- "**/*.nix"
- "pyproject.toml"
- "poetry.lock"
- "flake.lock"
- "nix/**"
- "!docs/**"
- "!mkdocs.yml"
- "!**/*.md"
branches:
- master
- "*.x.x"
Expand All @@ -16,7 +19,11 @@ on:
- "**/*.nix"
- "pyproject.toml"
- "poetry.lock"
- "flake.lock"
- "nix/**"
- "!docs/**"
- "!mkdocs.yml"
- "!**/*.md"
branches:
- master
- "*.x.x"
Expand All @@ -43,17 +50,32 @@ jobs:
uses: actions/checkout@v3

- name: install nix
uses: cachix/install-nix-action@v17
uses: cachix/install-nix-action@v18
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v10
uses: cachix/cachix-action@v12
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix
- name: nix build and test
run: |
set -euo pipefail
- name: nix build and run tests
continue-on-error: ${{ matrix.os == 'macos-latest' && matrix.python-version == '3.8' }}
run: nix build --keep-going --print-build-logs --file . --argstr python ${{ matrix.python-version }}
version='${{ matrix.python-version }}'
nix build ".#ibis${version//./}" --fallback --keep-going --print-build-logs
# build the whole dev shell when pushing to upstream, so that the cachix cache is populated
- name: nix build devShell
if: github.event_name == 'push'
run: |
set -euo pipefail
version='${{ matrix.python-version }}'
host_system="$(nix eval --raw 'nixpkgs#stdenv.hostPlatform.system')"
flake=".#devShells.${host_system}.ibis${version//./}"
nix build "$flake" --fallback --keep-going --print-build-logs
5 changes: 2 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,12 @@ jobs:
fetch-depth: 0
token: ${{ steps.generate_token.outputs.token }}

- uses: cachix/install-nix-action@v17
- uses: cachix/install-nix-action@v18
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- uses: cachix/cachix-action@v10
- uses: cachix/cachix-action@v12
with:
name: ibis
extraPullNames: nix-community,poetry2nix
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ concurrency: report

jobs:
report:
if: ${{ github.event.workflow_run.conclusion == 'success' || github.event.workflow_run.conclusion == 'failure' }}
if: github.event.workflow_run.conclusion == 'success' || github.event.workflow_run.conclusion == 'failure'
runs-on: ubuntu-latest
steps:
- name: Download artifact
Expand Down
74 changes: 38 additions & 36 deletions .github/workflows/update-deps.yml
Original file line number Diff line number Diff line change
@@ -1,81 +1,83 @@
name: Update Dependencies
name: Update Nix Flakes
on:
schedule:
# run every 3 days at midnight
- cron: "0 0 * * */3"
workflow_dispatch:

jobs:
generate_updates:
get-flakes:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
matrix: ${{ steps.get-flakes.outputs.matrix }}
steps:
- uses: actions/checkout@v3

- name: output dependency list
id: set-matrix
- uses: cachix/install-nix-action@v18
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: generate flake matrix
id: get-flakes
run: |
set -euo pipefail
deps="$(jq -rcM '{dep: keys}' < nix/sources.json)"
echo "::set-output name=matrix::$deps"
flakes="$(nix flake metadata --json | jq -rcM '.locks.nodes.root.inputs | {flake: keys}')"
echo "matrix=${flakes}" >> "$GITHUB_OUTPUT"
niv_update:
flake-update:
runs-on: ubuntu-latest
needs:
- generate_updates
- get-flakes
strategy:
matrix: ${{ fromJSON(needs.generate_updates.outputs.matrix) }}
fail-fast: false
matrix: ${{ fromJSON(needs.get-flakes.outputs.matrix) }}
steps:
- uses: actions/checkout@v3

- uses: cachix/install-nix-action@v17
- uses: cachix/install-nix-action@v18
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v10
uses: cachix/cachix-action@v12
with:
name: ibis
extraPullNames: nix-community,poetry2nix

- uses: cpcloud/niv-dep-info-action@v2.0.7
- uses: cpcloud/flake-dep-info-action@v2.0.10
id: get_current_commit
with:
dependency: ${{ matrix.dep }}
input: ${{ matrix.flake }}

- name: update ${{ matrix.dep }}
run: nix run 'nixpkgs#niv' -- update ${{ matrix.dep }}
- name: update ${{ matrix.flake }}
run: nix flake lock --update-input ${{ matrix.flake }}

- uses: cpcloud/niv-dep-info-action@v2.0.7
- uses: cpcloud/flake-dep-info-action@v2.0.10
id: get_new_commit
with:
dependency: ${{ matrix.dep }}
input: ${{ matrix.flake }}

- name: create an output indicating whether a PR is needed
id: needs_pr
run: |
set -euo pipefail
echo "::set-output name=did_change::${{ steps.get_current_commit.outputs.rev != steps.get_new_commit.outputs.rev }}"
run: echo "did_change=${{ steps.get_current_commit.outputs.rev != steps.get_new_commit.outputs.rev }}" >> "$GITHUB_OUTPUT"

- uses: tibdex/github-app-token@v1
if: ${{ fromJSON(steps.needs_pr.outputs.did_change) }}
if: fromJSON(steps.needs_pr.outputs.did_change)
id: generate_pr_token
with:
app_id: ${{ secrets.SQUAWK_BOT_APP_ID }}
private_key: ${{ secrets.SQUAWK_BOT_APP_PRIVATE_KEY }}

- uses: tibdex/github-app-token@v1
if: ${{ fromJSON(steps.needs_pr.outputs.did_change) }}
if: fromJSON(steps.needs_pr.outputs.did_change)
id: generate_pr_approval_token
with:
app_id: ${{ secrets.PR_APPROVAL_BOT_APP_ID }}
private_key: ${{ secrets.PR_APPROVAL_BOT_APP_PRIVATE_KEY }}

- uses: cpcloud/compare-commits-action@v5.0.23
if: ${{ fromJSON(steps.needs_pr.outputs.did_change) }}
- uses: cpcloud/compare-commits-action@v5.0.27
if: fromJSON(steps.needs_pr.outputs.did_change)
id: compare_commits
with:
token: ${{ steps.generate_pr_token.outputs.token }}
Expand All @@ -85,26 +87,26 @@ jobs:
include-merge-commits: false

- uses: peter-evans/create-pull-request@v4
if: ${{ fromJSON(steps.needs_pr.outputs.did_change) }}
if: fromJSON(steps.needs_pr.outputs.did_change)
id: create_pr
with:
token: ${{ steps.generate_pr_token.outputs.token }}
commit-message: "chore(deps/${{ matrix.dep }}): update"
branch: "create-pull-request/update-${{ matrix.dep }}"
commit-message: "chore(flake/${{ matrix.flake }}): `${{ steps.get_current_commit.outputs.short-rev }}` -> `${{ steps.get_new_commit.outputs.short-rev }}`"
branch: "create-pull-request/update-${{ matrix.flake }}"
delete-branch: true
author: "ibis-squawk-bot[bot] <ibis-squawk-bot[bot]@users.noreply.github.com>"
title: "chore(deps/${{ matrix.dep }}): update"
title: "chore(flake/${{ matrix.flake }}): `${{ steps.get_current_commit.outputs.short-rev }}` -> `${{ steps.get_new_commit.outputs.short-rev }}`"
body: ${{ steps.compare_commits.outputs.differences }}
labels: dependencies,autorebase:opt-in
labels: dependencies,nix,autorebase:opt-in

- uses: juliangruber/approve-pull-request-action@v1.1.1
if: ${{ fromJSON(steps.needs_pr.outputs.did_change) }}
- uses: juliangruber/approve-pull-request-action@v2.0.3
if: fromJSON(steps.needs_pr.outputs.did_change)
with:
github-token: ${{ steps.generate_pr_approval_token.outputs.token }}
number: ${{ steps.create_pr.outputs.pull-request-number }}

- uses: peter-evans/enable-pull-request-automerge@v2
if: ${{ fromJSON(steps.needs_pr.outputs.did_change) }}
if: fromJSON(steps.needs_pr.outputs.did_change)
with:
token: ${{ steps.generate_pr_token.outputs.token }}
pull-request-number: ${{ steps.create_pr.outputs.pull-request-number }}
Expand Down
73 changes: 0 additions & 73 deletions .github/workflows/update-setup-py.yml

This file was deleted.

4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ ci/udf/.ninja_log
ci/udf/build.ninja
junit.xml
spark-warehouse
docs/backends/support_matrix.csv
docs/backends/*support_matrix.csv
__pycache__
tags
.DS_Store
prof/
.hypothesis
105 changes: 60 additions & 45 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,47 +1,68 @@
ci:
autofix_commit_msg: "style: auto fixes from pre-commit.ci hooks"
autofix_prs: false
autoupdate_commit_msg: "style: pre-commit.ci autoupdate"
autoupdate_commit_msg: "chore(deps): pre-commit.ci autoupdate"
skip:
- actionlint
- prettier
- shellcheck
- shfmt
- just
- nixpkgs-fmt
- nix-linter
- statix
default_stages:
- commit
repos:
- repo: https://github.com/pycqa/isort
rev: 5.10.1
- repo: https://github.com/rhysd/actionlint
rev: v1.6.22
hooks:
- id: isort
- id: actionlint
- repo: https://github.com/psf/black
rev: 22.8.0
rev: 22.12.0
hooks:
- id: black
- repo: https://github.com/pycqa/flake8
rev: 5.0.4
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.215
hooks:
- id: flake8
- repo: https://github.com/MarcoGorelli/absolufy-imports
rev: v0.3.1
- id: ruff
# exclude a file (if configured to do so), even if it's passed in explicitly
args: ["--force-exclude"]
- repo: https://github.com/adrienverge/yamllint
rev: v1.28.0
hooks:
- id: absolufy-imports
- repo: https://github.com/asottile/pyupgrade
rev: v2.37.3
- id: yamllint
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: pyupgrade
exclude: setup.py
entry: pyupgrade --py38-plus
types:
- python
- id: check-added-large-files
- id: check-case-conflict
- id: check-docstring-first
- id: check-executables-have-shebangs
- id: check-merge-conflict
- id: check-shebang-scripts-are-executable
- id: check-vcs-permalinks
- id: destroyed-symlinks
- id: detect-aws-credentials
args: ["--allow-missing-credentials"]
- id: detect-private-key
- id: end-of-file-fixer
exclude: .+/snapshots/.+
- id: fix-byte-order-marker
- id: mixed-line-ending
- id: trailing-whitespace
args: ["--markdown-linebreak-ext=md"]
exclude: .+/snapshots/.+
- repo: meta
hooks:
- id: check-hooks-apply
- id: check-useless-excludes
- repo: local
hooks:
- id: prettier
name: prettier
language: system
entry: prettier --write
entry: prettier
args: ["--write"]
types_or:
- json
- toml
Expand All @@ -51,12 +72,9 @@ repos:
name: shellcheck
language: system
entry: shellcheck
files: \.sh$
types:
- shell
types_or:
- file
- sh
- shell
- ash
- bash
- bats
Expand All @@ -65,37 +83,34 @@ repos:
- id: shfmt
name: shfmt
language: system
entry: shfmt -i 2 -sr -s
files: \.sh$
types:
- file
entry: shfmt
args: ["-i", "2", "-sr", "-s"]
types_or:
- file
- sh
- shell
- ash
- bash
- bats
- dash
- ksh
- id: just
name: just
language: system
entry: just --fmt --unstable --check
entry: just
args: ["--fmt", "--unstable"]
files: ^justfile$
pass_filenames: false
types:
- file
- id: nixpkgs-fmt
name: nixpkgs-fmt
language: system
entry: nixpkgs-fmt
exclude: nix/sources\.nix
files: \.nix$
types:
- file
types_or:
- file
- id: nix-linter
name: nix-linter
- nix
- id: statix
name: statix
language: system
entry: nix-linter --check="no-FreeLetInFunc"
exclude: nix/sources\.nix
files: \.nix$
entry: statix
args: ["fix"]
pass_filenames: false
types:
- file
types_or:
- file
- nix
6 changes: 3 additions & 3 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
.direnv
.mypy_cache
.pytest_cache
nix/sources.json
result
result-*
.ruff_cache
result*
docs/release_notes.md
docs/overrides/*.html
docs/api/expressions/top_level.md
docs/SUMMARY.md
site
ci/udf/CMakeFiles
poetry.lock
ibis
9 changes: 2 additions & 7 deletions .releaserc.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ module.exports = {
{ type: "chore", hidden: true },
{ type: "docs", section: "Documentation" },
{ type: "style", hidden: true },
{ type: "refactor", hidden: true },
{ type: "refactor", section: "Refactors" },
{ type: "perf", section: "Performance" },
{ type: "test", hidden: true },
{ type: "depr", section: "Deprecations" },
Expand Down Expand Up @@ -78,12 +78,7 @@ module.exports = {
[
"@semantic-release/git",
{
assets: [
"pyproject.toml",
"docs/release_notes.md",
"setup.py",
"ibis/__init__.py",
],
assets: ["pyproject.toml", "docs/release_notes.md", "ibis/__init__.py"],
message: "chore(release): ${nextRelease.version}",
},
],
Expand Down
9 changes: 9 additions & 0 deletions .yamllint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
extends: default

rules:
document-start: disable
line-length: disable # we already enforce this with prettier
truthy: disable
comments:
min-spaces-from-content: 1
File renamed without changes.
28 changes: 0 additions & 28 deletions LICENSES/odo.txt

This file was deleted.

183 changes: 159 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,42 +1,177 @@
# Ibis: Expressive analytics in Python at any scale
# Ibis

| Service | Status |
| -------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Documentation | [![Documentation Status](https://img.shields.io/badge/docs-docs.ibis--project.org-blue.svg)](http://ibis-project.org) |
| Conda packages | [![Anaconda-Server Badge](https://anaconda.org/conda-forge/ibis-framework/badges/version.svg)](https://anaconda.org/conda-forge/ibis-framework) |
| PyPI | [![PyPI](https://img.shields.io/pypi/v/ibis-framework.svg)](https://pypi.org/project/ibis-framework) |
| Ibis CI | [![Build status](https://github.com/ibis-project/ibis/actions/workflows/ibis-main.yml/badge.svg)](https://github.com/ibis-project/ibis/actions/workflows/ibis-main.yml?query=branch%3Amaster) |
| Backend CI | [![Build status](https://github.com/ibis-project/ibis/actions/workflows/ibis-backends.yml/badge.svg)](https://github.com/ibis-project/ibis/actions/workflows/ibis-backends.yml?query=branch%3Amaster) |
| Coverage | [![Codecov branch](https://img.shields.io/codecov/c/github/ibis-project/ibis/master.svg)](https://codecov.io/gh/ibis-project/ibis) |
[![Documentation Status](https://img.shields.io/badge/docs-docs.ibis--project.org-blue.svg)](http://ibis-project.org)
[![Anaconda-Server Badge](https://anaconda.org/conda-forge/ibis-framework/badges/version.svg)](https://anaconda.org/conda-forge/ibis-framework)
[![PyPI](https://img.shields.io/pypi/v/ibis-framework.svg)](https://pypi.org/project/ibis-framework)
[![Build status](https://github.com/ibis-project/ibis/actions/workflows/ibis-main.yml/badge.svg)](https://github.com/ibis-project/ibis/actions/workflows/ibis-main.yml?query=branch%3Amaster)
[![Build status](https://github.com/ibis-project/ibis/actions/workflows/ibis-backends.yml/badge.svg)](https://github.com/ibis-project/ibis/actions/workflows/ibis-backends.yml?query=branch%3Amaster)
[![Codecov branch](https://img.shields.io/codecov/c/github/ibis-project/ibis/master.svg)](https://codecov.io/gh/ibis-project/ibis)

Ibis is a Python library to help you write expressive analytics at any scale,
small to large. Its goal is to simplify analytical workflows and make you more
productive.
## What is Ibis?

Install Ibis from PyPI with:
Ibis is a Python library that provides a lightweight, universal interface for data wrangling. It helps Python users explore and transform data of any size, stored anywhere.

```sh
pip install ibis-framework
```
Ibis has three primary components:

or from conda-forge with
1. **A dataframe API for Python**.
This means that Python users can write Ibis code to manipulate tabular data.
2. **Interfaces to 10+ query engines.**
This means that wherever data is stored, data scientists can use Ibis as their API of choice to communicate with any of those query engines.
3. **Deferred execution**.
Ibis uses deferred execution, meaning that execution of code is pushed to the query engine.
This means users can execute at the speed of their backend, not their local computer.

```sh
conda install ibis-framework -c conda-forge
```
## Why Use Ibis?

Ibis aims to be a future-proof solution to interacting with data using Python and can accomplish this goal through its main features:

- **Familiar API**: Ibis’s API design borrows from popular APIs like pandas and dplyr that most users already know and like to use.
- **Consistent syntax**: Ibis aims to be universal Python API for tabular data, big or small.
- **Deferred execution**: Ibis pushes code execution to the query engine and only moves required data into memory when it has to.
This leads to more faster, more efficient analytics workflows
- **Interactive mode**: Ibis also provides an interactive mode, in which users can quickly diagnose problems, do exploratory data analysis, and mock up workflows locally.
- **10+ supported backends**: Ibis supports multiple query engines and DataFrame APIs.
Use one interface to transform with your data wherever it lives: from DataFrames in pandas to parquet files through DuckDB to tables in BigQuery.
- **Minimize rewrites**: Depending on backend capabilities, teams can often keep most of their Ibis code the same whether a team changes anything on the backend, like increasing or decreasing computing power, changing the number or size of their databases, or switching backend engines.

Ibis provides tools for interacting with the following systems:
## Common Use Cases

- **Speed up prototype to production.**
Scale code written and tested locally to the cloud of distributed systems with minimum rewrites.
- **Boost performance of existing Python or pandas code.**
For example a general rule of thumb for pandas is "Have 5 to 10 times as much RAM as the size of your dataset".
When a dataset exceeds this rule, using in-memory frameworks, like pandas, can be slow.
Instead, using Ibis will significantly speed up your workflows because of its deferred execution.
Ibis also empowers you to switch to a faster database engine, without changing much of your code.
- **Get rid of long, error-prone, fstrings.**
Ibis provides one syntax for multiple query engines and dataframe APIs that lets you avoid learning new flavors of SQL or other framework-specific code.
Learn the syntax once and use that syntax anywhere.

## Backends

Ibis acts as a universal frontend to the following systems:

- [Apache Impala](https://ibis-project.org/docs/latest/backends/Impala/)
- [Google BigQuery](https://github.com/ibis-project/ibis-bigquery)
- [ClickHouse](https://ibis-project.org/docs/latest/backends/ClickHouse/)
- [HeavyAI](https://github.com/heavyai/ibis-heavyai)
- [Dask](https://ibis-project.org/docs/latest/backends/Dask/)
- [DuckDB](https://ibis-project.org/docs/latest/backends/DuckDB/)
- [Google BigQuery](https://ibis-project.org/docs/dev/backends/BigQuery/)
- [HeavyAI](https://github.com/heavyai/ibis-heavyai)
- [MySQL](https://ibis-project.org/docs/latest/backends/MySQL/)
- [Microsoft SQL Server](https://ibis-project.org/dev/latest/backends/MSSQL/)
- [Pandas](https://ibis-project.org/docs/latest/backends/Pandas/)
- [Polars](https://ibis-project.org/docs/dev/backends/Polars/)
- [PostgreSQL](https://ibis-project.org/docs/latest/backends/PostgreSQL/)
- [PySpark](https://ibis-project.org/docs/latest/backends/PySpark/)
- [Snowflake](https://ibis-project.org/docs/dev/backends/Snowflake) (experimental)
- [SQLite](https://ibis-project.org/docs/latest/backends/SQLite/)
- [Trino](https://ibis-project.org/docs/dev/backends/Trino/) (experimental)

The list of supported backends is continuously growing. Anyone can get involved
in adding new ones! Learn more about contributing to ibis in our contributing
docs at https://github.com/ibis-project/ibis/blob/master/docs/CONTRIBUTING.md

## Installation

Install Ibis from PyPI with:

```
pip install ibis-framework
```

Or from conda-forge with:

```
conda install ibis-framework -c conda-forge
```

(It’s a common mistake to `pip install ibis`. If you try to use Ibis and get errors early on try uninstalling `ibis` and installing `ibis-framework`)

For specific backends, include the backend name in brackets for PyPI:

```
pip install ibis-framework[duckdb]
```

Or use `ibis-$BACKEND` where `$BACKEND` the specific backend you want to use:

```
conda install ibis-postgres -c conda-forge
```

## Getting Started with Ibis

You can find a number of helpful tutorials on the Ibis website
[here](https://ibis-project.org/docs/latest/tutorial/01-Introduction-to-Ibis/)
including:

- [Introduction to Ibis](https://ibis-project.org/docs/latest/tutorial/01-Introduction-to-Ibis/)
- [Aggregating and Joining Data](https://ibis-project.org/docs/latest/tutorial/02-Aggregates-Joins/)
- [Creating and Inserting Data](https://ibis-project.org/docs/latest/tutorial/05-IO-Create-Insert-External-Data/)

You can also get started analyzing any dataset, anywhere with just a few lines of Ibis code.
Here’s an example of how to use Ibis with an SQLite database.

Download the SQLite database from the ibis-tutorial-data GCS (Google Cloud Storage) bucket, then connect to it using ibis.

```bash
# make a directory called geo_dir and add the geography database to that folder
mkdir -p geo_dir
curl -LsS -o geo_dir/geography.db 'https://storage.googleapis.com/ibis-tutorial-data/geography.db'
```

Connect to the the database and show the available tables

```python
>>> import ibis
>>> ibis.options.interactive = True
>>> connection = ibis.sqlite.connect('geo_dir/geography.db')
>>> connection.list_tables()
['countries', 'gdp', 'independence']
```

Choose the `countries` table and preview its first few rows

```python
>>> countries = connection.table('countries')
countries.head()
```

| | iso_alpha2 | iso_alpha3 | iso_numeric | fips | name | capital | area_km2 | population | continent |
| :-- | :--------- | :--------- | :---------- | :--- | :------------------- | :--------------- | :------- | :--------- | :-------- |
| 0 | AD | AND | 20 | AN | Andorra | Andorra la Vella | 468 | 84000 | EU |
| 1 | AE | ARE | 784 | AE | United Arab Emirates | Abu Dhabi | 82880 | 4975593 | AS |
| 2 | AF | AFG | 4 | AF | Afghanistan | Kabul | 647500 | 29121286 | AS |
| 3 | AG | ATG | 28 | AC | Antigua and Barbuda | St. Johns | 443 | 86754 | NA |
| 4 | AI | AIA | 660 | AV | Anguilla | The Valley | 102 | 13254 | NA |

```python
# Select the name, continent and population columns and filter them to only return countries from Asia

asian_countries = countries['name', 'continent', 'population'].filter(countries['continent'] == 'AS')
asian_countries.limit(6)
```

| | name | continent | population |
| :-- | :------------------- | :-------- | :--------- |
| 0 | United Arab Emirates | AS | 4975593 |
| 1 | Afghanistan | AS | 29121286 |
| 2 | Armenia | AS | 2968000 |
| 3 | Azerbaijan | AS | 8303512 |
| 4 | Bangladesh | AS | 156118464 |
| 5 | Bahrain | AS | 738004 |

## Community and Contributing

Ibis is an open source project and welcomes contributions from anyone in the community.
Read more about how you can contribute [here](https://github.com/ibis-project/ibis/blob/master/docs/CONTRIBUTING.md).
We care about keeping our community welcoming for all to participate and have a [code of conduct](https://github.com/ibis-project/ibis/blob/master/docs/CODE_OF_CONDUCT.md) to ensure this.
The Ibis project is open sourced under the [Apache License](https://github.com/ibis-project/ibis/blob/master/LICENSE.txt).

Join our community here:

- Twitter: https://twitter.com/IbisData
- Gitter: https://gitter.im/ibis-dev/Lobby
- StackOverflow: https://stackoverflow.com/questions/tagged/ibis

Learn more about using the library at https://ibis-project.org.
For more information visit our official website [here](https://ibis-project.org/docs/latest/).
File renamed without changes.
52 changes: 52 additions & 0 deletions ci/conda-lock/generate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env bash

set -euo pipefail

python_version="${1}"
python_version_file="$(mktemp --suffix=.yml)"

{
echo 'name: conda-lock'
echo 'dependencies:'
echo " - python=${python_version}"
} > "${python_version_file}"

extras=(
-e bigquery
-e dask
-e duckdb
-e impala
-e mssql
-e mysql
-e pandas
-e polars
-e postgres
-e pyspark
-e snowflake
-e sqlite
-e trino
-e visualization
-e decompiler
)
template="conda-lock/{platform}-${python_version}.lock"
conda lock \
--file pyproject.toml \
--file "${python_version_file}" \
--kind explicit \
--platform linux-64 \
--platform osx-64 \
--filename-template "${template}" \
--filter-extras \
--mamba \
"${extras[@]}" -e clickhouse

conda lock \
--file pyproject.toml \
--file "${python_version_file}" \
--kind explicit \
--platform osx-arm64 \
--platform win-64 \
--filename-template "${template}" \
--filter-extras \
--mamba \
"${extras[@]}"
60 changes: 29 additions & 31 deletions ci/make_geography_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@
#
# The source of the `independence` table has been obtained from
# [Wikipedia](https://en.wikipedia.org/wiki/List_of_national_independence_days).
from __future__ import annotations

import argparse
import datetime
import tempfile
from pathlib import Path
from typing import Any, Mapping

import click
import requests
import sqlalchemy as sa
import toolz
Expand Down Expand Up @@ -70,10 +72,7 @@ def make_geography_db(
table = sa.Table(
table_name,
metadata,
*(
sa.Column(col_name, col_type)
for col_name, col_type in schema
),
*(sa.Column(col_name, col_type) for col_name, col_type in schema),
)
table_columns = table.c.keys()
post_parse = POST_PARSE_FUNCTIONS.get(table_name, toolz.identity)
Expand All @@ -82,39 +81,38 @@ def make_geography_db(
table.create(bind=bind)
bind.execute(
table.insert().values(),
[
post_parse(dict(zip(table_columns, row)))
for row in data[table_name]
],
[post_parse(dict(zip(table_columns, row))) for row in data[table_name]],
)


@click.command(
help="Create the geography SQLite database for the Ibis tutorial"
)
@click.option(
"-d",
"--output-directory",
default=Path(tempfile.gettempdir()),
type=click.Path(dir_okay=True, path_type=Path),
help="The directory to which the database will be output",
show_default=True,
)
@click.option(
"-u",
"--input-data-url",
default="https://storage.googleapis.com/ibis-tutorial-data/geography.json",
type=str,
help="The URL containing the data with which to populate the database",
)
def main(output_directory: Path, input_data_url: str) -> None:
response = requests.get(input_data_url)
def main() -> None:
parser = argparse.ArgumentParser(
description="Create the geography SQLite database for the Ibis tutorial"
)
parser.add_argument(
"-d",
"--output-directory",
default=tempfile.gettempdir(),
type=str,
help="The directory to which the database will be output",
)
parser.add_argument(
"-u",
"--input-data-url",
default="https://storage.googleapis.com/ibis-tutorial-data/geography.json",
type=str,
help="The URL containing the data with which to populate the database",
)

args = parser.parse_args()

response = requests.get(args.input_data_url)
response.raise_for_status()
input_data = response.json()
db_path = output_directory.joinpath("geography.db")
db_path = Path(args.output_directory).joinpath("geography.db")
con = sa.create_engine(f"sqlite:///{db_path}")
make_geography_db(input_data, con)
click.echo(db_path)
print(db_path) # noqa: T201


if __name__ == "__main__":
Expand Down
32 changes: 18 additions & 14 deletions ci/release/dry_run.sh
Original file line number Diff line number Diff line change
@@ -1,36 +1,40 @@
#!/usr/bin/env nix-shell
#!nix-shell -I nixpkgs=channel:nixos-unstable-small --pure -p git jq nodejs nix -i bash
# shellcheck shell=bash
#!/usr/bin/env bash

set -euo pipefail

curdir="$PWD"
worktree="$(mktemp -d)"
branch="$(basename "$worktree")"

git worktree add "$worktree"
nix develop '.#release' -c git worktree add "$worktree"

function cleanup() {
cd "$curdir" || exit 1
git worktree remove --force "$worktree"
git worktree prune
git branch -D "$branch"
nix develop '.#release' -c git worktree remove --force "$worktree"
nix develop '.#release' -c git worktree prune
nix develop '.#release' -c git branch -D "$branch"
}

trap cleanup EXIT ERR

cd "$worktree" || exit 1

node <<< 'console.log(JSON.stringify(require("./.releaserc.js")))' |
jq '.plugins |= [.[] | select(.[0] != "@semantic-release/github")]' > .releaserc.json
nix develop '.#release' -c node <<< 'console.log(JSON.stringify(require("./.releaserc.js")))' |
nix develop '.#release' -c jq '.plugins |= [.[] | select(.[0] != "@semantic-release/github")]' > .releaserc.json

git rm .releaserc.js
nix develop '.#release' -c git rm .releaserc.js
nix develop '.#release' -c git add .releaserc.json
nix develop '.#release' -c git commit -m 'test: semantic-release dry run' --no-verify --no-gpg-sign

git add .releaserc.json
# If this is set then semantic-release will assume the release is running
# against a PR.
#
# Normally this would be fine, except that most of the release process that is
# useful to test is prevented from running, even in dry-run mode, so we `unset`
# this variable here and pass `--dry-run` ourselves
unset GITHUB_ACTIONS

git commit -m 'test: semantic-release dry run' --no-verify --no-gpg-sign

npx --yes \
nix develop '.#release' -c npx --yes \
-p semantic-release \
-p "@semantic-release/commit-analyzer" \
-p "@semantic-release/release-notes-generator" \
Expand Down
13 changes: 5 additions & 8 deletions ci/release/prepare.sh
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
#!/usr/bin/env nix-shell
#!nix-shell -p gnugrep unzip poetry nix -i bash
# shellcheck shell=bash
#!/usr/bin/env bash

set -euo pipefail

version="${1}"

# set version
poetry version "$version"

./dev/poetry2setup -o setup.py
nix develop '.#release' -c poetry version "$version"

# build artifacts
poetry build
nix develop '.#release' -c poetry build

# ensure that the built wheel has the correct version number
unzip -p "dist/ibis_framework-${version}-py3-none-any.whl" ibis/__init__.py | grep -q "__version__ = \"$version\""
nix develop '.#release' -c unzip -p "dist/ibis_framework-${version}-py3-none-any.whl" ibis/__init__.py | \
nix develop '.#release' -c grep -q "__version__ = \"$version\""
6 changes: 2 additions & 4 deletions ci/release/publish.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/usr/bin/env nix-shell
#!nix-shell --pure --keep POETRY_PYPI_TOKEN_PYPI -p poetry -i bash
# shellcheck shell=bash
#!/usr/bin/env bash

set -euo pipefail

poetry publish
nix develop '.#release' -c poetry publish
6 changes: 2 additions & 4 deletions ci/release/run.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#!/usr/bin/env nix-shell
#!nix-shell -p cacert poetry git nodejs nix -i bash
# shellcheck shell=bash
#!/usr/bin/env bash

set -euo pipefail

npx --yes \
nix develop '.#release' -c npx --yes \
-p semantic-release \
-p "@semantic-release/commit-analyzer" \
-p "@semantic-release/release-notes-generator" \
Expand Down
14 changes: 6 additions & 8 deletions ci/release/verify.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
#!/usr/bin/env nix-shell
#!nix-shell -I nixpkgs=channel:nixos-unstable-small --pure --keep POETRY_PYPI_TOKEN_PYPI -p dyff git poetry yj -i bash
# shellcheck shell=bash
#!/usr/bin/env bash

set -euo pipefail

dry_run="${1:-false}"

# verify pyproject.toml
poetry check
nix develop '.#release' -c poetry check

# verify that the lock file is up to date
# verify that the lock file matches pyproject.toml
#
# go through the rigamarole of yj and dyff because poetry is sensitive to
# PYTHONHASHSEED
bash ./dev/lockfile_diff.sh
# the lock file might not be the most fresh, but that's okay: it need only be
# consistent with pyproject.toml
nix develop '.#release' -c poetry lock --check

# verify that we have a token available to push to pypi using set -u
if [ "${dry_run}" = "false" ]; then
Expand Down
28 changes: 21 additions & 7 deletions ci/schema/clickhouse.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE TABLE diamonds (
CREATE OR REPLACE TABLE diamonds (
carat Nullable(Float64),
cut Nullable(String),
color Nullable(String),
Expand All @@ -11,7 +11,7 @@ CREATE TABLE diamonds (
z Nullable(Float64)
) ENGINE = Memory;

CREATE TABLE batting (
CREATE OR REPLACE TABLE batting (
`playerID` Nullable(String),
`yearID` Nullable(Int64),
stint Nullable(Int64),
Expand All @@ -36,7 +36,7 @@ CREATE TABLE batting (
`GIDP` Nullable(Int64)
) ENGINE = Memory;

CREATE TABLE awards_players (
CREATE OR REPLACE TABLE awards_players (
`playerID` Nullable(String),
`awardID` Nullable(String),
`yearID` Nullable(Int64),
Expand All @@ -45,7 +45,7 @@ CREATE TABLE awards_players (
notes Nullable(String)
) ENGINE = Memory;

CREATE TABLE functional_alltypes (
CREATE OR REPLACE TABLE functional_alltypes (
`index` Nullable(Int64),
`Unnamed: 0` Nullable(Int64),
id Nullable(Int32),
Expand All @@ -63,13 +63,13 @@ CREATE TABLE functional_alltypes (
month Nullable(Int32)
) ENGINE = Memory;

CREATE TABLE tzone (
CREATE OR REPLACE TABLE tzone (
ts Nullable(DateTime),
key Nullable(String),
value Nullable(Float64)
) ENGINE = Memory;

CREATE TABLE IF NOT EXISTS array_types (
CREATE OR REPLACE TABLE array_types (
x Array(Nullable(Int64)),
y Array(Nullable(String)),
z Array(Nullable(Float64)),
Expand All @@ -87,7 +87,7 @@ INSERT INTO array_types VALUES
([4, NULL, NULL, 5], ['d', NULL, NULL, 'e'], [4.0, NULL, NULL, 5.0], 'c', 6.0, [[1, 2, 3]]);


CREATE TABLE IF NOT EXISTS struct (
CREATE OR REPLACE TABLE struct (
abc Tuple(
a Nullable(Float64),
b Nullable(String),
Expand All @@ -105,3 +105,17 @@ INSERT INTO struct VALUES
(tuple(2.0, NULL, 3)),
(tuple(NULL, NULL, NULL)),
(tuple(3.0, 'orange', NULL));

CREATE OR REPLACE TABLE map (kv Map(String, Nullable(Int64))) ENGINE = Memory;

INSERT INTO map VALUES
(map('a', 1, 'b', 2, 'c', 3)),
(map('d', 4, 'e', 5, 'c', 6));

CREATE OR REPLACE TABLE win (g String, x Int64, y Int64) ENGINE = Memory;
INSERT INTO win VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);
42 changes: 24 additions & 18 deletions ci/schema/duckdb.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
DROP TABLE IF EXISTS diamonds CASCADE;

CREATE TABLE diamonds (
CREATE OR REPLACE TABLE diamonds (
carat FLOAT,
cut TEXT,
color TEXT,
Expand All @@ -13,9 +11,7 @@ CREATE TABLE diamonds (
z FLOAT
);

DROP TABLE IF EXISTS batting CASCADE;

CREATE TABLE batting (
CREATE OR REPLACE TABLE batting (
"playerID" TEXT,
"yearID" BIGINT,
stint BIGINT,
Expand All @@ -40,9 +36,7 @@ CREATE TABLE batting (
"GIDP" BIGINT
);

DROP TABLE IF EXISTS awards_players CASCADE;

CREATE TABLE awards_players (
CREATE OR REPLACE TABLE awards_players (
"playerID" TEXT,
"awardID" TEXT,
"yearID" BIGINT,
Expand All @@ -51,9 +45,7 @@ CREATE TABLE awards_players (
notes TEXT
);

DROP TABLE IF EXISTS functional_alltypes CASCADE;

CREATE TABLE functional_alltypes (
CREATE OR REPLACE TABLE functional_alltypes (
"index" BIGINT,
"Unnamed: 0" BIGINT,
id INTEGER,
Expand All @@ -71,9 +63,7 @@ CREATE TABLE functional_alltypes (
month INTEGER
);

DROP TABLE IF EXISTS array_types CASCADE;

CREATE TABLE IF NOT EXISTS array_types (
CREATE OR REPLACE TABLE array_types (
x BIGINT[],
y TEXT[],
z DOUBLE PRECISION[],
Expand All @@ -91,9 +81,7 @@ INSERT INTO array_types VALUES
([4, NULL, NULL, 5], ['d', NULL, NULL, 'e'], [4.0, NULL, NULL, 5.0], 'c', 6.0, [[1, 2, 3]]);


DROP TABLE IF EXISTS struct CASCADE;

CREATE TABLE IF NOT EXISTS struct (
CREATE OR REPLACE TABLE struct (
abc STRUCT(a DOUBLE, b STRING, c BIGINT)
);

Expand All @@ -105,3 +93,21 @@ INSERT INTO struct VALUES
({'a': 2.0, 'b': NULL, 'c': 3}),
(NULL),
({'a': 3.0, 'b': 'orange', 'c': NULL});

CREATE OR REPLACE TABLE json_t (js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');

CREATE OR REPLACE TABLE win (g TEXT, x BIGINT, y BIGINT);
INSERT INTO win VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);
84 changes: 84 additions & 0 deletions ci/schema/mssql.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
DROP TABLE IF EXISTS diamonds;

CREATE TABLE diamonds (
carat FLOAT,
cut VARCHAR(MAX),
color VARCHAR(MAX),
clarity VARCHAR(MAX),
depth FLOAT,
"table" FLOAT,
price BIGINT,
x FLOAT,
y FLOAT,
z FLOAT
);

DROP TABLE IF EXISTS batting;

CREATE TABLE batting (
"playerID" VARCHAR(MAX),
"yearID" BIGINT,
stint BIGINT,
"teamID" VARCHAR(MAX),
"lgID" VARCHAR(MAX),
"G" BIGINT,
"AB" BIGINT,
"R" BIGINT,
"H" BIGINT,
"X2B" BIGINT,
"X3B" BIGINT,
"HR" BIGINT,
"RBI" BIGINT,
"SB" BIGINT,
"CS" BIGINT,
"BB" BIGINT,
"SO" BIGINT,
"IBB" BIGINT,
"HBP" BIGINT,
"SH" BIGINT,
"SF" BIGINT,
"GIDP" BIGINT
);

DROP TABLE IF EXISTS awards_players;

CREATE TABLE awards_players (
"playerID" VARCHAR(MAX),
"awardID" VARCHAR(MAX),
"yearID" BIGINT,
"lgID" VARCHAR(MAX),
tie VARCHAR(MAX),
notes VARCHAR(MAX)
);

DROP TABLE IF EXISTS functional_alltypes;

CREATE TABLE functional_alltypes (
"index" BIGINT,
"Unnamed: 0" BIGINT,
id INTEGER,
bool_col BIT,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INTEGER,
bigint_col BIGINT,
float_col REAL,
double_col DOUBLE PRECISION,
date_string_col VARCHAR(MAX),
string_col VARCHAR(MAX),
timestamp_col DATETIME2,
year INTEGER,
month INTEGER
);

CREATE INDEX "ix_functional_alltypes_index" ON functional_alltypes ("index");

DROP TABLE IF EXISTS win;

CREATE TABLE win (g VARCHAR(MAX), x BIGINT, y BIGINT);
INSERT INTO win VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);
22 changes: 22 additions & 0 deletions ci/schema/mysql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,25 @@ CREATE TABLE functional_alltypes (
) DEFAULT CHARACTER SET = utf8;

CREATE INDEX `ix_functional_alltypes_index` ON functional_alltypes (`index`);

DROP TABLE IF EXISTS json_t CASCADE;

CREATE TABLE IF NOT EXISTS json_t (js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');

DROP TABLE IF EXISTS win CASCADE;

CREATE TABLE win (g TEXT, x BIGINT, y BIGINT);
INSERT INTO win VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);
21 changes: 21 additions & 0 deletions ci/schema/postgresql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -183,3 +183,24 @@ CREATE INDEX IF NOT EXISTS idx_geo_geo_linestring ON geo USING GIST (geo_linestr
CREATE INDEX IF NOT EXISTS idx_geo_geo_multipolygon ON geo USING GIST (geo_multipolygon);
CREATE INDEX IF NOT EXISTS idx_geo_geo_point ON geo USING GIST (geo_point);
CREATE INDEX IF NOT EXISTS idx_geo_geo_polygon ON geo USING GIST (geo_polygon);

DROP TABLE IF EXISTS json_t CASCADE;

CREATE TABLE IF NOT EXISTS json_t (js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');

DROP TABLE IF EXISTS win CASCADE;
CREATE TABLE win (g TEXT, x BIGINT, y BIGINT);
INSERT INTO win VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);
110 changes: 110 additions & 0 deletions ci/schema/snowflake.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
CREATE OR REPLACE TABLE diamonds (
"carat" FLOAT,
"cut" TEXT,
"color" TEXT,
"clarity" TEXT,
"depth" FLOAT,
"table" FLOAT,
"price" BIGINT,
"x" FLOAT,
"y" FLOAT,
"z" FLOAT
);

CREATE OR REPLACE TABLE batting (
"playerID" TEXT,
"yearID" BIGINT,
"stint" BIGINT,
"teamID" TEXT,
"lgID" TEXT,
"G" BIGINT,
"AB" BIGINT,
"R" BIGINT,
"H" BIGINT,
"X2B" BIGINT,
"X3B" BIGINT,
"HR" BIGINT,
"RBI" BIGINT,
"SB" BIGINT,
"CS" BIGINT,
"BB" BIGINT,
"SO" BIGINT,
"IBB" BIGINT,
"HBP" BIGINT,
"SH" BIGINT,
"SF" BIGINT,
"GIDP" BIGINT
);

CREATE OR REPLACE TABLE awards_players (
"playerID" TEXT,
"awardID" TEXT,
"yearID" BIGINT,
"lgID" TEXT,
"tie" TEXT,
"notes" TEXT
);

CREATE OR REPLACE TABLE functional_alltypes (
"index" BIGINT,
"Unnamed: 0" BIGINT,
"id" INTEGER,
"bool_col" BOOLEAN,
"tinyint_col" SMALLINT,
"smallint_col" SMALLINT,
"int_col" INTEGER,
"bigint_col" BIGINT,
"float_col" REAL,
"double_col" DOUBLE PRECISION,
"date_string_col" TEXT,
"string_col" TEXT,
"timestamp_col" TIMESTAMP WITHOUT TIME ZONE,
"year" INTEGER,
"month" INTEGER
);

CREATE OR REPLACE TABLE array_types (
"x" ARRAY,
"y" ARRAY,
"z" ARRAY,
"grouper" TEXT,
"scalar_column" DOUBLE PRECISION,
"multi_dim" ARRAY
);

INSERT INTO array_types ("x", "y", "z", "grouper", "scalar_column", "multi_dim")
SELECT [1, 2, 3], ['a', 'b', 'c'], [1.0, 2.0, 3.0], 'a', 1.0, [[], [1, 2, 3], NULL] UNION
SELECT [4, 5], ['d', 'e'], [4.0, 5.0], 'a', 2.0, [] UNION
SELECT [6, NULL], ['f', NULL], [6.0, NULL], 'a', 3.0, [NULL, [], NULL] UNION
SELECT [NULL, 1, NULL], [NULL, 'a', NULL], [], 'b', 4.0, [[1], [2], [], [3, 4, 5]] UNION
SELECT [2, NULL, 3], ['b', NULL, 'c'], NULL, 'b', 5.0, NULL UNION
SELECT [4, NULL, NULL, 5], ['d', NULL, NULL, 'e'], [4.0, NULL, NULL, 5.0], 'c', 6.0, [[1, 2, 3]];

CREATE OR REPLACE TABLE struct ("abc" OBJECT);

INSERT INTO struct ("abc")
SELECT {'a': 1.0, 'b': 'banana', 'c': 2} UNION
SELECT {'a': 2.0, 'b': 'apple', 'c': 3} UNION
SELECT {'a': 3.0, 'b': 'orange', 'c': 4} UNION
SELECT {'a': NULL, 'b': 'banana', 'c': 2} UNION
SELECT {'a': 2.0, 'b': NULL, 'c': 3} UNION
SELECT NULL UNION
SELECT {'a': 3.0, 'b': 'orange', 'c': NULL};

CREATE OR REPLACE TABLE json_t ("js" VARIANT);

INSERT INTO json_t ("js")
SELECT parse_json('{"a": [1,2,3,4], "b": 1}') UNION
SELECT parse_json('{"a":null,"b":2}') UNION
SELECT parse_json('{"a":"foo", "c":null}') UNION
SELECT parse_json('null') UNION
SELECT parse_json('[42,47,55]') UNION
SELECT parse_json('[]');

CREATE OR REPLACE TABLE win ("g" TEXT, "x" BIGINT, "y" BIGINT);
INSERT INTO win VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);
21 changes: 21 additions & 0 deletions ci/schema/sqlite.sql
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,24 @@ CREATE TABLE diamonds (
y FLOAT,
z FLOAT
);

DROP TABLE IF EXISTS json_t;

CREATE TABLE json_t (js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');

DROP TABLE IF EXISTS win;
CREATE TABLE win (g TEXT, x BIGINT, y BIGINT);
INSERT INTO win VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);
21 changes: 6 additions & 15 deletions codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,18 @@ codecov:
# runs have finished. Should match with comment.after_n_builds below.
after_n_builds: 32

comment:
after_n_builds: 32
layout: "reach, diff, files"
behavior: default
require_changes: true # if true: only post the comment if coverage changes
require_base: false # [yes :: must have a base report to post]
require_head: true # [yes :: must have a head report to post]
branches: null
comment: false

ignore:
- "docs/**"

coverage:
status:
project:
default:
# Allow for slight decreases in code coverage, makes
# the coverage status checks a little less finicky
threshold: 0.5%
only_pulls: true
patch:
default:
threshold: 0.5%
target: auto
threshold: 1%
only_pulls: true
project:
default:
enabled: false
547 changes: 272 additions & 275 deletions conda-lock/linux-64-3.10.lock

Large diffs are not rendered by default.

563 changes: 280 additions & 283 deletions conda-lock/linux-64-3.8.lock

Large diffs are not rendered by default.

561 changes: 279 additions & 282 deletions conda-lock/linux-64-3.9.lock

Large diffs are not rendered by default.

535 changes: 266 additions & 269 deletions conda-lock/osx-64-3.10.lock

Large diffs are not rendered by default.

557 changes: 277 additions & 280 deletions conda-lock/osx-64-3.8.lock

Large diffs are not rendered by default.

555 changes: 276 additions & 279 deletions conda-lock/osx-64-3.9.lock

Large diffs are not rendered by default.

366 changes: 366 additions & 0 deletions conda-lock/osx-arm64-3.10.lock

Large diffs are not rendered by default.

367 changes: 367 additions & 0 deletions conda-lock/osx-arm64-3.8.lock

Large diffs are not rendered by default.

366 changes: 366 additions & 0 deletions conda-lock/osx-arm64-3.9.lock

Large diffs are not rendered by default.

532 changes: 269 additions & 263 deletions conda-lock/win-64-3.10.lock

Large diffs are not rendered by default.

558 changes: 283 additions & 275 deletions conda-lock/win-64-3.8.lock

Large diffs are not rendered by default.

558 changes: 282 additions & 276 deletions conda-lock/win-64-3.9.lock

Large diffs are not rendered by default.

78 changes: 11 additions & 67 deletions default.nix
Original file line number Diff line number Diff line change
@@ -1,69 +1,13 @@
{ python ? "3.10"
, doCheck ? true
, backends ? [
"dask"
"datafusion"
"duckdb"
"pandas"
"sqlite"
]
}:
let
pkgs = import ./nix;
drv =
{ poetry2nix
, python
, lib
}:

(import
(
let
buildInputs = with pkgs; [ gdal_2 graphviz-nox proj sqlite ];
checkInputs = buildInputs;
lock = builtins.fromJSON (builtins.readFile ./flake.lock);
in
poetry2nix.mkPoetryApplication {
inherit python;

projectDir = ./.;
src = pkgs.gitignoreSource ./.;

overrides = pkgs.poetry2nix.overrides.withDefaults (
import ./poetry-overrides.nix
);

preConfigure = ''
rm setup.py
'';

inherit buildInputs checkInputs;

preCheck = ''
set -euo pipefail
tempdir="$(mktemp -d)"
cp -r ${pkgs.ibisTestingData}/* "$tempdir"
find "$tempdir" -type f -exec chmod u+rw {} +
find "$tempdir" -type d -exec chmod u+rwx {} +
ln -s "$tempdir" ci/ibis-testing-data
'';

checkPhase = ''
set -euo pipefail
runHook preCheck
pytest --numprocesses auto --dist loadgroup -m '${lib.concatStringsSep " or " backends} or core'
runHook postCheck
'';

inherit doCheck;

pythonImportsCheck = [ "ibis" ] ++ (map (backend: "ibis.backends.${backend}") backends);
};
in
pkgs.callPackage drv {
python = pkgs."python${builtins.replaceStrings [ "." ] [ "" ] python}";
}
fetchTarball {
url = "https://github.com/edolstra/flake-compat/archive/${lock.nodes.flake-compat.locked.rev}.tar.gz";
sha256 = lock.nodes.flake-compat.locked.narHash;
}
)
{
src = ./.;
}).defaultNix
21 changes: 0 additions & 21 deletions dev/lockfile_diff.sh

This file was deleted.

15 changes: 0 additions & 15 deletions dev/poetry2setup

This file was deleted.

55 changes: 0 additions & 55 deletions dev/poetry2setup.py

This file was deleted.

14 changes: 0 additions & 14 deletions dev/update-lock-files.sh

This file was deleted.

64 changes: 61 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: "3.4"
services:
clickhouse:
image: clickhouse/clickhouse-server:22.8.5.29-alpine
image: clickhouse/clickhouse-server:22.12.2.25-alpine
ports:
- 8123:8123
- 9000:9000
Expand Down Expand Up @@ -48,7 +48,7 @@ services:
- CMD
- pg_isready
timeout: 5s
image: postgres:13.8-alpine
image: postgres:13.9-alpine
networks:
- impala
kudu:
Expand Down Expand Up @@ -89,7 +89,7 @@ services:
- mysqladmin
- ping
timeout: 5s
image: mariadb:10.9.2
image: mariadb:10.10.2
ports:
- 3306:3306
networks:
Expand All @@ -101,6 +101,7 @@ services:
POSTGRES_DB: ibis_testing
POSTGRES_USER: postgres
build: ./docker/postgres
image: ibis-postgres
healthcheck:
interval: 10s
retries: 3
Expand All @@ -112,9 +113,66 @@ services:
- 5432:5432
networks:
- postgres
mssql:
environment:
MSSQL_SA_PASSWORD: 1bis_Testing!
ACCEPT_EULA: "Y"
healthcheck:
interval: 10s
retries: 3
test:
- CMD-SHELL
- /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P "$$MSSQL_SA_PASSWORD" -Q "SELECT 1 AS one"
timeout: 10s
build:
context: .
dockerfile: ./docker/mssql/Dockerfile
image: ibis-mssql
ports:
- 1433:1433
networks:
- mssql
trino-postgres:
user: postgres
environment:
POSTGRES_PASSWORD: postgres
healthcheck:
interval: 10s
retries: 3
test:
- CMD
- pg_isready
timeout: 5s
build: ./docker/postgres
image: ibis-postgres
ports:
- 5433:5432
networks:
- trino
trino:
depends_on:
- trino-postgres
healthcheck:
interval: 5s
retries: 6
test:
- CMD-SHELL
- trino --execute 'SELECT 1 AS one'
timeout: 30s
image: trinodb/trino:405
ports:
- 8080:8080
networks:
- trino
volumes:
- $PWD/docker/trino/catalog/postgresql.properties:/etc/trino/catalog/postgresql.properties:ro
- $PWD/docker/trino/catalog/memory.properties:/etc/trino/catalog/memory.properties:ro
- $PWD/docker/trino/jvm.config:/etc/trino/jvm.config:ro

networks:
impala:
mysql:
mssql:
clickhouse:
postgres:
trino:
2 changes: 2 additions & 0 deletions docker/mssql/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FROM mcr.microsoft.com/mssql/server:2022-latest
COPY ./ci/ibis-testing-data /data
2 changes: 1 addition & 1 deletion docker/postgres/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
FROM postgis/postgis:14-3.2-alpine
FROM postgis/postgis:15-3.3-alpine
RUN apk add postgresql14-plpython3
1 change: 1 addition & 0 deletions docker/trino/catalog/memory.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
connector.name=memory
5 changes: 5 additions & 0 deletions docker/trino/catalog/postgresql.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
connector.name=postgresql
connection-url=jdbc:postgresql://trino-postgres:5432/ibis_testing
connection-user=postgres
connection-password=postgres
postgresql.array-mapping=AS_ARRAY
16 changes: 16 additions & 0 deletions docker/trino/jvm.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-server
-Xmx2G
-XX:InitialRAMPercentage=80
-XX:MaxRAMPercentage=80
-XX:G1HeapRegionSize=32M
-XX:+ExplicitGCInvokesConcurrent
-XX:+ExitOnOutOfMemoryError
-XX:+HeapDumpOnOutOfMemoryError
-XX:-OmitStackTraceInFastThrow
-XX:ReservedCodeCacheSize=512M
-XX:PerMethodRecompilationCutoff=10000
-XX:PerBytecodeRecompilationCutoff=10000
-Djdk.attach.allowAttachSelf=true
-Djdk.nio.maxCachedBufferSize=2000000
-XX:+UnlockDiagnosticVMOptions
-XX:+UseAESCTRIntrinsics
76 changes: 40 additions & 36 deletions docs/SUMMARY.md
Original file line number Diff line number Diff line change
@@ -1,37 +1,41 @@
* [Home](index.md)
* [Blog](blog/)
* Tutorial
* [Introduction to Ibis](tutorial/01-Introduction-to-Ibis.ipynb)
* [Aggregating and Joining](tutorial/02-Aggregates-Joins.ipynb)
* [Lazy Mode and Logging](tutorial/03-Expressions-Lazy-Mode-Logging.ipynb)
* [More Value Expressions](tutorial/04-More-Value-Expressions.ipynb)
* [Creating and Inserting External Data](tutorial/05-IO-Create-Insert-External-Data.ipynb)
* [Complex Filtering](tutorial/06-ComplexFiltering.ipynb)
* [Analytics Tools](tutorial/07-Analytics-Tools.ipynb)
* [Geospatial Analysis](tutorial/08-Geospatial-Analysis.ipynb)
* [Ibis for SQL Programmers](ibis-for-sql-programmers.ipynb)
* [User Guide](user_guide/)
* [Execution Backends](backends/)
* [How To Guide](how_to/)
* [Contribute](contribute/)
* [Code of Conduct](CODE_OF_CONDUCT.md)
* Community
* [About](about/)
* [Ask a question (StackOverflow)](https://stackoverflow.com/questions/tagged/ibis)
* [Chat (Gitter)](https://gitter.im/ibis-dev/Lobby)
* community/*.md
* [Release Notes](release_notes.md)
* API Reference
* [Expressions](api/expressions/index.md)
* [Top Level](api/expressions/top_level.md)
* [Tables](api/expressions/tables.md)
* [Generic Values](api/expressions/generic.md)
* [Numeric + Boolean](api/expressions/numeric.md)
* [Strings](api/expressions/strings.md)
* [Timestamps + Dates + Times](api/expressions/timestamps.md)
* [Collections](api/expressions/collections.md)
* [Geospatial](api/expressions/geospatial.md)
* [Data Types](api/datatypes.md)
* [Schemas](api/schemas.md)
* [Backend Interfaces](api/backends/)
* [Configuration](api/config.md)
* [Install](install.md)
* [Docs](docs/index.md)
* [Tutorial](tutorial/index.md)
* [Getting Started](tutorial/01-Introduction-to-Ibis.ipynb)
* [Aggregating and Joining](tutorial/02-Aggregates-Joins.ipynb)
* [Lazy Mode and Logging](tutorial/03-Expressions-Lazy-Mode-Logging.ipynb)
* [More Value Expressions](tutorial/04-More-Value-Expressions.ipynb)
* [Creating and Inserting External Data](tutorial/05-IO-Create-Insert-External-Data.ipynb)
* [Complex Filtering](tutorial/06-ComplexFiltering.ipynb)
* [Analytics Tools](tutorial/07-Analytics-Tools.ipynb)
* [Geospatial Analysis](tutorial/rendered/08-Geospatial-Analysis.ipynb)
* [How To Guide](how_to/)
* [Execution Backends](backends/)
* [User Guide](user_guide/)
* API Reference
* [Expressions](api/expressions/index.md)
* [Top Level](api/expressions/top_level.md)
* [Tables](api/expressions/tables.md)
* [Generic Values](api/expressions/generic.md)
* [Numeric + Boolean](api/expressions/numeric.md)
* [Strings](api/expressions/strings.md)
* [Timestamps + Dates + Times](api/expressions/timestamps.md)
* [Collections](api/expressions/collections.md)
* [Geospatial](api/expressions/geospatial.md)
* [Data Types](api/datatypes.md)
* [Schemas](api/schemas.md)
* [Backend Interfaces](api/backends/)
* [Configuration](api/config.md)
* [Ibis for SQL Programmers](ibis-for-sql-programmers.ipynb)
* [Ibis for pandas Users](ibis-for-pandas-users.ipynb)
* [Backend Operations Matrix](backends/support_matrix.md)
* [Releases](release_notes.md)
* Blog
* [Ibis v4.0.0](blog/ibis-version-4.0.0-release.md)
* [Analyzing Ibis's CI Data with Ibis](blog/rendered/ci-analysis.ipynb)
* [ffill and bfill using ibis](blog/ffill-and-bfill-using-ibis.md)
* [Ibis v3.1.0](blog/Ibis-version-3.1.0-release.md)
* [Ibis v3.0.0](blog/Ibis-version-3.0.0-release.md)
* [Community](community/index.md)
* [Contribute](community/contribute/)
Loading