76 changes: 64 additions & 12 deletions .github/workflows/ibis-backends-cloud.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,15 @@ on:
- ".envrc"
branches:
- main
pull_request_target:
types:
- labeled

permissions:
# this allows extractions/setup-just to list releases for `just` at a higher
# rate limit while restricting GITHUB_TOKEN permissions elsewhere
contents: read

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

env:
FORCE_COLOR: "1"
SQLALCHEMY_WARN_20: "1"
Expand All @@ -32,38 +31,85 @@ jobs:
# only a single bigquery or snowflake run at a time, otherwise test data is
# clobbered by concurrent runs
concurrency:
group: ${{ matrix.backend.name }}-${{ matrix.python-version }}
group: ${{ matrix.backend.title }}-${{ matrix.python-version }}-${{ github.event.label.name || 'ci-run-cloud' }}
cancel-in-progress: false

runs-on: ubuntu-latest
if: github.event_name == 'push' || github.event.label.name == 'ci-run-cloud'
strategy:
fail-fast: false
matrix:
python-version:
- "3.9"
- "3.11"
backend:
- name: bigquery
title: BigQuery
- name: snowflake
title: Snowflake
extras:
- snowflake
include:
- python-version: "3.9"
backend:
name: bigquery
title: BigQuery
extras:
- bigquery
- python-version: "3.11"
backend:
name: bigquery
title: BigQuery
extras:
- bigquery
- geospatial
- python-version: "3.10"
backend:
name: snowflake
title: Snowflake + Snowpark
key: snowflake-snowpark
extras:
- snowflake
steps:
- name: checkout
uses: actions/checkout@v4
if: github.event.label.name != 'ci-run-cloud'

- name: install poetry
run: pipx install 'poetry==1.7.1'
- name: checkout
if: github.event.label.name == 'ci-run-cloud'
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}

- uses: actions/create-github-app-token@v1.9.3
id: generate_token
with:
app-id: ${{ secrets.DOCS_BOT_APP_ID }}
private-key: ${{ secrets.DOCS_BOT_APP_PRIVATE_KEY }}

- name: reset cloud ci run label
uses: actions-ecosystem/action-remove-labels@v1
if: github.event.label.name == 'ci-run-cloud'
with:
labels: ci-run-cloud
github_token: ${{ steps.generate_token.outputs.token }}

- name: install python
uses: actions/setup-python@v5
id: install_python
with:
python-version: ${{ matrix.python-version }}
cache: poetry

- name: install poetry
run: pip install 'poetry==1.8.2'

- name: install additional deps
if: matrix.backend.key == 'snowflake-snowpark'
run: poetry add snowflake-snowpark-python --python="==${{ steps.install_python.outputs.python-version }}"

- name: install ibis
run: poetry install --without dev --without docs --extras ${{ matrix.backend.name }}
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}"

- uses: extractions/setup-just@v1
- uses: extractions/setup-just@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Expand Down Expand Up @@ -94,11 +140,17 @@ jobs:
SNOWFLAKE_SCHEMA: ${{ secrets.SNOWFLAKE_SCHEMA }}
SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }}

- name: enable snowpark testing
if: matrix.backend.key == 'snowflake-snowpark'
run: echo "SNOWFLAKE_SNOWPARK=1" >> "$GITHUB_ENV"

- name: "run parallel tests: ${{ matrix.backend.name }}"
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup

- name: upload code coverage
if: success()
continue-on-error: true
uses: codecov/codecov-action@v4
with:
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
token: ${{ secrets.CODECOV_TOKEN }}
7 changes: 7 additions & 0 deletions .github/workflows/ibis-backends-skip-helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
- "**/*.qmd"
- "codecov.yml"
- ".envrc"
- ".codespellrc"
branches:
- main
- "*.x.x"
Expand All @@ -20,6 +21,7 @@ on:
- "**/*.qmd"
- "codecov.yml"
- ".envrc"
- ".codespellrc"
branches:
- main
- "*.x.x"
Expand All @@ -33,11 +35,16 @@ jobs:
runs-on: ubuntu-latest
steps:
- run: echo "No build required"
test_pyspark:
runs-on: ubuntu-latest
steps:
- run: echo "No build required"
backends:
# this job exists so that we can use a single job from this workflow to gate merging
runs-on: ubuntu-latest
needs:
- test_backends_min_version
- test_backends
- test_pyspark
steps:
- run: exit 0
384 changes: 185 additions & 199 deletions .github/workflows/ibis-backends.yml

Large diffs are not rendered by default.

84 changes: 84 additions & 0 deletions .github/workflows/ibis-benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: Benchmarks

on:
push:
branches:
- main
- "*.x.x"
merge_group:

# since we're writing to cloud storage, we don't want to have multiple
# instances of this job running at one time
concurrency: benchmarks-${{ github.repository }}

permissions:
# increase the rate limit for github operations, but limit token permissions
# to read-only
contents: read

jobs:
benchmarks:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v4

- name: install python
uses: actions/setup-python@v5
id: install_python
with:
python-version: "3.11"

- name: install poetry
run: pip install 'poetry==1.8.2'

- name: install system dependencies
run: sudo apt-get install -qq -y build-essential libgeos-dev freetds-dev unixodbc-dev

- name: install ibis
run: poetry install --without dev --without docs --all-extras

- name: make benchmark output dir
run: mkdir .benchmarks

- name: benchmark
run: poetry run pytest --benchmark-enable --benchmark-json .benchmarks/output.json ibis/tests/benchmarks

- uses: google-github-actions/auth@v2
with:
credentials_json: ${{ secrets.GCP_CREDENTIALS }}

- uses: google-github-actions/setup-gcloud@v2

- name: show gcloud info
run: gcloud info

- name: download the latest duckdb release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
gh release download -R duckdb/duckdb --pattern 'duckdb_cli-linux-amd64.zip'
unzip duckdb_cli-linux-amd64.zip
- name: convert json data to parquet
run: |
set -euo pipefail
# sort json keys
jq --sort-keys -rcM < "$PWD/.benchmarks/output.json" > output.json
# connect to a file to allow spilling to disk
./duckdb json2parquet.ddb <<EOF
COPY (
SELECT * FROM read_ndjson_auto('output.json', maximum_object_size=2**27)
) TO 'output.parquet' (FORMAT PARQUET, COMPRESSION ZSTD)
EOF
- name: copy data to gcs
run: |
set -euo pipefail
timestamp="$(date --iso-8601=ns --utc | tr ',' '.')"
gsutil cp output.parquet "gs://ibis-benchmark-data/ci/${timestamp}.parquet"
252 changes: 0 additions & 252 deletions .github/workflows/ibis-docs-lint.yml

This file was deleted.

78 changes: 78 additions & 0 deletions .github/workflows/ibis-docs-main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
name: Docs main

on:
push:
branches:
- main
merge_group:

# only a single docs job that pushes to `main` can run at any given time
concurrency: docs-${{ github.repository }}

permissions:
# increase the rate limit for github operations, but limit token permissions
# to read-only
contents: read

jobs:
docs:
runs-on: ubuntu-latest
steps:
- name: install nix
uses: cachix/install-nix-action@v26
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v14
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix

- name: checkout
uses: actions/checkout@v4

- name: run doctests
# keep HOME because duckdb (which we use for doctests) wants to use
# that for extensions
run: nix develop --ignore-environment --keep HOME --keep HYPOTHESIS_PROFILE -c just doctest

- name: build api docs
run: nix develop --ignore-environment -c just docs-apigen --verbose

- name: build docs
run: nix develop --ignore-environment --keep HOME -c just docs-render

- name: build jupyterlite
run: nix develop --ignore-environment --keep HOME -c just build-jupyterlite

- name: check that all frozen computations were done before push
run: git diff --exit-code --stat

- name: verify internal links
run: nix develop --ignore-environment '.#links' -c just checklinks --offline --no-progress

- name: deploy docs
run: nix develop --ignore-environment --keep NETLIFY_AUTH_TOKEN -c just docs-deploy
env:
NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}

# Upload index related
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.10"

- name: Install Algolia API Dependencies
run: |
python -m pip install --upgrade algoliasearch
- name: Create and Upload Index
run: |
python .github/workflows/upload-algolia.py
env:
ALGOLIA_WRITE_API_KEY: ${{ secrets.ALGOLIA_WRITE_API_KEY }}
ALGOLIA_APP_ID: HS77W8GWM1
ALGOLIA_INDEX: prod_ibis
57 changes: 57 additions & 0 deletions .github/workflows/ibis-docs-pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Docs PR

on:
pull_request:
branches:
- main
- "*.x.x"
merge_group:

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

permissions:
# increase the rate limit for github operations, but limit token permissions
# to read-only
contents: read

jobs:
docs:
runs-on: ubuntu-latest
steps:
- name: install nix
uses: cachix/install-nix-action@v26
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v14
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix

- name: checkout
uses: actions/checkout@v4

- name: run doctest
# keep HOME because duckdb (which we use for doctests) wants to use
# that for extensions
run: nix develop --ignore-environment --keep HOME --keep HYPOTHESIS_PROFILE -c just doctest

- name: generate api docs
run: nix develop --ignore-environment -c just docs-apigen --verbose

- name: build docs
run: nix develop --ignore-environment --keep HOME -c just docs-render

- name: build jupyterlite
run: nix develop --ignore-environment --keep HOME -c just build-jupyterlite

- name: check that all frozen computations were done before push
run: git diff --exit-code --stat

- name: verify internal links
run: nix develop --ignore-environment '.#links' -c just checklinks --offline --no-progress
103 changes: 103 additions & 0 deletions .github/workflows/ibis-lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
name: Lint

on:
push:
branches:
- main
- "*.x.x"
pull_request:
branches:
- main
- "*.x.x"
merge_group:

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

permissions:
# increase the rate limit for github operations, but limit token permissions
# to read-only
contents: read

jobs:
lint:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v4

- name: install nix
uses: cachix/install-nix-action@v26
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v14
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix

# run against the full shell.nix on push so it gets pushed to cachix
- name: pre-commit checks
run: nix develop '.#preCommit' --ignore-environment --keep-going -c pre-commit run --all-files --show-diff-on-failure --color=always

release_notes_spellcheck:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: install nix
uses: cachix/install-nix-action@v26
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v14
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix

- name: check generated release notes spelling
run: nix run '.#check-release-notes-spelling'

simulate_release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- uses: cachix/install-nix-action@v26
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- uses: cachix/cachix-action@v14
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix

- name: Configure git info
run: |
set -euo pipefail
# not incredibly important what user we use here
#
# we're making a commit in a temporary worktree that is thrown away
# if the process exits successfully
#
# git requires user information to make commits
git config user.name 'ibis-squawk-bot[bot]'
git config user.email 'ibis-squawk-bot[bot]@users.noreply.github.com'
- name: run semantic-release
run: ./ci/release/dry_run.sh
62 changes: 26 additions & 36 deletions .github/workflows/ibis-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,19 @@ jobs:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
steps:
- name: checkout
uses: actions/checkout@v4

- name: install poetry
run: pipx install 'poetry==1.7.1'

- name: install python
uses: actions/setup-python@v5
id: install_python
with:
python-version: ${{ matrix.python-version }}
cache: poetry

- name: install poetry
run: pip install 'poetry==1.8.2'

- name: install ${{ matrix.os }} system dependencies
if: matrix.os == 'ubuntu-latest'
Expand All @@ -78,49 +78,43 @@ jobs:
- name: install ibis
run: poetry install --without dev --without docs --extras "visualization decompiler"

- uses: extractions/setup-just@v1
- uses: extractions/setup-just@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: run all core tests and run benchmarks once parallel
if: matrix.os != 'windows-latest'
run: just ci-check -m "'core or benchmark'" -n auto
run: just ci-check -m "'core or benchmarks'" --numprocesses auto

- name: run all core tests and run benchmarks once serial
if: matrix.os == 'windows-latest'
run: just ci-check -m "'core or benchmark'"
run: just ci-check -m "'core or benchmarks'"

- name: upload code coverage
if: success()
continue-on-error: true
uses: codecov/codecov-action@v4
with:
flags: core,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
token: ${{ secrets.CODECOV_TOKEN }}

test_shapely_duckdb_import:
name: Test shapely and duckdb import
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
python-version:
- "3.11"
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v4

- name: install poetry
run: pipx install 'poetry==1.7.1'

- name: install python
uses: actions/setup-python@v5
id: install_python
with:
python-version: ${{ matrix.python-version }}
cache: poetry
python-version: "3.12"

- name: install ${{ matrix.os }} system dependencies
- name: install poetry
run: pip install 'poetry==1.8.2'

- name: install system dependencies
run: |
set -euo pipefail
Expand All @@ -135,15 +129,10 @@ jobs:
run: poetry run python -c 'import shapely.geometry, duckdb'

test_doctests:
# FIXME(kszucs): re-enable this build
if: false
name: Doctests
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
python-version:
- "3.11"
runs-on: ubuntu-latest
steps:
- name: install system dependencies
run: |
Expand All @@ -155,20 +144,19 @@ jobs:
- name: checkout
uses: actions/checkout@v4

- name: install poetry
run: pipx install 'poetry==1.7.1'

- name: install python
uses: actions/setup-python@v5
id: install_python
with:
python-version: ${{ matrix.python-version }}
cache: poetry
python-version: "3.12"

- name: install poetry
run: pip install 'poetry==1.8.2'

- name: install ibis with all extras
run: poetry install --without dev --without docs --extras all
run: poetry install --without dev --without docs --all-extras

- uses: extractions/setup-just@v1
- uses: extractions/setup-just@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Expand All @@ -177,6 +165,8 @@ jobs:

- name: upload code coverage
if: success()
continue-on-error: true
uses: codecov/codecov-action@v4
with:
flags: core,doctests,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
token: ${{ secrets.CODECOV_TOKEN }}
4 changes: 1 addition & 3 deletions .github/workflows/nix-skip-helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ jobs:
- "3.9"
- "3.10"
- "3.11"
include:
- os: macos-latest
python-version: "3.10"
- "3.12"
steps:
- run: echo "No build required"
3 changes: 2 additions & 1 deletion .github/workflows/nix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,13 @@ jobs:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
steps:
- name: checkout
uses: actions/checkout@v4

- name: install nix
uses: cachix/install-nix-action@v25
uses: cachix/install-nix-action@v26
with:
nix_path: nixpkgs=channel:nixos-unstable-small
extra_nix_config: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pre-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
run: python -m pip install --upgrade pip

- name: install poetry
run: python -m pip install 'poetry==1.7.1' poetry-dynamic-versioning
run: python -m pip install 'poetry==1.8.2' poetry-dynamic-versioning

- name: compute ibis version
id: get_version
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@ jobs:
release:
runs-on: ubuntu-latest
steps:
- uses: tibdex/github-app-token@v2
- uses: actions/create-github-app-token@v1.9.3
id: generate_token
with:
app_id: ${{ secrets.APP_ID }}
private_key: ${{ secrets.APP_PRIVATE_KEY }}
app-id: ${{ secrets.APP_ID }}
private-key: ${{ secrets.APP_PRIVATE_KEY }}

- uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ steps.generate_token.outputs.token }}

- uses: cachix/install-nix-action@v25
- uses: cachix/install-nix-action@v26
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
Expand Down
60 changes: 60 additions & 0 deletions .github/workflows/upload-algolia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import annotations # noqa: INP001

import json
import os
from urllib.request import urlopen

from algoliasearch.search_client import SearchClient

api_key = os.environ["ALGOLIA_WRITE_API_KEY"]
app_id = os.environ["ALGOLIA_APP_ID"]
index_name = os.environ["ALGOLIA_INDEX"]


def truncate_string(string, max_size):
# Encode the string to bytes using UTF-8 encoding
encoded_string = string.encode("utf-8")

# Truncate the bytes to ensure the size is smaller than max_bytes
truncated_bytes = encoded_string[: max_size - 1]

# Decode the truncated bytes back to string
truncated_string = truncated_bytes.decode("utf-8", errors="ignore")

return truncated_string


def main():
client = SearchClient.create(app_id, api_key)
index = client.init_index(index_name)

# Download the index generated by quarto from the ibis website
with urlopen("https://ibis-project.org/search.json") as response:
search = json.loads(response.read())

# According to algolia docs, for the build plan each record (in our case this
# is search[i]) has a limit of 10KB.
# (see https://support.algolia.com/hc/en-us/articles/4406981897617-Is-there-a-size-limit-for-my-index-records and
# https://www.algolia.com/doc/guides/scaling/algolia-service-limits/)
# Every key in our record is pretty small except for the "text" one.
# I tried truncating it to < 10_000 and even though we don't get a record
# size error, we keep hitting a AlgoliaUnreachableHostException
# I opened an issue because the error is unhelpful, and unclear.
# https://github.com/algolia/algoliasearch-client-python/issues/565

# It wasn't until I cut the "text" field to max_size=1000 that I was able to
# get an index. My guess is we are hitting another limitation, but I was not
# able to find anything on the docs.

max_size = 1_000
for obj in search:
if len(obj["text"].encode("utf-8")) > max_size:
obj["text"] = truncate_string(obj["text"], max_size)
size = len(obj["text"].encode("utf-8"))
assert size < max_size

index.replace_all_objects(search)


if __name__ == "__main__":
main()
20 changes: 18 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ dist
coverage.xml

# Environments
.env
.venv
env/
venv/
Expand Down Expand Up @@ -80,11 +79,11 @@ ibis_testing*
result
result-*

# generated mkdocs website
.benchmarks

# tutorial data
geography.db
geography.duckdb

# build artifacts
ci/udf/.ninja_deps
Expand Down Expand Up @@ -135,3 +134,20 @@ ibis/examples/descriptions
# automatically generated odbc file for ci
ci/odbc/odbc.ini
*-citibike-tripdata.tar.xz

# data downloaded by the geospatial tutorial
docs/posts/ibis-duckdb-geospatial/nyc_data.db.wal

# pixi environment directory
.pixi

# jupyter cache directories
docs/**/.jupyter_cache

# quarto generated files
docs/posts-listing.json
docs/posts.feed-full-staged
docs/**/*.html

# jupyterlite stuff
.jupyterlite.doit.db
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ default_stages:
- commit
repos:
- repo: https://github.com/rhysd/actionlint
rev: v1.6.26
rev: v1.6.27
hooks:
- id: actionlint-system
- repo: https://github.com/codespell-project/codespell
Expand All @@ -37,7 +37,7 @@ repos:
types_or:
- python
- pyi
args: ["check", "--force-exclude", "--show-source", "--fix"]
args: ["check", "--force-exclude", "--output-format=full", "--fix"]
require_serial: true
minimum_pre_commit_version: "2.9.2"
- repo: local
Expand All @@ -54,11 +54,11 @@ repos:
require_serial: true
minimum_pre_commit_version: "2.9.2"
- repo: https://github.com/adrienverge/yamllint
rev: v1.33.0
rev: v1.35.1
hooks:
- id: yamllint
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v4.6.0
hooks:
- id: check-added-large-files
args: ["--maxkb=710"]
Expand Down
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ docs/overrides/*.html
docs/release_notes.md
docs/_freeze
docs/_publish.yml
docs/_extensions
docs/.quarto
docs/_output
ibis
Expand Down
2 changes: 1 addition & 1 deletion .prettierrc.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ semi = true
singleQuote = false
arrowParens = "avoid"
useTabs = false
trailingComma = "all"
trailingComma = "none"
52 changes: 28 additions & 24 deletions .releaserc.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ module.exports = {
{
// deprecations are patch releases
releaseRules: [{ type: "depr", release: "patch" }],
preset: "conventionalcommits",
},
preset: "conventionalcommits"
}
],
[
"@semantic-release/release-notes-generator",
Expand All @@ -27,17 +27,17 @@ module.exports = {
{ type: "refactor", section: "Refactors" },
{ type: "perf", section: "Performance" },
{ type: "test", hidden: true },
{ type: "depr", section: "Deprecations" },
],
},
},
{ type: "depr", section: "Deprecations" }
]
}
}
],
[
"@semantic-release/changelog",
{
changelogTitle: "Release notes\n---",
changelogFile: "docs/release_notes.md",
},
changelogTitle: "---\n---",
changelogFile: "docs/release_notes_generated.qmd"
}
],
[
"semantic-release-replace-plugin",
Expand All @@ -52,13 +52,13 @@ module.exports = {
file: "ibis/__init__.py",
hasChanged: true,
numMatches: 1,
numReplacements: 1,
},
numReplacements: 1
}
],
countMatches: true,
},
],
},
countMatches: true
}
]
}
],
[
"@semantic-release/exec",
Expand All @@ -67,22 +67,26 @@ module.exports = {
"ci/release/verify_conditions.sh ${options.dryRun}",
verifyReleaseCmd: "ci/release/verify_release.sh ${nextRelease.version}",
prepareCmd: "ci/release/prepare.sh ${nextRelease.version}",
publishCmd: "ci/release/publish.sh",
},
publishCmd: "ci/release/publish.sh"
}
],
[
"@semantic-release/github",
{
successComment: false,
assets: ["dist/*.whl"],
},
assets: ["dist/*.whl"]
}
],
[
"@semantic-release/git",
{
assets: ["pyproject.toml", "docs/release_notes.md", "ibis/__init__.py"],
message: "chore(release): ${nextRelease.version}",
},
],
],
assets: [
"pyproject.toml",
"docs/release_notes_generated.qmd",
"ibis/__init__.py"
],
message: "chore(release): ${nextRelease.version}"
}
]
]
};
299 changes: 162 additions & 137 deletions README.md

Large diffs are not rendered by default.

31 changes: 0 additions & 31 deletions ci/conda-lock/condarc

This file was deleted.

60 changes: 0 additions & 60 deletions ci/conda-lock/generate.sh

This file was deleted.

436 changes: 0 additions & 436 deletions ci/conda-lock/linux-64/3.10.lock

This file was deleted.

430 changes: 0 additions & 430 deletions ci/conda-lock/linux-64/3.11.lock

This file was deleted.

417 changes: 0 additions & 417 deletions ci/conda-lock/osx-64/3.10.lock

This file was deleted.

411 changes: 0 additions & 411 deletions ci/conda-lock/osx-64/3.11.lock

This file was deleted.

417 changes: 0 additions & 417 deletions ci/conda-lock/osx-arm64/3.10.lock

This file was deleted.

411 changes: 0 additions & 411 deletions ci/conda-lock/osx-arm64/3.11.lock

This file was deleted.

418 changes: 0 additions & 418 deletions ci/conda-lock/win-64/3.10.lock

This file was deleted.

412 changes: 0 additions & 412 deletions ci/conda-lock/win-64/3.11.lock

This file was deleted.

87 changes: 35 additions & 52 deletions ci/make_geography_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,72 +16,56 @@
from __future__ import annotations

import argparse
import datetime
import json
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING, Any

import requests
import sqlalchemy as sa
import toolz

import ibis

if TYPE_CHECKING:
from collections.abc import Mapping

SCHEMAS = {
"countries": [
("iso_alpha2", sa.TEXT),
("iso_alpha3", sa.TEXT),
("iso_numeric", sa.INT),
("fips", sa.TEXT),
("name", sa.TEXT),
("capital", sa.TEXT),
("area_km2", sa.REAL),
("population", sa.INT),
("continent", sa.TEXT),
],
"gdp": [
("country_code", sa.TEXT),
("year", sa.INT),
("value", sa.REAL),
],
"independence": [
("country_code", sa.TEXT),
("independence_date", sa.DATE),
("independence_from", sa.TEXT),
],
}

POST_PARSE_FUNCTIONS = {
"independence": lambda row: toolz.assoc(
row,
"independence_date",
datetime.datetime.fromisoformat(row["independence_date"]).date(),
)
"countries": {
"iso_alpha2": "string",
"iso_alpha3": "string",
"iso_numeric": "int",
"fips": "string",
"name": "string",
"capital": "string",
"area_km2": "float",
"population": "int",
"continent": "string",
},
"gdp": {
"country_code": "string",
"year": "int",
"value": "float",
},
"independence": {
"country_code": "string",
"independence_date": "date",
"independence_from": "string",
},
}


def make_geography_db(
data: Mapping[str, Any],
con: sa.engine.Engine,
data: Mapping[str, Any], con: ibis.backends.duckdb.Backend
) -> None:
metadata = sa.MetaData(bind=con)

with con.begin() as bind:
with tempfile.TemporaryDirectory() as d:
for table_name, schema in SCHEMAS.items():
table = sa.Table(
table_name,
metadata,
*(sa.Column(col_name, col_type) for col_name, col_type in schema),
ibis_schema = ibis.schema(schema)
cols = ibis_schema.names
path = Path(d, f"{table_name}.jsonl")
path.write_text(
"\n".join(json.dumps(dict(zip(cols, row))) for row in data[table_name])
)
table_columns = table.c.keys()
post_parse = POST_PARSE_FUNCTIONS.get(table_name, toolz.identity)

table.drop(bind=bind, checkfirst=True)
table.create(bind=bind)
bind.execute(
table.insert().values(),
[post_parse(dict(zip(table_columns, row))) for row in data[table_name]],
con.create_table(
table_name, obj=con.read_json(path), schema=ibis_schema, overwrite=True
)


Expand Down Expand Up @@ -109,9 +93,8 @@ def main() -> None:
response = requests.get(args.input_data_url)
response.raise_for_status()
input_data = response.json()
db_path = Path(args.output_directory).joinpath("geography.db")
con = sa.create_engine(f"sqlite:///{db_path}")
make_geography_db(input_data, con)
db_path = Path(args.output_directory).joinpath("geography.duckdb")
make_geography_db(input_data, ibis.duckdb.connect(db_path))
print(db_path) # noqa: T201


Expand Down
170 changes: 170 additions & 0 deletions ci/schema/bigquery.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
CREATE OR REPLACE TABLE {dataset}.struct (
abc STRUCT<a FLOAT64, b STRING, c INT64>
);

INSERT INTO {dataset}.struct VALUES
(STRUCT(1.0, 'banana', 2)),
(STRUCT(2.0, 'apple', 3)),
(STRUCT(3.0, 'orange', 4)),
(STRUCT(NULL, 'banana', 2)),
(STRUCT(2.0, NULL, 3)),
(NULL),
(STRUCT(3.0, 'orange', NULL));

CREATE OR REPLACE TABLE {dataset}.array_types (
x ARRAY<INT64>,
y ARRAY<STRING>,
z ARRAY<FLOAT64>,
grouper STRING,
scalar_column FLOAT64,
);

INSERT INTO {dataset}.array_types VALUES
([1, 2, 3], ['a', 'b', 'c'], [1.0, 2.0, 3.0], 'a', 1.0),
([4, 5], ['d', 'e'], [4.0, 5.0], 'a', 2.0),
([6], ['f'], [6.0], 'a', 3.0),
([1], ['a'], [], 'b', 4.0),
([2, 3], ['b', 'c'], NULL, 'b', 5.0),
([4, 5], ['d', 'e'], [4.0, 5.0], 'c', 6.0);

CREATE OR REPLACE TABLE {dataset}.win (
g STRING,
x INT64,
y INT64
);

INSERT INTO {dataset}.win VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);

CREATE OR REPLACE TABLE {dataset}.topk (
x INT64
);

INSERT INTO {dataset}.topk VALUES (1), (1), (NULL);

CREATE OR REPLACE TABLE {dataset}.numeric_table (
string_col STRING,
numeric_col NUMERIC
);

INSERT INTO {dataset}.numeric_table VALUES
('1st value', 0.999999999),
('2nd value', 0.000000002);

CREATE OR REPLACE TABLE {dataset}.json_t (
js JSON
);

INSERT INTO {dataset}.json_t VALUES
(JSON '{{"a": [1,2,3,4], "b": 1}}'),
(JSON '{{"a":null,"b":2}}'),
(JSON '{{"a":"foo", "c":null}}'),
(JSON 'null'),
(JSON '[42,47,55]'),
(JSON '[]'),
(JSON '"a"'),
(JSON '""'),
(JSON '"b"'),
(NULL),
(JSON 'true'),
(JSON 'false'),
(JSON '42'),
(JSON '37.37');


LOAD DATA OVERWRITE {dataset}.functional_alltypes (
id INT64,
bool_col BOOLEAN,
tinyint_col INT64,
smallint_col INT64,
int_col INT64,
bigint_col INT64,
float_col FLOAT64,
double_col FLOAT64,
date_string_col STRING,
string_col STRING,
timestamp_col DATETIME,
year INT64,
month INT64
)
FROM FILES (
format = 'PARQUET',
uris = ['gs://ibis-ci-data/functional_alltypes.parquet']
);

LOAD DATA OVERWRITE {dataset}.awards_players
FROM FILES (
format = 'PARQUET',
uris = ['gs://ibis-ci-data/awards_players.parquet']
);

LOAD DATA OVERWRITE {dataset}.batting
FROM FILES (
format = 'PARQUET',
uris = ['gs://ibis-ci-data/batting.parquet']
);

LOAD DATA OVERWRITE {dataset}.diamonds
FROM FILES (
format = 'PARQUET',
uris = ['gs://ibis-ci-data/diamonds.parquet']
);

LOAD DATA OVERWRITE {dataset}.astronauts
FROM FILES (
format = 'PARQUET',
uris = ['gs://ibis-ci-data/astronauts.parquet']
);

LOAD DATA OVERWRITE {dataset}.functional_alltypes_parted (
id INT64,
bool_col BOOLEAN,
tinyint_col INT64,
smallint_col INT64,
int_col INT64,
bigint_col INT64,
float_col FLOAT64,
double_col FLOAT64,
date_string_col STRING,
string_col STRING,
timestamp_col DATETIME,
year INT64,
month INT64
)
PARTITION BY _PARTITIONDATE
FROM FILES (
format = 'PARQUET',
uris = ['gs://ibis-ci-data/functional_alltypes.parquet']
);

CREATE OR REPLACE TABLE {dataset}.timestamp_column_parted (
my_timestamp_parted_col TIMESTAMP,
string_col STRING,
int_col INT64
)
PARTITION BY DATE(my_timestamp_parted_col);

CREATE OR REPLACE TABLE {dataset}.date_column_parted (
my_date_parted_col DATE,
string_col STRING,
int_col INT64
)
PARTITION BY my_date_parted_col;

CREATE OR REPLACE TABLE {dataset}.struct_table (
array_of_structs_col ARRAY<STRUCT<int_field INTEGER, string_field STRING>>,
nested_struct_col STRUCT<sub_struct STRUCT<timestamp_col TIMESTAMP>>,
struct_col STRUCT<string_field STRING>
);

INSERT INTO {dataset}.struct_table VALUES
([(12345, 'abcdefg'), (NULL, NULL)],
STRUCT(STRUCT(NULL)),
STRUCT(NULL)),
([(12345, 'abcdefg'), (NULL, 'hijklmnop')],
STRUCT(STRUCT('2017-10-20 16:37:50.000000')),
STRUCT('a'));
3 changes: 3 additions & 0 deletions ci/schema/clickhouse.sql
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,6 @@ INSERT INTO ibis_testing.win VALUES
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);

CREATE OR REPLACE TABLE ibis_testing.topk (x Nullable(Int64)) ENGINE = Memory;
INSERT INTO ibis_testing.topk VALUES (1), (1), (NULL);
20 changes: 18 additions & 2 deletions ci/schema/duckdb.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,23 @@ INSERT INTO struct VALUES
(NULL),
({'a': 3.0, 'b': 'orange', 'c': NULL});

CREATE OR REPLACE TABLE json_t (js TEXT);
CREATE OR REPLACE TABLE json_t (js JSON);

INSERT INTO json_t VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');
('[]'),
('"a"'),
('""'),
('"b"'),
(NULL),
('true'),
('false'),
('42'),
('37.37');

CREATE OR REPLACE TABLE win (g TEXT, x BIGINT NOT NULL, y BIGINT);
INSERT INTO win VALUES
Expand All @@ -51,3 +59,11 @@ CREATE OR REPLACE TABLE map (idx BIGINT, kv MAP(STRING, BIGINT));
INSERT INTO map VALUES
(1, MAP(['a', 'b', 'c'], [1, 2, 3])),
(2, MAP(['d', 'e', 'f'], [4, 5, 6]));


CREATE OR REPLACE TABLE topk (x BIGINT);
INSERT INTO topk VALUES (1), (1), (NULL);

CREATE SCHEMA shops;
CREATE TABLE shops.ice_cream (flavor TEXT, quantity INT);
INSERT INTO shops.ice_cream values ('vanilla', 2), ('chocolate', 3);
45 changes: 31 additions & 14 deletions ci/schema/exasol.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
DROP SCHEMA IF EXISTS EXASOL CASCADE;
CREATE SCHEMA EXASOL;

CREATE OR REPLACE TABLE EXASOL.diamonds
CREATE OR REPLACE TABLE EXASOL."diamonds"
(
"carat" DOUBLE,
"cut" VARCHAR(256),
Expand All @@ -15,13 +15,13 @@ CREATE OR REPLACE TABLE EXASOL.diamonds
"z" DOUBLE
);

CREATE OR REPLACE TABLE EXASOL.batting
CREATE OR REPLACE TABLE EXASOL."batting"
(
"playerID" VARCHAR(256),
"yearID" BIGINT,
"stint" BIGINT,
"teamID" VARCHAR(256),
"logID" VARCHAR(256),
"lgID" VARCHAR(256),
"G" BIGINT,
"AB" BIGINT,
"R" BIGINT,
Expand All @@ -41,22 +41,22 @@ CREATE OR REPLACE TABLE EXASOL.batting
"GIDP" BIGINT
);

CREATE OR REPLACE TABLE EXASOL.awards_players
CREATE OR REPLACE TABLE EXASOL."awards_players"
(
"playerId" VARCHAR(256),
"playerID" VARCHAR(256),
"awardID" VARCHAR(256),
"yearID" VARCHAR(256),
"logID" VARCHAR(256),
"yearID" BIGINT,
"lgID" VARCHAR(256),
"tie" VARCHAR(256),
"notest" VARCHAR(256)
"notes" VARCHAR(256)
);

CREATE OR REPLACE TABLE EXASOL.functional_alltypes
CREATE OR REPLACE TABLE EXASOL."functional_alltypes"
(
"id" INTEGER,
"bool_col" BOOLEAN,
"tinyint_col" SHORTINT,
"small_int" SMALLINT,
"smallint_col" SMALLINT,
"int_col" INTEGER,
"bigint_col" BIGINT,
"float_col" FLOAT,
Expand All @@ -69,7 +69,24 @@ CREATE OR REPLACE TABLE EXASOL.functional_alltypes
);


IMPORT INTO EXASOL.diamonds FROM LOCAL CSV FILE '/data/diamonds.csv' COLUMN SEPARATOR = ',' SKIP = 1;
IMPORT INTO EXASOL.batting FROM LOCAL CSV FILE '/data/batting.csv' COLUMN SEPARATOR = ',' SKIP = 1;
IMPORT INTO EXASOL.awards_players FROM LOCAL CSV FILE '/data/awards_players.csv' COLUMN SEPARATOR = ',' SKIP = 1;
IMPORT INTO EXASOL.functional_alltypes FROM LOCAL CSV FILE '/data/functional_alltypes.csv' COLUMN SEPARATOR = ',' SKIP = 1;
IMPORT INTO EXASOL."diamonds" FROM LOCAL CSV FILE '/data/diamonds.csv' COLUMN SEPARATOR = ',' SKIP = 1;
IMPORT INTO EXASOL."batting" FROM LOCAL CSV FILE '/data/batting.csv' COLUMN SEPARATOR = ',' SKIP = 1;
IMPORT INTO EXASOL."awards_players" FROM LOCAL CSV FILE '/data/awards_players.csv' COLUMN SEPARATOR = ',' SKIP = 1;
IMPORT INTO EXASOL."functional_alltypes" FROM LOCAL CSV FILE '/data/functional_alltypes.csv' COLUMN SEPARATOR = ',' SKIP = 1;

CREATE OR REPLACE TABLE EXASOL."win"
(
"g" VARCHAR(1),
"x" BIGINT,
"y" BIGINT
);

INSERT INTO "win" VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);

CREATE OR REPLACE TABLE EXASOL."topk" ("x" BIGINT);
INSERT INTO "topk" VALUES (1), (1), (NULL);
5 changes: 5 additions & 0 deletions ci/schema/mssql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,8 @@ INSERT INTO win VALUES
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);

DROP TABLE IF EXISTS topk;

CREATE TABLE topk (x BIGINT);
INSERT INTO topk VALUES (1), (1), (NULL);
15 changes: 14 additions & 1 deletion ci/schema/mysql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,15 @@ INSERT INTO json_t VALUES
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');
('[]'),
('"a"'),
('""'),
('"b"'),
(NULL),
('true'),
('false'),
('42'),
('37.37');

DROP TABLE IF EXISTS win CASCADE;

Expand All @@ -119,3 +127,8 @@ INSERT INTO win VALUES
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);

DROP TABLE IF EXISTS topk CASCADE;

CREATE TABLE topk (x BIGINT);
INSERT INTO topk VALUES (1), (1), (NULL);
5 changes: 5 additions & 0 deletions ci/schema/oracle.sql
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,9 @@ INSERT INTO "win" VALUES
('a', 3, 1),
('a', 4, 1);

DROP TABLE IF EXISTS "topk";

CREATE TABLE "topk" ("x" NUMBER(18));
INSERT INTO "topk" VALUES (1), (1), (NULL);

COMMIT;
45 changes: 29 additions & 16 deletions ci/schema/postgres.sql
Original file line number Diff line number Diff line change
Expand Up @@ -121,21 +121,6 @@ CREATE TABLE awards_players (

COPY awards_players FROM '/data/awards_players.csv' WITH (FORMAT CSV, HEADER TRUE, DELIMITER ',');

DROP TYPE IF EXISTS vector CASCADE;
CREATE TYPE vector AS (
x FLOAT8,
y FLOAT8,
z FLOAT8
);

DROP VIEW IF EXISTS awards_players_special_types CASCADE;
CREATE VIEW awards_players_special_types AS
SELECT
*,
setweight(to_tsvector('simple', notes), 'A')::TSVECTOR AS search,
NULL::vector AS simvec
FROM awards_players;

DROP TABLE IF EXISTS functional_alltypes CASCADE;

CREATE TABLE functional_alltypes (
Expand Down Expand Up @@ -273,7 +258,15 @@ INSERT INTO json_t VALUES
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');
('[]'),
('"a"'),
('""'),
('"b"'),
(NULL),
('true'),
('false'),
('42'),
('37.37');

DROP TABLE IF EXISTS win CASCADE;
CREATE TABLE win (g TEXT, x BIGINT NOT NULL, y BIGINT);
Expand All @@ -289,3 +282,23 @@ CREATE TABLE map (idx BIGINT, kv HSTORE);
INSERT INTO map VALUES
(1, 'a=>1,b=>2,c=>3'),
(2, 'd=>4,e=>5,c=>6');

DROP TABLE IF EXISTS topk;

CREATE TABLE topk (x BIGINT);
INSERT INTO topk VALUES (1), (1), (NULL);

CREATE EXTENSION IF NOT EXISTS vector;

DROP VIEW IF EXISTS awards_players_special_types CASCADE;
CREATE VIEW awards_players_special_types AS
SELECT
*,
setweight(to_tsvector('simple', notes), 'A')::TSVECTOR AS search,
NULL::vector AS simvec
FROM awards_players;


DROP TABLE IF EXISTS items CASCADE;
CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3));
INSERT INTO items (embedding) VALUES ('[1,2,3]'), ('[4,5,6]');
145 changes: 79 additions & 66 deletions ci/schema/risingwave.sql
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
SET RW_IMPLICIT_FLUSH=true;

DROP TABLE IF EXISTS diamonds CASCADE;

CREATE TABLE diamonds (
carat FLOAT,
cut TEXT,
color TEXT,
clarity TEXT,
depth FLOAT,
DROP TABLE IF EXISTS "diamonds" CASCADE;

CREATE TABLE "diamonds" (
"carat" FLOAT,
"cut" TEXT,
"color" TEXT,
"clarity" TEXT,
"depth" FLOAT,
"table" FLOAT,
price BIGINT,
x FLOAT,
y FLOAT,
z FLOAT
"price" BIGINT,
"x" FLOAT,
"y" FLOAT,
"z" FLOAT
) WITH (
connector = 'posix_fs',
match_pattern = 'diamonds.csv',
posix_fs.root = '/data',
) FORMAT PLAIN ENCODE CSV ( without_header = 'false', delimiter = ',' );

DROP TABLE IF EXISTS astronauts CASCADE;
DROP TABLE IF EXISTS "astronauts" CASCADE;

CREATE TABLE astronauts (
CREATE TABLE "astronauts" (
"id" BIGINT,
"number" BIGINT,
"nationwide_number" BIGINT,
Expand Down Expand Up @@ -52,12 +52,12 @@ CREATE TABLE astronauts (
posix_fs.root = '/data',
) FORMAT PLAIN ENCODE CSV ( without_header = 'false', delimiter = ',' );

DROP TABLE IF EXISTS batting CASCADE;
DROP TABLE IF EXISTS "batting" CASCADE;

CREATE TABLE batting (
CREATE TABLE "batting" (
"playerID" TEXT,
"yearID" BIGINT,
stint BIGINT,
"stint" BIGINT,
"teamID" TEXT,
"lgID" TEXT,
"G" BIGINT,
Expand All @@ -83,95 +83,108 @@ CREATE TABLE batting (
posix_fs.root = '/data',
) FORMAT PLAIN ENCODE CSV ( without_header = 'false', delimiter = ',' );

DROP TABLE IF EXISTS awards_players CASCADE;
DROP TABLE IF EXISTS "awards_players" CASCADE;

CREATE TABLE awards_players (
CREATE TABLE "awards_players" (
"playerID" TEXT,
"awardID" TEXT,
"yearID" BIGINT,
"lgID" TEXT,
tie TEXT,
notes TEXT
"tie" TEXT,
"notes" TEXT
) WITH (
connector = 'posix_fs',
match_pattern = 'awards_players.csv',
posix_fs.root = '/data',
) FORMAT PLAIN ENCODE CSV ( without_header = 'false', delimiter = ',' );

DROP TABLE IF EXISTS functional_alltypes CASCADE;

CREATE TABLE functional_alltypes (
id INTEGER,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INTEGER,
bigint_col BIGINT,
float_col REAL,
double_col DOUBLE PRECISION,
date_string_col TEXT,
string_col TEXT,
timestamp_col TIMESTAMP WITHOUT TIME ZONE,
year INTEGER,
month INTEGER
DROP TABLE IF EXISTS "functional_alltypes" CASCADE;

CREATE TABLE "functional_alltypes" (
"id" INTEGER,
"bool_col" BOOLEAN,
"tinyint_col" SMALLINT,
"smallint_col" SMALLINT,
"int_col" INTEGER,
"bigint_col" BIGINT,
"float_col" REAL,
"double_col" DOUBLE PRECISION,
"date_string_col" TEXT,
"string_col" TEXT,
"timestamp_col" TIMESTAMP WITHOUT TIME ZONE,
"year" INTEGER,
"month" INTEGER
) WITH (
connector = 'posix_fs',
match_pattern = 'functional_alltypes.csv',
posix_fs.root = '/data',
) FORMAT PLAIN ENCODE CSV ( without_header = 'false', delimiter = ',' );

DROP TABLE IF EXISTS tzone CASCADE;
DROP TABLE IF EXISTS "tzone" CASCADE;

CREATE TABLE tzone (
ts TIMESTAMP WITH TIME ZONE,
key TEXT,
value DOUBLE PRECISION
CREATE TABLE "tzone" (
"ts" TIMESTAMP WITH TIME ZONE,
"key" TEXT,
"value" DOUBLE PRECISION
);

INSERT INTO tzone
INSERT INTO "tzone"
SELECT
CAST('2017-05-28 11:01:31.000400' AS TIMESTAMP WITH TIME ZONE) +
t * INTERVAL '1 day 1 second' AS ts,
CHR(97 + t) AS key,
t + t / 10.0 AS value
FROM generate_series(0, 9) AS t;

DROP TABLE IF EXISTS array_types CASCADE;

CREATE TABLE IF NOT EXISTS array_types (
x BIGINT[],
y TEXT[],
z DOUBLE PRECISION[],
grouper TEXT,
scalar_column DOUBLE PRECISION,
multi_dim BIGINT[][]
t * INTERVAL '1 day 1 second' AS "ts",
CHR(97 + t) AS "key",
t + t / 10.0 AS "value"
FROM generate_series(0, 9) AS "t";

DROP TABLE IF EXISTS "array_types" CASCADE;

CREATE TABLE IF NOT EXISTS "array_types" (
"x" BIGINT[],
"y" TEXT[],
"z" DOUBLE PRECISION[],
"grouper" TEXT,
"scalar_column" DOUBLE PRECISION,
"multi_dim" BIGINT[][]
);

INSERT INTO array_types VALUES
INSERT INTO "array_types" VALUES
(ARRAY[1, 2, 3], ARRAY['a', 'b', 'c'], ARRAY[1.0, 2.0, 3.0], 'a', 1.0, ARRAY[ARRAY[NULL::BIGINT, NULL, NULL], ARRAY[1, 2, 3]]),
(ARRAY[4, 5], ARRAY['d', 'e'], ARRAY[4.0, 5.0], 'a', 2.0, ARRAY[]::BIGINT[][]),
(ARRAY[6, NULL], ARRAY['f', NULL], ARRAY[6.0, NULL], 'a', 3.0, ARRAY[NULL, ARRAY[]::BIGINT[], NULL]),
(ARRAY[NULL, 1, NULL], ARRAY[NULL, 'a', NULL], ARRAY[]::DOUBLE PRECISION[], 'b', 4.0, ARRAY[ARRAY[1], ARRAY[2], ARRAY[NULL::BIGINT], ARRAY[3]]),
(ARRAY[2, NULL, 3], ARRAY['b', NULL, 'c'], NULL, 'b', 5.0, NULL),
(ARRAY[4, NULL, NULL, 5], ARRAY['d', NULL, NULL, 'e'], ARRAY[4.0, NULL, NULL, 5.0], 'c', 6.0, ARRAY[ARRAY[1, 2, 3]]);

DROP TABLE IF EXISTS json_t CASCADE;
DROP TABLE IF EXISTS "json_t" CASCADE;

CREATE TABLE IF NOT EXISTS json_t (js JSONB);
CREATE TABLE IF NOT EXISTS "json_t" ("js" JSONB);

INSERT INTO json_t VALUES
INSERT INTO "json_t" VALUES
('{"a": [1,2,3,4], "b": 1}'),
('{"a":null,"b":2}'),
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');

DROP TABLE IF EXISTS win CASCADE;
CREATE TABLE win (g TEXT, x BIGINT, y BIGINT);
INSERT INTO win VALUES
('[]'),
('"a"'),
('""'),
('"b"'),
(NULL),
('true'),
('false'),
('42'),
('37.37');

DROP TABLE IF EXISTS "win" CASCADE;
CREATE TABLE "win" ("g" TEXT, "x" BIGINT, "y" BIGINT);
INSERT INTO "win" VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);

DROP TABLE IF EXISTS "topk";

CREATE TABLE "topk" ("x" BIGINT);
INSERT INTO "topk" VALUES (1), (1), (NULL);
43 changes: 27 additions & 16 deletions ci/schema/snowflake.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE OR REPLACE TABLE diamonds (
CREATE OR REPLACE TABLE "diamonds" (
"carat" FLOAT,
"cut" TEXT,
"color" TEXT,
Expand All @@ -11,7 +11,7 @@ CREATE OR REPLACE TABLE diamonds (
"z" FLOAT
);

CREATE OR REPLACE TABLE astronauts (
CREATE OR REPLACE TABLE "astronauts" (
"id" BIGINT,
"number" BIGINT,
"nationwide_number" BIGINT,
Expand All @@ -38,7 +38,7 @@ CREATE OR REPLACE TABLE astronauts (
"total_eva_hrs" FLOAT
);

CREATE OR REPLACE TABLE batting (
CREATE OR REPLACE TABLE "batting" (
"playerID" TEXT,
"yearID" BIGINT,
"stint" BIGINT,
Expand All @@ -63,7 +63,7 @@ CREATE OR REPLACE TABLE batting (
"GIDP" BIGINT
);

CREATE OR REPLACE TABLE awards_players (
CREATE OR REPLACE TABLE "awards_players" (
"playerID" TEXT,
"awardID" TEXT,
"yearID" BIGINT,
Expand All @@ -72,7 +72,7 @@ CREATE OR REPLACE TABLE awards_players (
"notes" TEXT
);

CREATE OR REPLACE TABLE functional_alltypes (
CREATE OR REPLACE TABLE "functional_alltypes" (
"id" INTEGER,
"bool_col" BOOLEAN,
"tinyint_col" SMALLINT,
Expand All @@ -88,7 +88,7 @@ CREATE OR REPLACE TABLE functional_alltypes (
"month" INTEGER
);

CREATE OR REPLACE TABLE array_types (
CREATE OR REPLACE TABLE "array_types" (
"x" ARRAY,
"y" ARRAY,
"z" ARRAY,
Expand All @@ -97,24 +97,24 @@ CREATE OR REPLACE TABLE array_types (
"multi_dim" ARRAY
);

INSERT INTO array_types ("x", "y", "z", "grouper", "scalar_column", "multi_dim")
INSERT INTO "array_types" ("x", "y", "z", "grouper", "scalar_column", "multi_dim")
SELECT [1, 2, 3], ['a', 'b', 'c'], [1.0, 2.0, 3.0], 'a', 1.0, [[], [1, 2, 3], NULL] UNION
SELECT [4, 5], ['d', 'e'], [4.0, 5.0], 'a', 2.0, [] UNION
SELECT [6, NULL], ['f', NULL], [6.0, NULL], 'a', 3.0, [NULL, [], NULL] UNION
SELECT [NULL, 1, NULL], [NULL, 'a', NULL], [], 'b', 4.0, [[1], [2], [], [3, 4, 5]] UNION
SELECT [2, NULL, 3], ['b', NULL, 'c'], NULL, 'b', 5.0, NULL UNION
SELECT [4, NULL, NULL, 5], ['d', NULL, NULL, 'e'], [4.0, NULL, NULL, 5.0], 'c', 6.0, [[1, 2, 3]];

CREATE OR REPLACE TABLE map ("idx" BIGINT, "kv" OBJECT);
CREATE OR REPLACE TABLE "map" ("idx" BIGINT, "kv" OBJECT);

INSERT INTO map ("idx", "kv")
INSERT INTO "map" ("idx", "kv")
SELECT 1, object_construct('a', 1, 'b', 2, 'c', 3) UNION
SELECT 2, object_construct('d', 4, 'e', 5, 'f', 6);


CREATE OR REPLACE TABLE struct ("abc" OBJECT);
CREATE OR REPLACE TABLE "struct" ("abc" OBJECT);

INSERT INTO struct ("abc")
INSERT INTO "struct" ("abc")
SELECT {'a': 1.0, 'b': 'banana', 'c': 2} UNION
SELECT {'a': 2.0, 'b': 'apple', 'c': 3} UNION
SELECT {'a': 3.0, 'b': 'orange', 'c': 4} UNION
Expand All @@ -123,20 +123,31 @@ INSERT INTO struct ("abc")
SELECT NULL UNION
SELECT {'a': 3.0, 'b': 'orange', 'c': NULL};

CREATE OR REPLACE TABLE json_t ("js" VARIANT);
CREATE OR REPLACE TABLE "json_t" ("js" VARIANT);

INSERT INTO json_t ("js")
INSERT INTO "json_t" ("js")
SELECT parse_json('{"a": [1,2,3,4], "b": 1}') UNION
SELECT parse_json('{"a":null,"b":2}') UNION
SELECT parse_json('{"a":"foo", "c":null}') UNION
SELECT parse_json('null') UNION
SELECT parse_json('[42,47,55]') UNION
SELECT parse_json('[]');
SELECT parse_json('[]') UNION
SELECT parse_json('"a"') UNION
SELECT parse_json('""') UNION
SELECT parse_json('"b"') UNION
SELECT NULL UNION
SELECT parse_json('true') UNION
SELECT parse_json('false') UNION
SELECT parse_json('42') UNION
SELECT parse_json('37.37');

CREATE OR REPLACE TABLE win ("g" TEXT, "x" BIGINT NOT NULL, "y" BIGINT);
INSERT INTO win VALUES
CREATE OR REPLACE TABLE "win" ("g" TEXT, "x" BIGINT NOT NULL, "y" BIGINT);
INSERT INTO "win" VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);

CREATE OR REPLACE TABLE "topk" ("x" BIGINT);
INSERT INTO "topk" VALUES (1), (1), (NULL);
14 changes: 13 additions & 1 deletion ci/schema/sqlite.sql
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,15 @@ INSERT INTO json_t VALUES
('{"a":"foo", "c":null}'),
('null'),
('[42,47,55]'),
('[]');
('[]'),
('"a"'),
('""'),
('"b"'),
(NULL),
('true'),
('false'),
('42'),
('37.37');

DROP TABLE IF EXISTS win;
CREATE TABLE win (g TEXT, x BIGINT NOT NULL, y BIGINT);
Expand All @@ -119,3 +127,7 @@ INSERT INTO win VALUES
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);

DROP TABLE IF EXISTS topk;
CREATE TABLE "topk" ("x" BIGINT);
INSERT INTO "topk" VALUES (1), (1), (NULL);
14 changes: 13 additions & 1 deletion ci/schema/trino.sql
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,15 @@ INSERT INTO memory.default.json_t VALUES
(JSON '{"a":"foo", "c":null}'),
(JSON 'null'),
(JSON '[42,47,55]'),
(JSON '[]');
(JSON '[]'),
(JSON '"a"'),
(JSON '""'),
(JSON '"b"'),
(NULL),
(JSON 'true'),
(JSON 'false'),
(JSON '42'),
(JSON '37.37');

DROP TABLE IF EXISTS win;
CREATE TABLE win (g VARCHAR, x BIGINT, y BIGINT);
Expand All @@ -178,3 +186,7 @@ INSERT INTO win VALUES
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);

DROP TABLE IF EXISTS topk;
CREATE TABLE topk (x BIGINT);
INSERT INTO topk VALUES (1), (1), (NULL);
1 change: 1 addition & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
codecov:
branch: main
notify:
# a high number to try to delay codecov reporting until most of the test
# runs have finished. Should match with comment.after_n_builds below.
Expand Down
35 changes: 19 additions & 16 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
services:
clickhouse:
image: clickhouse/clickhouse-server:23.12.3.40-alpine
image: clickhouse/clickhouse-server:24.3.2.23-alpine
ports:
- 8123:8123 # http port
- 9000:9000 # native protocol port
Expand Down Expand Up @@ -28,7 +28,7 @@ services:
- CMD
- mariadb-admin
- ping
image: mariadb:11.2.2
image: mariadb:11.3.2
ports:
- 3306:3306
networks:
Expand Down Expand Up @@ -77,7 +77,7 @@ services:
- mssql

hive-metastore-db:
image: postgres:16.1-alpine
image: postgres:16.2-alpine
environment:
POSTGRES_USER: admin
POSTGRES_PASSWORD: admin
Expand All @@ -94,7 +94,7 @@ services:
- trino

minio:
image: bitnami/minio:2024.1.31
image: bitnami/minio:2024.4.28
environment:
MINIO_ROOT_USER: accesskey
MINIO_ROOT_PASSWORD: secretkey
Expand Down Expand Up @@ -156,7 +156,7 @@ services:
test:
- CMD-SHELL
- trino --output-format null --execute 'show schemas in hive; show schemas in memory'
image: trinodb/trino:438
image: trinodb/trino:445
ports:
- 8080:8080
networks:
Expand All @@ -167,7 +167,7 @@ services:
- $PWD/docker/trino/jvm.config:/etc/trino/jvm.config:ro

druid-postgres:
image: postgres:16.1-alpine
image: postgres:16.2-alpine
container_name: druid-postgres
environment:
POSTGRES_PASSWORD: FoolishPassword
Expand Down Expand Up @@ -199,7 +199,7 @@ services:
- druid

druid-coordinator:
image: apache/druid:26.0.0
image: apache/druid:29.0.1
hostname: coordinator
container_name: coordinator
volumes:
Expand All @@ -224,7 +224,7 @@ services:
- druid

druid-broker:
image: apache/druid:26.0.0
image: apache/druid:29.0.1
hostname: broker
container_name: broker
volumes:
Expand Down Expand Up @@ -252,7 +252,7 @@ services:
- druid

druid-historical:
image: apache/druid:26.0.0
image: apache/druid:29.0.1
hostname: historical
container_name: historical
volumes:
Expand All @@ -279,7 +279,7 @@ services:
- druid

druid-middlemanager:
image: apache/druid:26.0.0
image: apache/druid:29.0.1
hostname: middlemanager
container_name: middlemanager
volumes:
Expand Down Expand Up @@ -307,7 +307,7 @@ services:
- druid

druid:
image: apache/druid:26.0.0
image: apache/druid:29.0.1
hostname: router
container_name: router
volumes:
Expand Down Expand Up @@ -362,7 +362,7 @@ services:
- oracle:/opt/oracle/data

exasol:
image: exasol/docker-db:7.1.25
image: exasol/docker-db:7.1.26
privileged: true
ports:
- 8563:8563
Expand All @@ -379,7 +379,7 @@ services:
- exasol:/data

flink-jobmanager:
image: flink:1.18.1
image: flink:${FLINK_VERSION}
environment:
FLINK_PROPERTIES: |
jobmanager.rpc.address: flink-jobmanager
Expand All @@ -390,8 +390,11 @@ services:
- flink

flink:
build: ./docker/flink
image: ibis-flink
image: ibis-flink-${FLINK_VERSION}
build:
context: ./docker/flink
args:
FLINK_VERSION: ${FLINK_VERSION}
environment:
FLINK_PROPERTIES: |
jobmanager.rpc.address: flink-jobmanager
Expand Down Expand Up @@ -539,7 +542,7 @@ services:
- impala

risingwave:
image: ghcr.io/risingwavelabs/risingwave:nightly-20240122
image: ghcr.io/risingwavelabs/risingwave:nightly-20240204
command: "standalone --meta-opts=\" \
--advertise-addr 0.0.0.0:5690 \
--backend mem \
Expand Down
101 changes: 101 additions & 0 deletions conda/environment-arm64-flink.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
name: ibis-dev-flink
channels:
- conda-forge
dependencies:
# runtime dependencies
- python =3.10
- atpublic >=2.3
- bidict >=0.22.1
- black >=22.1.0,<25
- clickhouse-connect >=0.5.23
- dask >=2022.9.1
- datafusion >=0.6
- db-dtypes >=0.3.0,<2
- deltalake
- duckdb-engine <1,>=0.1.8
- filelock >=3.7.0,<4
- fsspec >=2022.1.0
- gcsfs
- geopandas >=0.6
- google-cloud-bigquery >=3,<4
- google-cloud-bigquery-storage >=2,<3
- impyla >=0.17
- numpy >=1.23.2,<2
- oracledb >=1.3.1
- packaging >=21.3
- pandas >=1.5.3,<3
- parsy >=2
- pins >=0.8.2
- poetry-core >=1.0.0
- poetry-dynamic-versioning >=0.18.0
- polars >=0.20.17
- psycopg2 >=2.8.4
- pyarrow =11.0.0
- pyarrow-tests
- pyarrow-hotfix >=0.4
- pydata-google-auth
- pydruid >=0.6.5
- pymysql >=1
- pyspark >=3
- python-dateutil >=2.8.2
- python-duckdb >=0.8.1
- python-graphviz >=0.16
- pytz >=2022.7
- regex >=2021.7.6
- requests >=2
- rich >=12.4.4
- shapely>=2,<3
- snowflake-connector-python >=3.0.2
- sqlglot >=22.5,<23.1
- toolz >=0.11
- trino-python-client >=0.321
# geospatial
- leafmap >=0.29.6
# streaming
- kafka-python
- openjdk <21
# test dependencies
- filelock >=3.7.0,<4
- hypothesis >=6.58.0,<7
- pytest >=8.0.2,<9
- pytest-benchmark >=3.4.1,<5
- pytest-clarity >=1.0.1,<2
- pytest-cov >=3.0.0,<5
- pytest-httpserver >=1.0.5,<2
- pytest-mock >=3.6.1,<4
- pytest-randomly >=3.10.1,<4
- pytest-repeat >=0.9.1,<0.10
- pytest-snapshot >=0.9.0,<1
- pytest-timeout >=2.3.1,<3
- pytest-xdist >=2.3.0,<4
- requests >=2,<3
# docs
- quarto >=1.4
- altair >=5.0.1
- distributed >=2022.9.1
- ipykernel >=6.25.1
- itables >=1.6.3
- jupyter-cache
- nbclient >=0.8.0
- plotly >=5.16.1
- plotnine >=0.12.2
- py-cpuinfo >=9
- quartodoc >=0.6.1
- seaborn
# dev utilities
- codespell >=2.2.6
- ipython
- poetry-plugin-export
- pre-commit
- prettier
- pydeps >=1.12.7
- pyinstrument
- ruff >=0.1.8
- taplo
- tqdm >=4.66.1
- just
# needed for apache-flink >=1.18.0
- py4j =0.10.9.7
- pip
- pip:
- apache-flink >=1.19.0
98 changes: 98 additions & 0 deletions conda/environment-arm64.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
name: ibis-dev-arm64
channels:
- conda-forge
dependencies:
# runtime dependencies
- python >=3.10
- atpublic >=2.3
- bidict >=0.22.1
- black >=22.1.0,<25
- clickhouse-connect >=0.5.23
- dask >=2022.9.1
- datafusion >=0.6
- db-dtypes >=0.3.0,<2
- deltalake
- duckdb-engine <1,>=0.1.8
- filelock >=3.7.0,<4
- fsspec >=2022.1.0
- gcsfs
- geopandas >=0.6
- google-cloud-bigquery >=3,<4
- google-cloud-bigquery-storage >=2,<3
- impyla >=0.17
- numpy >=1.23.2,<2
- oracledb >=1.3.1
- packaging >=21.3
- pandas >=1.5.3,<3
- parsy >=2
- pins >=0.8.2
- poetry-core >=1.0.0
- poetry-dynamic-versioning >=0.18.0
- polars >=0.20.17
- psycopg2 >=2.8.4
- pyarrow >=10.0.1
- pyarrow-tests
- pyarrow-hotfix >=0.4
- pydata-google-auth
- pydruid >=0.6.5
- pymysql >=1
- pyodbc >=4.0.39
- pyspark >=3
- python-dateutil >=2.8.2
- python-duckdb >=0.8.1
- python-graphviz >=0.16
- pytz >=2022.7
- regex >=2021.7.6
- requests >=2
- rich >=12.4.4
- shapely>=2,<3
- snowflake-connector-python >=3.0.2
- sqlglot >=22.5,<23.1
- toolz >=0.11
- trino-python-client >=0.321
- openjdk <21
# geospatial
- leafmap >=0.29.6,<0.31
# test dependencies
- filelock >=3.7.0,<4
- hypothesis >=6.58.0,<7
- pytest >=8.0.2,<9
- pytest-benchmark >=3.4.1,<5
- pytest-clarity >=1.0.1,<2
- pytest-cov >=3.0.0,<5
- pytest-httpserver >=1.0.5,<2
- pytest-mock >=3.6.1,<4
- pytest-randomly >=3.10.1,<4
- pytest-repeat >=0.9.1,<0.10
- pytest-snapshot >=0.9.0,<1
- pytest-timeout >=2.3.1,<3
- pytest-xdist >=2.3.0,<4
- requests >=2,<3
# docs
- quarto >=1.4
- altair >=5.0.1
- distributed >=2022.9.1
- ipykernel >=6.25.1
- itables >=1.6.3
- jupyter-cache
- nbclient >=0.8.0
- plotly >=5.16.1
- plotnine >=0.12.2
- py-cpuinfo >=9
- quartodoc >=0.6.1
- seaborn
# dev utilities
- codespell >=2.2.6
- ipython
- poetry-plugin-export
- pre-commit
- prettier
- pydeps >=1.12.7
- pyinstrument
- ruff >=0.1.8
- taplo
- tqdm >=4.66.1
- just
- pip
- pip:
- lonboard==0.4.0
102 changes: 102 additions & 0 deletions conda/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
name: ibis-dev
channels:
- conda-forge
dependencies:
# runtime dependencies
- apache-flink
- atpublic >=2.3
- bidict >=0.22.1
- black >=22.1.0,<25
- clickhouse-connect >=0.5.23
- dask >=2022.9.1
- datafusion >=0.6
- db-dtypes >=0.3.0,<2
- deltalake
- duckdb-engine <1,>=0.1.8
- filelock >=3.7.0,<4
- fsspec >=2022.1.0
- gcsfs
- geopandas >=0.6
- google-cloud-bigquery >=3,<4
- google-cloud-bigquery-storage >=2,<3
- impyla >=0.17
- numpy >=1.23.2,<2
- oracledb >=1.3.1
- packaging >=21.3
- pandas >=1.5.3,<3
- parsy >=2
- pins >=0.8.2
- pip
- poetry-core >=1.0.0
- poetry-dynamic-versioning >=0.18.0
- polars >=0.20.17
- psycopg2 >=2.8.4
- pyarrow >=10.0.1
- pyarrow-hotfix >=0.4
- pydata-google-auth
- pydruid >=0.6.5
- pymysql >=1
- pyodbc >=4.0.39
- pyspark >=3
- python >=3.10
- python-dateutil >=2.8.2
- python-duckdb >=0.8.1
- python-graphviz >=0.16
- pytz >=2022.7
- regex >=2021.7.6
- requests >=2
- rich >=12.4.4
- shapely >=2,<3
- snowflake-connector-python >=3.0.2
- sqlglot >=22.5,<23.1
- toolz >=0.11
- trino-python-client >=0.321
- openjdk <21
# geospatial
- leafmap >=0.29.6,<0.31
# streaming
- kafka-python
# test dependencies
- filelock >=3.7.0,<4
- hypothesis >=6.58.0,<7
- pytest >=8.0.2,<9
- pytest-benchmark >=3.4.1,<5
- pytest-clarity >=1.0.1,<2
- pytest-cov >=3.0.0,<5
- pytest-httpserver >=1.0.5,<2
- pytest-mock >=3.6.1,<4
- pytest-randomly >=3.10.1,<4
- pytest-repeat >=0.9.1,<0.10
- pytest-snapshot >=0.9.0,<1
- pytest-xdist >=2.3.0,<4
- pytest-timeout >=2.3.1,<3
- requests >=2,<3
# docs
- quarto >=1.4
- altair >=5.0.1
- distributed >=2022.9.1
- ipykernel >=6.25.1
- itables >=1.6.3
- jupyter-cache
- nbclient >=0.8.0
- plotly >=5.16.1
- plotnine >=0.12.2
- py-cpuinfo >=9
- quartodoc >=0.6.1
- seaborn
# dev utilities
- codespell >=2.2.6
- go-shfmt
- ipython
- poetry-plugin-export
- pre-commit
- prettier
- pydeps >=1.12.7
- pyinstrument
- ruff >=0.1.8
- taplo
- tqdm >=4.66.1
- just
- pip
- pip:
- lonboard==0.4.0
2 changes: 1 addition & 1 deletion docker/druid/environment
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ druid_extensions_loadList=["postgresql-metadata-storage", "druid-multi-stage-que
druid_zk_service_host=zookeeper

druid_worker_capacity=6
druid_generic_useDefaultValueForNull=true
druid_generic_useDefaultValueForNull=false

druid_metadata_storage_host=
druid_metadata_storage_type=postgresql
Expand Down
18 changes: 16 additions & 2 deletions docker/flink/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
FROM flink:1.18.1
ARG FLINK_VERSION=1.19.0
FROM flink:${FLINK_VERSION}

# ibis-flink requires PyFlink dependency
RUN wget -nv -P $FLINK_HOME/lib/ https://repo1.maven.org/maven2/org/apache/flink/flink-python/1.18.1/flink-python-1.18.1.jar
ARG FLINK_VERSION=1.19.0
RUN wget -nv -P $FLINK_HOME/lib/ https://repo1.maven.org/maven2/org/apache/flink/flink-python/${FLINK_VERSION}/flink-python-${FLINK_VERSION}.jar

# install python3 and pip3
RUN apt-get update -y && \
apt-get install -y python3 python3-pip python3-dev openjdk-11-jdk-headless && \
rm -rf /var/lib/apt/lists/*
RUN ln -s /usr/bin/python3 /usr/bin/python

# install PyFlink
ARG BUILDARCH
ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk-${BUILDARCH:-amd64}
RUN pip3 install apache-flink==${FLINK_VERSION}
2 changes: 2 additions & 0 deletions docker/mysql/startup.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
CREATE USER 'ibis'@'localhost' IDENTIFIED BY 'ibis';
CREATE SCHEMA IF NOT EXISTS test_schema;
GRANT CREATE, DROP ON *.* TO 'ibis'@'%';
GRANT CREATE,SELECT,DROP ON `test_schema`.* TO 'ibis'@'%';
FLUSH PRIVILEGES;
14 changes: 14 additions & 0 deletions docker/postgres/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,2 +1,16 @@
FROM postgis/postgis:15-3.3-alpine AS pgvector-builder
RUN apk add --no-cache git
RUN apk add --no-cache build-base
RUN apk add --no-cache clang15
RUN apk add --no-cache llvm15-dev llvm15
WORKDIR /tmp
RUN git clone --branch v0.6.2 https://github.com/pgvector/pgvector.git
WORKDIR /tmp/pgvector
RUN make
RUN make install

FROM postgis/postgis:15-3.3-alpine
RUN apk add --no-cache postgresql15-plpython3
COPY --from=pgvector-builder /usr/local/lib/postgresql/bitcode/vector.index.bc /usr/local/lib/postgresql/bitcode/vector.index.bc
COPY --from=pgvector-builder /usr/local/lib/postgresql/vector.so /usr/local/lib/postgresql/vector.so
COPY --from=pgvector-builder /usr/local/share/postgresql/extension /usr/local/share/postgresql/extension
2 changes: 0 additions & 2 deletions docker/trino/catalog/hive.properties
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
connector.name=hive

hive.allow-drop-table=true
hive.allow-rename-table=true
hive.ignore-absent-partitions=true
hive.metastore.thrift.delete-files-on-drop=true
hive.metastore.uri=thrift://hive-metastore:9083
Expand Down
1 change: 0 additions & 1 deletion docs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ diamonds.json
*.ndjson
reference/
objects.json
*support_matrix.csv

# generated notebooks and files
*.ipynb
Expand Down
4 changes: 3 additions & 1 deletion docs/404.qmd
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Page not found
---
title: Page not found
---

The page you requested cannot be found (perhaps it was moved or renamed).

Expand Down
3 changes: 0 additions & 3 deletions docs/_callouts/experimental_backend.qmd

This file was deleted.

1 change: 1 addition & 0 deletions docs/_code/setup_penguins.qmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
```{python}
import ibis # <1>
import ibis.selectors as s # <1>
from ibis import _
ibis.options.interactive = True # <2>
Expand Down
19 changes: 19 additions & 0 deletions docs/_freeze/posts/duckdb-for-rag/index/execute-results/html.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions docs/_freeze/posts/hamilton-ibis/execute-results/html.json

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions docs/_freeze/posts/hamilton-ibis/index/execute-results/html.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions docs/_freeze/posts/into-snowflake/index/execute-results/html.json

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions docs/_freeze/posts/lms-for-data/index/execute-results/html.json

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions docs/_freeze/posts/unix-backend/index/execute-results/html.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions docs/_freeze/posts/wow-analysis/index/execute-results/html.json

Large diffs are not rendered by default.

File renamed without changes.
File renamed without changes.

Large diffs are not rendered by default.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Large diffs are not rendered by default.

95 changes: 86 additions & 9 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ code-annotations: hover
execute:
warning: false
error: false
cache: true

filters:
- interlinks
Expand Down Expand Up @@ -42,6 +43,12 @@ website:
search:
location: navbar
type: overlay
algolia:
index-name: prod_ibis
application-id: HS77W8GWM1
search-only-api-key: 8ca4fcd24da322db857322ae4f79d6f3
analytics-events: true
show-logo: true

# options
reader-mode: false
Expand Down Expand Up @@ -88,7 +95,8 @@ website:
- sidebar:reference
right:
- posts.qmd
- release_notes.md
- presentations.qmd
- release_notes.qmd
- sidebar:contribute

sidebar:
Expand All @@ -100,7 +108,9 @@ website:
contents:
- install.qmd
- auto: tutorials/*.qmd
- auto: tutorials/data-platforms
- auto: tutorials/browser
- auto: tutorials/cloud-data-platforms
- auto: tutorials/open-source-software
- id: concepts
title: "Concepts"
style: "docked"
Expand Down Expand Up @@ -167,7 +177,6 @@ website:

- section: Configuration
contents:
- reference/ContextAdjustment.qmd
- reference/Interactive.qmd
- reference/Options.qmd
- reference/Repr.qmd
Expand Down Expand Up @@ -367,15 +376,85 @@ quartodoc:
name: Temporal expressions
desc: Dates, times, timestamps and intervals.
contents:
- TimestampValue
- DateValue
- TimeValue
- IntervalValue
- name: TimestampValue
members:
- add
- radd
- sub
- between
- bucket
- date
- day
- day_of_week
- day_of_year
- delta
- epoch_seconds
- hour
- microsecond
- millisecond
- minute
- month
- quarter
- second
- strftime
- time
- truncate
- week_of_year
- year
- name: DateValue
members:
- add
- radd
- sub
- day
- day_of_week
- day_of_year
- epoch_seconds
- month
- quarter
- strftime
- truncate
- week_of_year
- year
- name: TimeValue
members:
- add
- radd
- sub
- between
- hour
- microsecond
- millisecond
- minute
- second
- strftime
- time
- truncate
- name: IntervalValue
dynamic: true
members:
- to_unit
- negate
- years
- quarters
- months
- weeks
- days
- hours
- minutes
- seconds
- milliseconds
- microseconds
- nanoseconds
- DayOfWeek
- name: now
package: ibis
dynamic: true
signature_name: full
- name: today
package: ibis
dynamic: true
signature_name: full
- name: date
package: ibis
dynamic: true
Expand All @@ -392,7 +471,6 @@ quartodoc:
package: ibis
dynamic: true
signature_name: full

- kind: page
path: expression-collections
package: ibis
Expand Down Expand Up @@ -547,7 +625,6 @@ quartodoc:
desc: "Ibis configuration"
package: ibis.config
contents:
- ContextAdjustment
- Interactive
- Options
- Repr
Expand Down
29 changes: 10 additions & 19 deletions docs/_tabsets/install.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ backends = [
{"name": "Polars", "module": "polars"},
{"name": "PostgreSQL", "module": "postgres"},
{"name": "PySpark", "module": "pyspark"},
{"name": "RisingWave", "module": "risingwave"},
{"name": "Snowflake", "module": "snowflake"},
{"name": "SQLite", "module": "sqlite"},
{"name": "Trino", "module": "trino"},
Expand All @@ -36,6 +37,9 @@ installers = [
{"name": "pixi", "line": "Add the `ibis-{extra}` package:", "cmd": "pixi add ibis-{extra}"},
]
with open("./_callouts/pypi_warning.qmd") as f:
pypi_warning = f.read()
for installer in installers:
installer_name = installer["name"]
cmd = installer["cmd"]
Expand All @@ -53,23 +57,10 @@ for installer in installers:
print(f"## {backend_name}")
print()
if backend_name == "Flink":
if installer_name == "pip":
print("Install alongside the `apache-flink` package:")
print()
print(f"```bash\npip install ibis-framework apache-flink\n```")
else:
print(
dedent(
"""\
::: {.callout-important}
## PyFlink is not available on conda-forge; please
use `pip` to install the PyFlink backend instead.
:::"""
)
)
continue
if backend_name == "Flink" and installer_name == "pip":
print("Install alongside the `apache-flink` package:")
print()
print(f"```bash\npip install ibis-framework apache-flink\n```")
else:
extra = backend.get("extra", mod)
Expand All @@ -82,8 +73,8 @@ for installer in installers:
print(f"Connect using [`ibis.{mod}.connect`](./backends/{backend_name.lower()}.qmd#ibis.{mod}.connect).")
print()
if installer_name == "pip":
print("{{< include /_callouts/pypi_warning.qmd >}}")
if installer_name == "pip":
print(pypi_warning)
print()
print(":::")
Expand Down
9 changes: 9 additions & 0 deletions docs/_tabsets/repl_warning.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
::: {.callout-warning collapse="true"}
## Ibis in the browser is experimental.

iOS is known to cause crashes on this page.

Mobile Firefox may also not work (the page won't crash though).

Please [open an issue on GitHub](https://github.com/ibis-project/ibis/issues/new/choose) if you encounter problems.
:::
Loading