19 changes: 9 additions & 10 deletions dev/poetry2setup.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import argparse
import re
import subprocess
import sys
from pathlib import Path

import black
import tomli
from poetry.core.factory import Factory
from poetry.core.masonry.builders.sdist import SdistBuilder

Expand All @@ -23,16 +22,16 @@ def main(args: argparse.Namespace) -> None:
code = sdist_builder.build_setup().decode("UTF-8")

# pull out black config
config = tomli.loads(input_dir.joinpath("pyproject.toml").read_text())
black_config = config["tool"]["black"]
black_config["string_normalization"] = black_config.pop(
"skip_string_normalization", False
out = subprocess.run(
["black", "--quiet", "-"],
input=code.encode("UTF-8"),
stdout=subprocess.PIPE,
)
black_config.pop("exclude", None)
out = black.format_file_contents(
code, fast=False, mode=black.Mode(**black_config)
print(
DOUBLE_PIPE_REGEX.sub("|", out.stdout.decode("UTF-8")),
file=args.output_file,
end="",
)
print(DOUBLE_PIPE_REGEX.sub("|", out), file=args.output_file, end="")


if __name__ == "__main__":
Expand Down
120 changes: 120 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
version: "3.4"
services:
clickhouse:
image: yandex/clickhouse-server:22-alpine
ports:
- 8123:8123
- 9000:9000
networks:
- clickhouse
impala:
depends_on:
- impala-postgres
- kudu
environment:
PGPASSWORD: postgres
healthcheck:
interval: 30s
retries: 20
test:
- CMD-SHELL
- nc -z 127.0.0.1 21050 && nc -z 127.0.0.1 50070
timeout: 10s
hostname: localhost
image: ibisproject/impala:latest
ports:
- 9020:9020
- 50070:50070
- 50075:50075
- 8020:8020
- 8042:8042
- 9083:9083
- 21000:21000
- 21050:21050
- 25000:25000
- 25010:25010
- 25020:25020
networks:
- impala
impala-postgres:
user: postgres
hostname: postgres
environment:
POSTGRES_PASSWORD: postgres
healthcheck:
interval: 10s
retries: 3
test:
- CMD
- pg_isready
timeout: 5s
image: postgres:13.6-alpine
networks:
- impala
kudu:
cap_add:
- SYS_TIME
depends_on:
- kudu-tserver
environment:
KUDU_MASTER: "true"
image: ibisproject/kudu:latest
ports:
- 7051:7051
- 8051:8051
networks:
- impala
kudu-tserver:
cap_add:
- SYS_TIME
environment:
KUDU_MASTER: "false"
image: ibisproject/kudu:latest
ports:
- 7050:7050
- 8050:8050
networks:
- impala
mysql:
environment:
MYSQL_ALLOW_EMPTY_PASSWORD: "true"
MYSQL_DATABASE: ibis_testing
MYSQL_PASSWORD: ibis
MYSQL_USER: ibis
healthcheck:
interval: 10s
retries: 3
test:
- CMD
- mysqladmin
- ping
timeout: 5s
image: mariadb:10.8
ports:
- 3306:3306
networks:
- mysql
postgres:
user: postgres
environment:
POSTGRES_PASSWORD: postgres
POSTGRES_DB: ibis_testing
POSTGRES_USER: postgres
build: ./docker/postgres
healthcheck:
interval: 10s
retries: 3
test:
- CMD
- pg_isready
timeout: 5s
ports:
- 5432:5432
networks:
- postgres

networks:
impala:
mysql:
clickhouse:
postgres:
2 changes: 2 additions & 0 deletions docker/postgres/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FROM postgis/postgis:14-3.2-alpine
RUN apk add postgresql14-plpython3 postgresql14-jit
198 changes: 0 additions & 198 deletions docs/Makefile

This file was deleted.

34 changes: 34 additions & 0 deletions docs/SUMMARY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
* [Home](index.md)
* Tutorial
* [Introduction to Ibis](tutorial/01-Introduction-to-Ibis.ipynb)
* [Aggregating and Joining](tutorial/02-Aggregates-Joins.ipynb)
* [Lazy Mode and Logging](tutorial/03-Expressions-Lazy-Mode-Logging.ipynb)
* [More Value Expressions](tutorial/04-More-Value-Expressions.ipynb)
* [Creating and Inserting External Data](tutorial/05-IO-Create-Insert-External-Data.ipynb)
* [Complex Filtering](tutorial/06-ComplexFiltering.ipynb)
* [Analytics Tools](tutorial/07-Analytics-Tools.ipynb)
* [Geospatial Analysis](tutorial/08-Geospatial-Analysis.ipynb)
* [Ibis for SQL Programmers](ibis-for-sql-programmers.ipynb)
* [User Guide](user_guide/)
* [Execution Backends](backends/)
* [Contribute](contribute/)
* Community
* [Blog](blog/)
* [About](about/)
* [Ask a question (StackOverflow)](https://stackoverflow.com/questions/tagged/ibis)
* [Chat (Gitter)](https://gitter.im/ibis-dev/Lobby)
* community/*.md
* [Release Notes](release_notes.md)
* API Reference
* [Expressions](api/expressions/index.md)
* [Top Level](api/expressions/top_level.md)
* [Tables](api/expressions/tables.md)
* [Generic Values](api/expressions/generic.md)
* [Numeric + Boolean](api/expressions/numeric.md)
* [Strings](api/expressions/strings.md)
* [Timestamps + Dates + Times](api/expressions/timestamps.md)
* [Collections](api/expressions/collections.md)
* [Geospatial](api/expressions/geospatial.md)
* [Data Types](api/datatypes.md)
* [Backend Interfaces](api/backends/)
* [Configuration](api/config.md)
6 changes: 3 additions & 3 deletions docs/web/about/license.md → docs/about/license.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Ibis is distributed under the Apache License, Version 2.0.
```text
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
https://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
Expand Down Expand Up @@ -191,13 +191,13 @@ Ibis is distributed under the Apache License, Version 2.0.
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Copyright 2022 Ibis Developers
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
Expand Down
File renamed without changes.
24 changes: 24 additions & 0 deletions docs/about/team.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Team

## Contributors

{{ config.extra.project_name }} is developed and maintained by a [community of
volunteer contributors]({{ config.repo_url }}graphs/contributors).

{% for group in config.extra.team %}

## {{ group.name }}

{% for person in group.members %}

- https://github.com/{{ person }}
{% endfor %}

{% endfor %}

{{ config.extra.project_name }} aims to be a welcoming, friendly, diverse and
inclusive community. Everybody is welcome, regardless of gender, sexual
orientation, gender identity, and expression, disability, physical appearance,
body size, race, or religion. We do not tolerate harassment of community
members in any form. In particular, people from underrepresented groups are
encouraged to join the community.
7 changes: 7 additions & 0 deletions docs/api/backends/base.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Backend Base Classes

<!-- prettier-ignore-start -->
::: ibis.backends.base.BaseBackend
selection:
inherited_members: true
<!-- prettier-ignore-end -->
9 changes: 9 additions & 0 deletions docs/api/backends/pandas.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Pandas-like Backend Base Classes

These base classes underlie the pandas-based backends.

<!-- prettier-ignore-start -->
::: ibis.backends.pandas.BasePandasBackend
selection:
inherited_members: true
<!-- prettier-ignore-end -->
7 changes: 7 additions & 0 deletions docs/api/backends/sql.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# SQL Backend Base Classes

<!-- prettier-ignore-start -->
::: ibis.backends.base.sql.BaseSQLBackend
selection:
inherited_members: true
<!-- prettier-ignore-end -->
7 changes: 7 additions & 0 deletions docs/api/backends/sqlalchemy.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# SQLAlchemy Backend Base Classes

<!-- prettier-ignore-start -->
::: ibis.backends.base.sql.alchemy.BaseAlchemyBackend
selection:
inherited_members: true
<!-- prettier-ignore-end -->
3 changes: 3 additions & 0 deletions docs/api/config.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Configuration Options

::: ibis.config
11 changes: 11 additions & 0 deletions docs/api/datatypes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Data Types

This module contains classes for handling the different storage types that
occur in databases.

<!-- prettier-ignore-start -->
All data type constructors take a `nullable: bool` parameter whose default
value is [`True`][True].

::: ibis.expr.datatypes
<!-- prettier-ignore-end -->
14 changes: 14 additions & 0 deletions docs/api/expressions/collections.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Complex Type Expressions

These APIs are available on complex data types such as arrays, maps, and
structs.

::: ibis.expr.types.arrays
::: ibis.expr.types.maps

<!-- prettier-ignore-start -->
::: ibis.expr.types.structs
selection:
filters:
- "!^Destruct.*"
<!-- prettier-ignore-end -->
11 changes: 11 additions & 0 deletions docs/api/expressions/generic.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Generic Expression APIs

These expressions are available on scalars and columns of any element type.

<!-- prettier-ignore-start -->
::: ibis.expr.types.generic
selection:
filters:
- "!^literal"
- "!^null"
<!-- prettier-ignore-end -->
5 changes: 5 additions & 0 deletions docs/api/expressions/geospatial.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Geospatial Expressions

Ibis supports the following geospatial expression APIs

::: ibis.expr.types.geospatial
5 changes: 5 additions & 0 deletions docs/api/expressions/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Base Expression Types

These APIs are shared by both table and column expressions.

::: ibis.expr.types.core.Expr
6 changes: 6 additions & 0 deletions docs/api/expressions/numeric.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Numeric Expressions

These APIs are available on numeric and boolean expressions.

::: ibis.expr.types.numeric
::: ibis.expr.types.logical
5 changes: 5 additions & 0 deletions docs/api/expressions/strings.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# String Expressions

All string operations are valid for both scalars and columns.

::: ibis.expr.types.strings
6 changes: 6 additions & 0 deletions docs/api/expressions/tables.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Table Expressions

Table expressions form the basis for most Ibis expressions.

::: ibis.expr.types.relations
::: ibis.expr.types.groupby
5 changes: 5 additions & 0 deletions docs/api/expressions/timestamps.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Temporal Expression APIs

All timestamp operations are valid for both scalars and columns.

::: ibis.expr.types.temporal
36 changes: 36 additions & 0 deletions docs/api/expressions/top_level.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Top-level APIs

These methods and objects are available directly in the `ibis` module.

## `NA`

`NA` is the null scalar.

::: ibis.array
::: ibis.case
::: ibis.coalesce
::: ibis.cumulative_window
::: ibis.date
::: ibis.desc
::: ibis.greatest
::: ibis.ifelse
::: ibis.interval
::: ibis.least
::: ibis.literal
::: ibis.map
::: ibis.negate
::: ibis.now
::: ibis.null
::: ibis.param
::: ibis.random
::: ibis.range_window
::: ibis.row_number
::: ibis.schema
::: ibis.struct
::: ibis.table
::: ibis.time
::: ibis.timestamp
::: ibis.trailing_range_window
::: ibis.trailing_window
::: ibis.where
::: ibis.window
8 changes: 8 additions & 0 deletions docs/backends/ClickHouse.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
backend_name: ClickHouse
backend_url: https://clickhouse.yandex/
backend_module: clickhouse
backend_param_style: connection parameters
---

{% include 'backends/template.md' %}
9 changes: 9 additions & 0 deletions docs/backends/Dask.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
backend_name: Dask
backend_url: https://dask.org
backend_module: dask
backend_param_style: a dictionary of paths
is_experimental: true
---

{% include 'backends/template.md' %}
10 changes: 10 additions & 0 deletions docs/backends/Datafusion.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
backend_name: Datafusion
backend_url: https://arrow.apache.org/datafusion/
backend_module: datafusion
backend_param_style: a dictionary of paths
is_experimental: true
version_added: "2.1"
---

{% include 'backends/template.md' %}
10 changes: 10 additions & 0 deletions docs/backends/DuckDB.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
backend_name: DuckDB
backend_url: https://duckdb.org/
backend_module: duckdb
backend_param_style: a path to a DuckDB database
backend_connection_example: ibis.duckdb.connect("path/to/my.duckdb")
development_only: false
---

{% include 'backends/template.md' %}
1,386 changes: 1,386 additions & 0 deletions docs/backends/Impala.md

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions docs/backends/MySQL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
backend_name: MySQL
backend_url: https://www.mysql.com/
backend_module: mysql
backend_param_style: a SQLAlchemy-style URI
---

{% include 'backends/template.md' %}
138 changes: 138 additions & 0 deletions docs/backends/Pandas.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
---
backend_name: Pandas
backend_url: https://pandas.pydata.org/
backend_module: pandas
intro: Ibis's pandas backend is available in core Ibis.
backend_param_style: a dictionary of paths
do_connect_base: BasePandasBackend
is_core: true
---

{% include 'backends/template.md' %}

## User Defined functions (UDF)

Ibis supports defining three kinds of user-defined functions for operations on
expressions targeting the pandas backend: **element-wise**, **reduction**, and
**analytic**.

### Elementwise Functions

An **element-wise** function is a function that takes N rows as input and
produces N rows of output. `log`, `exp`, and `floor` are examples of
element-wise functions.

Here's how to define an element-wise function:

```python
import ibis.expr.datatypes as dt
from ibis.backends.pandas import udf

@udf.elementwise(input_type=[dt.int64], output_type=dt.double)
def add_one(x):
return x + 1.0
```

### Reduction Functions

A **reduction** is a function that takes N rows as input and produces 1 row
as output. `sum`, `mean` and `count` are examples of reductions. In
the context of a `GROUP BY`, reductions produce 1 row of output _per
group_.

Here's how to define a reduction function:

```python
import ibis.expr.datatypes as dt
from ibis.backends.pandas import udf

@udf.reduction(input_type=[dt.double], output_type=dt.double)
def double_mean(series):
return 2 * series.mean()
```

### Analytic Functions

An **analytic** function is like an **element-wise** function in that it takes
N rows as input and produces N rows of output. The key difference is that
analytic functions can be applied _per group_ using window functions. Z-score
is an example of an analytic function.

Here's how to define an analytic function:

```python
import ibis.expr.datatypes as dt
from ibis.backends.pandas import udf

@udf.analytic(input_type=[dt.double], output_type=dt.double)
def zscore(series):
return (series - series.mean()) / series.std()
```

### Details of Pandas UDFs

- Element-wise provide support
for applying your UDF to any combination of scalar values and columns.
- Reductions provide support for
whole column aggregations, grouped aggregations, and application of your
function over a window.
- Analytic functions work in both grouped and non-grouped
settings
- The objects you receive as input arguments are either `pandas.Series` or
Python/NumPy scalars.

!!! warning "Keyword arguments must be given a default"

Any keyword arguments must be given a default value or the function **will
not work**.

A common Python convention is to set the default value to `None` and
handle setting it to something not `None` in the body of the function.

Using `add_one` from above as an example, the following call will receive a
`pandas.Series` for the `x` argument:

```python
import ibis
import pandas as pd
df = pd.DataFrame({'a': [1, 2, 3]})
con = ibis.pandas.connect({'df': df})
t = con.table('df')
expr = add_one(t.a)
expr
```

And this will receive the `int` 1:

```python
expr = add_one(1)
expr
```

Since the pandas backend passes around `**kwargs` you can accept `**kwargs`
in your function:

```python
import ibis.expr.datatypes as dt
from ibis.backends.pandas import udf

@udf.elementwise([dt.int64], dt.double)
def add_two(x, **kwargs): # do stuff with kwargs
return x + 2.0
```

Or you can leave them out as we did in the example above. You can also
optionally accept specific keyword arguments.

For example:

```python
import ibis.expr.datatypes as dt
from ibis.backends.pandas import udf

@udf.elementwise([dt.int64], dt.double)
def add_two_with_none(x, y=None):
if y is None:
y = 2.0
return x + y
```
8 changes: 8 additions & 0 deletions docs/backends/PostgreSQL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
backend_name: PostgreSQL
backend_url: https://www.postgresql.org/
backend_module: postgres
backend_param_style: a SQLAlchemy-style URI
---

{% include 'backends/template.md' %}
8 changes: 8 additions & 0 deletions docs/backends/PySpark.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
backend_name: PySpark
backend_url: https://spark.apache.org/docs/latest/api/python/
backend_module: pyspark
backend_param_style: PySpark things
---

{% include 'backends/template.md' %}
42 changes: 42 additions & 0 deletions docs/backends/SQLite.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
backend_name: SQLite
backend_url: https://www.sqlite.org/
backend_module: sqlite
backend_param_style: a path to a SQLite database
exclude_backend_api: true
---

{% include 'backends/template.md' %}

## Backend API

<!-- prettier-ignore-start -->
::: ibis.backends.sqlite.Backend
rendering:
heading_level: 3
selection:
inherited_members: true
members:
- add_operation
- attach
- compile
- connect
- create_database
- create_table
- create_view
- database
- drop_table
- drop_view
- execute
- exists_database
- exists_table
- explain
- insert
- list_databases
- list_tables
- load_data
- raw_sql
- schema
- table
- verify
<!-- prettier-ignore-end -->
36 changes: 36 additions & 0 deletions docs/backends/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Backends

## String Generating Backends

The first category of backend translate Ibis expressions into string queries.

The compiler turns each expression into a string query and passes that query to the
database through a driver API for execution.

- [Apache Impala](Impala.md)
- [ClickHouse](ClickHouse.md)
- [Google BigQuery](https://github.com/ibis-project/ibis-bigquery/)
- [HeavyAI](https://github.com/heavyai/ibis-heavyai)

## Expression Generating Backends

The next category of backends translates ibis expressions into another
system's expressions, for example, SQLAlchemy.

Instead of generating strings for each expression these backends produce
another kind of expression and typically have high-level APIs for execution.

- [Dask](Dask.md)
- [Datafusion](Datafusion.md)
- [MySQL](MySQL.md)
- [PostgreSQL](PostgreSQL.md)
- [PySpark](PySpark.md)
- [SQLite](SQLite.md)

## Direct Execution Backends

The pandas backend is the only direct execution backend. A full description
of the implementation can be found in the module docstring of the pandas
backend located in `ibis/backends/pandas/core.py`.

- [Pandas](Pandas.md)
10 changes: 10 additions & 0 deletions docs/backends/support_matrix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
hide:
- toc
---

# Operation Support Matrix

Backends are shown in descending order of the number of supported operations.

{{ read_csv("docs/backends/support_matrix.csv") }}
75 changes: 75 additions & 0 deletions docs/backends/template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# [{{ backend_name }}]({{ backend_url }})

{% if is_experimental %}
!!! experimental "{% if version_added %}New in v{{ version_added }}{% else %}Experimental{% endif %}"

The {{ backend_name }} backend is experimental and is subject to backwards incompatible changes.

{% endif %}

{% if intro %}{{ intro }}{% endif %}

{% if not development_only %}

## Install

Install ibis and dependencies for the {{ backend_name }} backend:

=== "pip"

```sh
pip install 'ibis-framework{% if not is_core %}[{{ backend_module }}]{% endif %}'
```

{% for mgr in ["conda", "mamba"] %}
=== "{{ mgr }}"

```sh
{{ mgr }} install -c conda-forge ibis-{% if is_core %}framework{% else %}{{ backend_module }}{% endif %}
```

{% endfor %}

{% else %}
!!! info "The {{ backend_name }} backend isn't released yet!"

[Set up a development environment](../contribute/01_environment.md) to use this backend.

{% endif %}

## Connect

### API

Create a client by passing in {{ backend_param_style }} to `ibis.{{ backend_module }}.connect`.

<!-- prettier-ignore-start -->
See [`ibis.backends.{{ backend_module }}.Backend.do_connect`][ibis.backends.{{ backend_module }}.Backend.do_connect]
for connection parameter information.
<!-- prettier-ignore-end -->

<!-- prettier-ignore-start -->
!!! info "`ibis.{{ backend_module }}.connect` is a thin wrapper around [`ibis.backends.{{ backend_module }}.Backend.do_connect`][ibis.backends.{{ backend_module }}.Backend.do_connect]."
<!-- prettier-ignore-end -->

### Connection Parameters

<!-- prettier-ignore-start -->
::: ibis.backends.{{ backend_module }}.Backend.do_connect
rendering:
heading_level: 4
<!-- prettier-ignore-end -->

{% if not exclude_backend_api %}

## Backend API

<!-- prettier-ignore-start -->
::: ibis.backends.{{ backend_module }}.Backend
rendering:
heading_level: 3
selection:
inherited_members: true
<!-- prettier-ignore-end -->

{% endif %}
108 changes: 108 additions & 0 deletions docs/blog/Ibis-version-3.0.0-release.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Ibis v3.0.0

#### by: Marlene Mhangami

The latest version of Ibis, version 3.0.0, has just been released! This post highlights some of the new features, breaking changes, and performance improvements that come with the new release. 3.0.0 is a major release and includes more changes than those listed in this post. A full list of the changes can be found in the project release notes [here](https://ibis-project.org/docs/dev/release_notes/).

## New Features

Aligned to the roadmap and in response to the community’s requests, Ibis 3.0.0 introduces many new features and functionality.

1. Now query an Ibis table using inline SQL
2. _NEW_ DuckDB backend
3. Explore the _NEW_ backend support matrix tool
4. Improved support for arrays and tuples in ClickHouse
5. Suffixes now supported in join API expressions
6. APIs for creating timestamps and dates from component fields
7. Pretty printing in ipython/ notebooks

Refer to the sections below for more detail on each new feature.

### Inline SQL

The most exciting feature of this release is inline SQL! Many data scientists or developers may be familiar with both Python and SQL. However there may be some queries, transformations that they feel comfortable doing in SQL instead of Python. In the updated version of Ibis users can query an Ibis table using SQL! The new .sql method allows users to mix SQL strings with ibis expressions as well as query ibis table expressions in SQL strings.

This functionality currently works for the following backends:

1. PostgreSQL
2. DuckDB
3. PySpark
4. MySQL

If you're interested in adding .sql support for other backends please [open an issue](https://github.com/ibis-project/ibis/issues?page=2&q=is%3Aissue+is%3Aclosed+milestone%3A3.0.0).

### DuckDB Backend

Ibis now supports DuckDB as a backend. DuckDB is a high-performance SQL OLAP database management system. It is designed to be fast, reliable and easy to use and can be embedded. Many Ibis use cases start from getting tables from a single-node backend so directly supporting DuckDB offers a lot of value. As mentioned earlier, the DuckDB backend allows for the new .sql method on tables for mixing sql and Ibis expressions.

### Backend Support Matrix

As the number of backends Ibis supports grows, it can be challenging for users to decide which one best fits their needs. One way to make a more informed decision is for users to find the backend that supports the operations they intend to use. The 3.0.0 release comes with a backend support matrix that allows users to do just that. A screenshot of part of the matrix can be seen below and the full version can be found [here](https://ibis-project.org/docs/dev/backends/support_matrix/).

In addition to this users can now call `ibis.${backend}.has_operation` to find out if a specific operation is supported by a backend.

![backend support matrix](matrix.png)

### Support of arrays and tuples for ClickHouse

The 3.0.0 release includes a slew of important improvements for the ClickHouse backend. Most prominently ibis now supports ClickHouse arrays and tuples.
Some of the related operations that have been implemented are:

- ArrayIndex
- ArrayConcat
- ArrayRepeat
- ArraySlice

Other additional operations now supported for the clickhouse backend are string concat, string slicing, table union, trim, pad and string predicates (LIKE and ILIKE) and all remaining joins.

### Suffixes now supported in join API expressions

In previous versions Ibis' join API did not accept suffixes as a parameter, leaving backends to either use some default value or raise an error at execution time when column names overlapped. In 3.0.0 suffixes are now directly supported in the join API itself. Along with the removal of materialize, ibis now automatically adds a default suffix to any overlapping column names.

### Creating timestamp from component fields

It is now possible to create timestamps directly from component fields. This is now possible using the new method `ibis.date(y,m,d)`. A user can pass in a year, month and day and the result is a datetime object. That is we can assert for example that `ibis.date (2022, 2, 4).type() == dt.date`

### Pretty print tables in ipython notebooks

For users that use jupyter notebooks, `repr_html` has been added for expressions to enable pretty printing tables in the notebook. This is currently only available for interactive mode (currently delegating to pandas implementation) and should help notebooks become more readable. An example of what this looks like can be seen below.

![pretty print repr](repr.png)

## Breaking Changes

3.0.0 is a major release and according to the project's use of semantic versioning, breaking changes are on the table. The full list of these changes can be found [here](https://ibis-project.org/docs/dev/release_notes/).

1. Python 3.8 is now the minimum supported version
2. Removal of `.materialize()`

Refer to the sections below for more detail on these changes.

### The minimum supported Python version is now Python 3.8

Ibis currently follows [NEP 29](https://numpy.org/neps/nep-0029-deprecation_policy.html), a community policy standard that recommends Python and Numpy versions to support. NEP 29 suggests that all projects across the Scientific Python ecosystem adopt a common “time window-based” policy for support of Python and NumPy versions. Standardizing a recommendation for project support of minimum Python and NumPy versions will improve downstream project planning. As part of the 3.0.0 release, support for Python 3.7 has been dropped and the project has now adopted support for version 3.8 and higher.

### Removal of .materialize()

This release sees the removal of the `.materialize()` method from TableExpr. In the past, the materialize method has caused a lot of confusion. Doing simple things like `t.join(s, t.foo == s.foo).select(["unambiguous_column"])` raised an exception because of it. It turns out that .materialize() isn't necessary and therefore has been removed. This is a breaking change for some code that uses materialize. The materialize method still exists, but is now a pass-through and triggers a warning.

There are also some breaking changes introduced here in the case of overlapping column names. If there are any overlapping column names, a suffix will be attached to both the left and right tables. So, in the case of `s.asof_join(t, "time")` the resulting schema will have both a `time_x` and a `time_y` column.

## Performance Improvements

The following changes to the Ibis codebase have resulted in performance improvements.

1. Speeding up ` __str__` and `__hash__` datatypes
2. Creating a fast path for simple column selection (pandas/dask backends)
3. Global equality cache
4. Removing full tree repr from rule validator error message
5. Speed up attribute access
6. Using assign instead of concat in projections when possible (pandas/dask backends)

Additionally, all TPC-H suite queries can be represented in Ibis. All queries are ready-to-run, using the default substitution parameters as specified by the TPC-H spec. Queries have been added [here](https://github.com/ibis-project/tpc-queries).

## Conclusion

In summary, the 3.0.0 release includes a number of new features including the ability to query an Ibis table using inline SQL, a DuckDB backend, a backend support matrix tool, support for arrays and tuples, suffixes in joins, timestamps from component fields and prettier tables in ipython. Some breaking changes to take note of are the removal of .materialize() and the switch to Python 3.8 as the minimum supported version. A wide range of changes to the code has also led to significant speed ups in 3.0.0 as well.

Ibis is a community led, open source project. If you’d like to contribute to the project check out the contribution guide [here](https://ibis-project.org/docs/dev/contribute/01_environment/). If you run into a problem and would like to submit an issue you can do so through Ibis’ [Github repository](https://github.com/ibis-project/ibis). Finally, Ibis relies on community support to grow and to become successful! You can help promote Ibis by following and sharing the project on [Twitter](https://twitter.com/IbisData), [starring the repo](https://github.com/ibis-project/ibis) or [contributing](https://ibis-project.org/docs/dev/) to the code. Ibis continues to improve with every release. Keep an eye on the blog for updates on the next one!
Binary file added docs/blog/matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/blog/repr.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 3 additions & 3 deletions docs/web/community/coc.md → docs/community/coc.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
# Code of Conduct

{{ config.project_name | title }} is governed by the
{{ config.extra.project_name | title }} is governed by the
[NumFOCUS code of conduct](https://numfocus.org/code-of-conduct):

!!! quote

Be kind to others. Do not insult or put down others. Behave professionally.
Remember that harassment and sexist, racist, or exclusionary jokes are not
appropriate for {{ config.project_name | upper }}.
appropriate for {{ config.extra.project_name | upper }}.

All communication should be appropriate for a professional audience including
people of many different backgrounds. Sexual language and imagery is not
appropriate.

{{ config.project_name | title }} is dedicated to providing a harassment-free
{{ config.extra.project_name | title }} is dedicated to providing a harassment-free
community for everyone, regardless of gender, sexual orientation, gender
identity, and expression, disability, physical appearance, body size, race,
or religion. We do not tolerate harassment of community members in any form.
Expand Down
75 changes: 58 additions & 17 deletions docs/web/community/ecosystem.md → docs/community/ecosystem.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,34 @@

## [pandas](https://github.com/pandas-dev/pandas)

[pandas](https://pandas.pydata.org) is a Python package that provides fast,
flexible, and expressive data structures designed to make working with "relational" or
"labeled" data both easy and intuitive. It aims to be the fundamental high-level
building block for doing practical, real world data analysis in Python. Additionally,
it has the broader goal of becoming the most powerful and flexible open source data
analysis / manipulation tool available in any language. It is already well on its way
[pandas](https://pandas.pydata.org) is a Python package that provides fast,
flexible, and expressive data structures designed to make working with "relational" or
"labeled" data both easy and intuitive. It aims to be the fundamental high-level
building block for doing practical, real world data analysis in Python. Additionally,
it has the broader goal of becoming the most powerful and flexible open source data
analysis / manipulation tool available in any language. It is already well on its way
towards this goal.

## [SQLAlchemy](https://github.com/sqlalchemy/sqlalchemy)

[SQLAlchemy](https://www.sqlalchemy.org/) is the Python SQL toolkit and
Object Relational Mapper that gives application developers the full power and
flexibility of SQL. SQLAlchemy provides a full suite of well known enterprise-level
persistence patterns, designed for efficient and high-performing database access,
[SQLAlchemy](https://www.sqlalchemy.org/) is the Python SQL toolkit and
Object Relational Mapper that gives application developers the full power and
flexibility of SQL. SQLAlchemy provides a full suite of well known enterprise-level
persistence patterns, designed for efficient and high-performing database access,
adapted into a simple and Pythonic domain language.

## [sql_to_ibis](https://github.com/zbrookle/sql_to_ibis)

[sql_to_ibis](https://github.com/zbrookle/sql_to_ibis) is a Python package that
translates SQL syntax into ibis expressions. This allows users to use one unified SQL
[sql_to_ibis](https://github.com/zbrookle/sql_to_ibis) is a Python package that
translates SQL syntax into ibis expressions. This allows users to use one unified SQL
dialect to target many different backends, even those that don't traditionally
support SQL.
support SQL.

A good use case would be ease of migration between databases or backends. Suppose you
were moving from SQLite to MySQL or from PostgresSQL to BigQuery. These
frameworks all have very subtle differences in SQL dialects, but with sql_to_ibis,
were moving from SQLite to MySQL or from PostgresSQL to BigQuery. These
frameworks all have very subtle differences in SQL dialects, but with sql_to_ibis,
these differences are automatically translated in Ibis.

Another good use case is pandas, which has no SQL support at all for querying a
dataframe. With sql_to_ibis this is made possible.

Expand All @@ -49,10 +49,51 @@ sql_to_ibis.query(
"select column1, cast(column2 as integer) + 1 as my_col2 from my_table"
).execute()
```

This would output a dataframe that looks like:

| column1 | my_col2 |
|---------|---------|
| ------- | ------- |
| 1 | 5 |
| 2 | 6 |
| 3 | 7 |

## Ibis on [Fugue](https://github.com/fugue-project/fugue)

[Fugue](https://github.com/fugue-project/fugue) is a low-code abstraction layer letting users express
the workflows in SQL or Python end-to-end. The design philosophy of Fugue and Ibis is very aligned, and
Fugue is at a higher level of abstraction compared to Ibis. So the integration is very intuitive, Ibis
is also able to run on all the backends Fugue supports: Pandas, Spark, Dask and DuckDB. The value Fugue
adds to Ibis is the seamless integration of SQL semantics and scientific computing plus non-standard SQL
operations. The detailed tutorial can be found
[here](https://fugue-tutorials.readthedocs.io/tutorials/integrations/ibis.html)

Here is an example of a distributed inference pipeline:

```python
import pandas as pd
import fugue_ibis
from fugue import FugueWorkflow

# schema: *,pred:double
def predict(df: pd.DataFrame) -> pd.DataFrame:
model = load_model("somefile")
return df.assign(pred=model.predict(df))

def distributed_predict(file1, df2, dest):
dag = FugueWorkflow()
a = dag.load(file1).as_ibis()
b = dag.df(df2).as_ibis()
# ibis operations (you can do more here)
joined = a.inner_join(b, a.key==b.key)[a, b.f2]
filtered = joined[joined.f1>0]
# back to fugue, apply predict distributedly and save
filtered.as_fugue().transform(predict).save(dest)
return dag

# test locally
distributed_predict(small_file, pandas_df2, temp_dest).run()

# run on spark when you have a SparkSession: session
distributed_predict(large_file, spark_df2, dest).run(session)
```
142 changes: 142 additions & 0 deletions docs/contribute/01_environment.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
---
hide:
- toc
---

# Setting Up a Development Environment

## Required Dependencies

- [`git`](https://git-scm.com/)

!!! note "Python 3.10 is supported on a best-effort basis"

As of 2022-02-17 there is support for Python 3.10 when using `nix` for development.

`conda-forge` is still in [the process of migrating packages to Python
3.10](https://conda-forge.org/status/#python310).

=== "Nix"

#### Support Matrix

| Python Version :material-arrow-right: | Python 3.8 | Python 3.9 | Python 3.10 |
| -----------------------------------------: | :----------------------------------------------------: | :------------------------------------------------: | :------------------------------------------------: |
| **Operating System** :material-arrow-down: | | | |
| **Linux** | {{ config.extra.support_levels.supported.icon }}[^1] | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS** | {{ config.extra.support_levels.bug.icon }}[^2] | {{ config.extra.support_levels.bug.icon }} | {{ config.extra.support_levels.bug.icon }} |
| **Windows** | {{ config.extra.support_levels.unsupported.icon }}[^3] | {{ config.extra.support_levels.unsupported.icon }} | {{ config.extra.support_levels.unsupported.icon }} |

1. [Install `nix`](https://nixos.org/download.html)
1. Install `gh`:

=== "`nix-shell`"

```sh
nix-shell -p gh
```

=== "`nix-env`"

```sh
nix-env -iA gh
```

1. Fork and clone the ibis repository:

```sh
gh repo fork --clone --remote ibis-project/ibis
```

1. Set up the public `ibis` Cachix cache to pull pre-built dependencies:

```sh
nix-shell -p cachix --run 'cachix use ibis'
```

1. Run `nix-shell` in the checkout directory:

```sh
cd ibis
nix-shell
```

This may take awhile due to artifact download from the cache.

=== "Conda"

!!! info "Some optional dependencies for Windows are not available through `conda`/`mamba`"

1. `python-duckdb` and `duckdb-engine`. Required for the DuckDB backend.
1. `clickhouse-cityhash`. Required for compression support in the ClickHouse backend.

#### Support Matrix

| Python Version :material-arrow-right: | Python 3.8 | Python 3.9 | Python 3.10 |
| -----------------------------------------: | :--------------------------------------------------: | :----------------------------------------------: | :--------------------------------------------: |
| **Operating System** :material-arrow-down: | | | |
| **Linux** | {{ config.extra.support_levels.supported.icon }}[^1] | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.bug.icon }}[^2] |
| **macOS** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.bug.icon }} |
| **Windows** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.bug.icon }} |

{% set managers = {"conda": {"name": "Miniconda", "url": "https://docs.conda.io/en/latest/miniconda.html"}, "mamba": {"name": "Mamba", "url": "https://github.com/mamba-org/mamba"}} %}
{% for manager, params in managers.items() %}

=== "`{{ manager }}`"

1. Install [{{ params["name"] }}]({{ params["url"] }})

1. Install `gh`

```sh
{{ manager }} install -c conda-forge gh
```

1. Fork and clone the ibis repository:

```sh
gh repo fork --clone --remote ibis-project/ibis
```

1. Create a Conda environment from a lock file in the repo:

{% set platforms = {"Linux": "linux", "MacOS": "osx", "Windows": "win"} %}
{% for os, platform in platforms.items() %}
=== "{{ os }}"

```sh
cd ibis
{{ manager }} create -n ibis-dev --file=conda-lock/{{ platform }}-64-3.9.lock
```
{% endfor %}

1. Activate the environment

```sh
{{ manager }} activate ibis-dev
```

1. Install your local copy of `ibis` into the Conda environment.

```sh
cd ibis
pip install -e .
```

1. If you want to run the backend test suite you'll need to install `docker-compose`:

```sh
{{ manager }} install docker-compose -c conda-forge
```

{% endfor %}

Once you've set up an environment, try building the documentation:

```sh
mkdocs serve
```

{% for data in config.extra.support_levels.values() %}
[^{{ loop.index }}]: {{ data.description }}
{% endfor %}
208 changes: 208 additions & 0 deletions docs/contribute/02_workflow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
# Working on the Codebase

## Find an issue to work on

All contributions are welcome! Code, docs, and constructive feedback are all
great contributions to the project.

If you don't have a particular issue in mind head over to the GitHub issue
tracker for Ibis and look for open issues with the label [`good first issue`](https://github.com/ibis-project/ibis/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22).

Feel free to help with other issues that aren't labeled as such, but they may be more challenging.

Once you find an issue you want to work on, write a comment with the text
`/take` on the issue. GitHub will then assign the issue to you.

This lets people know you're working on the issue. If you find an issue that
has an assignee, comment on the issue and ask whether the assignee is still
working on the issue.

## Make a branch

The first thing you want to do is make a branch. Let's call it `useful-bugfix`:

```sh
git checkout -b useful-bugfix
```

## Make the desired change

Let's say you've made a change to `ibis/expr/types.py` to fix a bug reported in issue #424242 (not actually an issue).

Running `git status` should give output similar to this:

```sh
On branch useful-bugfix
Your branch is up to date with 'origin/useful-bugfix'.

Changes not staged for commit:
(use "git add <file>..." to update what will be committed)
(use "git restore <file>..." to discard changes in working directory)
modified: ibis/expr/types.py

no changes added to commit (use "git add" and/or "git commit -a")
```

## Run the test suite

Next, you'll want to run a subset of the test suite.

### Required Dependencies

!!! warning "You need a development environment before running tests"

Make sure you've set up a [development environment](01_environment.md)
before proceeding

Run the test suite:

```sh
pytest -m core
```

!!! tip "Each backend has a `pytest` marker"

You can run the tests for a specific backend using

```sh
pytest -m $the_backend_name
```

For example, to run SQLite tests:

```sh
pytest -m sqlite
```

## Commit your changes

### Required Dependencies

- `git`
- [`cz`](https://commitizen-tools.github.io/commitizen/)

!!! tip

`cz` is already installed in your environment if you followed the [setup
instructions](01_environment.md)

Next, you'll want to commit your changes.

Ibis's commit message structure follows the [`semantic-release`
conventions](https://github.com/semantic-release/semantic-release).

!!! warning

It isn't necessary to use `cz commit` to make commits, but it is necessary
to follow the instructions outlined in [this
table](https://github.com/semantic-release/semantic-release#commit-message-format).

Stage your changes and run `cz commit`:

```sh
git add .
cz commit
```

You should see a series of prompts about actions to take next:

1. Select the type of change you're committing. In this case, we're committing a bug fix, so we'll select fix:

```console
? Select the type of change you are committing (Use arrow keys)
» fix: A bug fix. Correlates with PATCH in SemVer
feat: A new feature. Correlates with MINOR in SemVer
docs: Documentation only changes
style: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons, etc)
refactor: A code change that neither fixes a bug nor adds a feature
perf: A code change that improves performance
test: Adding missing or correcting existing tests
build: Changes that affect the build system or external dependencies (example scopes: pip, docker, npm)
ci: Changes to our CI configuration files and scripts (example scopes: GitLabCI)
```

Generally you don't need to think too hard about what category to select, but note that:

- `feat` will cause a minor version bump
- `fix` will cause a patch version bump
- everything else will **not** cause a version bump, **unless it's a breaking
change** (continue reading these instructions for more info on that)

2. Next, you're asked what the scope of this change is:

```console
? What is the scope of this change? (class or file name): (press [enter] to skip)
```

This is optional, but if there's a clear component or single file that is
modified you should put it. In our case, let's assume the bug fixed a type
inference problem, so we'd type in `type-inference` at this prompt.

3. You'll then be asked to type in a short description of the change which will be the commit message title:

```console
? Write a short and imperative summary of the code changes: (lower case and no period)
fix a type inference issue where floats were incorrectly cast to ints
```

Let's say there was a problem with spurious casting of float to integers, so
we type in the message above. That number on the left (here `(69)`) is the
length of description you've typed in.

4. Next you'll be asked for a longer description, which is entirely optional
**unless the change is a breaking change**, or you feel like a bit of prose

```console
? Provide additional contextual information about the code changes: (press [enter] to skip)
A bug was triggered by some incorrect code that caused floats to be incorrectly cast to integers.
```

For non breaking changes, this isn't strictly necessary but it can be very
helpful when a change is large, obscure, or complex. For this example let's just reiterate
most of what the commit title says.

5. Next you're asked about breaking changes:

```console
? Is this a BREAKING CHANGE? Correlates with MAJOR in SemVer (y/N)
```

If you answer `y`, then you'll get an additional prompt asking you to
describe the breaking changes. This description will ultimately make its way
into the user-facing release notes. If there aren't any breaking changes, press enter.
Let's say this bug fix does **not** introduce a breaking change.

6. Finally, you're asked whether this change affects any open issues (ignore
the bit about breaking changes) and if yes then to reference them:

```console
? Footer. Information about Breaking Changes and reference issues that this commit closes: (press [enter] to skip)
fixes #424242
```

Here we typed `fixes #424242` to indicate that we fixed issue #9000.

Whew! Seems like a lot, but it's rather quick once you get used to it. After
that you should have a commit that looks roughly like this, ready to be automatically rolled into the next release:

```console
commit 4049adbd66b0df48e37ca105da0b9139101a1318 (HEAD -> useful-bugfix)
Author: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Tue Dec 21 10:30:50 2021 -0500

fix(type-inference): fix a type inference issue where floats were incorrectly cast to ints

A bug was triggered by some incorrect code that caused floats to be incorrectly cast to integers.

fixes #424242
```

### Push your changes

Now that you've got a commit, you're ready to push your changes and make a pull request!

```sh
gh pr create
```

Follow the prompts, and `gh` will print a link to your PR upon successfuly submission.
33 changes: 33 additions & 0 deletions docs/contribute/03_style.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Style and Formatting

## Code Style

The following tools are run in both CI and `pre-commit` checks to ensure codebase hygiene:

| Tool | Purpose |
| ---------------------------------------------------------------------: | :-------------------------------------------------- |
| [`black`](https://github.com/psf/black) | Formatting Python code |
| [`isort`](https://github.com/PyCQA/isort) | Formatting and sorting `import` statements |
| [`absolufy-imports`](https://github.com/MarcoGorelli/absolufy-imports) | Automatically convert relative imports to absolute. |
| [`flake8`](https://flake8.pycqa.org/en/latest/) | Linting Python code |
| [`nix-linter`](https://github.com/Synthetica9/nix-linter) | Linting nix files |
| [`nixpkgs-fmt`](https://github.com/nix-community/nixpkgs-fmt) | Formatting nix files |
| [`shellcheck`](https://github.com/koalaman/shellcheck) | Linting shell scripts |
| [`shfmt`](https://github.com/mvdan/sh) | Formatting shell scripts |
| [`pyupgrade`](https://github.com/asottile/pyupgrade) | Ensuring the latest available Python syntax is used |

!!! tip

If you use `nix-shell` all of these are setup for you and ready to use, you don't
need to install any of these tools.

We use [numpydoc](https://numpydoc.readthedocs.io/en/latest/format.html) as our
standard format for docstrings.

## Commit philosophy

We aim to make our individual commits small and tightly focused on the feature
they are implementing or bug being fixed. If you find yourself making
functional changes to different areas of the codebase, we prefer you break up
your changes into separate Pull Requests. In general, a philosophy of one
Github Issue per Pull Request is a good rule of thumb.
102 changes: 102 additions & 0 deletions docs/contribute/04_backend_tests.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Working with the Backend Test Suite

!!! danger "Before you start"

This section assumes you have a working [development environment](01_environment.md).

!!! info "You may be able to skip this section"

If you haven't made changes to the core of ibis (e.g., `ibis/expr`)
or any specific backends (`ibis/backends`) this material isn't necessary to
follow to make a pull request.

## Motivation

One the primary challenges when developing against the ibis codebase is testing
backends that require non-trivial setup.

Moreover, many of the backends that ibis works with have very different
deployment deployment models:

- **In-process** systems like SQLite
- **Client-server** systems like PostgreSQL and MySQL
- Systems that **run the gamut** of deployment models like ClickHouse
- Systems that run **on-premises**, like Impala

This section of the docs is describes how to work with the backend test suite.

## Backend Testing with Compose

Here is the list of backends that can be tested using `docker-compose`.

| Backend | Docker Compose Services |
| ---------- | ----------------------- |
| ClickHouse | `clickhouse` |
| PostgreSQL | `postgres` |
| impala | `impala`, `kudu` |
| mysql | `mysql` |

### Testing a Compose Service

!!! check "Check your current directory"

Make sure you're inside of your clone of the ibis GitHub repository

Let's fire up a PostgreSQL server and run tests against it.

#### Start the `postgres` Service

Open a new shell and run

```sh
docker-compose up --build postgres
```

Test the connection in the original shell using

```sh
export PGPASSWORD=postgres
psql -t -A -h localhost -U postgres -d ibis_testing -c "select 'success'"
```

You should see this output:

```console
success
```

!!! warning "PostgreSQL doesn't start up instantly"

It takes a few seconds for postgres to start, so if the previous
command fails wait a few seconds and try again

Congrats, you now have a PostgreSQL server running and are ready to run tests!

#### Load Data

The backend needs to be populated with test data:

1. Download the data

```sh
python ci/datamgr.py download
```

2. In the original terminal, run

```sh
python ci/datamgr.py load postgres
```

You should see a bit of logging, and the command should complete shortly thereafter.

#### Run the test suite

You're now ready to run the test suite for the postgres backend:
```sh
pytest -m postgres
```
Please [file an issue](https://github.com/ibis-project/ibis/issues/new) if the
test suite fails for any reason.
99 changes: 99 additions & 0 deletions docs/contribute/05_maintainers_guide.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Maintaining the Codebase

Ibis maintainers are expected to handle the following tasks as they arise:

- Reviewing and merging pull requests
- Triaging new issues

## Dependencies

A number of tasks that are typically associated with maintenance are partially or fully automated.

| Dependency Type | Management Tool |
| --------------------------: | :----------------------------------------------------------------------------------------------------------------- |
| Python library dependencies | [WhiteSource Renovate](https://www.whitesourcesoftware.com/free-developer-tools/renovate/) |
| GitHub Actions | [WhiteSource Renovate](https://www.whitesourcesoftware.com/free-developer-tools/renovate/) |
| Nix dependencies | [A GitHub Action](https://github.com/ibis-project/ibis/actions/workflows/update-deps.yml) run at a regular cadence |

Dependencies are managed using [`poetry`](https://python-poetry.org).

Occasionally you may need to lock poetry dependencies, which can be
done by running

```sh
poetry lock --no-update
```

### Automatic Dependency Updates

[WhiteSource
Renovate](https://www.whitesourcesoftware.com/free-developer-tools/renovate/)
will run at some cadence (outside of traditional business hours) and submit PRs
that update dependencies.

These upgrades use a conservative update strategy, which is currently to
increase the upper bound of a dependency's version range.

The PRs it generates will regenerate a number of other files so that in most
cases contributors do not have to remember to generate and commit these files.

### Adding or Changing Dependencies

1. Edit `pyproject.toml` as needed.
2. Run `poetry lock --no-update`
3. Regenerate `setup.py`:

!!! failure "Do not manually edit `setup.py`"

`setup.py` is [automatically
generated](https://github.com/ibis-project/ibis/blob/master/dev/poetry2setup.py)
from `pyproject.toml`

=== "Nix"

```sh
./dev/poetry2setup -o setup.py
```

=== "Without Nix"

Run the following command

```sh
PYTHONHASHSEED=42 python ./dev/poetry2setup.py -o setup.py
```

!!! question "Why do we need to set `PYTHONHASHSEED`?"

Dependencies' [`extras`](https://python-poetry.org/docs/pyproject/#extras) are stored
in-memory using a `frozenset`, the elements of which are arbitrarily ordered.

As of 2022-02-24 this is [fixed in the default
branch](https://github.com/python-poetry/poetry-core/pull/280) of
[`poetry-core`] but isn't yet released.

Updates of minor and patch versions of dependencies are handled automatically by
[`renovate`](https://github.com/renovatebot/renovate).

## Merging PRs

PRs can be merged using the [`gh` command line tool](https://github.com/cli/cli)
or with the GitHub web UI.

## Release

Ibis is released on [PyPI](https://pypi.org/project/ibis-framework/) and [Conda Forge](https://github.com/conda-forge/ibis-framework-feedstock).

=== "PyPI"

Releases to PyPI are handled automatically using [semantic
release](https://egghead.io/lessons/javascript-automating-releases-with-semantic-release).

To trigger a release use the [Release GitHub Action](https://github.com/ibis-project/ibis/actions/workflows/release.yml).

=== "`conda-forge`"

The conda-forge package is maintained as a [conda-forge feedstock](https://github.com/conda-forge/ibis-framework-feedstock).

After a release to PyPI, the conda-forge bot automatically updates the ibis
package.
21 changes: 21 additions & 0 deletions docs/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import ibis

con = ibis.sqlite.connect("crunchbase.db")

c = con.table("companies")
i = con.table("investments")

expr = (
c.left_join(i, c.permalink == i.company_permalink)
.group_by(investor_name=ibis.coalesce(i.investor_name, "NO INVESTOR"))
.aggregate(
num_investments=c.permalink.nunique(),
acq_ipos=(
c.status.isin(("ipo", "acquired"))
.ifelse(c.permalink, ibis.NA)
.nunique()
),
)
.mutate(acq_rate=lambda t: t.acq_ipos / t.num_investments)
.sort_by(ibis.desc(2))
)
20 changes: 20 additions & 0 deletions docs/example.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
SELECT
*,
CAST(acq_ipos / num_investments AS FLOAT) AS acq_rate
FROM (
SELECT
COALESCE(i.investor_name, 'NO INVESTOR') AS investor_name,
COUNT(DISTINCT c.permalink) AS num_investments,
COUNT(
DISTINCT
CASE
WHEN c.status IN ('ipo', 'acquired') THEN c.permalink
ELSE NULL
END
) AS acq_ipos
FROM companies AS c
LEFT JOIN investments AS i
ON c.permalink = i.company_permalink
GROUP BY 1
ORDER BY 2 DESC
)
2,912 changes: 2,912 additions & 0 deletions docs/ibis-for-sql-programmers.ipynb

Large diffs are not rendered by default.

215 changes: 215 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
---
hide:
- toc
---

# :ibis-logo: Ibis

## Expressive analytics in Python at any scale.

<script
src="https://asciinema.org/a/yp5Ww4XKyjJsUCXkEz5or9rPq.js"
data-autoplay="true"
data-preload="true"
data-loop="true"
data-i="4"
data-rows="20"
id="asciicast-yp5Ww4XKyjJsUCXkEz5or9rPq"
async>
</script>

## Installation

=== "pip"

```sh
pip install ibis-framework
```

{% for mgr in ["conda", "mamba"] %}
=== "{{ mgr }}"

```sh
{{ mgr }} install -c conda-forge ibis-framework
```

{% endfor %}

Try it out!

```python
python -c 'import ibis; print(ibis.__version__)'
```

## Features

### SQL Coverage

#### Anything you can write in a `SELECT` statement you can write in Ibis.

=== "Group By"

##### SQL

```sql
SELECT f, sum(a + b) AS d
FROM t
GROUP BY f
```

##### Ibis

```python
t.group_by("f").aggregate(d=t.a + t.b.sum())
```

=== "Join"

##### SQL

```sql
SELECT exp(t.a) AS d
FROM t
LEFT SEMI JOIN s
ON t.x = t.y
```

##### Ibis

```python
t.semi_join(s, t.x == t.y).select([lambda t: t.a.exp().name("d")])
```

=== "Window Functions"

##### SQL

```sql
SELECT *, avg(x) OVER (PARTITION BY y) as z
FROM t
```

##### Ibis

```python
t.group_by("y").mutate(z=t.x.avg())
```

!!! tip "Coming from SQL?"

Check out [Ibis for SQL Programmers](ibis-for-sql-programmers)

### Abstract Over SQL Dialects

#### No more rewrites when scaling up or down.

=== "SQLite"

```python
con = ibis.sqlite.connect("my_sqlite.db")
```

=== "PostgreSQL"

```python
con = ibis.postgres.connect(user="me", host="my_computer", port=9090)
```

=== "BigQuery"

```python
con = ibis.bigquery.connect(project_id="my_project_id", dataset_id="my_dataset_id")
```

```python
t = con.table("t")
t.group_by("y").mutate(z=t.x.avg())
```

### Ecosystem

#### Ibis builds on top of and works with existing Python tools.

```python
t.semi_join(s, t.x == t.y).select([lambda t: t.a.exp().name("d")]).head(2)
df = expr.execute() # a pandas DataFrame!
```

## Example

Let's compute the number of citizens per squared kilometer in Asia:

```python
>>> import ibis
>>> db = ibis.sqlite.connect("geography.db")
>>> countries = db.table("countries")
>>> asian_countries = countries.filter(countries.continent == "AS")
>>> density_in_asia = asian_countries.population.sum() / asian_countries.area_km2.sum()
>>> density_in_asia.execute()
130.7019141926602
```

!!! tip "Learn more!"

Learn more about Ibis in [our tutorial](tutorial/01-Introduction-to-Ibis).

## Comparison to other tools

=== "SQL"

!!! tip "Coming from SQL?"

Check out [Ibis for SQL Programmers](ibis-for-sql-programmers)!

Ibis gives you the benefit of a programming language. You don't need to
sacrifice maintainability to get to those insights!

=== "Ibis"

``` py title="docs/example.py" linenums="1"
--8<-- "docs/example.py"
```

=== "SQL"

``` sql title="docs/example.sql" linenums="1"
--8<-- "docs/example.sql"
```

=== "SQLAlchemy"

Ibis aims to be more concise and composable than
[SQLAlchemy](https://www.sqlalchemy.org/) when writing interactive
analytics code.

!!! success "Ibis :heart:'s SQLAlchemy"

Ibis generates SQLAlchemy expressions for some of our backends
including the [PostgreSQL](./backends/PostgreSQL.md) and
[SQLite](./backends/SQLite.md) backends!

=== "Ibis"

``` py title="docs/example.py" linenums="1"
--8<-- "docs/example.py"
```

=== "SQLAlchemy"

``` py title="docs/sqlalchemy_example.py" "linenums="1"
--8<-- "docs/sqlalchemy_example.py"
```

<div class="download-button" markdown>
[:fontawesome-solid-cloud-arrow-down: Download the example data](https://storage.googleapis.com/ibis-testing-data/crunchbase.db){ .md-button .md-button--primary }
</div>

## What's Next?

!!! question "Need a specific backend?"

Take a look at the [backends](./backends/index.md) documentation!

!!! tip "Interested in contributing?"

Get started by [setting up a development environment](./contribute/01_environment.md)!
16 changes: 16 additions & 0 deletions docs/javascripts/mathjax.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
window.MathJax = {
tex: {
inlineMath: [["\\(", "\\)"]],
displayMath: [["\\[", "\\]"]],
processEscapes: true,
processEnvironments: true,
},
options: {
ignoreHtmlClass: ".*|",
processHtmlClass: "arithmatex",
},
};

document$.subscribe(() => {
MathJax.typesetPromise();
});
242 changes: 0 additions & 242 deletions docs/make.bat

This file was deleted.

82 changes: 0 additions & 82 deletions docs/mkdocs.yml

This file was deleted.

17 changes: 17 additions & 0 deletions docs/overrides/main.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{% extends "base.html" %}

{% block extrahead %}
<meta name="google-site-verification" content="IVqzkYiD5E35oD4kkVOcTYCTfqWKU1f6zOHCnLIPkUU">
<script data-goatcounter="https://ibis.goatcounter.com/count"
async src="https://gc.zgo.at/count.js"></script>
{% endblock %}

{% block content %}
{% if page.nb_url %}
<a href="{{ page.nb_url }}" title="Download Notebook" class="md-content__button md-icon">
{% include ".icons/material/download.svg" %}
</a>
{% endif %}

{{ super() }}
{% endblock content %}
194 changes: 191 additions & 3 deletions docs/web/release_notes.md → docs/release_notes.md

Large diffs are not rendered by default.

Binary file removed docs/source/_static/favicon.ico
Binary file not shown.
146 changes: 0 additions & 146 deletions docs/source/_static/favicon.svg

This file was deleted.

135 changes: 0 additions & 135 deletions docs/source/_static/logo-wide.svg

This file was deleted.

15 changes: 0 additions & 15 deletions docs/source/_templates/layout.html

This file was deleted.

529 changes: 0 additions & 529 deletions docs/source/api.rst

This file was deleted.

51 changes: 0 additions & 51 deletions docs/source/backends/clickhouse.rst

This file was deleted.

4 changes: 0 additions & 4 deletions docs/source/backends/dask.rst

This file was deleted.

56 changes: 0 additions & 56 deletions docs/source/backends/datafusion.rst

This file was deleted.

1,414 changes: 0 additions & 1,414 deletions docs/source/backends/impala.rst

This file was deleted.

75 changes: 0 additions & 75 deletions docs/source/backends/index.rst

This file was deleted.

56 changes: 0 additions & 56 deletions docs/source/backends/mysql.rst

This file was deleted.

154 changes: 0 additions & 154 deletions docs/source/backends/pandas.rst

This file was deleted.

58 changes: 0 additions & 58 deletions docs/source/backends/postgres.rst

This file was deleted.

45 changes: 0 additions & 45 deletions docs/source/backends/pyspark.rst

This file was deleted.

52 changes: 0 additions & 52 deletions docs/source/backends/sqlite.rst

This file was deleted.

Loading