| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,120 @@ | ||
| version: "3.4" | ||
| services: | ||
| clickhouse: | ||
| image: yandex/clickhouse-server:22-alpine | ||
| ports: | ||
| - 8123:8123 | ||
| - 9000:9000 | ||
| networks: | ||
| - clickhouse | ||
| impala: | ||
| depends_on: | ||
| - impala-postgres | ||
| - kudu | ||
| environment: | ||
| PGPASSWORD: postgres | ||
| healthcheck: | ||
| interval: 30s | ||
| retries: 20 | ||
| test: | ||
| - CMD-SHELL | ||
| - nc -z 127.0.0.1 21050 && nc -z 127.0.0.1 50070 | ||
| timeout: 10s | ||
| hostname: localhost | ||
| image: ibisproject/impala:latest | ||
| ports: | ||
| - 9020:9020 | ||
| - 50070:50070 | ||
| - 50075:50075 | ||
| - 8020:8020 | ||
| - 8042:8042 | ||
| - 9083:9083 | ||
| - 21000:21000 | ||
| - 21050:21050 | ||
| - 25000:25000 | ||
| - 25010:25010 | ||
| - 25020:25020 | ||
| networks: | ||
| - impala | ||
| impala-postgres: | ||
| user: postgres | ||
| hostname: postgres | ||
| environment: | ||
| POSTGRES_PASSWORD: postgres | ||
| healthcheck: | ||
| interval: 10s | ||
| retries: 3 | ||
| test: | ||
| - CMD | ||
| - pg_isready | ||
| timeout: 5s | ||
| image: postgres:13.6-alpine | ||
| networks: | ||
| - impala | ||
| kudu: | ||
| cap_add: | ||
| - SYS_TIME | ||
| depends_on: | ||
| - kudu-tserver | ||
| environment: | ||
| KUDU_MASTER: "true" | ||
| image: ibisproject/kudu:latest | ||
| ports: | ||
| - 7051:7051 | ||
| - 8051:8051 | ||
| networks: | ||
| - impala | ||
| kudu-tserver: | ||
| cap_add: | ||
| - SYS_TIME | ||
| environment: | ||
| KUDU_MASTER: "false" | ||
| image: ibisproject/kudu:latest | ||
| ports: | ||
| - 7050:7050 | ||
| - 8050:8050 | ||
| networks: | ||
| - impala | ||
| mysql: | ||
| environment: | ||
| MYSQL_ALLOW_EMPTY_PASSWORD: "true" | ||
| MYSQL_DATABASE: ibis_testing | ||
| MYSQL_PASSWORD: ibis | ||
| MYSQL_USER: ibis | ||
| healthcheck: | ||
| interval: 10s | ||
| retries: 3 | ||
| test: | ||
| - CMD | ||
| - mysqladmin | ||
| - ping | ||
| timeout: 5s | ||
| image: mariadb:10.8 | ||
| ports: | ||
| - 3306:3306 | ||
| networks: | ||
| - mysql | ||
| postgres: | ||
| user: postgres | ||
| environment: | ||
| POSTGRES_PASSWORD: postgres | ||
| POSTGRES_DB: ibis_testing | ||
| POSTGRES_USER: postgres | ||
| build: ./docker/postgres | ||
| healthcheck: | ||
| interval: 10s | ||
| retries: 3 | ||
| test: | ||
| - CMD | ||
| - pg_isready | ||
| timeout: 5s | ||
| ports: | ||
| - 5432:5432 | ||
| networks: | ||
| - postgres | ||
|
|
||
| networks: | ||
| impala: | ||
| mysql: | ||
| clickhouse: | ||
| postgres: |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| FROM postgis/postgis:14-3.2-alpine | ||
| RUN apk add postgresql14-plpython3 postgresql14-jit |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| * [Home](index.md) | ||
| * Tutorial | ||
| * [Introduction to Ibis](tutorial/01-Introduction-to-Ibis.ipynb) | ||
| * [Aggregating and Joining](tutorial/02-Aggregates-Joins.ipynb) | ||
| * [Lazy Mode and Logging](tutorial/03-Expressions-Lazy-Mode-Logging.ipynb) | ||
| * [More Value Expressions](tutorial/04-More-Value-Expressions.ipynb) | ||
| * [Creating and Inserting External Data](tutorial/05-IO-Create-Insert-External-Data.ipynb) | ||
| * [Complex Filtering](tutorial/06-ComplexFiltering.ipynb) | ||
| * [Analytics Tools](tutorial/07-Analytics-Tools.ipynb) | ||
| * [Geospatial Analysis](tutorial/08-Geospatial-Analysis.ipynb) | ||
| * [Ibis for SQL Programmers](ibis-for-sql-programmers.ipynb) | ||
| * [User Guide](user_guide/) | ||
| * [Execution Backends](backends/) | ||
| * [Contribute](contribute/) | ||
| * Community | ||
| * [Blog](blog/) | ||
| * [About](about/) | ||
| * [Ask a question (StackOverflow)](https://stackoverflow.com/questions/tagged/ibis) | ||
| * [Chat (Gitter)](https://gitter.im/ibis-dev/Lobby) | ||
| * community/*.md | ||
| * [Release Notes](release_notes.md) | ||
| * API Reference | ||
| * [Expressions](api/expressions/index.md) | ||
| * [Top Level](api/expressions/top_level.md) | ||
| * [Tables](api/expressions/tables.md) | ||
| * [Generic Values](api/expressions/generic.md) | ||
| * [Numeric + Boolean](api/expressions/numeric.md) | ||
| * [Strings](api/expressions/strings.md) | ||
| * [Timestamps + Dates + Times](api/expressions/timestamps.md) | ||
| * [Collections](api/expressions/collections.md) | ||
| * [Geospatial](api/expressions/geospatial.md) | ||
| * [Data Types](api/datatypes.md) | ||
| * [Backend Interfaces](api/backends/) | ||
| * [Configuration](api/config.md) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| # Team | ||
|
|
||
| ## Contributors | ||
|
|
||
| {{ config.extra.project_name }} is developed and maintained by a [community of | ||
| volunteer contributors]({{ config.repo_url }}graphs/contributors). | ||
|
|
||
| {% for group in config.extra.team %} | ||
|
|
||
| ## {{ group.name }} | ||
|
|
||
| {% for person in group.members %} | ||
|
|
||
| - https://github.com/{{ person }} | ||
| {% endfor %} | ||
|
|
||
| {% endfor %} | ||
|
|
||
| {{ config.extra.project_name }} aims to be a welcoming, friendly, diverse and | ||
| inclusive community. Everybody is welcome, regardless of gender, sexual | ||
| orientation, gender identity, and expression, disability, physical appearance, | ||
| body size, race, or religion. We do not tolerate harassment of community | ||
| members in any form. In particular, people from underrepresented groups are | ||
| encouraged to join the community. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| # Backend Base Classes | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| ::: ibis.backends.base.BaseBackend | ||
| selection: | ||
| inherited_members: true | ||
| <!-- prettier-ignore-end --> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| # Pandas-like Backend Base Classes | ||
|
|
||
| These base classes underlie the pandas-based backends. | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| ::: ibis.backends.pandas.BasePandasBackend | ||
| selection: | ||
| inherited_members: true | ||
| <!-- prettier-ignore-end --> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| # SQL Backend Base Classes | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| ::: ibis.backends.base.sql.BaseSQLBackend | ||
| selection: | ||
| inherited_members: true | ||
| <!-- prettier-ignore-end --> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| # SQLAlchemy Backend Base Classes | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| ::: ibis.backends.base.sql.alchemy.BaseAlchemyBackend | ||
| selection: | ||
| inherited_members: true | ||
| <!-- prettier-ignore-end --> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| # Configuration Options | ||
|
|
||
| ::: ibis.config |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| # Data Types | ||
|
|
||
| This module contains classes for handling the different storage types that | ||
| occur in databases. | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| All data type constructors take a `nullable: bool` parameter whose default | ||
| value is [`True`][True]. | ||
|
|
||
| ::: ibis.expr.datatypes | ||
| <!-- prettier-ignore-end --> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| # Complex Type Expressions | ||
|
|
||
| These APIs are available on complex data types such as arrays, maps, and | ||
| structs. | ||
|
|
||
| ::: ibis.expr.types.arrays | ||
| ::: ibis.expr.types.maps | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| ::: ibis.expr.types.structs | ||
| selection: | ||
| filters: | ||
| - "!^Destruct.*" | ||
| <!-- prettier-ignore-end --> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| # Generic Expression APIs | ||
|
|
||
| These expressions are available on scalars and columns of any element type. | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| ::: ibis.expr.types.generic | ||
| selection: | ||
| filters: | ||
| - "!^literal" | ||
| - "!^null" | ||
| <!-- prettier-ignore-end --> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| # Geospatial Expressions | ||
|
|
||
| Ibis supports the following geospatial expression APIs | ||
|
|
||
| ::: ibis.expr.types.geospatial |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| # Base Expression Types | ||
|
|
||
| These APIs are shared by both table and column expressions. | ||
|
|
||
| ::: ibis.expr.types.core.Expr |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| # Numeric Expressions | ||
|
|
||
| These APIs are available on numeric and boolean expressions. | ||
|
|
||
| ::: ibis.expr.types.numeric | ||
| ::: ibis.expr.types.logical |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| # String Expressions | ||
|
|
||
| All string operations are valid for both scalars and columns. | ||
|
|
||
| ::: ibis.expr.types.strings |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| # Table Expressions | ||
|
|
||
| Table expressions form the basis for most Ibis expressions. | ||
|
|
||
| ::: ibis.expr.types.relations | ||
| ::: ibis.expr.types.groupby |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| # Temporal Expression APIs | ||
|
|
||
| All timestamp operations are valid for both scalars and columns. | ||
|
|
||
| ::: ibis.expr.types.temporal |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| # Top-level APIs | ||
|
|
||
| These methods and objects are available directly in the `ibis` module. | ||
|
|
||
| ## `NA` | ||
|
|
||
| `NA` is the null scalar. | ||
|
|
||
| ::: ibis.array | ||
| ::: ibis.case | ||
| ::: ibis.coalesce | ||
| ::: ibis.cumulative_window | ||
| ::: ibis.date | ||
| ::: ibis.desc | ||
| ::: ibis.greatest | ||
| ::: ibis.ifelse | ||
| ::: ibis.interval | ||
| ::: ibis.least | ||
| ::: ibis.literal | ||
| ::: ibis.map | ||
| ::: ibis.negate | ||
| ::: ibis.now | ||
| ::: ibis.null | ||
| ::: ibis.param | ||
| ::: ibis.random | ||
| ::: ibis.range_window | ||
| ::: ibis.row_number | ||
| ::: ibis.schema | ||
| ::: ibis.struct | ||
| ::: ibis.table | ||
| ::: ibis.time | ||
| ::: ibis.timestamp | ||
| ::: ibis.trailing_range_window | ||
| ::: ibis.trailing_window | ||
| ::: ibis.where | ||
| ::: ibis.window |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| --- | ||
| backend_name: ClickHouse | ||
| backend_url: https://clickhouse.yandex/ | ||
| backend_module: clickhouse | ||
| backend_param_style: connection parameters | ||
| --- | ||
|
|
||
| {% include 'backends/template.md' %} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| --- | ||
| backend_name: Dask | ||
| backend_url: https://dask.org | ||
| backend_module: dask | ||
| backend_param_style: a dictionary of paths | ||
| is_experimental: true | ||
| --- | ||
|
|
||
| {% include 'backends/template.md' %} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| --- | ||
| backend_name: Datafusion | ||
| backend_url: https://arrow.apache.org/datafusion/ | ||
| backend_module: datafusion | ||
| backend_param_style: a dictionary of paths | ||
| is_experimental: true | ||
| version_added: "2.1" | ||
| --- | ||
|
|
||
| {% include 'backends/template.md' %} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| --- | ||
| backend_name: DuckDB | ||
| backend_url: https://duckdb.org/ | ||
| backend_module: duckdb | ||
| backend_param_style: a path to a DuckDB database | ||
| backend_connection_example: ibis.duckdb.connect("path/to/my.duckdb") | ||
| development_only: false | ||
| --- | ||
|
|
||
| {% include 'backends/template.md' %} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| --- | ||
| backend_name: MySQL | ||
| backend_url: https://www.mysql.com/ | ||
| backend_module: mysql | ||
| backend_param_style: a SQLAlchemy-style URI | ||
| --- | ||
|
|
||
| {% include 'backends/template.md' %} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,138 @@ | ||
| --- | ||
| backend_name: Pandas | ||
| backend_url: https://pandas.pydata.org/ | ||
| backend_module: pandas | ||
| intro: Ibis's pandas backend is available in core Ibis. | ||
| backend_param_style: a dictionary of paths | ||
| do_connect_base: BasePandasBackend | ||
| is_core: true | ||
| --- | ||
|
|
||
| {% include 'backends/template.md' %} | ||
|
|
||
| ## User Defined functions (UDF) | ||
|
|
||
| Ibis supports defining three kinds of user-defined functions for operations on | ||
| expressions targeting the pandas backend: **element-wise**, **reduction**, and | ||
| **analytic**. | ||
|
|
||
| ### Elementwise Functions | ||
|
|
||
| An **element-wise** function is a function that takes N rows as input and | ||
| produces N rows of output. `log`, `exp`, and `floor` are examples of | ||
| element-wise functions. | ||
|
|
||
| Here's how to define an element-wise function: | ||
|
|
||
| ```python | ||
| import ibis.expr.datatypes as dt | ||
| from ibis.backends.pandas import udf | ||
|
|
||
| @udf.elementwise(input_type=[dt.int64], output_type=dt.double) | ||
| def add_one(x): | ||
| return x + 1.0 | ||
| ``` | ||
|
|
||
| ### Reduction Functions | ||
|
|
||
| A **reduction** is a function that takes N rows as input and produces 1 row | ||
| as output. `sum`, `mean` and `count` are examples of reductions. In | ||
| the context of a `GROUP BY`, reductions produce 1 row of output _per | ||
| group_. | ||
|
|
||
| Here's how to define a reduction function: | ||
|
|
||
| ```python | ||
| import ibis.expr.datatypes as dt | ||
| from ibis.backends.pandas import udf | ||
|
|
||
| @udf.reduction(input_type=[dt.double], output_type=dt.double) | ||
| def double_mean(series): | ||
| return 2 * series.mean() | ||
| ``` | ||
|
|
||
| ### Analytic Functions | ||
|
|
||
| An **analytic** function is like an **element-wise** function in that it takes | ||
| N rows as input and produces N rows of output. The key difference is that | ||
| analytic functions can be applied _per group_ using window functions. Z-score | ||
| is an example of an analytic function. | ||
|
|
||
| Here's how to define an analytic function: | ||
|
|
||
| ```python | ||
| import ibis.expr.datatypes as dt | ||
| from ibis.backends.pandas import udf | ||
|
|
||
| @udf.analytic(input_type=[dt.double], output_type=dt.double) | ||
| def zscore(series): | ||
| return (series - series.mean()) / series.std() | ||
| ``` | ||
|
|
||
| ### Details of Pandas UDFs | ||
|
|
||
| - Element-wise provide support | ||
| for applying your UDF to any combination of scalar values and columns. | ||
| - Reductions provide support for | ||
| whole column aggregations, grouped aggregations, and application of your | ||
| function over a window. | ||
| - Analytic functions work in both grouped and non-grouped | ||
| settings | ||
| - The objects you receive as input arguments are either `pandas.Series` or | ||
| Python/NumPy scalars. | ||
|
|
||
| !!! warning "Keyword arguments must be given a default" | ||
|
|
||
| Any keyword arguments must be given a default value or the function **will | ||
| not work**. | ||
|
|
||
| A common Python convention is to set the default value to `None` and | ||
| handle setting it to something not `None` in the body of the function. | ||
|
|
||
| Using `add_one` from above as an example, the following call will receive a | ||
| `pandas.Series` for the `x` argument: | ||
|
|
||
| ```python | ||
| import ibis | ||
| import pandas as pd | ||
| df = pd.DataFrame({'a': [1, 2, 3]}) | ||
| con = ibis.pandas.connect({'df': df}) | ||
| t = con.table('df') | ||
| expr = add_one(t.a) | ||
| expr | ||
| ``` | ||
|
|
||
| And this will receive the `int` 1: | ||
|
|
||
| ```python | ||
| expr = add_one(1) | ||
| expr | ||
| ``` | ||
|
|
||
| Since the pandas backend passes around `**kwargs` you can accept `**kwargs` | ||
| in your function: | ||
|
|
||
| ```python | ||
| import ibis.expr.datatypes as dt | ||
| from ibis.backends.pandas import udf | ||
|
|
||
| @udf.elementwise([dt.int64], dt.double) | ||
| def add_two(x, **kwargs): # do stuff with kwargs | ||
| return x + 2.0 | ||
| ``` | ||
|
|
||
| Or you can leave them out as we did in the example above. You can also | ||
| optionally accept specific keyword arguments. | ||
|
|
||
| For example: | ||
|
|
||
| ```python | ||
| import ibis.expr.datatypes as dt | ||
| from ibis.backends.pandas import udf | ||
|
|
||
| @udf.elementwise([dt.int64], dt.double) | ||
| def add_two_with_none(x, y=None): | ||
| if y is None: | ||
| y = 2.0 | ||
| return x + y | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| --- | ||
| backend_name: PostgreSQL | ||
| backend_url: https://www.postgresql.org/ | ||
| backend_module: postgres | ||
| backend_param_style: a SQLAlchemy-style URI | ||
| --- | ||
|
|
||
| {% include 'backends/template.md' %} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| --- | ||
| backend_name: PySpark | ||
| backend_url: https://spark.apache.org/docs/latest/api/python/ | ||
| backend_module: pyspark | ||
| backend_param_style: PySpark things | ||
| --- | ||
|
|
||
| {% include 'backends/template.md' %} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| --- | ||
| backend_name: SQLite | ||
| backend_url: https://www.sqlite.org/ | ||
| backend_module: sqlite | ||
| backend_param_style: a path to a SQLite database | ||
| exclude_backend_api: true | ||
| --- | ||
|
|
||
| {% include 'backends/template.md' %} | ||
|
|
||
| ## Backend API | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| ::: ibis.backends.sqlite.Backend | ||
| rendering: | ||
| heading_level: 3 | ||
| selection: | ||
| inherited_members: true | ||
| members: | ||
| - add_operation | ||
| - attach | ||
| - compile | ||
| - connect | ||
| - create_database | ||
| - create_table | ||
| - create_view | ||
| - database | ||
| - drop_table | ||
| - drop_view | ||
| - execute | ||
| - exists_database | ||
| - exists_table | ||
| - explain | ||
| - insert | ||
| - list_databases | ||
| - list_tables | ||
| - load_data | ||
| - raw_sql | ||
| - schema | ||
| - table | ||
| - verify | ||
| <!-- prettier-ignore-end --> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| # Backends | ||
|
|
||
| ## String Generating Backends | ||
|
|
||
| The first category of backend translate Ibis expressions into string queries. | ||
|
|
||
| The compiler turns each expression into a string query and passes that query to the | ||
| database through a driver API for execution. | ||
|
|
||
| - [Apache Impala](Impala.md) | ||
| - [ClickHouse](ClickHouse.md) | ||
| - [Google BigQuery](https://github.com/ibis-project/ibis-bigquery/) | ||
| - [HeavyAI](https://github.com/heavyai/ibis-heavyai) | ||
|
|
||
| ## Expression Generating Backends | ||
|
|
||
| The next category of backends translates ibis expressions into another | ||
| system's expressions, for example, SQLAlchemy. | ||
|
|
||
| Instead of generating strings for each expression these backends produce | ||
| another kind of expression and typically have high-level APIs for execution. | ||
|
|
||
| - [Dask](Dask.md) | ||
| - [Datafusion](Datafusion.md) | ||
| - [MySQL](MySQL.md) | ||
| - [PostgreSQL](PostgreSQL.md) | ||
| - [PySpark](PySpark.md) | ||
| - [SQLite](SQLite.md) | ||
|
|
||
| ## Direct Execution Backends | ||
|
|
||
| The pandas backend is the only direct execution backend. A full description | ||
| of the implementation can be found in the module docstring of the pandas | ||
| backend located in `ibis/backends/pandas/core.py`. | ||
|
|
||
| - [Pandas](Pandas.md) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| --- | ||
| hide: | ||
| - toc | ||
| --- | ||
|
|
||
| # Operation Support Matrix | ||
|
|
||
| Backends are shown in descending order of the number of supported operations. | ||
|
|
||
| {{ read_csv("docs/backends/support_matrix.csv") }} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,75 @@ | ||
| # [{{ backend_name }}]({{ backend_url }}) | ||
|
|
||
| {% if is_experimental %} | ||
| !!! experimental "{% if version_added %}New in v{{ version_added }}{% else %}Experimental{% endif %}" | ||
|
|
||
| The {{ backend_name }} backend is experimental and is subject to backwards incompatible changes. | ||
|
|
||
| {% endif %} | ||
|
|
||
| {% if intro %}{{ intro }}{% endif %} | ||
|
|
||
| {% if not development_only %} | ||
|
|
||
| ## Install | ||
|
|
||
| Install ibis and dependencies for the {{ backend_name }} backend: | ||
|
|
||
| === "pip" | ||
|
|
||
| ```sh | ||
| pip install 'ibis-framework{% if not is_core %}[{{ backend_module }}]{% endif %}' | ||
| ``` | ||
|
|
||
| {% for mgr in ["conda", "mamba"] %} | ||
| === "{{ mgr }}" | ||
|
|
||
| ```sh | ||
| {{ mgr }} install -c conda-forge ibis-{% if is_core %}framework{% else %}{{ backend_module }}{% endif %} | ||
| ``` | ||
|
|
||
| {% endfor %} | ||
|
|
||
| {% else %} | ||
| !!! info "The {{ backend_name }} backend isn't released yet!" | ||
|
|
||
| [Set up a development environment](../contribute/01_environment.md) to use this backend. | ||
|
|
||
| {% endif %} | ||
|
|
||
| ## Connect | ||
|
|
||
| ### API | ||
|
|
||
| Create a client by passing in {{ backend_param_style }} to `ibis.{{ backend_module }}.connect`. | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| See [`ibis.backends.{{ backend_module }}.Backend.do_connect`][ibis.backends.{{ backend_module }}.Backend.do_connect] | ||
| for connection parameter information. | ||
| <!-- prettier-ignore-end --> | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| !!! info "`ibis.{{ backend_module }}.connect` is a thin wrapper around [`ibis.backends.{{ backend_module }}.Backend.do_connect`][ibis.backends.{{ backend_module }}.Backend.do_connect]." | ||
| <!-- prettier-ignore-end --> | ||
|
|
||
| ### Connection Parameters | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| ::: ibis.backends.{{ backend_module }}.Backend.do_connect | ||
| rendering: | ||
| heading_level: 4 | ||
| <!-- prettier-ignore-end --> | ||
|
|
||
| {% if not exclude_backend_api %} | ||
|
|
||
| ## Backend API | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| ::: ibis.backends.{{ backend_module }}.Backend | ||
| rendering: | ||
| heading_level: 3 | ||
| selection: | ||
| inherited_members: true | ||
| <!-- prettier-ignore-end --> | ||
|
|
||
| {% endif %} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| # Ibis v3.0.0 | ||
|
|
||
| #### by: Marlene Mhangami | ||
|
|
||
| The latest version of Ibis, version 3.0.0, has just been released! This post highlights some of the new features, breaking changes, and performance improvements that come with the new release. 3.0.0 is a major release and includes more changes than those listed in this post. A full list of the changes can be found in the project release notes [here](https://ibis-project.org/docs/dev/release_notes/). | ||
|
|
||
| ## New Features | ||
|
|
||
| Aligned to the roadmap and in response to the community’s requests, Ibis 3.0.0 introduces many new features and functionality. | ||
|
|
||
| 1. Now query an Ibis table using inline SQL | ||
| 2. _NEW_ DuckDB backend | ||
| 3. Explore the _NEW_ backend support matrix tool | ||
| 4. Improved support for arrays and tuples in ClickHouse | ||
| 5. Suffixes now supported in join API expressions | ||
| 6. APIs for creating timestamps and dates from component fields | ||
| 7. Pretty printing in ipython/ notebooks | ||
|
|
||
| Refer to the sections below for more detail on each new feature. | ||
|
|
||
| ### Inline SQL | ||
|
|
||
| The most exciting feature of this release is inline SQL! Many data scientists or developers may be familiar with both Python and SQL. However there may be some queries, transformations that they feel comfortable doing in SQL instead of Python. In the updated version of Ibis users can query an Ibis table using SQL! The new .sql method allows users to mix SQL strings with ibis expressions as well as query ibis table expressions in SQL strings. | ||
|
|
||
| This functionality currently works for the following backends: | ||
|
|
||
| 1. PostgreSQL | ||
| 2. DuckDB | ||
| 3. PySpark | ||
| 4. MySQL | ||
|
|
||
| If you're interested in adding .sql support for other backends please [open an issue](https://github.com/ibis-project/ibis/issues?page=2&q=is%3Aissue+is%3Aclosed+milestone%3A3.0.0). | ||
|
|
||
| ### DuckDB Backend | ||
|
|
||
| Ibis now supports DuckDB as a backend. DuckDB is a high-performance SQL OLAP database management system. It is designed to be fast, reliable and easy to use and can be embedded. Many Ibis use cases start from getting tables from a single-node backend so directly supporting DuckDB offers a lot of value. As mentioned earlier, the DuckDB backend allows for the new .sql method on tables for mixing sql and Ibis expressions. | ||
|
|
||
| ### Backend Support Matrix | ||
|
|
||
| As the number of backends Ibis supports grows, it can be challenging for users to decide which one best fits their needs. One way to make a more informed decision is for users to find the backend that supports the operations they intend to use. The 3.0.0 release comes with a backend support matrix that allows users to do just that. A screenshot of part of the matrix can be seen below and the full version can be found [here](https://ibis-project.org/docs/dev/backends/support_matrix/). | ||
|
|
||
| In addition to this users can now call `ibis.${backend}.has_operation` to find out if a specific operation is supported by a backend. | ||
|
|
||
|  | ||
|
|
||
| ### Support of arrays and tuples for ClickHouse | ||
|
|
||
| The 3.0.0 release includes a slew of important improvements for the ClickHouse backend. Most prominently ibis now supports ClickHouse arrays and tuples. | ||
| Some of the related operations that have been implemented are: | ||
|
|
||
| - ArrayIndex | ||
| - ArrayConcat | ||
| - ArrayRepeat | ||
| - ArraySlice | ||
|
|
||
| Other additional operations now supported for the clickhouse backend are string concat, string slicing, table union, trim, pad and string predicates (LIKE and ILIKE) and all remaining joins. | ||
|
|
||
| ### Suffixes now supported in join API expressions | ||
|
|
||
| In previous versions Ibis' join API did not accept suffixes as a parameter, leaving backends to either use some default value or raise an error at execution time when column names overlapped. In 3.0.0 suffixes are now directly supported in the join API itself. Along with the removal of materialize, ibis now automatically adds a default suffix to any overlapping column names. | ||
|
|
||
| ### Creating timestamp from component fields | ||
|
|
||
| It is now possible to create timestamps directly from component fields. This is now possible using the new method `ibis.date(y,m,d)`. A user can pass in a year, month and day and the result is a datetime object. That is we can assert for example that `ibis.date (2022, 2, 4).type() == dt.date` | ||
|
|
||
| ### Pretty print tables in ipython notebooks | ||
|
|
||
| For users that use jupyter notebooks, `repr_html` has been added for expressions to enable pretty printing tables in the notebook. This is currently only available for interactive mode (currently delegating to pandas implementation) and should help notebooks become more readable. An example of what this looks like can be seen below. | ||
|
|
||
|  | ||
|
|
||
| ## Breaking Changes | ||
|
|
||
| 3.0.0 is a major release and according to the project's use of semantic versioning, breaking changes are on the table. The full list of these changes can be found [here](https://ibis-project.org/docs/dev/release_notes/). | ||
|
|
||
| 1. Python 3.8 is now the minimum supported version | ||
| 2. Removal of `.materialize()` | ||
|
|
||
| Refer to the sections below for more detail on these changes. | ||
|
|
||
| ### The minimum supported Python version is now Python 3.8 | ||
|
|
||
| Ibis currently follows [NEP 29](https://numpy.org/neps/nep-0029-deprecation_policy.html), a community policy standard that recommends Python and Numpy versions to support. NEP 29 suggests that all projects across the Scientific Python ecosystem adopt a common “time window-based” policy for support of Python and NumPy versions. Standardizing a recommendation for project support of minimum Python and NumPy versions will improve downstream project planning. As part of the 3.0.0 release, support for Python 3.7 has been dropped and the project has now adopted support for version 3.8 and higher. | ||
|
|
||
| ### Removal of .materialize() | ||
|
|
||
| This release sees the removal of the `.materialize()` method from TableExpr. In the past, the materialize method has caused a lot of confusion. Doing simple things like `t.join(s, t.foo == s.foo).select(["unambiguous_column"])` raised an exception because of it. It turns out that .materialize() isn't necessary and therefore has been removed. This is a breaking change for some code that uses materialize. The materialize method still exists, but is now a pass-through and triggers a warning. | ||
|
|
||
| There are also some breaking changes introduced here in the case of overlapping column names. If there are any overlapping column names, a suffix will be attached to both the left and right tables. So, in the case of `s.asof_join(t, "time")` the resulting schema will have both a `time_x` and a `time_y` column. | ||
|
|
||
| ## Performance Improvements | ||
|
|
||
| The following changes to the Ibis codebase have resulted in performance improvements. | ||
|
|
||
| 1. Speeding up ` __str__` and `__hash__` datatypes | ||
| 2. Creating a fast path for simple column selection (pandas/dask backends) | ||
| 3. Global equality cache | ||
| 4. Removing full tree repr from rule validator error message | ||
| 5. Speed up attribute access | ||
| 6. Using assign instead of concat in projections when possible (pandas/dask backends) | ||
|
|
||
| Additionally, all TPC-H suite queries can be represented in Ibis. All queries are ready-to-run, using the default substitution parameters as specified by the TPC-H spec. Queries have been added [here](https://github.com/ibis-project/tpc-queries). | ||
|
|
||
| ## Conclusion | ||
|
|
||
| In summary, the 3.0.0 release includes a number of new features including the ability to query an Ibis table using inline SQL, a DuckDB backend, a backend support matrix tool, support for arrays and tuples, suffixes in joins, timestamps from component fields and prettier tables in ipython. Some breaking changes to take note of are the removal of .materialize() and the switch to Python 3.8 as the minimum supported version. A wide range of changes to the code has also led to significant speed ups in 3.0.0 as well. | ||
|
|
||
| Ibis is a community led, open source project. If you’d like to contribute to the project check out the contribution guide [here](https://ibis-project.org/docs/dev/contribute/01_environment/). If you run into a problem and would like to submit an issue you can do so through Ibis’ [Github repository](https://github.com/ibis-project/ibis). Finally, Ibis relies on community support to grow and to become successful! You can help promote Ibis by following and sharing the project on [Twitter](https://twitter.com/IbisData), [starring the repo](https://github.com/ibis-project/ibis) or [contributing](https://ibis-project.org/docs/dev/) to the code. Ibis continues to improve with every release. Keep an eye on the blog for updates on the next one! |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,142 @@ | ||
| --- | ||
| hide: | ||
| - toc | ||
| --- | ||
|
|
||
| # Setting Up a Development Environment | ||
|
|
||
| ## Required Dependencies | ||
|
|
||
| - [`git`](https://git-scm.com/) | ||
|
|
||
| !!! note "Python 3.10 is supported on a best-effort basis" | ||
|
|
||
| As of 2022-02-17 there is support for Python 3.10 when using `nix` for development. | ||
|
|
||
| `conda-forge` is still in [the process of migrating packages to Python | ||
| 3.10](https://conda-forge.org/status/#python310). | ||
|
|
||
| === "Nix" | ||
|
|
||
| #### Support Matrix | ||
|
|
||
| | Python Version :material-arrow-right: | Python 3.8 | Python 3.9 | Python 3.10 | | ||
| | -----------------------------------------: | :----------------------------------------------------: | :------------------------------------------------: | :------------------------------------------------: | | ||
| | **Operating System** :material-arrow-down: | | | | | ||
| | **Linux** | {{ config.extra.support_levels.supported.icon }}[^1] | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | | ||
| | **macOS** | {{ config.extra.support_levels.bug.icon }}[^2] | {{ config.extra.support_levels.bug.icon }} | {{ config.extra.support_levels.bug.icon }} | | ||
| | **Windows** | {{ config.extra.support_levels.unsupported.icon }}[^3] | {{ config.extra.support_levels.unsupported.icon }} | {{ config.extra.support_levels.unsupported.icon }} | | ||
|
|
||
| 1. [Install `nix`](https://nixos.org/download.html) | ||
| 1. Install `gh`: | ||
|
|
||
| === "`nix-shell`" | ||
|
|
||
| ```sh | ||
| nix-shell -p gh | ||
| ``` | ||
|
|
||
| === "`nix-env`" | ||
|
|
||
| ```sh | ||
| nix-env -iA gh | ||
| ``` | ||
|
|
||
| 1. Fork and clone the ibis repository: | ||
|
|
||
| ```sh | ||
| gh repo fork --clone --remote ibis-project/ibis | ||
| ``` | ||
|
|
||
| 1. Set up the public `ibis` Cachix cache to pull pre-built dependencies: | ||
|
|
||
| ```sh | ||
| nix-shell -p cachix --run 'cachix use ibis' | ||
| ``` | ||
|
|
||
| 1. Run `nix-shell` in the checkout directory: | ||
|
|
||
| ```sh | ||
| cd ibis | ||
| nix-shell | ||
| ``` | ||
|
|
||
| This may take awhile due to artifact download from the cache. | ||
|
|
||
| === "Conda" | ||
|
|
||
| !!! info "Some optional dependencies for Windows are not available through `conda`/`mamba`" | ||
|
|
||
| 1. `python-duckdb` and `duckdb-engine`. Required for the DuckDB backend. | ||
| 1. `clickhouse-cityhash`. Required for compression support in the ClickHouse backend. | ||
|
|
||
| #### Support Matrix | ||
|
|
||
| | Python Version :material-arrow-right: | Python 3.8 | Python 3.9 | Python 3.10 | | ||
| | -----------------------------------------: | :--------------------------------------------------: | :----------------------------------------------: | :--------------------------------------------: | | ||
| | **Operating System** :material-arrow-down: | | | | | ||
| | **Linux** | {{ config.extra.support_levels.supported.icon }}[^1] | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.bug.icon }}[^2] | | ||
| | **macOS** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.bug.icon }} | | ||
| | **Windows** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.bug.icon }} | | ||
|
|
||
| {% set managers = {"conda": {"name": "Miniconda", "url": "https://docs.conda.io/en/latest/miniconda.html"}, "mamba": {"name": "Mamba", "url": "https://github.com/mamba-org/mamba"}} %} | ||
| {% for manager, params in managers.items() %} | ||
|
|
||
| === "`{{ manager }}`" | ||
|
|
||
| 1. Install [{{ params["name"] }}]({{ params["url"] }}) | ||
|
|
||
| 1. Install `gh` | ||
|
|
||
| ```sh | ||
| {{ manager }} install -c conda-forge gh | ||
| ``` | ||
|
|
||
| 1. Fork and clone the ibis repository: | ||
|
|
||
| ```sh | ||
| gh repo fork --clone --remote ibis-project/ibis | ||
| ``` | ||
|
|
||
| 1. Create a Conda environment from a lock file in the repo: | ||
|
|
||
| {% set platforms = {"Linux": "linux", "MacOS": "osx", "Windows": "win"} %} | ||
| {% for os, platform in platforms.items() %} | ||
| === "{{ os }}" | ||
|
|
||
| ```sh | ||
| cd ibis | ||
| {{ manager }} create -n ibis-dev --file=conda-lock/{{ platform }}-64-3.9.lock | ||
| ``` | ||
| {% endfor %} | ||
|
|
||
| 1. Activate the environment | ||
|
|
||
| ```sh | ||
| {{ manager }} activate ibis-dev | ||
| ``` | ||
|
|
||
| 1. Install your local copy of `ibis` into the Conda environment. | ||
|
|
||
| ```sh | ||
| cd ibis | ||
| pip install -e . | ||
| ``` | ||
|
|
||
| 1. If you want to run the backend test suite you'll need to install `docker-compose`: | ||
|
|
||
| ```sh | ||
| {{ manager }} install docker-compose -c conda-forge | ||
| ``` | ||
|
|
||
| {% endfor %} | ||
|
|
||
| Once you've set up an environment, try building the documentation: | ||
|
|
||
| ```sh | ||
| mkdocs serve | ||
| ``` | ||
|
|
||
| {% for data in config.extra.support_levels.values() %} | ||
| [^{{ loop.index }}]: {{ data.description }} | ||
| {% endfor %} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,208 @@ | ||
| # Working on the Codebase | ||
|
|
||
| ## Find an issue to work on | ||
|
|
||
| All contributions are welcome! Code, docs, and constructive feedback are all | ||
| great contributions to the project. | ||
|
|
||
| If you don't have a particular issue in mind head over to the GitHub issue | ||
| tracker for Ibis and look for open issues with the label [`good first issue`](https://github.com/ibis-project/ibis/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22). | ||
|
|
||
| Feel free to help with other issues that aren't labeled as such, but they may be more challenging. | ||
|
|
||
| Once you find an issue you want to work on, write a comment with the text | ||
| `/take` on the issue. GitHub will then assign the issue to you. | ||
|
|
||
| This lets people know you're working on the issue. If you find an issue that | ||
| has an assignee, comment on the issue and ask whether the assignee is still | ||
| working on the issue. | ||
|
|
||
| ## Make a branch | ||
|
|
||
| The first thing you want to do is make a branch. Let's call it `useful-bugfix`: | ||
|
|
||
| ```sh | ||
| git checkout -b useful-bugfix | ||
| ``` | ||
|
|
||
| ## Make the desired change | ||
|
|
||
| Let's say you've made a change to `ibis/expr/types.py` to fix a bug reported in issue #424242 (not actually an issue). | ||
|
|
||
| Running `git status` should give output similar to this: | ||
|
|
||
| ```sh | ||
| On branch useful-bugfix | ||
| Your branch is up to date with 'origin/useful-bugfix'. | ||
|
|
||
| Changes not staged for commit: | ||
| (use "git add <file>..." to update what will be committed) | ||
| (use "git restore <file>..." to discard changes in working directory) | ||
| modified: ibis/expr/types.py | ||
|
|
||
| no changes added to commit (use "git add" and/or "git commit -a") | ||
| ``` | ||
|
|
||
| ## Run the test suite | ||
|
|
||
| Next, you'll want to run a subset of the test suite. | ||
|
|
||
| ### Required Dependencies | ||
|
|
||
| !!! warning "You need a development environment before running tests" | ||
|
|
||
| Make sure you've set up a [development environment](01_environment.md) | ||
| before proceeding | ||
|
|
||
| Run the test suite: | ||
|
|
||
| ```sh | ||
| pytest -m core | ||
| ``` | ||
|
|
||
| !!! tip "Each backend has a `pytest` marker" | ||
|
|
||
| You can run the tests for a specific backend using | ||
|
|
||
| ```sh | ||
| pytest -m $the_backend_name | ||
| ``` | ||
|
|
||
| For example, to run SQLite tests: | ||
|
|
||
| ```sh | ||
| pytest -m sqlite | ||
| ``` | ||
|
|
||
| ## Commit your changes | ||
|
|
||
| ### Required Dependencies | ||
|
|
||
| - `git` | ||
| - [`cz`](https://commitizen-tools.github.io/commitizen/) | ||
|
|
||
| !!! tip | ||
|
|
||
| `cz` is already installed in your environment if you followed the [setup | ||
| instructions](01_environment.md) | ||
|
|
||
| Next, you'll want to commit your changes. | ||
|
|
||
| Ibis's commit message structure follows the [`semantic-release` | ||
| conventions](https://github.com/semantic-release/semantic-release). | ||
|
|
||
| !!! warning | ||
|
|
||
| It isn't necessary to use `cz commit` to make commits, but it is necessary | ||
| to follow the instructions outlined in [this | ||
| table](https://github.com/semantic-release/semantic-release#commit-message-format). | ||
|
|
||
| Stage your changes and run `cz commit`: | ||
|
|
||
| ```sh | ||
| git add . | ||
| cz commit | ||
| ``` | ||
|
|
||
| You should see a series of prompts about actions to take next: | ||
|
|
||
| 1. Select the type of change you're committing. In this case, we're committing a bug fix, so we'll select fix: | ||
|
|
||
| ```console | ||
| ? Select the type of change you are committing (Use arrow keys) | ||
| » fix: A bug fix. Correlates with PATCH in SemVer | ||
| feat: A new feature. Correlates with MINOR in SemVer | ||
| docs: Documentation only changes | ||
| style: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons, etc) | ||
| refactor: A code change that neither fixes a bug nor adds a feature | ||
| perf: A code change that improves performance | ||
| test: Adding missing or correcting existing tests | ||
| build: Changes that affect the build system or external dependencies (example scopes: pip, docker, npm) | ||
| ci: Changes to our CI configuration files and scripts (example scopes: GitLabCI) | ||
| ``` | ||
|
|
||
| Generally you don't need to think too hard about what category to select, but note that: | ||
|
|
||
| - `feat` will cause a minor version bump | ||
| - `fix` will cause a patch version bump | ||
| - everything else will **not** cause a version bump, **unless it's a breaking | ||
| change** (continue reading these instructions for more info on that) | ||
|
|
||
| 2. Next, you're asked what the scope of this change is: | ||
|
|
||
| ```console | ||
| ? What is the scope of this change? (class or file name): (press [enter] to skip) | ||
| ``` | ||
|
|
||
| This is optional, but if there's a clear component or single file that is | ||
| modified you should put it. In our case, let's assume the bug fixed a type | ||
| inference problem, so we'd type in `type-inference` at this prompt. | ||
|
|
||
| 3. You'll then be asked to type in a short description of the change which will be the commit message title: | ||
|
|
||
| ```console | ||
| ? Write a short and imperative summary of the code changes: (lower case and no period) | ||
| fix a type inference issue where floats were incorrectly cast to ints | ||
| ``` | ||
|
|
||
| Let's say there was a problem with spurious casting of float to integers, so | ||
| we type in the message above. That number on the left (here `(69)`) is the | ||
| length of description you've typed in. | ||
|
|
||
| 4. Next you'll be asked for a longer description, which is entirely optional | ||
| **unless the change is a breaking change**, or you feel like a bit of prose | ||
|
|
||
| ```console | ||
| ? Provide additional contextual information about the code changes: (press [enter] to skip) | ||
| A bug was triggered by some incorrect code that caused floats to be incorrectly cast to integers. | ||
| ``` | ||
|
|
||
| For non breaking changes, this isn't strictly necessary but it can be very | ||
| helpful when a change is large, obscure, or complex. For this example let's just reiterate | ||
| most of what the commit title says. | ||
|
|
||
| 5. Next you're asked about breaking changes: | ||
|
|
||
| ```console | ||
| ? Is this a BREAKING CHANGE? Correlates with MAJOR in SemVer (y/N) | ||
| ``` | ||
|
|
||
| If you answer `y`, then you'll get an additional prompt asking you to | ||
| describe the breaking changes. This description will ultimately make its way | ||
| into the user-facing release notes. If there aren't any breaking changes, press enter. | ||
| Let's say this bug fix does **not** introduce a breaking change. | ||
|
|
||
| 6. Finally, you're asked whether this change affects any open issues (ignore | ||
| the bit about breaking changes) and if yes then to reference them: | ||
|
|
||
| ```console | ||
| ? Footer. Information about Breaking Changes and reference issues that this commit closes: (press [enter] to skip) | ||
| fixes #424242 | ||
| ``` | ||
|
|
||
| Here we typed `fixes #424242` to indicate that we fixed issue #9000. | ||
|
|
||
| Whew! Seems like a lot, but it's rather quick once you get used to it. After | ||
| that you should have a commit that looks roughly like this, ready to be automatically rolled into the next release: | ||
|
|
||
| ```console | ||
| commit 4049adbd66b0df48e37ca105da0b9139101a1318 (HEAD -> useful-bugfix) | ||
| Author: Phillip Cloud <417981+cpcloud@users.noreply.github.com> | ||
| Date: Tue Dec 21 10:30:50 2021 -0500 | ||
|
|
||
| fix(type-inference): fix a type inference issue where floats were incorrectly cast to ints | ||
|
|
||
| A bug was triggered by some incorrect code that caused floats to be incorrectly cast to integers. | ||
|
|
||
| fixes #424242 | ||
| ``` | ||
|
|
||
| ### Push your changes | ||
|
|
||
| Now that you've got a commit, you're ready to push your changes and make a pull request! | ||
|
|
||
| ```sh | ||
| gh pr create | ||
| ``` | ||
|
|
||
| Follow the prompts, and `gh` will print a link to your PR upon successfuly submission. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| # Style and Formatting | ||
|
|
||
| ## Code Style | ||
|
|
||
| The following tools are run in both CI and `pre-commit` checks to ensure codebase hygiene: | ||
|
|
||
| | Tool | Purpose | | ||
| | ---------------------------------------------------------------------: | :-------------------------------------------------- | | ||
| | [`black`](https://github.com/psf/black) | Formatting Python code | | ||
| | [`isort`](https://github.com/PyCQA/isort) | Formatting and sorting `import` statements | | ||
| | [`absolufy-imports`](https://github.com/MarcoGorelli/absolufy-imports) | Automatically convert relative imports to absolute. | | ||
| | [`flake8`](https://flake8.pycqa.org/en/latest/) | Linting Python code | | ||
| | [`nix-linter`](https://github.com/Synthetica9/nix-linter) | Linting nix files | | ||
| | [`nixpkgs-fmt`](https://github.com/nix-community/nixpkgs-fmt) | Formatting nix files | | ||
| | [`shellcheck`](https://github.com/koalaman/shellcheck) | Linting shell scripts | | ||
| | [`shfmt`](https://github.com/mvdan/sh) | Formatting shell scripts | | ||
| | [`pyupgrade`](https://github.com/asottile/pyupgrade) | Ensuring the latest available Python syntax is used | | ||
|
|
||
| !!! tip | ||
|
|
||
| If you use `nix-shell` all of these are setup for you and ready to use, you don't | ||
| need to install any of these tools. | ||
|
|
||
| We use [numpydoc](https://numpydoc.readthedocs.io/en/latest/format.html) as our | ||
| standard format for docstrings. | ||
|
|
||
| ## Commit philosophy | ||
|
|
||
| We aim to make our individual commits small and tightly focused on the feature | ||
| they are implementing or bug being fixed. If you find yourself making | ||
| functional changes to different areas of the codebase, we prefer you break up | ||
| your changes into separate Pull Requests. In general, a philosophy of one | ||
| Github Issue per Pull Request is a good rule of thumb. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,102 @@ | ||
| # Working with the Backend Test Suite | ||
|
|
||
| !!! danger "Before you start" | ||
|
|
||
| This section assumes you have a working [development environment](01_environment.md). | ||
|
|
||
| !!! info "You may be able to skip this section" | ||
|
|
||
| If you haven't made changes to the core of ibis (e.g., `ibis/expr`) | ||
| or any specific backends (`ibis/backends`) this material isn't necessary to | ||
| follow to make a pull request. | ||
|
|
||
| ## Motivation | ||
|
|
||
| One the primary challenges when developing against the ibis codebase is testing | ||
| backends that require non-trivial setup. | ||
|
|
||
| Moreover, many of the backends that ibis works with have very different | ||
| deployment deployment models: | ||
|
|
||
| - **In-process** systems like SQLite | ||
| - **Client-server** systems like PostgreSQL and MySQL | ||
| - Systems that **run the gamut** of deployment models like ClickHouse | ||
| - Systems that run **on-premises**, like Impala | ||
|
|
||
| This section of the docs is describes how to work with the backend test suite. | ||
|
|
||
| ## Backend Testing with Compose | ||
|
|
||
| Here is the list of backends that can be tested using `docker-compose`. | ||
|
|
||
| | Backend | Docker Compose Services | | ||
| | ---------- | ----------------------- | | ||
| | ClickHouse | `clickhouse` | | ||
| | PostgreSQL | `postgres` | | ||
| | impala | `impala`, `kudu` | | ||
| | mysql | `mysql` | | ||
|
|
||
| ### Testing a Compose Service | ||
|
|
||
| !!! check "Check your current directory" | ||
|
|
||
| Make sure you're inside of your clone of the ibis GitHub repository | ||
|
|
||
| Let's fire up a PostgreSQL server and run tests against it. | ||
|
|
||
| #### Start the `postgres` Service | ||
|
|
||
| Open a new shell and run | ||
|
|
||
| ```sh | ||
| docker-compose up --build postgres | ||
| ``` | ||
|
|
||
| Test the connection in the original shell using | ||
|
|
||
| ```sh | ||
| export PGPASSWORD=postgres | ||
| psql -t -A -h localhost -U postgres -d ibis_testing -c "select 'success'" | ||
| ``` | ||
|
|
||
| You should see this output: | ||
|
|
||
| ```console | ||
| success | ||
| ``` | ||
|
|
||
| !!! warning "PostgreSQL doesn't start up instantly" | ||
|
|
||
| It takes a few seconds for postgres to start, so if the previous | ||
| command fails wait a few seconds and try again | ||
|
|
||
| Congrats, you now have a PostgreSQL server running and are ready to run tests! | ||
|
|
||
| #### Load Data | ||
|
|
||
| The backend needs to be populated with test data: | ||
|
|
||
| 1. Download the data | ||
|
|
||
| ```sh | ||
| python ci/datamgr.py download | ||
| ``` | ||
|
|
||
| 2. In the original terminal, run | ||
|
|
||
| ```sh | ||
| python ci/datamgr.py load postgres | ||
| ``` | ||
|
|
||
| You should see a bit of logging, and the command should complete shortly thereafter. | ||
|
|
||
| #### Run the test suite | ||
|
|
||
| You're now ready to run the test suite for the postgres backend: | ||
| ```sh | ||
| pytest -m postgres | ||
| ``` | ||
| Please [file an issue](https://github.com/ibis-project/ibis/issues/new) if the | ||
| test suite fails for any reason. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,99 @@ | ||
| # Maintaining the Codebase | ||
|
|
||
| Ibis maintainers are expected to handle the following tasks as they arise: | ||
|
|
||
| - Reviewing and merging pull requests | ||
| - Triaging new issues | ||
|
|
||
| ## Dependencies | ||
|
|
||
| A number of tasks that are typically associated with maintenance are partially or fully automated. | ||
|
|
||
| | Dependency Type | Management Tool | | ||
| | --------------------------: | :----------------------------------------------------------------------------------------------------------------- | | ||
| | Python library dependencies | [WhiteSource Renovate](https://www.whitesourcesoftware.com/free-developer-tools/renovate/) | | ||
| | GitHub Actions | [WhiteSource Renovate](https://www.whitesourcesoftware.com/free-developer-tools/renovate/) | | ||
| | Nix dependencies | [A GitHub Action](https://github.com/ibis-project/ibis/actions/workflows/update-deps.yml) run at a regular cadence | | ||
|
|
||
| Dependencies are managed using [`poetry`](https://python-poetry.org). | ||
|
|
||
| Occasionally you may need to lock poetry dependencies, which can be | ||
| done by running | ||
|
|
||
| ```sh | ||
| poetry lock --no-update | ||
| ``` | ||
|
|
||
| ### Automatic Dependency Updates | ||
|
|
||
| [WhiteSource | ||
| Renovate](https://www.whitesourcesoftware.com/free-developer-tools/renovate/) | ||
| will run at some cadence (outside of traditional business hours) and submit PRs | ||
| that update dependencies. | ||
|
|
||
| These upgrades use a conservative update strategy, which is currently to | ||
| increase the upper bound of a dependency's version range. | ||
|
|
||
| The PRs it generates will regenerate a number of other files so that in most | ||
| cases contributors do not have to remember to generate and commit these files. | ||
|
|
||
| ### Adding or Changing Dependencies | ||
|
|
||
| 1. Edit `pyproject.toml` as needed. | ||
| 2. Run `poetry lock --no-update` | ||
| 3. Regenerate `setup.py`: | ||
|
|
||
| !!! failure "Do not manually edit `setup.py`" | ||
|
|
||
| `setup.py` is [automatically | ||
| generated](https://github.com/ibis-project/ibis/blob/master/dev/poetry2setup.py) | ||
| from `pyproject.toml` | ||
|
|
||
| === "Nix" | ||
|
|
||
| ```sh | ||
| ./dev/poetry2setup -o setup.py | ||
| ``` | ||
|
|
||
| === "Without Nix" | ||
|
|
||
| Run the following command | ||
|
|
||
| ```sh | ||
| PYTHONHASHSEED=42 python ./dev/poetry2setup.py -o setup.py | ||
| ``` | ||
|
|
||
| !!! question "Why do we need to set `PYTHONHASHSEED`?" | ||
|
|
||
| Dependencies' [`extras`](https://python-poetry.org/docs/pyproject/#extras) are stored | ||
| in-memory using a `frozenset`, the elements of which are arbitrarily ordered. | ||
|
|
||
| As of 2022-02-24 this is [fixed in the default | ||
| branch](https://github.com/python-poetry/poetry-core/pull/280) of | ||
| [`poetry-core`] but isn't yet released. | ||
|
|
||
| Updates of minor and patch versions of dependencies are handled automatically by | ||
| [`renovate`](https://github.com/renovatebot/renovate). | ||
|
|
||
| ## Merging PRs | ||
|
|
||
| PRs can be merged using the [`gh` command line tool](https://github.com/cli/cli) | ||
| or with the GitHub web UI. | ||
|
|
||
| ## Release | ||
|
|
||
| Ibis is released on [PyPI](https://pypi.org/project/ibis-framework/) and [Conda Forge](https://github.com/conda-forge/ibis-framework-feedstock). | ||
|
|
||
| === "PyPI" | ||
|
|
||
| Releases to PyPI are handled automatically using [semantic | ||
| release](https://egghead.io/lessons/javascript-automating-releases-with-semantic-release). | ||
|
|
||
| To trigger a release use the [Release GitHub Action](https://github.com/ibis-project/ibis/actions/workflows/release.yml). | ||
|
|
||
| === "`conda-forge`" | ||
|
|
||
| The conda-forge package is maintained as a [conda-forge feedstock](https://github.com/conda-forge/ibis-framework-feedstock). | ||
|
|
||
| After a release to PyPI, the conda-forge bot automatically updates the ibis | ||
| package. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| import ibis | ||
|
|
||
| con = ibis.sqlite.connect("crunchbase.db") | ||
|
|
||
| c = con.table("companies") | ||
| i = con.table("investments") | ||
|
|
||
| expr = ( | ||
| c.left_join(i, c.permalink == i.company_permalink) | ||
| .group_by(investor_name=ibis.coalesce(i.investor_name, "NO INVESTOR")) | ||
| .aggregate( | ||
| num_investments=c.permalink.nunique(), | ||
| acq_ipos=( | ||
| c.status.isin(("ipo", "acquired")) | ||
| .ifelse(c.permalink, ibis.NA) | ||
| .nunique() | ||
| ), | ||
| ) | ||
| .mutate(acq_rate=lambda t: t.acq_ipos / t.num_investments) | ||
| .sort_by(ibis.desc(2)) | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| SELECT | ||
| *, | ||
| CAST(acq_ipos / num_investments AS FLOAT) AS acq_rate | ||
| FROM ( | ||
| SELECT | ||
| COALESCE(i.investor_name, 'NO INVESTOR') AS investor_name, | ||
| COUNT(DISTINCT c.permalink) AS num_investments, | ||
| COUNT( | ||
| DISTINCT | ||
| CASE | ||
| WHEN c.status IN ('ipo', 'acquired') THEN c.permalink | ||
| ELSE NULL | ||
| END | ||
| ) AS acq_ipos | ||
| FROM companies AS c | ||
| LEFT JOIN investments AS i | ||
| ON c.permalink = i.company_permalink | ||
| GROUP BY 1 | ||
| ORDER BY 2 DESC | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,215 @@ | ||
| --- | ||
| hide: | ||
| - toc | ||
| --- | ||
|
|
||
| # :ibis-logo: Ibis | ||
|
|
||
| ## Expressive analytics in Python at any scale. | ||
|
|
||
| <script | ||
| src="https://asciinema.org/a/yp5Ww4XKyjJsUCXkEz5or9rPq.js" | ||
| data-autoplay="true" | ||
| data-preload="true" | ||
| data-loop="true" | ||
| data-i="4" | ||
| data-rows="20" | ||
| id="asciicast-yp5Ww4XKyjJsUCXkEz5or9rPq" | ||
| async> | ||
| </script> | ||
|
|
||
| ## Installation | ||
|
|
||
| === "pip" | ||
|
|
||
| ```sh | ||
| pip install ibis-framework | ||
| ``` | ||
|
|
||
| {% for mgr in ["conda", "mamba"] %} | ||
| === "{{ mgr }}" | ||
|
|
||
| ```sh | ||
| {{ mgr }} install -c conda-forge ibis-framework | ||
| ``` | ||
|
|
||
| {% endfor %} | ||
|
|
||
| Try it out! | ||
|
|
||
| ```python | ||
| python -c 'import ibis; print(ibis.__version__)' | ||
| ``` | ||
|
|
||
| ## Features | ||
|
|
||
| ### SQL Coverage | ||
|
|
||
| #### Anything you can write in a `SELECT` statement you can write in Ibis. | ||
|
|
||
| === "Group By" | ||
|
|
||
| ##### SQL | ||
|
|
||
| ```sql | ||
| SELECT f, sum(a + b) AS d | ||
| FROM t | ||
| GROUP BY f | ||
| ``` | ||
|
|
||
| ##### Ibis | ||
|
|
||
| ```python | ||
| t.group_by("f").aggregate(d=t.a + t.b.sum()) | ||
| ``` | ||
|
|
||
| === "Join" | ||
|
|
||
| ##### SQL | ||
|
|
||
| ```sql | ||
| SELECT exp(t.a) AS d | ||
| FROM t | ||
| LEFT SEMI JOIN s | ||
| ON t.x = t.y | ||
| ``` | ||
|
|
||
| ##### Ibis | ||
|
|
||
| ```python | ||
| t.semi_join(s, t.x == t.y).select([lambda t: t.a.exp().name("d")]) | ||
| ``` | ||
|
|
||
| === "Window Functions" | ||
|
|
||
| ##### SQL | ||
|
|
||
| ```sql | ||
| SELECT *, avg(x) OVER (PARTITION BY y) as z | ||
| FROM t | ||
| ``` | ||
|
|
||
| ##### Ibis | ||
|
|
||
| ```python | ||
| t.group_by("y").mutate(z=t.x.avg()) | ||
| ``` | ||
|
|
||
| !!! tip "Coming from SQL?" | ||
|
|
||
| Check out [Ibis for SQL Programmers](ibis-for-sql-programmers) | ||
|
|
||
| ### Abstract Over SQL Dialects | ||
|
|
||
| #### No more rewrites when scaling up or down. | ||
|
|
||
| === "SQLite" | ||
|
|
||
| ```python | ||
| con = ibis.sqlite.connect("my_sqlite.db") | ||
| ``` | ||
|
|
||
| === "PostgreSQL" | ||
|
|
||
| ```python | ||
| con = ibis.postgres.connect(user="me", host="my_computer", port=9090) | ||
| ``` | ||
|
|
||
| === "BigQuery" | ||
|
|
||
| ```python | ||
| con = ibis.bigquery.connect(project_id="my_project_id", dataset_id="my_dataset_id") | ||
| ``` | ||
|
|
||
| ```python | ||
| t = con.table("t") | ||
| t.group_by("y").mutate(z=t.x.avg()) | ||
| ``` | ||
|
|
||
| ### Ecosystem | ||
|
|
||
| #### Ibis builds on top of and works with existing Python tools. | ||
|
|
||
| ```python | ||
| t.semi_join(s, t.x == t.y).select([lambda t: t.a.exp().name("d")]).head(2) | ||
| df = expr.execute() # a pandas DataFrame! | ||
| ``` | ||
|
|
||
| ## Example | ||
|
|
||
| Let's compute the number of citizens per squared kilometer in Asia: | ||
|
|
||
| ```python | ||
| >>> import ibis | ||
| >>> db = ibis.sqlite.connect("geography.db") | ||
| >>> countries = db.table("countries") | ||
| >>> asian_countries = countries.filter(countries.continent == "AS") | ||
| >>> density_in_asia = asian_countries.population.sum() / asian_countries.area_km2.sum() | ||
| >>> density_in_asia.execute() | ||
| 130.7019141926602 | ||
| ``` | ||
|
|
||
| !!! tip "Learn more!" | ||
|
|
||
| Learn more about Ibis in [our tutorial](tutorial/01-Introduction-to-Ibis). | ||
|
|
||
| ## Comparison to other tools | ||
|
|
||
| === "SQL" | ||
|
|
||
| !!! tip "Coming from SQL?" | ||
|
|
||
| Check out [Ibis for SQL Programmers](ibis-for-sql-programmers)! | ||
|
|
||
| Ibis gives you the benefit of a programming language. You don't need to | ||
| sacrifice maintainability to get to those insights! | ||
|
|
||
| === "Ibis" | ||
|
|
||
| ``` py title="docs/example.py" linenums="1" | ||
| --8<-- "docs/example.py" | ||
| ``` | ||
|
|
||
| === "SQL" | ||
|
|
||
| ``` sql title="docs/example.sql" linenums="1" | ||
| --8<-- "docs/example.sql" | ||
| ``` | ||
|
|
||
| === "SQLAlchemy" | ||
|
|
||
| Ibis aims to be more concise and composable than | ||
| [SQLAlchemy](https://www.sqlalchemy.org/) when writing interactive | ||
| analytics code. | ||
|
|
||
| !!! success "Ibis :heart:'s SQLAlchemy" | ||
|
|
||
| Ibis generates SQLAlchemy expressions for some of our backends | ||
| including the [PostgreSQL](./backends/PostgreSQL.md) and | ||
| [SQLite](./backends/SQLite.md) backends! | ||
|
|
||
| === "Ibis" | ||
|
|
||
| ``` py title="docs/example.py" linenums="1" | ||
| --8<-- "docs/example.py" | ||
| ``` | ||
|
|
||
| === "SQLAlchemy" | ||
|
|
||
| ``` py title="docs/sqlalchemy_example.py" "linenums="1" | ||
| --8<-- "docs/sqlalchemy_example.py" | ||
| ``` | ||
|
|
||
| <div class="download-button" markdown> | ||
| [:fontawesome-solid-cloud-arrow-down: Download the example data](https://storage.googleapis.com/ibis-testing-data/crunchbase.db){ .md-button .md-button--primary } | ||
| </div> | ||
|
|
||
| ## What's Next? | ||
|
|
||
| !!! question "Need a specific backend?" | ||
|
|
||
| Take a look at the [backends](./backends/index.md) documentation! | ||
|
|
||
| !!! tip "Interested in contributing?" | ||
|
|
||
| Get started by [setting up a development environment](./contribute/01_environment.md)! |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| window.MathJax = { | ||
| tex: { | ||
| inlineMath: [["\\(", "\\)"]], | ||
| displayMath: [["\\[", "\\]"]], | ||
| processEscapes: true, | ||
| processEnvironments: true, | ||
| }, | ||
| options: { | ||
| ignoreHtmlClass: ".*|", | ||
| processHtmlClass: "arithmatex", | ||
| }, | ||
| }; | ||
|
|
||
| document$.subscribe(() => { | ||
| MathJax.typesetPromise(); | ||
| }); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| {% extends "base.html" %} | ||
|
|
||
| {% block extrahead %} | ||
| <meta name="google-site-verification" content="IVqzkYiD5E35oD4kkVOcTYCTfqWKU1f6zOHCnLIPkUU"> | ||
| <script data-goatcounter="https://ibis.goatcounter.com/count" | ||
| async src="https://gc.zgo.at/count.js"></script> | ||
| {% endblock %} | ||
|
|
||
| {% block content %} | ||
| {% if page.nb_url %} | ||
| <a href="{{ page.nb_url }}" title="Download Notebook" class="md-content__button md-icon"> | ||
| {% include ".icons/material/download.svg" %} | ||
| </a> | ||
| {% endif %} | ||
|
|
||
| {{ super() }} | ||
| {% endblock content %} |