| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| { | ||
| "cells": [ | ||
| { | ||
| "cell_type": "markdown", | ||
| "metadata": {}, | ||
| "source": [ | ||
| "# Creation of the geography database\n", | ||
| "\n", | ||
| "This notbook creates the SQLite `geography.db` database, used in the Ibis tutorials.\n", | ||
| "\n", | ||
| "The source of the `countries` table has been obtained from [GeoNames](https://www.geonames.org/countries/).\n", | ||
| "\n", | ||
| "The data for the `gdp` data has been obtained from the [World Bank website](https://data.worldbank.org/indicator/NY.GDP.MKTP.CD)." | ||
| ] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": 1, | ||
| "metadata": {}, | ||
| "outputs": [], | ||
| "source": [ | ||
| "import json\n", | ||
| "import sqlite3\n", | ||
| "\n", | ||
| "\n", | ||
| "with open('geography.json') as f:\n", | ||
| " data = json.load(f)\n", | ||
| "\n", | ||
| "conn = sqlite3.connect('geography.db')\n", | ||
| "cursor = conn.cursor()\n", | ||
| "\n", | ||
| "cursor.execute('''\n", | ||
| "CREATE TABLE countries (\n", | ||
| " iso_alpha2 TEXT,\n", | ||
| " iso_alpha3 TEXT,\n", | ||
| " iso_numeric INT,\n", | ||
| " fips TEXT,\n", | ||
| " name TEXT,\n", | ||
| " capital TEXT,\n", | ||
| " area_km2 REAL,\n", | ||
| " population INT,\n", | ||
| " continent TEXT);\n", | ||
| "''')\n", | ||
| "cursor.executemany('INSERT INTO countries VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',\n", | ||
| " data['countries'])\n", | ||
| "\n", | ||
| "cursor.execute('''\n", | ||
| "CREATE TABLE gdp (\n", | ||
| " country_code TEXT,\n", | ||
| " year INT,\n", | ||
| " value REAL);\n", | ||
| "''')\n", | ||
| "cursor.executemany('INSERT INTO gdp VALUES (?, ?, ?)',\n", | ||
| " data['gdp'])\n", | ||
| "\n", | ||
| "conn.commit()" | ||
| ] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": null, | ||
| "metadata": {}, | ||
| "outputs": [], | ||
| "source": [] | ||
| } | ||
| ], | ||
| "metadata": { | ||
| "kernelspec": { | ||
| "display_name": "Python 3", | ||
| "language": "python", | ||
| "name": "python3" | ||
| }, | ||
| "language_info": { | ||
| "codemirror_mode": { | ||
| "name": "ipython", | ||
| "version": 3 | ||
| }, | ||
| "file_extension": ".py", | ||
| "mimetype": "text/x-python", | ||
| "name": "python", | ||
| "nbconvert_exporter": "python", | ||
| "pygments_lexer": "ipython3", | ||
| "version": "3.7.6" | ||
| } | ||
| }, | ||
| "nbformat": 4, | ||
| "nbformat_minor": 4 | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| .. _tutorial: | ||
|
|
||
| Tutorial | ||
| ======== | ||
|
|
||
| Here we show Jupyter notebooks that take you through various tasks using ibis. | ||
|
|
||
| .. toctree:: | ||
| :maxdepth: 1 | ||
|
|
||
| 01-Introduction-to-Ibis.ipynb | ||
| 02-Aggregates-Joins.ipynb | ||
| 03-Expressions-Lazy-Mode-Logging.ipynb | ||
| 04-More-Value-Expressions.ipynb | ||
| 05-IO-Create-Insert-External-Data.ipynb | ||
| 06-Advanced-Topics-TopK-SelfJoins.ipynb | ||
| 07-Advanced-Topics-ComplexFiltering.ipynb | ||
| 08-More-Analytics-Helpers.ipynb |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| .. _userguide: | ||
|
|
||
| ********** | ||
| User guide | ||
| ********** | ||
|
|
||
| The user guide covers Ibis by topic. | ||
|
|
||
| If you are new to Ibis, you can learn the basics in the :ref:`tutorial`. | ||
|
|
||
| For users looking for information about a particular class, method... | ||
| the information is available in the :ref:`api`. | ||
|
|
||
| .. toctree:: | ||
| :maxdepth: 1 | ||
|
|
||
| configuration | ||
| sql | ||
| udf | ||
| geospatial_analysis | ||
| design | ||
| extending/index |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| .. _udf: | ||
|
|
||
| User Defined Functions | ||
| ====================== | ||
|
|
||
| Ibis provides a mechanism for writing custom scalar and aggregate functions, | ||
| with varying levels of support for different backends. UDFs/UDAFs are a complex | ||
| topic. | ||
|
|
||
| This section of the documentation will discuss some of the backend specific | ||
| details of user defined functions. | ||
|
|
||
| .. warning:: | ||
|
|
||
| The UDF API is provisional and subject to change. | ||
|
|
||
| The next backends provide UDF support: | ||
|
|
||
| - :ref:`udf.impala` | ||
| - :ref:`udf.pandas` | ||
| - :ref:`udf.bigquery` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| # Ibis: Python Data Analysis Productivity Framework | ||
|
|
||
| Ibis is a toolbox to bridge the gap between local Python environments (like | ||
| pandas and scikit-learn) and remote storage and execution systems like Hadoop | ||
| components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, | ||
| etc.). Its goal is to simplify analytical workflows and make you more | ||
| productive. | ||
|
|
||
| We have a handful of specific priority focus areas: | ||
|
|
||
| - Enable data analysts to translate local, single-node data idioms to scalable | ||
| computation representations (e.g. SQL or Spark) | ||
| - Integration with pandas and other Python data ecosystem components | ||
| - Provide high level analytics APIs and workflow tools to enhance productivity | ||
| and streamline common or tedious tasks. | ||
| - Integration with community standard data formats (e.g. Parquet and Avro) | ||
| - Abstract away database-specific SQL differences | ||
|
|
||
| As the [Apache Arrow](http://arrow.apache.org/) project develops, we will | ||
| look to use Arrow to enable computational code written in Python to be executed | ||
| natively within other systems like Apache Spark and Apache Impala (incubating). | ||
|
|
||
| Source code is on GitHub: <https://github.com/ibis-project/ibis>. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| # Team | ||
|
|
||
| ## Contributors | ||
|
|
||
| {{ ibis.project_name }} is developed and maintained by a | ||
| [community of volunteer contributors](https://github.com/{{ ibis.github_repo_url }}/graphs/contributors). | ||
|
|
||
|
|
||
| {% for group in team %} | ||
|
|
||
| ## {{ group.name }} | ||
|
|
||
| <div class="row maintainers"> | ||
| {% for row in group.members | batch(6, "") %} | ||
| <div class="card-group maintainers"> | ||
| {% for person in row %} | ||
| {% if person %} | ||
| <div class="card"> | ||
| <img class="card-img-top" alt="" src="{{ person.avatar_url }}"/> | ||
| <div class="card-body"> | ||
| <h6 class="card-title"> | ||
| {% if person.blog %} | ||
| <a href="{{ person.blog }}"> | ||
| {{ person.name or person.login }} | ||
| </a> | ||
| {% else %} | ||
| {{ person.name or person.login }} | ||
| {% endif %} | ||
| </h6> | ||
| <p class="card-text small"><a href="{{ person.html_url }}">{{ person.login }}</a></p> | ||
| </div> | ||
| </div> | ||
| {% else %} | ||
| <div class="card border-0"></div> | ||
| {% endif %} | ||
| {% endfor %} | ||
| </div> | ||
| {% endfor %} | ||
| </div> | ||
|
|
||
| {% endfor %} | ||
|
|
||
| {{ ibis.project_name }} aims to be a welcoming, friendly, diverse and inclusive community. | ||
| Everybody is welcome, regardless of gender, sexual orientation, gender identity, | ||
| and expression, disability, physical appearance, body size, race, or religion. | ||
| We do not tolerate harassment of community members in any form. | ||
| In particular, people from underrepresented groups are encouraged to join the community. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| # Code of Conduct | ||
|
|
||
| Ibis is governed by the | ||
| [NumFOCUS code of conduct](https://numfocus.org/code-of-conduct), | ||
| which in a short version is: | ||
|
|
||
| Be kind to others. Do not insult or put down others. Behave professionally. | ||
| Remember that harassment and sexist, racist, or exclusionary jokes are not | ||
| appropriate for {{ ibis.project_name }}. | ||
|
|
||
| All communication should be appropriate for a professional audience | ||
| including people of many different backgrounds. Sexual language and | ||
| imagery is not appropriate. | ||
|
|
||
| {{ ibis.project_name }} is dedicated to providing a harassment-free community for everyone, | ||
| regardless of gender, sexual orientation, gender identity, and expression, | ||
| disability, physical appearance, body size, race, or religion. We do not | ||
| tolerate harassment of community members in any form. | ||
|
|
||
| Thank you for helping make this a welcoming, friendly community for all. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| # Ecosystem | ||
|
|
||
| ## [pandas](https://github.com/pandas-dev/pandas) | ||
|
|
||
| [pandas](https://pandas.pydata.org) is a Python package that provides fast, | ||
| flexible, and expressive data structures designed to make working with "relational" or | ||
| "labeled" data both easy and intuitive. It aims to be the fundamental high-level | ||
| building block for doing practical, real world data analysis in Python. Additionally, | ||
| it has the broader goal of becoming the most powerful and flexible open source data | ||
| analysis / manipulation tool available in any language. It is already well on its way | ||
| towards this goal. | ||
|
|
||
| ## [SQLAlchemy](https://github.com/sqlalchemy/sqlalchemy) | ||
|
|
||
| [SQLAlchemy](https://www.sqlalchemy.org/) is the Python SQL toolkit and | ||
| Object Relational Mapper that gives application developers the full power and | ||
| flexibility of SQL. SQLAlchemy provides a full suite of well known enterprise-level | ||
| persistence patterns, designed for efficient and high-performing database access, | ||
| adapted into a simple and Pythonic domain language. | ||
|
|
||
| ## [sql_to_ibis](https://github.com/zbrookle/sql_to_ibis) | ||
|
|
||
| [sql_to_ibis](https://github.com/zbrookle/sql_to_ibis) is a Python package that | ||
| translates SQL syntax into ibis expressions. This allows users to use one unified SQL | ||
| dialect to target many different backends, even those that don't traditionally | ||
| support SQL. | ||
|
|
||
| A good use case would be ease of migration between databases or backends. Suppose you | ||
| were moving from SQLite to MySQL or from PostgresSQL to BigQuery. These | ||
| frameworks all have very subtle differences in SQL dialects, but with sql_to_ibis, | ||
| these differences are automatically translated in Ibis. | ||
|
|
||
| Another good use case is pandas, which has no SQL support at all for querying a | ||
| dataframe. With sql_to_ibis this is made possible. | ||
|
|
||
| For example, | ||
|
|
||
| ```python | ||
| import ibis.backends.pandas | ||
| import pandas | ||
| import sql_to_ibis | ||
|
|
||
| df = pandas.DataFrame({"column1": [1, 2, 3], "column2": ["4", "5", "6"]}) | ||
| ibis_table = ibis.backends.pandas.from_dataframe( | ||
| df, name="my_table", client=ibis.backends.pandas.PandasClient({}) | ||
| ) | ||
| sql_to_ibis.register_temp_table(ibis_table, "my_table") | ||
| sql_to_ibis.query( | ||
| "select column1, cast(column2 as integer) + 1 as my_col2 from my_table" | ||
| ).execute() | ||
| ``` | ||
| This would output a dataframe that looks like: | ||
|
|
||
| ``` | ||
| | column1 | my_col2 | | ||
| |---------|---------| | ||
| | 1 | 5 | | ||
| | 2 | 6 | | ||
| | 3 | 7 | | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| pysuerga: | ||
| extensions: | ||
| - pysuerga.contrib.team | ||
| markdown_extensions: | ||
| - toc | ||
| - tables | ||
| - fenced_code | ||
| - codehilite | ||
|
|
||
| ibis: | ||
| project_name: Ibis | ||
| github_repo_url: ibis-project/ibis | ||
|
|
||
| layout: | ||
| title: "Ibis: Python data analysis productivity framework" | ||
| favicon: /static/img/favicon.ico | ||
| stylesheets: | ||
| - /static/css/ibis.css | ||
| - /static/css/codehilite.css | ||
| logo: /static/img/logo_ibis.svg | ||
| header_text: Ibis | ||
| navbar: | ||
| - name: "About us" | ||
| sections: | ||
| - name: "About Ibis" | ||
| target: /about/index.html | ||
| - name: "Team" | ||
| target: /about/team.html | ||
| - name: "Roadmap" | ||
| target: /about/roadmap.html | ||
| - name: "License" | ||
| target: /about/license.html | ||
| - name: "Getting started" | ||
| target: /getting_started.html | ||
| - name: "Documentation" | ||
| target: /docs/ | ||
| - name: "Community" | ||
| sections: | ||
| - name: "Ask a question (StackOverflow)" | ||
| target: https://stackoverflow.com/questions/tagged/ibis | ||
| - name: "Chat (Gitter)" | ||
| target: https://gitter.im/ibis-dev/Lobby | ||
| - name: "Code of conduct" | ||
| target: /community/coc.html | ||
| - name: "Ecosystem" | ||
| target: /community/ecosystem.html | ||
| - name: "Contribute" | ||
| target: /contribute.html | ||
| social_media: | ||
| - font_awesome: twitter | ||
| url: https://twitter.com/IbisData | ||
| - font_awesome: github | ||
| url: https://github.com/ibis-project/ibis/ | ||
| footer_note: "© Copyright 2020, Ibis developers" | ||
| google_analytics: "" | ||
|
|
||
| team: | ||
| - name: "Active maintainers" | ||
| kind: github | ||
| members: | ||
| - jreback | ||
| - datapythonista | ||
| - name: "Former maintainers" | ||
| kind: github | ||
| members: | ||
| - wesm | ||
| - cpcloud | ||
| - kszucs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,132 @@ | ||
| # Contributing to Ibis | ||
|
|
||
| ## Set up a development environment | ||
|
|
||
| 1. Create a fork of the [Ibis repository](https://github.com/ibis-project/ibis), and clone it. | ||
|
|
||
| :::sh | ||
| git clone https://github.com/<your-github-username>/ibis | ||
|
|
||
|
|
||
| 2. [Download](https://docs.conda.io/en/latest/miniconda.html) and install Miniconda | ||
| 3. Create a Conda environment suitable for ibis development: | ||
|
|
||
| :::sh | ||
| cd ibis | ||
| conda env create | ||
|
|
||
|
|
||
| 4. Activate the environment | ||
|
|
||
| :::sh | ||
| conda activate ibis-dev | ||
|
|
||
| 5. Install your local copy of Ibis into the Conda environment. In the root of the project run: | ||
|
|
||
| :::sh | ||
| pip install -e . | ||
|
|
||
|
|
||
| ## Find an issue to work on | ||
|
|
||
| If you are working with Ibis, and find a bug, or you are reading the documentation and see something | ||
| wrong, or that could be clearer, you can work on that. | ||
|
|
||
| But sometimes, you may want to contribute to Ibis, but you don't have anything in mind. In that case, | ||
| you can check the GitHub issue tracker for Ibis, and look for issues with the label | ||
| [good first issue](https://github.com/ibis-project/ibis/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22). | ||
| Feel free to also help with other issues that don't have the label, but they may be more challenging, | ||
| and require knowledge of Ibis internals. | ||
|
|
||
| Once you found an issue you want to work on, write a comment with the text `/take`, and GitHub will | ||
| assign the issue to yourself. This way, nobody else will work on it at the same time. If you find an | ||
| issue that someone else is assigned to, please contact the assignee to know if they are still working | ||
| on it. | ||
|
|
||
|
|
||
| ## Working with backends | ||
|
|
||
| Ibis comes with several backends. If you want to work with a specific backend, you will have to install | ||
| the dependencies for the backend with `conda install -n ibis-dev -c conda-forge --file="ci/deps/<backend>.yml"`. | ||
|
|
||
| If you don't have a database for the backend you want to work on, you can check the configuration of the | ||
| continuos integration, where docker images are used for different backend. This is defined in | ||
| `.github/workflows/main.yml`. | ||
|
|
||
| ## Run the test suite | ||
|
|
||
| To run Ibis tests use the next command: | ||
|
|
||
| ```sh | ||
| PYTEST_BACKENDS="sqlite pandas" python -m pytest ibis/tests | ||
| ``` | ||
|
|
||
| You can change `sqlite pandas` by the backend or backends (space separated) that | ||
| you want to test. | ||
|
|
||
|
|
||
| ## Style and formatting | ||
|
|
||
| We use [flake8](http://flake8.pycqa.org/en/latest/), | ||
| [black](https://github.com/psf/black) and | ||
| [isort](https://github.com/pre-commit/mirrors-isort) to ensure our code | ||
| is formatted and linted properly. If you have properly set up your development | ||
| environment by running ``make develop``, the pre-commit hooks should check | ||
| that your proposed changes continue to conform to our style guide. | ||
|
|
||
| We use [numpydoc](https://numpydoc.readthedocs.io/en/latest/format.html) as | ||
| our standard format for docstrings. | ||
|
|
||
|
|
||
| ## Commit philosophy | ||
|
|
||
| We aim to make our individual commits small and tightly focused on the feature | ||
| they are implementing. If you find yourself making functional changes to | ||
| different areas of the codebase, we prefer you break up your changes into | ||
| separate Pull Requests. In general, a philosophy of one Github Issue per | ||
| Pull Request is a good rule of thumb, though that isn't always possible. | ||
|
|
||
| We avoid merge commits (and in fact they are disabled in the Github repository) | ||
| so you may be asked to rebase your changes on top of the latest commits to | ||
| master if there have been changes since you last updated a Pull Request. | ||
| Rebasing your changes is usually as simple as running | ||
| ``git pull upstream master --rebase`` and then force-pushing to your branch: | ||
| ``git push origin <branch-name> -f``. | ||
|
|
||
|
|
||
| ## Commit/PR messages | ||
|
|
||
| Well-structed commit messages allow us to generate comprehensive release notes | ||
| and make it very easy to understand what a commit/PR contributes to our | ||
| codebase. Commit messages and PR titles should be prefixed with a standard | ||
| code the states what kind of change it is. They fall broadly into 3 categories: | ||
| ``FEAT (feature)``, ``BUG (bug)``, and ``SUPP (support)``. The ``SUPP`` | ||
| category has some more fine-grained aliases that you can use, such as ``BLD`` | ||
| (build), ``CI`` (continuous integration), ``DOC`` (documentation), ``TST`` | ||
| (testing), and ``RLS`` (releases). | ||
|
|
||
|
|
||
| ## Maintainer's guide | ||
|
|
||
| Maintainers generally perform two roles, merging PRs and making official | ||
| releases. | ||
|
|
||
|
|
||
| ### Merging PRs | ||
|
|
||
| We have a CLI script that will merge Pull Requests automatically once they have | ||
| been reviewed and approved. See the help message in ``dev/merge-pr.py`` for | ||
| full details. If you have two-factor authentication turned on in Github, you | ||
| will have to generate an application-specific password by following this | ||
| [guide](https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). | ||
| You will then use that generated password on the command line for the ``-P`` | ||
| argument. | ||
|
|
||
| Access the [Ibis "Merging PRs" wiki](https://github.com/ibis-project/ibis/wiki/Merging-PRs) page | ||
| for more information. | ||
|
|
||
|
|
||
| ### Releasing | ||
|
|
||
| Access the [Ibis "Releasing" wiki](https://github.com/ibis-project/ibis/wiki/Releasing-Ibis) page | ||
| for more information. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| # Getting started | ||
|
|
||
| ## Installation instructions | ||
|
|
||
| The next steps provides the easiest and recommended way to set up your | ||
| environment to use {{ ibis.project_name }}. Other installation options can be found in | ||
| the [advanced installation page]({{ base_url}}/docs/getting_started/install.html). | ||
|
|
||
| 1. Download [Anaconda](https://www.anaconda.com/distribution/) for your operating system and | ||
| the latest Python version, run the installer, and follow the steps. Detailed instructions | ||
| on how to install Anaconda can be found in the | ||
| [Anaconda documentation](https://docs.anaconda.com/anaconda/install/)). | ||
|
|
||
| 2. In the Anaconda prompt (or terminal in Linux or MacOS), install {{ ibis.project_name }}: | ||
|
|
||
| :::sh | ||
| conda install -c conda-forge ibis-framework | ||
|
|
||
| 3. In the Anaconda prompt (or terminal in Linux or MacOS), start JupyterLab: | ||
|
|
||
| <img class="img-fluid" alt="" src="{{ base_url }}/static/img/install/anaconda_prompt.png"/> | ||
|
|
||
| 4. In JupyterLab, create a new (Python 3) notebook: | ||
|
|
||
| <img class="img-fluid" alt="" src="{{ base_url }}/static/img/install/jupyterlab_home.png"/> | ||
|
|
||
| 5. In the first cell of the notebook, you can import {{ ibis.project_name }} and check the version with: | ||
|
|
||
| :::python | ||
| import ibis | ||
| ibis.__version__ | ||
|
|
||
| 6. Now you are ready to use {{ ibis.project_name }}, and you can write your code in the next cells. | ||
|
|
||
| ## Tutorials | ||
|
|
||
| You can learn more about {{ ibis.project_name }} in the | ||
| [tutorials](https://ibis-project.org/docs/tutorial/index.html), | ||
| and more about JupyterLab in the [JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html). |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| <div class="row"> | ||
| <div class="col"> | ||
| <section class="jumbotron text-center home-jumbotron"> | ||
| <p> | ||
| Write your analytics code once, run it everywhere. | ||
| </p> | ||
| </section> | ||
| </div> | ||
| </div> | ||
|
|
||
| ## Main features | ||
|
|
||
| Ibis provides a standard way to write analytics code, that then can be run in | ||
| multiple engines. | ||
|
|
||
| - **Full coverage of SQL features**: You can code in Ibis anything you can implement in a SQL SELECT | ||
| - **Transparent to SQL implementation differences**: Write standard code that translate to any SQL syntax | ||
| - **High performance execution**: Execute at the speed of your backend, not your local computer | ||
| - **Integration with community data formats and tools** (e.g. pandas, Parquet, Avro...) | ||
|
|
||
| ## Supported engines | ||
|
|
||
| - Standard DBMS: [PostgreSQL](/docs/backends/postgres.html), [MySQL](/docs/backends/mysql.html), [SQLite](/docs/backends/sqlite.html) | ||
| - Analytical DBMS: [OmniSciDB](/docs/backends/omnisci.html), [ClickHouse](/docs/backends/clickhouse.html) | ||
| - Distributed platforms: [Impala](/docs/backends/impala.html), [Spark](/docs/backends/spark.html), [BigQuery](/docs/backends/bigquery.html) | ||
| - In memory execution: [pandas](/docs/backends/pandas.html) | ||
|
|
||
| ## Example | ||
|
|
||
| The next example is all the code you need to connect to a database with a | ||
| countries database, and compute the number of citizens per squared kilometer in Asia: | ||
|
|
||
| ```python | ||
| >>> import ibis | ||
| >>> db = ibis.sqlite.connect('geography.db') | ||
| >>> countries = db.table('countries') | ||
| >>> asian_countries = countries.filter(countries['continent'] == 'AS') | ||
| >>> density_in_asia = asian_countries['population'].sum() / asian_countries['area_km2'].sum() | ||
| >>> density_in_asia.execute() | ||
| 130.7019141926602 | ||
| ``` | ||
|
|
||
| Learn more about Ibis in [our tutorial](/docs/tutorial/). | ||
|
|
||
| ## Comparison to other tools | ||
|
|
||
| ### Why not use [pandas](https://pandas.pydata.org/)? | ||
|
|
||
| pandas is great for many use cases. But pandas loads the data into the | ||
| memory of the local host, and performs the computations on it. | ||
|
|
||
| Ibis instead, leaves the data in its storage, and performs the computations | ||
| there. This means that even if your data is distributed, or it requires | ||
| GPU accelarated speed, Ibis code will be able to benefit from your storage | ||
| capabilities. | ||
|
|
||
| ### Why not use SQL? | ||
|
|
||
| SQL is widely used and very convenient when writing simple queries. But as | ||
| the complexity of operations grow, SQL can become very difficult to deal with. | ||
|
|
||
| With Ibis, you can take fully advantage of software engineering techniques to | ||
| keep your code readable and maintainable, while writing very complex analytics | ||
| code. | ||
|
|
||
| ### Why not use [SQLAlchemy](https://www.sqlalchemy.org/)? | ||
|
|
||
| SQLAlchemy is very convenient as an ORM (Object Relational Mapper), providing | ||
| a Python interface to SQL databases. Ibis uses SQLAlchemy internally, but aims | ||
| to provide a friendlier syntax for analytics code. And Ibis is also not limited | ||
| to SQL databases, but also can connect to distributed platforms and in-memory | ||
| representations. | ||
|
|
||
| ### Why not use [Dask](https://dask.org/)? | ||
|
|
||
| Dask provides advanced parallelism, and can distribute pandas jobs. Ibis can | ||
| process data in a similar way, but for a different number of backends. For | ||
| example, given a Spark cluster, Ibis allows to perform analytics using it, | ||
| with a familiar Python syntax. Ibis plans to add support for a Dask backend | ||
| in the future. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| .codehilite .hll { background-color: #ffffcc } | ||
| .codehilite { background: #f8f8f8; } | ||
| .codehilite .c { color: #408080; font-style: italic } /* Comment */ | ||
| .codehilite .err { border: 1px solid #FF0000 } /* Error */ | ||
| .codehilite .k { color: #008000; font-weight: bold } /* Keyword */ | ||
| .codehilite .o { color: #666666 } /* Operator */ | ||
| .codehilite .ch { color: #408080; font-style: italic } /* Comment.Hashbang */ | ||
| .codehilite .cm { color: #408080; font-style: italic } /* Comment.Multiline */ | ||
| .codehilite .cp { color: #BC7A00 } /* Comment.Preproc */ | ||
| .codehilite .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */ | ||
| .codehilite .c1 { color: #408080; font-style: italic } /* Comment.Single */ | ||
| .codehilite .cs { color: #408080; font-style: italic } /* Comment.Special */ | ||
| .codehilite .gd { color: #A00000 } /* Generic.Deleted */ | ||
| .codehilite .ge { font-style: italic } /* Generic.Emph */ | ||
| .codehilite .gr { color: #FF0000 } /* Generic.Error */ | ||
| .codehilite .gh { color: #000080; font-weight: bold } /* Generic.Heading */ | ||
| .codehilite .gi { color: #00A000 } /* Generic.Inserted */ | ||
| .codehilite .go { color: #888888 } /* Generic.Output */ | ||
| .codehilite .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ | ||
| .codehilite .gs { font-weight: bold } /* Generic.Strong */ | ||
| .codehilite .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ | ||
| .codehilite .gt { color: #0044DD } /* Generic.Traceback */ | ||
| .codehilite .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ | ||
| .codehilite .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ | ||
| .codehilite .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ | ||
| .codehilite .kp { color: #008000 } /* Keyword.Pseudo */ | ||
| .codehilite .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ | ||
| .codehilite .kt { color: #B00040 } /* Keyword.Type */ | ||
| .codehilite .m { color: #666666 } /* Literal.Number */ | ||
| .codehilite .s { color: #BA2121 } /* Literal.String */ | ||
| .codehilite .na { color: #7D9029 } /* Name.Attribute */ | ||
| .codehilite .nb { color: #008000 } /* Name.Builtin */ | ||
| .codehilite .nc { color: #0000FF; font-weight: bold } /* Name.Class */ | ||
| .codehilite .no { color: #880000 } /* Name.Constant */ | ||
| .codehilite .nd { color: #AA22FF } /* Name.Decorator */ | ||
| .codehilite .ni { color: #999999; font-weight: bold } /* Name.Entity */ | ||
| .codehilite .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ | ||
| .codehilite .nf { color: #0000FF } /* Name.Function */ | ||
| .codehilite .nl { color: #A0A000 } /* Name.Label */ | ||
| .codehilite .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ | ||
| .codehilite .nt { color: #008000; font-weight: bold } /* Name.Tag */ | ||
| .codehilite .nv { color: #19177C } /* Name.Variable */ | ||
| .codehilite .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ | ||
| .codehilite .w { color: #bbbbbb } /* Text.Whitespace */ | ||
| .codehilite .mb { color: #666666 } /* Literal.Number.Bin */ | ||
| .codehilite .mf { color: #666666 } /* Literal.Number.Float */ | ||
| .codehilite .mh { color: #666666 } /* Literal.Number.Hex */ | ||
| .codehilite .mi { color: #666666 } /* Literal.Number.Integer */ | ||
| .codehilite .mo { color: #666666 } /* Literal.Number.Oct */ | ||
| .codehilite .sa { color: #BA2121 } /* Literal.String.Affix */ | ||
| .codehilite .sb { color: #BA2121 } /* Literal.String.Backtick */ | ||
| .codehilite .sc { color: #BA2121 } /* Literal.String.Char */ | ||
| .codehilite .dl { color: #BA2121 } /* Literal.String.Delimiter */ | ||
| .codehilite .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ | ||
| .codehilite .s2 { color: #BA2121 } /* Literal.String.Double */ | ||
| .codehilite .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ | ||
| .codehilite .sh { color: #BA2121 } /* Literal.String.Heredoc */ | ||
| .codehilite .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ | ||
| .codehilite .sx { color: #008000 } /* Literal.String.Other */ | ||
| .codehilite .sr { color: #BB6688 } /* Literal.String.Regex */ | ||
| .codehilite .s1 { color: #BA2121 } /* Literal.String.Single */ | ||
| .codehilite .ss { color: #19177C } /* Literal.String.Symbol */ | ||
| .codehilite .bp { color: #008000 } /* Name.Builtin.Pseudo */ | ||
| .codehilite .fm { color: #0000FF } /* Name.Function.Magic */ | ||
| .codehilite .vc { color: #19177C } /* Name.Variable.Class */ | ||
| .codehilite .vg { color: #19177C } /* Name.Variable.Global */ | ||
| .codehilite .vi { color: #19177C } /* Name.Variable.Instance */ | ||
| .codehilite .vm { color: #19177C } /* Name.Variable.Magic */ | ||
| .codehilite .il { color: #666666 } /* Literal.Number.Integer.Long */ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| body { | ||
| padding-top: 5em; | ||
| color: #444; | ||
| } | ||
| h1 { | ||
| font-size: 2.4rem; | ||
| font-weight: 700; | ||
| color: #2980B9; | ||
| } | ||
| h2 { | ||
| font-size: 1.45rem; | ||
| font-weight: 700; | ||
| color: black; | ||
| } | ||
| h3 { | ||
| font-size: 1.1rem; | ||
| font-weight: 600; | ||
| color: #444; | ||
| } | ||
| h3 a { | ||
| color: #446; | ||
| } | ||
| h4 { | ||
| font-size: 1rem; | ||
| font-weight: 500; | ||
| color: #444; | ||
| } | ||
| a { | ||
| color: #130654; | ||
| } | ||
| pre { | ||
| white-space: pre; | ||
| padding: 10px; | ||
| background-color: #fafafa; | ||
| color: #222; | ||
| line-height: 1.2em; | ||
| border: 1px solid #c9c9c9; | ||
| margin: 1.5em 0; | ||
| box-shadow: 1px 1px 1px #d8d8d8 | ||
| } | ||
| blockquote, blockquote p { | ||
| color: #888; | ||
| } | ||
| .header-title { | ||
| font-size: 1.75rem; | ||
| font-weight: bold; | ||
| vertical-align: middle; | ||
| } | ||
| .blue { | ||
| color: #150458; | ||
| } | ||
| .pink { | ||
| color: #e70488; | ||
| } | ||
| .fab { | ||
| font-size: 1.2rem; | ||
| color: #666; | ||
| } | ||
| .fab:hover { | ||
| color: #130654; | ||
| } | ||
| a.navbar-brand img { | ||
| max-height: 50px; | ||
| height: 3rem; | ||
| margin-right: 0.75rem; | ||
| } | ||
| div.card { | ||
| margin: 0 0 .2em .2em !important; | ||
| } | ||
| div.card .card-title { | ||
| font-weight: 500; | ||
| color: #130654; | ||
| } | ||
| .book { | ||
| padding: 0 20%; | ||
| } | ||
| .bg-dark { | ||
| background-color: #2980B9 !important; | ||
| } | ||
| .navbar-dark .navbar-nav .nav-link { | ||
| color: rgba(255, 255, 255, .9); | ||
| } | ||
| .navbar-dark .navbar-nav .nav-link:hover { | ||
| color: white; | ||
| } | ||
| table.logo td { | ||
| text-align: center; | ||
| } | ||
| table.logo img { | ||
| height: 4rem; | ||
| } | ||
| .home-jumbotron { | ||
| background-image: url(/static/img/ibis_sky.png); | ||
| background-size: contain; | ||
| background-repeat: no-repeat; | ||
| background-color: #d8e8ff; | ||
| background-position: center; | ||
| min-height: 9rem; | ||
| } | ||
| .home-jumbotron p { | ||
| position: absolute; | ||
| bottom: 2rem; | ||
| text-shadow: 1px 1px #ffffff; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| # This file should have all the dependencies for development excluding the specific to the backends. | ||
| name: ibis-dev | ||
| channels: | ||
| - conda-forge | ||
| dependencies: | ||
| # Ibis hard dependencies | ||
| - multipledispatch>=0.6.0 | ||
| - numpy>=1.19 | ||
| - pandas>=0.25 # XXX pymapd does not support pandas 1.0 | ||
| - pytz>=2020.1 | ||
| - regex>=2020.7 | ||
| - toolz>=0.10 | ||
|
|
||
| # Ibis soft dependencies | ||
| # TODO This section is probably not very accurate right now (some dependencies should probably be in the backends files) | ||
| - sqlalchemy>=1.3 | ||
| - graphviz>=2.38 | ||
| - openjdk=8 | ||
| - pytables>=3.6 | ||
| - python-graphviz>=0.14 | ||
| - python-hdfs>=2.0.16 # XXX this verison can probably be increased | ||
|
|
||
| # Dev tools | ||
| - asv>=0.4.2 | ||
| - black=19.10b0 | ||
| - click>=7.1 # few scripts in ci/ | ||
| - conda-build # feedstock | ||
| - cmake>=3.17 | ||
| - flake8>=3.8 | ||
| - isort>=5.3 | ||
| - jinja2>=2.11 # feedstock | ||
| - mypy>=0.782 | ||
| - plumbum>=1.6 # few scripts in ci/ and dev/ | ||
| - pre-commit>=2.6 | ||
| - pydocstyle>=4.0 | ||
| - pygit2>=1.2 # dev/genrelease.py | ||
| - pytest>=5.4 | ||
| - pytest-cov>=2.10 | ||
| - pytest-mock>=3.1 | ||
| - ruamel.yaml>=0.16 # feedstock | ||
| - libiconv>=1.15 # bug in repo2docker, see https://github.com/jupyter/repo2docker/issues/758 | ||
| - xorg-libxpm>=3.5 | ||
| - xorg-libxrender>=0.9 | ||
|
|
||
| # Docs | ||
| - ipython>=7.17 | ||
| - jupyter>=1.0 | ||
| - matplotlib>=2 # XXX test if this can be bumped | ||
| - nbconvert | ||
| - nbsphinx>=0.7 | ||
| - nomkl | ||
| - pyarrow>=0.12 # must pin again otherwise strange things happen | ||
| - semantic_version=2.6 # https://github.com/ibis-project/ibis/issues/2027 | ||
| - sphinx>=2.0.1 | ||
| - sphinx-releases | ||
| - sphinx_rtd_theme>=0.5 | ||
| - pip | ||
| - pip: | ||
| - pysuerga |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,144 +1,84 @@ | ||
| """Initialize Ibis module.""" | ||
| import warnings | ||
| from contextlib import suppress | ||
|
|
||
| import ibis.config_init # noqa: F401 | ||
| import ibis.expr.api as api # noqa: F401 | ||
| import ibis.expr.types as ir # noqa: F401 | ||
| import ibis.util as util # noqa: F401 | ||
|
|
||
| # pandas backend is mandatory | ||
| from ibis.backends import pandas # noqa: F401 | ||
| from ibis.common.exceptions import IbisError # noqa: F401 | ||
| from ibis.config import options # noqa: F401 | ||
| from ibis.expr.api import * # noqa: F401,F403 | ||
|
|
||
| from ._version import get_versions # noqa: E402 | ||
|
|
||
| with suppress(ImportError): | ||
| # pip install ibis-framework[csv] | ||
| from ibis.backends import csv # noqa: F401 | ||
|
|
||
| with suppress(ImportError): | ||
| # pip install ibis-framework[parquet] | ||
| from ibis.backends import parquet # noqa: F401 | ||
|
|
||
| with suppress(ImportError): | ||
| # pip install ibis-framework[hdf5] | ||
| from ibis.backends import hdf5 # noqa: F401 | ||
|
|
||
| with suppress(ImportError): | ||
| # pip install ibis-framework[impala] | ||
| from ibis.backends import impala # noqa: F401 | ||
|
|
||
| with suppress(ImportError): | ||
| # pip install ibis-framework[sqlite] | ||
| from ibis.backends import sqlite # noqa: F401 | ||
|
|
||
| with suppress(ImportError): | ||
| # pip install ibis-framework[postgres] | ||
| from ibis.backends import postgres # noqa: F401 | ||
|
|
||
| with suppress(ImportError): | ||
| # pip install ibis-framework[mysql] | ||
| from ibis.backends import mysql # noqa: F401 | ||
|
|
||
| with suppress(ImportError): | ||
| # pip install ibis-framework[clickhouse] | ||
| from ibis.backends import clickhouse # noqa: F401 | ||
|
|
||
| with suppress(ImportError): | ||
| # pip install ibis-framework[bigquery] | ||
| from ibis.backends import bigquery # noqa: F401 | ||
|
|
||
| with suppress(ImportError): | ||
| # pip install ibis-framework[omniscidb] | ||
| from ibis.backends import omniscidb # noqa: F401 | ||
|
|
||
| with suppress(ImportError): | ||
| # pip install ibis-framework[spark] | ||
| from ibis.backends import spark # noqa: F401 | ||
|
|
||
| with suppress(ImportError): | ||
| from ibis.backends import pyspark # noqa: F401 | ||
|
|
||
|
|
||
| __version__ = get_versions()['version'] | ||
| del get_versions | ||
|
|
||
|
|
||
| def __getattr__(name): | ||
| if name in ('HDFS', 'WebHDFS', 'hdfs_connect'): | ||
| warnings.warn( | ||
| f'`ibis.{name}` has been deprecated and will be removed in a ' | ||
| f'future version, use `ibis.impala.{name}` instead', | ||
| FutureWarning, | ||
| stacklevel=2, | ||
| ) | ||
| if 'impala' in globals(): | ||
| return getattr(impala, name) | ||
| else: | ||
| raise AttributeError( | ||
| f'`ibis.{name}` requires impala backend to be installed' | ||
| ) | ||
| raise AttributeError |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,194 @@ | ||
| from operator import add, mul, sub | ||
|
|
||
| import ibis.backends.base_sqlalchemy.compiler as comp | ||
| import ibis.common.exceptions as com | ||
| import ibis.expr.datatypes as dt | ||
| import ibis.expr.operations as ops | ||
| import ibis.expr.types as ir | ||
| from ibis.backends.base_sql import ( | ||
| binary_infix_ops, | ||
| operation_registry, | ||
| quote_identifier, | ||
| ) | ||
|
|
||
|
|
||
| def build_ast(expr, context): | ||
| assert context is not None, 'context is None' | ||
| builder = BaseQueryBuilder(expr, context=context) | ||
| return builder.get_result() | ||
|
|
||
|
|
||
| def _get_query(expr, context): | ||
| assert context is not None, 'context is None' | ||
| ast = build_ast(expr, context) | ||
| query = ast.queries[0] | ||
|
|
||
| return query | ||
|
|
||
|
|
||
| def to_sql(expr, context=None): | ||
| if context is None: | ||
| context = BaseDialect.make_context() | ||
| assert context is not None, 'context is None' | ||
| query = _get_query(expr, context) | ||
| return query.compile() | ||
|
|
||
|
|
||
| # ---------------------------------------------------------------------- | ||
| # Select compilation | ||
|
|
||
|
|
||
| class BaseSelectBuilder(comp.SelectBuilder): | ||
| @property | ||
| def _select_class(self): | ||
| return BaseSelect | ||
|
|
||
|
|
||
| class BaseQueryBuilder(comp.QueryBuilder): | ||
|
|
||
| select_builder = BaseSelectBuilder | ||
|
|
||
|
|
||
| class BaseContext(comp.QueryContext): | ||
| def _to_sql(self, expr, ctx): | ||
| return to_sql(expr, ctx) | ||
|
|
||
|
|
||
| class BaseSelect(comp.Select): | ||
|
|
||
| """ | ||
| A SELECT statement which, after execution, might yield back to the user a | ||
| table, array/list, or scalar value, depending on the expression that | ||
| generated it | ||
| """ | ||
|
|
||
| @property | ||
| def translator(self): | ||
| return BaseExprTranslator | ||
|
|
||
| @property | ||
| def table_set_formatter(self): | ||
| return BaseTableSetFormatter | ||
|
|
||
|
|
||
| class BaseTableSetFormatter(comp.TableSetFormatter): | ||
|
|
||
| _join_names = { | ||
| ops.InnerJoin: 'INNER JOIN', | ||
| ops.LeftJoin: 'LEFT OUTER JOIN', | ||
| ops.RightJoin: 'RIGHT OUTER JOIN', | ||
| ops.OuterJoin: 'FULL OUTER JOIN', | ||
| ops.LeftAntiJoin: 'LEFT ANTI JOIN', | ||
| ops.LeftSemiJoin: 'LEFT SEMI JOIN', | ||
| ops.CrossJoin: 'CROSS JOIN', | ||
| } | ||
|
|
||
| def _get_join_type(self, op): | ||
| jname = self._join_names[type(op)] | ||
|
|
||
| return jname | ||
|
|
||
| def _quote_identifier(self, name): | ||
| return quote_identifier(name) | ||
|
|
||
|
|
||
| _map_interval_to_microseconds = dict( | ||
| W=604800000000, | ||
| D=86400000000, | ||
| h=3600000000, | ||
| m=60000000, | ||
| s=1000000, | ||
| ms=1000, | ||
| us=1, | ||
| ns=0.001, | ||
| ) | ||
|
|
||
|
|
||
| _map_interval_op_to_op = { | ||
| # Literal Intervals have two args, i.e. | ||
| # Literal(1, Interval(value_type=int8, unit='D', nullable=True)) | ||
| # Parse both args and multipy 1 * _map_interval_to_microseconds['D'] | ||
| ops.Literal: mul, | ||
| ops.IntervalMultiply: mul, | ||
| ops.IntervalAdd: add, | ||
| ops.IntervalSubtract: sub, | ||
| } | ||
|
|
||
|
|
||
| def _replace_interval_with_scalar(expr): | ||
| """ | ||
| Good old Depth-First Search to identify the Interval and IntervalValue | ||
| components of the expression and return a comparable scalar expression. | ||
| Parameters | ||
| ---------- | ||
| expr : float or expression of intervals | ||
| For example, ``ibis.interval(days=1) + ibis.interval(hours=5)`` | ||
| Returns | ||
| ------- | ||
| preceding : float or ir.FloatingScalar, depending upon the expr | ||
| """ | ||
| try: | ||
| expr_op = expr.op() | ||
| except AttributeError: | ||
| expr_op = None | ||
|
|
||
| if not isinstance(expr, (dt.Interval, ir.IntervalValue)): | ||
| # Literal expressions have op method but native types do not. | ||
| if isinstance(expr_op, ops.Literal): | ||
| return expr_op.value | ||
| else: | ||
| return expr | ||
| elif isinstance(expr, dt.Interval): | ||
| try: | ||
| microseconds = _map_interval_to_microseconds[expr.unit] | ||
| return microseconds | ||
| except KeyError: | ||
| raise ValueError( | ||
| "Expected preceding values of week(), " | ||
| + "day(), hour(), minute(), second(), millisecond(), " | ||
| + "microseconds(), nanoseconds(); got {}".format(expr) | ||
| ) | ||
| elif expr_op.args and isinstance(expr, ir.IntervalValue): | ||
| if len(expr_op.args) > 2: | ||
| raise com.NotImplementedError( | ||
| "'preceding' argument cannot be parsed." | ||
| ) | ||
| left_arg = _replace_interval_with_scalar(expr_op.args[0]) | ||
| right_arg = _replace_interval_with_scalar(expr_op.args[1]) | ||
| method = _map_interval_op_to_op[type(expr_op)] | ||
| return method(left_arg, right_arg) | ||
|
|
||
|
|
||
| _operation_registry = {**operation_registry, **binary_infix_ops} | ||
|
|
||
|
|
||
| # TODO move the name method to comp.ExprTranslator and use that instead | ||
| class BaseExprTranslator(comp.ExprTranslator): | ||
| """Base expression translator.""" | ||
|
|
||
| _registry = _operation_registry | ||
| context_class = BaseContext | ||
|
|
||
| @staticmethod | ||
| def _name_expr(formatted_expr, quoted_name): | ||
| return '{} AS {}'.format(formatted_expr, quoted_name) | ||
|
|
||
| def name(self, translated, name, force=True): | ||
| """Return expression with its identifier.""" | ||
| return self._name_expr(translated, quote_identifier(name, force=force)) | ||
|
|
||
|
|
||
| class BaseDialect(comp.Dialect): | ||
| translator = BaseExprTranslator | ||
|
|
||
|
|
||
| compiles = BaseExprTranslator.compiles | ||
| rewrites = BaseExprTranslator.rewrites | ||
|
|
||
|
|
||
| @rewrites(ops.FloorDivide) | ||
| def _floor_divide(expr): | ||
| left, right = expr.op().args | ||
| return left.div(right).floor() |