66 changes: 66 additions & 0 deletions docs/blog/rendered/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Linear regression model for predicting cab fares using PyTorch.
Adapted from https://gist.github.com/pdet/e8d38734232c08e6c15aba79b4eb8368#file-taxi_prediction_example-py.
"""

import pyarrow as pa
import torch
import tqdm
from torch import nn


class LinearRegression(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.linear = nn.Linear(input_dim, output_dim)

def forward(self, distances):
return self.linear(distances)


class PredictCabFare:
def __init__(self, data, learning_rate: float = 0.01, epochs: int = 100) -> None:
# Define the input and output dimensions
input_dim = 1
output_dim = 1

# Create a linear regression model instance
self.data = data
self.model = LinearRegression(input_dim, output_dim)
self.learning_rate = learning_rate
self.epochs = epochs

def train(self):
distances = self.data["trip_distance"].reshape(-1, 1)
fares = self.data["fare_amount"].reshape(-1, 1)

# Define the loss function
criterion = nn.MSELoss()

# Define the optimizer
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate)

# Train the model
for _ in tqdm.trange(self.epochs):
# Forward pass
y_pred = self.model(distances)

# Compute loss
loss = criterion(y_pred, fares)

# Backward pass and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()

def predict(self, input):
with torch.no_grad():
return self.model(input)

def __call__(self, input: pa.ChunkedArray):
# Convert the input to numpy so it can be fed to the model
#
# .copy() to avoid the warning about undefined behavior from torch
input = torch.from_numpy(input.to_numpy().copy())[:, None]
predicted = self.predict(input).ravel()
return pa.array(predicted.numpy())
583 changes: 583 additions & 0 deletions docs/blog/rendered/torch.ipynb

Large diffs are not rendered by default.

35 changes: 15 additions & 20 deletions docs/community/contribute/01_environment.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ hide:

#### Support Matrix

| Python Version :material-arrow-right: | Python 3.8 | Python 3.9 | Python 3.10 | Python 3.11 |
| -----------------------------------------: | :--------------------------------------------------: | :----------------------------------------------: | :----------------------------------------------: | :----------------------------------------------: |
| **Operating System** :material-arrow-down: | | | | |
| **Linux** | {{ config.extra.support_levels.supported.icon }}[^1] | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (x86_64)** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (aarch64)** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **Windows** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| Python Version :material-arrow-right: | Python 3.9 | Python 3.10 | Python 3.11 |
| -----------------------------------------: | :----------------------------------------------: | :----------------------------------------------: | :----------------------------------------------: |
| **Operating System** :material-arrow-down: | | | |
| **Linux** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (x86_64)** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (aarch64)** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **Windows** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |

{% set managers = {"conda": {"name": "Miniconda", "url": "https://docs.conda.io/en/latest/miniconda.html"}, "mamba": {"name": "Mamba", "url": "https://github.com/mamba-org/mamba"}} %}
{% for manager, params in managers.items() %}
Expand Down Expand Up @@ -51,6 +51,7 @@ hide:
=== "{{ os }}"

```sh
# Create a dev environment for {{platform}}
cd ibis
{{ manager }} create -n ibis-dev --file=conda-lock/{{ platform }}-3.10.lock
```
Expand All @@ -69,12 +70,6 @@ hide:
pip install -e .
```

1. If you want to run the backend test suite you'll need to install `docker-compose`:

```sh
{{ manager }} install docker-compose -c conda-forge
```

{% endfor %}

=== "pip"
Expand Down Expand Up @@ -118,13 +113,13 @@ hide:

#### Support Matrix

| Python Version :material-arrow-right: | Python 3.8 | Python 3.9 | Python 3.10 | Python 3.11 |
| -----------------------------------------: | :----------------------------------------------------: | :------------------------------------------------: | :------------------------------------------------: | :------------------------------------------------: |
| **Operating System** :material-arrow-down: | | | | |
| **Linux** | {{ config.extra.support_levels.supported.icon }}[^1] | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (x86_64)** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (aarch64)** | {{ config.extra.support_levels.unknown.icon }}[^2] | {{ config.extra.support_levels.unknown.icon }} | {{ config.extra.support_levels.unknown.icon }} | {{ config.extra.support_levels.unknown.icon }} |
| **Windows** | {{ config.extra.support_levels.unsupported.icon }}[^3] | {{ config.extra.support_levels.unsupported.icon }} | {{ config.extra.support_levels.unsupported.icon }} | {{ config.extra.support_levels.unsupported.icon }} |
| Python Version :material-arrow-right: | Python 3.9 | Python 3.10 | Python 3.11 |
| -----------------------------------------: | :------------------------------------------------: | :------------------------------------------------: | :------------------------------------------------: |
| **Operating System** :material-arrow-down: | | | |
| **Linux** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (x86_64)** | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} | {{ config.extra.support_levels.supported.icon }} |
| **macOS (aarch64)** | {{ config.extra.support_levels.unknown.icon }} | {{ config.extra.support_levels.unknown.icon }} | {{ config.extra.support_levels.unknown.icon }} |
| **Windows** | {{ config.extra.support_levels.unsupported.icon }} | {{ config.extra.support_levels.unsupported.icon }} | {{ config.extra.support_levels.unsupported.icon }} |

1. [Install `nix`](https://nixos.org/download.html)
1. Install `gh`:
Expand Down
46 changes: 46 additions & 0 deletions docs/concept/backends.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Backends

A backend is where execution of Ibis table expressions occur after compiling into some intermediate representation. A backend is often a database and the intermediate representation often SQL, but several types of backends exist. See the [backends page](/backends/) for specific documentation on each.

## String generating backends

The first category of backends translate Ibis table expressions into query strings.

The compiler turns each table expression into a query string and passes that query
to the database through a driver API for execution.

- [Apache Impala](/backends/impala/)
- [ClickHouse](/backends/clickhouse/)
- [Google BigQuery](/backends/bigquery/)
- [HeavyAI](https://github.com/heavyai/ibis-heavyai)

## Expression generating backends

The next category of backends translates Ibis table expressions into another
system's table expression objects, for example, SQLAlchemy.

Instead of generating a query string for each table expression, these backends
produce another kind of table expression object and typically have high-level APIs
for execution.

- [Apache Arrow Datafusion](/backends/datafusion/)
- [Apache Druid](/backends/druid/)
- [Apache PySpark](/backends/pyspark/)
- [Dask](/backends/dask/)
- [DuckDB](/backends/duckdb/)
- [MS SQL Server](/backends/mssql/)
- [MySQL](/backends/mysql/)
- [Oracle](/backends/oracle/)
- [Polars](/backends/polars/)
- [PostgreSQL](/backends/postgresql/)
- [SQLite](/backends/sqlite/)
- [Snowflake](/backends/snowflake/)
- [Trino](/backends/trino/)

## Direct execution backends

The pandas backend is the only direct execution backend. A full description
of the implementation can be found in the module docstring of the pandas
backend located in `ibis/backends/pandas/core.py`.

- [pandas](/backends/pandas/)
28 changes: 14 additions & 14 deletions docs/user_guide/design.md → docs/concept/design.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
1. The SQL string that generated by the compiler is sent to the database and
executed (this step is skipped for the pandas backend)
1. The database returns some data that is then turned into a pandas DataFrame
by ibis
by Ibis

## Expressions

The main user-facing component of ibis is expressions. The base class of all
expressions in ibis is the [ibis.expr.types.Expr][] class.
The main user-facing component of Ibis is expressions. The base class of all
expressions in Ibis is the [ibis.expr.types.Expr][] class.

Expressions provide the user facing API, most of which is defined in
`ibis/expr/api.py`.
Expand All @@ -32,7 +32,7 @@ Expressions provide the user facing API, most of which is defined in

Ibis's type system consists of a set of rules for specifying the types of
inputs to `ibis.expr.types.Node` subclasses. Upon construction of a `Node`
subclass, ibis performs validation of every input to the node based on the rule
subclass, Ibis performs validation of every input to the node based on the rule
that was used to declare the input.

Rules are defined in `ibis.expr.rules`
Expand All @@ -55,7 +55,7 @@ Examples of expression types include
### The `ibis.expr.types.Node` Class
<!-- prettier-ignore-end -->

`Node` subclasses make up the core set of operations of ibis. Each node
`Node` subclasses make up the core set of operations of Ibis. Each node
corresponds to a particular operation.

Most nodes are defined in the `ibis.expr.operations` module.
Expand Down Expand Up @@ -93,7 +93,7 @@ or column named `base` that defaults to nothing if not provided. The `base`
argument is `None` by default so that the expression will behave as the
underlying database does.

Similar objects are instantiated when you use ibis APIs:
Similar objects are instantiated when you use Ibis APIs:

```python
import ibis
Expand All @@ -118,16 +118,16 @@ type. An example of this is the `greatest` function, which takes the maximum
of all of its arguments. Another example is `CASE` statements, whose `THEN`
expressions determine the output type of the expression.

This allows ibis to provide **only** the APIs that make sense for a particular
This allows Ibis to provide **only** the APIs that make sense for a particular
type, even when an operation yields a different output type depending on its
input. Concretely, this means that you cannot perform operations that don't
make sense, like computing the average of a string column.

## Compilation

The next major component of ibis is the compilers.
The next major component of Ibis is the compilers.

The first few versions of ibis directly generated strings, but the compiler
The first few versions of Ibis directly generated strings, but the compiler
infrastructure was generalized to support compilation of
[SQLAlchemy](https://docs.sqlalchemy.org/en/latest/core/tutorial.html) based
expressions.
Expand All @@ -153,23 +153,23 @@ that will perform this translation.

!!! note "Ibis can target other systems besides SQL"

While ibis was designed with an explicit goal of first-class SQL support,
ibis can target other systems such as pandas.
While Ibis was designed with an explicit goal of first-class SQL support,
Ibis can target other systems such as pandas.

## Execution

Presumably we want to _do_ something with our compiled expressions. This is
where execution comes in.

This is least complex part of ibis, mostly only requiring ibis to correctly
This is least complex part of Ibis, mostly only requiring Ibis to correctly
handle whatever the database hands back.

By and large, the execution of compiled SQL is handled by the database to which
SQL is sent from ibis.
SQL is sent from Ibis.

However, once the data arrives from the database we need to convert that
data to a pandas DataFrame.

The Query class, with its `ibis.sql.client.Query._fetch` method, provides a way
for ibis `ibis.sql.client.SQLClient` objects to do any additional processing
for Ibis `ibis.sql.client.SQLClient` objects to do any additional processing
necessary after the database returns results to the client.
17 changes: 6 additions & 11 deletions docs/why_ibis.md → docs/concept/why_ibis.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
---
hide:
- footer
---

# Why try Ibis?
# Why Ibis?

Ibis is a dataframe interface to execution engines with support for [15+
backends](./backends/index.md). Ibis doesn't replace your existing execution
backends](/backends/). Ibis doesn't replace your existing execution
engine, it _extends_ it with powerful abstractions and intuitive syntax.

Ibis works with what you already have, so why not check out our [getting started
guide](./getting_started.md)?
guide](/tutorial/getting_started/)?

# How does Ibis compare to...

Expand Down Expand Up @@ -39,7 +34,7 @@ we can summarize some key points:
- Ibis lets you use SQL when you want to (for our SQL-based backends)

If your SQL-fu is strong, we might not convince you to leave it all behind, but
check out our [Ibis for SQL Programmers guide](./ibis-for-sql-programmers.ipynb)
check out our [Ibis for SQL users guide](/tutorial/ibis-for-sql-users/)
and see if it whets your appetite.

## `pandas`
Expand All @@ -58,7 +53,7 @@ Ibis to quickly and easily switch to a different backend that supports
out-of-core execution.

Ibis syntax is similar to `pandas` syntax, but it isn't a drop-in replacement.
Check out our [Ibis for Pandas Users guide](./ibis-for-pandas-users.ipynb) if
Check out our [Ibis for pandas Users guide](/tutorial/ibis-for-pandas-users/) if
you'd like to give Ibis a try!

## `sqlalchemy` and `sqlglot`
Expand All @@ -77,4 +72,4 @@ using SQLGlot.

If you are looking for a dataframe API to construct and execute your analytics
queries against a large collection of powerful execution engines, then allow us
point you at the [Ibis Getting Started guide](./getting_started.md).
point you at the [Ibis Getting Started guide](/tutorial/getting_started/).
9 changes: 0 additions & 9 deletions docs/docs/index.md

This file was deleted.

21 changes: 0 additions & 21 deletions docs/example.py

This file was deleted.

20 changes: 0 additions & 20 deletions docs/example.sql

This file was deleted.

70 changes: 70 additions & 0 deletions docs/example_streamlit_app/example_streamlit_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import requests
import streamlit as st

from ibis import _
from ibis.streamlit import IbisConnection

st.set_page_config(page_title="Yummy Data", layout="wide")
st.title("Yummy Data :bacon:")


@st.cache_data
def get_emoji():
resp = requests.get(
"https://raw.githubusercontent.com/omnidan/node-emoji/master/lib/emoji.json"
)
resp.raise_for_status()
emojis = resp.json()
return emojis


options = [1, 5, 10, 25, 50, 100]


@st.cache_data
def query():
return (
con.tables.recipes.relabel("snake_case")
.mutate(ner=_.ner.map(lambda n: n.lower()).unnest())
.ner.topk(max(options))
.relabel(dict(ner="ingredient"))
.to_pandas()
.assign(
emoji=lambda df: df.ingredient.map(
lambda emoji: f"{emojis.get(emoji, '-')}"
)
)
.set_index("ingredient")
)


emojis = get_emoji()

con = st.experimental_connection("ch", type=IbisConnection)

if n := st.radio("Ingredients", options, index=1, horizontal=True):
table, whole = st.columns((2, 1))
idx = options.index(n)
k = 0
base = query()
for m in options[: idx + 1]:
df = base.iloc[k:m]
if not k:
word = "first"
elif m < n:
word = "next"
else:
word = "last"

uniq_emojis = " ".join(df.emoji[df.emoji != "-"].unique())
table.header(f"{word.title()} {m - k:d}")
table.subheader(uniq_emojis)

table.dataframe(df, use_container_width=True)
k = m

b = base.iloc[:n]
uniq_emojis = " ".join(b.emoji[b.emoji != "-"].unique())
whole.header(f"Top {n:d}")
whole.subheader(uniq_emojis)
whole.dataframe(b, use_container_width=True)
3 changes: 3 additions & 0 deletions docs/example_streamlit_app/requirements.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# How to Chain Expressions with Underscore
# Chain expressions with the underscore API

Expressions can easily be chained using the deferred expression API, also known as the Underscore (`_`) API.

Expand All @@ -15,14 +15,14 @@ from ibis import _
import pandas as pd
```

Let's create two in-memory tables using [`ibis.memtable`](memtable-join.md), an API introduced in 3.2:
Let's create two in-memory tables using [`ibis.memtable`](memtable_join.md), an API introduced in 3.2:

```python
t1 = ibis.memtable(pd.DataFrame({'x': range(5), 'y': list('ab')*2 + list('e')}))
t2 = ibis.memtable(pd.DataFrame({'x': range(10), 'z': list(reversed(list('ab')*2 + list('e')))*2}))
```

## Creating ColumnExpressions
## Creating column expressions

We can use `_` to create new column expressions without explicit reference to the previous table expression:

Expand All @@ -39,7 +39,7 @@ zmax = _.z.max()
zct = _.z.count()
```

## Chaining Ibis Expressions
## Chaining Ibis expressions

We can also use it to chain Ibis expressions in one Python expression:

Expand All @@ -59,7 +59,7 @@ join = (
# _ is the filtered result, and re-create xmod in t2 using modf:
.join(t2, _.xmod == modf(t2))
# _ is the second join result:
.join(t1, _.xmod == modf(t1), suffixes=('', '_x'))
.join(t1, _.xmod == modf(t1))
# _ is the third join result:
.select(_.x, _.y, _.z)
# Finally, _ is the selection result:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Configuring Ibis
# Configure Ibis

Ibis configuration happens through the `ibis.options` attribute. Attributes can
be get and set like class attributes.
Expand Down
4 changes: 2 additions & 2 deletions docs/how_to/duckdb_register.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# How to Use `register` to load external data files with the DuckDB backend
# Load external data files with the DuckDB backend

<!-- prettier-ignore-start -->
Here we use the [`register`][ibis.backends.duckdb.Backend.register] method to load external data files and join them.
Here we use the `register` method to load external data files and join them.
<!-- prettier-ignore-end -->

We're going to download one month of [NYC Taxi
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/how_to/ffill_bfill_w_window.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# How to `ffill` and `bfill` using Window Functions
# Forward and backward fill data using window functions

If you have gaps in your data and need to fill them in using a simple forward fill
(given an order, null values are replaced by the value preceeding) or backward fill
(given an order, null values are replaced by the value preceding) or backward fill
(given an order, null values are replaced by the value following), then you can do this in Ibis:

=== "`ffill`"
Expand Down
39 changes: 19 additions & 20 deletions docs/how_to/memtable-join.md → docs/how_to/memtable_join.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# How to join an in-memory DataFrame to a TableExpression
# Join an in-memory DataFrame to a TableExpression

You might have an in-memory DataFrame that you want to join to a TableExpression.
For example, you might have a file on your local machine that you don't want to upload to
Expand Down Expand Up @@ -74,7 +74,7 @@ Converting a pandas DataFrame to a MemTable is as simple as feeding it to `ibis.
Out[10]:
PandasInMemoryTable
data:
DataFrameProxy:
PandasDataFrameProxy:
event_id event_name
0 0 e0
1 1 e1
Expand All @@ -87,24 +87,23 @@ and joining is the same as joining any two TableExpressions:
```python
In [11]: # Join as you would two table expressions
...: measures.join(
...: mem_events
...: ,measures['event_id'] == mem_events['event_id']
...: ,suffixes=('', '__x')
...: ).execute()
...: mem_events,
...: measures['event_id'] == mem_events['event_id']
...: ).to_pandas()
Out[11]:
event_id measured_on measurement event_id__x event_name
0 0 2021-06-01 NaN 0 e0
1 0 2021-06-02 5.0 0 e0
2 1 2021-06-03 NaN 1 e1
3 1 2021-06-04 NaN 1 e1
4 1 2021-05-05 42.0 1 e1
5 2 2021-05-06 42.0 2 e2
6 2 2021-05-07 NaN 2 e2
7 2 2021-05-08 11.0 2 e2
8 2 2021-05-09 NaN 2 e2
9 2 2021-05-10 NaN 2 e2
10 3 2021-07-11 NaN 3 e3
11 3 2021-07-12 NaN 3 e3
event_id measured_on measurement event_name
0 0 2021-06-01 NaN e0
1 0 2021-06-02 5.0 e0
2 1 2021-06-03 NaN e1
3 1 2021-06-04 NaN e1
4 1 2021-05-05 42.0 e1
5 2 2021-05-06 42.0 e2
6 2 2021-05-07 NaN e2
7 2 2021-05-08 11.0 e2
8 2 2021-05-09 NaN e2
9 2 2021-05-10 NaN e2
10 3 2021-07-11 NaN e3
11 3 2021-07-12 NaN e3
```

Note that the return result of the `join` is a TableExpression and that `execute` returns a pandas DataFrame.
Note that the return result of the `join` is a TableExpression and that `to_pandas` returns a pandas DataFrame.
4 changes: 2 additions & 2 deletions docs/user_guide/self_joins.md → docs/how_to/self_joins.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Self Joins
# Perform self joins

If you’re a relational data guru, you may have wondered how it’s possible to
join tables with themselves, because joins clauses involve column references
Expand Down Expand Up @@ -104,7 +104,7 @@ distinct object within Ibis. To do this, use the `view` function:
>>> results = (current.join(prior, ((current.region == prior.region) &
... (current.year == (prior.year - 1))))
... [current.region, current.year, yoy_change])
>>> df = results.execute()
>>> df = results.to_pandas()
```

```python
Expand Down
8 changes: 4 additions & 4 deletions docs/how_to/sessionize.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# How to Sessionize a Log of Events
# Sessionize a log of events

Suppose you have entities (users, objects, actions, etc) that have event logs through polling or event triggers.

Expand All @@ -11,7 +11,7 @@ Sessionization can also be useful on longer time scales, for instance to reconst

This guide on sessionization is inspired by [_The Expressions API in Polars is Amazing_](https://www.pola.rs/posts/the-expressions-api-in-polars-is-amazing/), a blog post in the [Polars](https://www.pola.rs/) community demonstrating the strength of Polars expressions.

## Sessionizing Logs on a Cadence
## Sessionizing logs on a cadence

For this example, we use an activity log from the online game "World of Warcraft" with more than 10 million records for 37,354 unique players [made available](https://www.kaggle.com/datasets/mylesoneill/warcraft-avatar-history?select=wowah_data.csv) under the CC0 / Public Domain license. A copy of the data can be found at `https://storage.googleapis.com/ibis-tutorial-data/wowah_data/wowah_data_raw.parquet` (75 MB) under the parquet format to reduce load times. You can use `ibis.read_parquet` to quickly get it into a table expression via the default `DuckDB` backend.

Expand Down Expand Up @@ -74,6 +74,6 @@ sessionized = (
)
```

Calling `ibis.show_sql(sessionized)` displays the SQL query and can be used to confirm that this Ibis expression does not rely on any join operations.
Calling `ibis.show_sql(sessionized)` displays the SQL query and can be used to confirm that this Ibis table expression does not rely on any join operations.

Calling `sessionized.execute()` should complete in less than a minute, depending on the speed of the internet connection to download the data and the number of CPU cores available to parallelize the processing of this nested query.
Calling `sessionized.to_pandas()` should complete in less than a minute, depending on the speed of the internet connection to download the data and the number of CPU cores available to parallelize the processing of this nested query.
19 changes: 19 additions & 0 deletions docs/how_to/streamlit.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Write a Streamlit app with Ibis

Streamlit + Ibis = :heart:

Ibis supports the [streamlit `experimental_connection` interface](https://blog.streamlit.io/introducing-st-experimental_connection/), making it easier than ever to combine the powers of both tools!

Check out the example application below that shows the top N ingredients from a corpus of recipes using [the ClickHouse backend](/backends/clickhouse/)!

<div class="streamlit-app">
<iframe class="streamlit-app-inner" src="https://ibis-example.streamlit.app/?embedded=true"></iframe>
</div>

And here's the source code for the application:

??? example "Source code"

```python title="example_streamlit_app.py"
--8<-- "docs/example_streamlit_app/example_streamlit_app.py"
```
2 changes: 1 addition & 1 deletion docs/how_to/topk.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# How to Compute the Top K Records
# Compute the top K records

<!-- prettier-ignore-start -->
Here we use the [`topk`][ibis.expr.types.Column.topk] method to compute the top
Expand Down
3,191 changes: 0 additions & 3,191 deletions docs/ibis-for-pandas-users.ipynb

This file was deleted.

10 changes: 5 additions & 5 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ hide:
---

<div class="install-tutorial-button" markdown>
[Getting Started](./getting_started.md){ .md-button .md-button--primary }
[Install](./install.md){ .md-button }
[Getting Started](/tutorial/getting_started/){ .md-button .md-button--primary }
[Install](/install/){ .md-button }
</div>

---
Expand All @@ -35,7 +35,7 @@ ORDER BY t1.year DESC
```

```py title="Execute on multiple backends"
>>> con.execute(q)
>>> con.to_pandas(q)

year mean(avg_rating)
0 2021 2.586362
Expand All @@ -49,8 +49,8 @@ ORDER BY t1.year DESC

## Features

- **Consistent syntax across backends**: Enjoy a uniform Python API, whether using [DuckDB](https://duckdb.org), [PostgreSQL](https://postgresql.org), [PySpark](https://spark.apache.org/docs/latest/api/python/index.html), [BigQuery](https://cloud.google.com/bigquery/), or [any other supported backend](./backends/index.md).
- **Consistent syntax across backends**: Enjoy a uniform Python API, whether using [DuckDB](https://duckdb.org), [PostgreSQL](https://postgresql.org), [PySpark](https://spark.apache.org/docs/latest/api/python/index.html), [BigQuery](https://cloud.google.com/bigquery/), or [any other supported backend](/backends/).
- **Performant**: Execute queries as fast as the database engine itself.
- **Interactive**: Explore data in a notebook or REPL.
- **Extensible**: Add new operations, optimizations, and custom APIs.
- **Free and open-source**: licensed under Apache 2.0, [available on Github](https://github.com/ibis-project/ibis/blob/master/README.md)
- **Free and open-source**: licensed under Apache 2.0, [available on Github](https://github.com/ibis-project/ibis/blob/master/README.md).
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Pandas-like Backend Base Classes
# pandas-like Backend Base Classes

These base classes underlie the pandas-based backends.

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

These APIs are available on arrays, maps and structs.

<!-- prettier-ignore-start -->
::: ibis.expr.types.arrays.ArrayValue
options:
show_source: true
::: ibis.expr.types.structs.StructValue
options:
show_source: true
::: ibis.expr.types.maps.MapValue
options:
show_source: true
<!-- prettier-ignore-end -->
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

These expressions are available on scalars and columns of any element type.

<!-- prettier-ignore-start -->
::: ibis.expr.types.generic.Value
options:
show_source: true
::: ibis.expr.types.generic.Column
options:
show_source: true
::: ibis.expr.types.generic.Scalar
options:
show_source: true
<!-- prettier-ignore-end -->
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,11 @@

Ibis supports the following geospatial expression APIs

<!-- prettier-ignore-start -->
::: ibis.expr.types.geospatial.GeoSpatialValue
options:
show_source: true
::: ibis.expr.types.geospatial.GeoSpatialColumn
options:
show_source: true
<!-- prettier-ignore-end -->
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,8 @@

These APIs are shared by both table and column expressions.

<!-- prettier-ignore-start -->
::: ibis.expr.types.core.Expr
options:
show_source: true
<!-- prettier-ignore-end -->
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,30 @@

These APIs are available on numeric and boolean expressions.

<!-- prettier-ignore-start -->
::: ibis.expr.types.numeric.NumericValue
options:
show_source: true
::: ibis.expr.types.numeric.NumericColumn
options:
show_source: true

::: ibis.expr.types.numeric.IntegerValue
options:
show_source: true
::: ibis.expr.types.numeric.IntegerColumn
options:
show_source: true

::: ibis.expr.types.numeric.FloatingValue
options:
show_source: true

::: ibis.expr.types.numeric.DecimalValue
options:
show_source: true

::: ibis.expr.types.logical.BooleanValue
options:
show_source: true
<!-- prettier-ignore-end -->
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,8 @@

All string operations are valid for both scalars and columns.

<!-- prettier-ignore-start -->
::: ibis.expr.types.strings.StringValue
options:
show_source: true
<!-- prettier-ignore-end -->
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,11 @@

Table expressions form the basis for most Ibis expressions.

<!-- prettier-ignore-start -->
::: ibis.expr.types.relations.Table
options:
show_source: true
::: ibis.expr.types.groupby.GroupedTable
options:
show_source: true
<!-- prettier-ignore-end -->
173 changes: 173 additions & 0 deletions docs/reference/expressions/timestamps.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# Temporal Expression APIs

All temporal operations are valid for both scalars and columns.

<!-- prettier-ignore-start -->
::: ibis.expr.types.temporal.TemporalValue
options:
show_source: true


::: ibis.expr.types.temporal.TimestampValue
options:
show_source: true
members: false
<!-- prettier-ignore-end -->

### Functions

<!-- prettier-ignore-start -->
::: ibis.expr.types.temporal.TimestampValue.add
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.between
options:
heading_level: 4
::: ibis.expr.types.temporal.TimestampValue.date
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.day
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.day_of_week
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.day_of_year
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.epoch_seconds
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.hour
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.millisecond
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.minute
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.month
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.second
options:
heading_level: 4
::: ibis.expr.types.temporal.TemporalValue.strftime
options:
heading_level: 4
::: ibis.expr.types.temporal.TimestampValue.sub
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.time
options:
heading_level: 4
::: ibis.expr.types.temporal.TimestampValue.truncate
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.quarter
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.week_of_year
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.year
options:
heading_level: 4


::: ibis.expr.types.temporal.DateValue
options:
show_source: true
members: false
<!-- prettier-ignore-end -->

### Functions

<!-- prettier-ignore-start -->
::: ibis.expr.types.temporal.DateValue.add
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.day
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.day_of_week
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.day_of_year
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.epoch_seconds
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.month
options:
heading_level: 4
::: ibis.expr.types.temporal.TemporalValue.strftime
options:
heading_level: 4
::: ibis.expr.types.temporal.DateValue.sub
options:
heading_level: 4
::: ibis.expr.types.temporal.DateValue.truncate
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.quarter
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.week_of_year
options:
heading_level: 4
::: ibis.expr.types.temporal._DateComponentMixin.year
options:
heading_level: 4


::: ibis.expr.types.temporal.TimeValue
options:
show_source: true
members: false
<!-- prettier-ignore-end -->

### Functions

<!-- prettier-ignore-start -->
::: ibis.expr.types.temporal.TimeValue.add
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.between
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.hour
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.microsecond
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.millisecond
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.minute
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.second
options:
heading_level: 4
::: ibis.expr.types.temporal.TemporalValue.strftime
options:
heading_level: 4
::: ibis.expr.types.temporal.TimeValue.sub
options:
heading_level: 4
::: ibis.expr.types.temporal._TimeComponentMixin.time
options:
heading_level: 4
::: ibis.expr.types.temporal.TimeValue.truncate
options:
heading_level: 4


::: ibis.expr.types.temporal.IntervalValue
options:
show_source: true
<!-- prettier-ignore-end -->
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,28 @@

These methods and objects are available directly in the `ibis` module.

## `NA`

`NA` is the null scalar.

<!-- prettier-ignore-start -->
::: ibis.and_
::: ibis.array
::: ibis.asc
::: ibis.case
::: ibis.coalesce
::: ibis.expr.types.Value.coalesce
::: ibis.cumulative_window
::: ibis.date
::: ibis.deferred
::: ibis.desc
::: ibis.difference
::: ibis.get_backend
::: ibis.greatest
::: ibis.expr.types.Value.greatest
::: ibis.ifelse
::: ibis.intersect
::: ibis.interval
::: ibis.least
::: ibis.expr.types.Value.least
::: ibis.literal
::: ibis.map
::: ibis.negate
::: ibis.memtable
::: ibis.expr.types.numeric.NumericValue.negate
::: ibis.NA
::: ibis.now
::: ibis.null
::: ibis.or_
Expand All @@ -34,6 +33,8 @@ These methods and objects are available directly in the `ibis` module.
::: ibis.random
::: ibis.range_window
::: ibis.read_csv
::: ibis.read_delta
::: ibis.read_json
::: ibis.read_parquet
::: ibis.row_number
::: ibis.schema
Expand All @@ -47,3 +48,4 @@ These methods and objects are available directly in the `ibis` module.
::: ibis.union
::: ibis.where
::: ibis.window
<!-- prettier-ignore-end -->
File renamed without changes.
File renamed without changes.
441 changes: 409 additions & 32 deletions docs/release_notes.md

Large diffs are not rendered by default.

25 changes: 0 additions & 25 deletions docs/sqlalchemy_example.py

This file was deleted.

7 changes: 7 additions & 0 deletions docs/stylesheets/code_select.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.language-pycon .gp, .language-pycon .go { /* Generic.Prompt, Generic.Output */
user-select: none;
}

.language-console .gp, .language-console .go { /* Generic.Prompt, Generic.Output */
user-select: none;
}
2 changes: 1 addition & 1 deletion docs/stylesheets/extra.css
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
display: table;
}

#streamlit-app {
.streamlit-app-inner {
height: 1000px;
width: 100%;
border: none;
Expand Down
13 changes: 13 additions & 0 deletions docs/supported_python_versions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Supported Python Versions

Ibis follows [NEP29](https://numpy.org/neps/nep-0029-deprecation_policy.html)
with respect to supported Python versions.

This has been in-place [since Ibis version 3.0.0](https://github.com/ibis-project/ibis/blob/5015677d78909473014a61725d371b4bf772cdff/docs/blog/Ibis-version-3.0.0-release.md?plain=1#L83).

The [support
table](https://numpy.org/neps/nep-0029-deprecation_policy.html#support-table)
shows the schedule for dropping support for Python versions.

The next major release of Ibis that occurs on or after the NEP29 drop date
removes support for the specified Python version.
35 changes: 15 additions & 20 deletions docs/getting_started.md → docs/tutorial/getting_started.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
---
hide:
- navigation
- footer
---

# Getting Started with `ibis`
# Getting started with `ibis`

This is a quick tour of some basic commands and usage patterns, just to get your flippers wet.

## Install `ibis`

This quick-start guide uses the DuckDB backend. You can check out the [Install
page](./install.md) for information on how to install other backends.
page](/install/) for information on how to install other backends.

```shell title="Install Ibis using pip"
$ pip install 'ibis-framework[duckdb]'
Expand Down Expand Up @@ -66,11 +60,11 @@ AlchemyTable: penguins
```

Ibis is lazily evaluated, so instead of seeing the data, we see the schema of
the table, instead. To peek at the data, we can call `head` and then `execute`
to get the first few rows of the table as a Pandas DataFrame.
the table, instead. To peek at the data, we can call `head` and then `to_pandas`
to get the first few rows of the table as a pandas DataFrame.

```python
>>> penguins.head().execute()
>>> penguins.head().to_pandas()
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year
0 Adelie Torgersen 39.1 18.7 181.0 3750.0 male 2007
1 Adelie Torgersen 39.5 17.4 186.0 3800.0 female 2007
Expand All @@ -79,9 +73,9 @@ to get the first few rows of the table as a Pandas DataFrame.
4 Adelie Torgersen 36.7 19.3 193.0 3450.0 female 2007
```

`execute` takes the existing lazy expression and evaluates it. If we leave it
off, you'll see the Ibis representation of the expression that `execute` will
evaluate (when you're ready!).
`to_pandas` takes the existing lazy table expression and evaluates it. If we
leave it off, you'll see the Ibis representation of the table expression that
`to_pandas` will evaluate (when you're ready!).

```python
>>> penguins.head()
Expand All @@ -98,19 +92,20 @@ r0 := AlchemyTable: penguins
Limit[r0, n=5]
```

!!! note "Results in Pandas DataFrame"
!!! note "Results in pandas DataFrame"

Ibis returns results as a Pandas DataFrame by default. It isn't using Pandas to
Ibis returns results as a pandas DataFrame using `to_pandas`, but isn't using pandas to
perform any of the computation. The query is executed by the backend (DuckDB in
this case). Only when the query is executed does Ibis then pull back the results
this case). Only when `to_pandas` is called does Ibis then pull back the results
and convert them into a DataFrame.

## Interactive Mode
## Interactive mode

For the rest of this intro, we'll turn on interactive mode, which partially
executes queries to give users a preview of the results. There is a small
difference in the way the output is formatted, but otherwise this is the same
as calling `execute()` on the expression with a limit of 10 results returned.
as calling `to_pandas` on the table expression with a limit of 10 result rows
returned.

```python
>>> ibis.options.interactive = True
Expand Down Expand Up @@ -377,7 +372,7 @@ You can also use a `selector` alongside a column name.
└───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴───────┘
```

You can read more about [`selectors`](./api/selectors.md) in the docs!
You can read more about [`selectors`](/reference/selectors/) in the docs!

### order_by

Expand Down
821 changes: 821 additions & 0 deletions docs/tutorial/ibis-for-dplyr-users.ipynb

Large diffs are not rendered by default.

1,436 changes: 1,436 additions & 0 deletions docs/tutorial/ibis-for-pandas-users.ipynb

Large diffs are not rendered by default.

1,333 changes: 174 additions & 1,159 deletions docs/ibis-for-sql-programmers.ipynb → docs/tutorial/ibis-for-sql-users.ipynb

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions docs/tutorial/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Ibis tutorials

Welcome to the Ibis tutorials!

- **Learning Ibis for the first time?:** Check out the [Ibis getting started tutorial](./getting_started/)!
- **Coming from SQL?**: Take a look at [Ibis for SQL users](./ibis-for-sql-users/)!
- **Coming from pandas?**: Check out [Ibis for pandas users](./ibis-for-pandas-users/)!
- **Coming from R?**: See [Ibis for dplyr users](./ibis-for-dplyr-users/)!
- **Want to see some more examples?**: We've got [a repository of examples](https://github.com/ibis-project/ibis-examples) for that!
36 changes: 27 additions & 9 deletions flake.lock
16 changes: 7 additions & 9 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@

backendDevDeps = with pkgs; [
# impala UDFs
clang_12
clang_15
cmake
ninja
# snowflake
Expand All @@ -53,18 +53,16 @@
# duckdb
duckdb
# mysql
mariadb-client
mycli
# pyspark
openjdk17_headless
# postgres
postgresql
# sqlite
# postgres client
pgcli
# sqlite with readline
sqlite-interactive
];
shellHook = ''
${pkgs.rsync}/bin/rsync \
--chmod=Du+rwx,Fu+rw --archive --delete \
"${pkgs.ibisTestingData}/" "$PWD/ci/ibis-testing-data"
ln -sf "${pkgs.ibisTestingData}" "$PWD/ci/ibis-testing-data"
# necessary for mkdocs
export PYTHONPATH=''${PWD}''${PYTHONPATH:+:}''${PYTHONPATH}
Expand Down Expand Up @@ -122,7 +120,7 @@
packages = {
inherit (pkgs) ibis38 ibis39 ibis310 ibis311;

default = pkgs.ibis311;
default = pkgs.ibis310;

inherit (pkgs) update-lock-files gen-all-extras gen-examples check-poetry-version;
};
Expand Down
52 changes: 51 additions & 1 deletion gen_redirects.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,57 @@
}

# Untemplated redirects
REDIRECTS = {}
REDIRECTS = {
"/backends/Pandas/": "/backends/pandas/",
"/getting_started/": "/tutorial/getting_started/",
"/ibis-for-sql-programmers/": "/tutorial/ibis-for-sql-users/",
"/ibis-for-pandas-users/": "/tutorial/ibis-for-pandas-users/",
"/ibis-for-dplyr-users/": "/tutorial/ibis-for-dplyr-users/",
"/why_ibis/": "/concept/why_ibis/",
"/user_guide/design/": "/concept/design/",
"/user_guide/self_joins/": "/how_to/self_joins/",
"/user_guide/configuration/": "/how_to/configuration/",
"/user_guide/extending/": "/how_to/extending/",
"/backends/BigQuery/": "/backends/bigquery/",
"/backends/Clickhouse/": "/backends/clickhouse/",
"/backends/Dask/": "/backends/dask/",
"/backends/Datafusion/": "/backends/datafusion/",
"/backends/Druid/": "/backends/druid/",
"/backends/DuckDB/": "/backends/duckdb/",
"/backends/Impala/": "/backends/impala/",
"/backends/MSSQL/": "/backends/mssql/",
"/backends/MySQL/": "/backends/mysql/",
"/backends/Oracle/": "/backends/oracle/",
"/backends/Polars/": "/backends/polars/",
"/backends/PostgreSQL/": "/backends/postgresql/",
"/backends/PySpark/": "/backends/pyspark/",
"/backends/SQLite/": "/backends/sqlite/",
"/backends/Snowflake/": "/backends/snowflake/",
"/backends/Trino/": "/backends/trino/",
"/backends/support_matrix": "/backends/_support_matrix/",
"/how_to/chain-expressions/": "/how_to/chain_expressions/",
"/how_to/memtable-join/": "/how_to/memtable_join/",
"/docs/": "/",
"/api/": "/reference/",
"/api/expressions/": "/reference/expressions/",
"/api/expressions/top_level/": "/reference/expressions/top_level/",
"/api/expressions/tables/": "/reference/expressions/tables/",
"/api/expressions/generic/": "/reference/expressions/generic/",
"/api/expressions/numeric/": "/reference/expressions/numeric/",
"/api/expressions/strings/": "/reference/expressions/strings/",
"/api/expressions/timestamps/": "/reference/expressions/timestamps/",
"/api/expressions/collections/": "/reference/expressions/collections/",
"/api/expressions/geospatial/": "/reference/expressions/geospatial/",
"/api/selectors/": "/reference/selectors/",
"/api/datatypes/": "/reference/datatypes/",
"/api/schemas/": "/reference/schemas/",
"/api/config/": "/reference/config/",
"/api/backends/": "/reference/backends/",
"/api/backends/base/": "/reference/backends/base/",
"/api/backends/pandas/": "/reference/backends/pandas/",
"/api/backends/sql/": "/reference/backends/sql/",
"/api/backends/sqlalchemy/": "/reference/backends/sqlalchemy/",
}

# Fill in templates
REDIRECTS.update(
Expand Down
13 changes: 9 additions & 4 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Initialize Ibis module."""
from __future__ import annotations

__version__ = "5.1.0"
__version__ = "6.0.0"

from ibis import examples, util
from ibis.backends.base import BaseBackend
Expand All @@ -10,11 +10,13 @@
from ibis.expr import api
from ibis.expr import types as ir
from ibis.expr.api import * # noqa: F403
from ibis.expr.operations import udf

__all__ = [ # noqa: PLE0604
'api',
'examples',
'ir',
'udf',
'util',
'BaseBackend',
'IbisError',
Expand Down Expand Up @@ -54,7 +56,7 @@ def __getattr__(name: str) -> BaseBackend:
msg = f"module 'ibis' has no attribute '{name}'. "
if name in _KNOWN_BACKENDS:
msg += f"""If you are trying to access the '{name}' backend,
try installing it first with `pip install ibis-{name}`"""
try installing it first with `pip install 'ibis-framework[{name}]'`"""
raise AttributeError(msg)

if len(entry_points) > 1:
Expand Down Expand Up @@ -109,7 +111,10 @@ def connect(*args, **kwargs):
proxy.name = name
proxy._from_url = backend._from_url
proxy._to_sql = backend._to_sql
if hasattr(backend, "_sqlglot_dialect"):
proxy._sqlglot_dialect = backend._sqlglot_dialect
if (dialect := getattr(backend, "_sqlglot_dialect", None)) is not None:
proxy._sqlglot_dialect = dialect
# Add any additional methods that should be exposed at the top level
for name in getattr(backend, "_top_level_methods", ()):
setattr(proxy, name, getattr(backend, name))

return proxy
184 changes: 150 additions & 34 deletions ibis/backends/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import re
import sys
import urllib.parse
from pathlib import Path
from typing import (
TYPE_CHECKING,
Any,
Expand All @@ -29,12 +28,23 @@
from ibis.common.caching import RefCountedCache

if TYPE_CHECKING:
from pathlib import Path

import pandas as pd
import pyarrow as pa
import torch

__all__ = ('BaseBackend', 'Database', 'connect')


_IBIS_TO_SQLGLOT_DIALECT = {
"mssql": "tsql",
"impala": "hive",
"pyspark": "spark",
"polars": "postgres",
}


class Database:
"""Generic Database class."""

Expand Down Expand Up @@ -259,13 +269,17 @@ def to_pyarrow(
A pyarrow table holding the results of the executed expression.
"""
pa = self._import_pyarrow()
self._run_pre_execute_hooks(expr)
try:
# Can't construct an array from record batches
# so construct at one column table (if applicable)
# then return the column _from_ the table
table = pa.Table.from_batches(
self.to_pyarrow_batches(expr, params=params, limit=limit, **kwargs)
)
with self.to_pyarrow_batches(
expr, params=params, limit=limit, **kwargs
) as reader:
table = pa.Table.from_batches(reader)
except pa.lib.ArrowInvalid:
raise
except ValueError:
# The pyarrow batches iterator is empty so pass in an empty
# iterator and a pyarrow schema
Expand Down Expand Up @@ -322,6 +336,44 @@ def to_pyarrow_batches(
"""
raise NotImplementedError

@util.experimental
def to_torch(
self,
expr: ir.Expr,
*,
params: Mapping[ir.Scalar, Any] | None = None,
limit: int | str | None = None,
**kwargs: Any,
) -> dict[str, torch.Tensor]:
"""Execute an expression and return results as a dictionary of torch tensors.
Parameters
----------
expr
Ibis expression to execute.
params
Parameters to substitute into the expression.
limit
An integer to effect a specific row limit. A value of `None` means no limit.
kwargs
Keyword arguments passed into the backend's `to_torch` implementation.
Returns
-------
dict[str, torch.Tensor]
A dictionary of torch tensors, keyed by column name.
"""
import torch

t = self.to_pyarrow(expr, params=params, limit=limit, **kwargs)
# without .copy() the arrays are read-only and thus writing to them is
# undefined behavior; we can't ignore this warning from torch because
# we're going out of ibis and downstream code can do whatever it wants
# with the data
return {
name: torch.from_numpy(t[name].to_numpy().copy()) for name in t.schema.names
}

def read_parquet(
self, path: str | Path, table_name: str | None = None, **kwargs: Any
) -> ir.Table:
Expand Down Expand Up @@ -400,11 +452,10 @@ def to_parquet(
self._import_pyarrow()
import pyarrow.parquet as pq

batch_reader = expr.to_pyarrow_batches(params=params)

with pq.ParquetWriter(path, batch_reader.schema) as writer:
for batch in batch_reader:
writer.write_batch(batch)
with expr.to_pyarrow_batches(params=params) as batch_reader:
with pq.ParquetWriter(path, batch_reader.schema) as writer:
for batch in batch_reader:
writer.write_batch(batch)

@util.experimental
def to_csv(
Expand Down Expand Up @@ -436,11 +487,48 @@ def to_csv(
self._import_pyarrow()
import pyarrow.csv as pcsv

batch_reader = expr.to_pyarrow_batches(params=params)
with expr.to_pyarrow_batches(params=params) as batch_reader:
with pcsv.CSVWriter(path, batch_reader.schema) as writer:
for batch in batch_reader:
writer.write_batch(batch)

@util.experimental
def to_delta(
self,
expr: ir.Table,
path: str | Path,
*,
params: Mapping[ir.Scalar, Any] | None = None,
**kwargs: Any,
) -> None:
"""Write the results of executing the given expression to a Delta Lake table.
This method is eager and will execute the associated expression
immediately.
Parameters
----------
expr
The ibis expression to execute and persist to Delta Lake table.
path
The data source. A string or Path to the Delta Lake table.
params
Mapping of scalar parameter expressions to value.
kwargs
Additional keyword arguments passed to deltalake.writer.write_deltalake method
"""
try:
from deltalake.writer import write_deltalake
except ImportError:
raise ImportError(
"The deltalake extra is required to use the "
"to_delta method. You can install it using pip:\n\n"
"pip install 'ibis-framework[deltalake]'\n"
)

with pcsv.CSVWriter(path, batch_reader.schema) as writer:
for batch in batch_reader:
writer.write_batch(batch)
with expr.to_pyarrow_batches(params=params) as batch_reader:
write_deltalake(path, batch_reader, **kwargs)


class BaseBackend(abc.ABC, _FileIOHandler):
Expand All @@ -450,10 +538,12 @@ class BaseBackend(abc.ABC, _FileIOHandler):
required methods.
"""

database_class = Database
table_class: type[ops.DatabaseTable] = ops.DatabaseTable
name: ClassVar[str]

supports_temporary_tables = False
supports_python_udfs = False
supports_in_memory_tables = True

def __init__(self, *args, **kwargs):
self._con_args: tuple[Any] = args
self._con_kwargs: dict[str, Any] = kwargs
Expand All @@ -467,12 +557,7 @@ def __init__(self, *args, **kwargs):
)

def __getstate__(self):
return dict(
database_class=self.database_class,
table_class=self.table_class,
_con_args=self._con_args,
_con_kwargs=self._con_kwargs,
)
return dict(_con_args=self._con_args, _con_kwargs=self._con_kwargs)

def __rich_repr__(self):
yield "name", self.name
Expand All @@ -498,7 +583,7 @@ def db_identity(self) -> str:
Hashable
Database identity
"""
parts = [self.table_class.__name__]
parts = [self.__class__]
parts.extend(self._con_args)
parts.extend(f'{k}={v}' for k, v in self._con_kwargs.items())
return '_'.join(map(str, parts))
Expand Down Expand Up @@ -561,7 +646,7 @@ def database(self, name: str | None = None) -> Database:
Database
A database object for the specified database.
"""
return self.database_class(name=name or self.current_database, client=self)
return Database(name=name or self.current_database, client=self)

@property
@abc.abstractmethod
Expand All @@ -577,7 +662,7 @@ def current_database(self) -> str | None:
"""

@abc.abstractmethod
def list_databases(self, like: str = None) -> list[str]:
def list_databases(self, like: str | None = None) -> list[str]:
"""List existing databases in the current connection.
Parameters
Expand Down Expand Up @@ -709,6 +794,25 @@ def register_options(cls) -> None:
except ValueError as e:
raise exc.BackendConfigurationNotRegistered(backend_name) from e

def _register_udfs(self, expr: ir.Expr) -> None:
"""Register UDFs contained in `expr` with the backend."""
if self.supports_python_udfs:
raise NotImplementedError(self.name)

def _register_in_memory_tables(self, expr: ir.Expr):
if self.supports_in_memory_tables:
raise NotImplementedError(self.name)

def _run_pre_execute_hooks(self, expr: ir.Expr) -> None:
"""Backend-specific hooks to run before an expression is executed."""
self._define_udf_translation_rules(expr)
self._register_udfs(expr)
self._register_in_memory_tables(expr)

def _define_udf_translation_rules(self, expr):
if self.supports_in_memory_tables:
raise NotImplementedError(self.name)

def compile(
self,
expr: ir.Expr,
Expand All @@ -733,15 +837,7 @@ def add_operation(self, operation: ops.Node) -> Callable:
Operations are defined in `ibis.expr.operations`, and a translation
function receives the translator object and an expression as
parameters, and returns a value depending on the backend. For example,
in SQL backends, a NullLiteral operation could be translated to the
string `"NULL"`.
Examples
--------
>>> @ibis.sqlite.add_operation(ibis.expr.operations.NullLiteral)
... def _null_literal(translator, expression):
... return 'NULL'
parameters, and returns a value depending on the backend.
"""
if not hasattr(self, 'compiler'):
raise RuntimeError('Only SQL-based backends support `add_operation`')
Expand Down Expand Up @@ -773,7 +869,7 @@ def create_database(self, name: str, force: bool = False) -> None:
def create_table(
self,
name: str,
obj: pd.DataFrame | ir.Table | None = None,
obj: pd.DataFrame | pa.Table | ir.Table | None = None,
*,
schema: ibis.Schema | None = None,
database: str | None = None,
Expand Down Expand Up @@ -940,6 +1036,26 @@ def _load_into_cache(self, name, expr):
def _clean_up_cached_table(self, op):
raise NotImplementedError(self.name)

def _transpile_sql(self, query: str, *, dialect: str | None = None) -> str:
# only transpile if dialect was passed
if dialect is None:
return query

import sqlglot as sg

# only transpile if the backend dialect doesn't match the input dialect
name = self.name
if (output_dialect := getattr(self, "_sqlglot_dialect", name)) is None:
raise NotImplementedError(f"No known sqlglot dialect for backend {name}")

if dialect != output_dialect:
(query,) = sg.transpile(
query,
read=_IBIS_TO_SQLGLOT_DIALECT.get(dialect, dialect),
write=output_dialect,
)
return query


@functools.lru_cache(maxsize=None)
def _get_backend_names() -> frozenset[str]:
Expand Down
14 changes: 8 additions & 6 deletions ibis/backends/base/df/scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
When there are no time contexts associate with the cached result, getting
and setting values in Scope would be as simple as get and set in a normal
dictonary. With time contexts, we need the following logic for getting
dictionary. With time contexts, we need the following logic for getting
and setting items in scope:
Before setting the value op in scope we need to perform the following
Expand Down Expand Up @@ -37,12 +37,14 @@
from __future__ import annotations

from collections import namedtuple
from typing import Any, Iterable, Tuple
from typing import TYPE_CHECKING, Any, Iterable, Tuple

import pandas as pd

from ibis.backends.base.df.timecontext import TimeContextRelation, compare_timecontext
from ibis.expr.operations import Node

if TYPE_CHECKING:
from ibis.expr.operations import Node

TimeContext = Tuple[pd.Timestamp, pd.Timestamp]

Expand All @@ -52,7 +54,7 @@
class Scope:
def __init__(
self,
param: dict[Node, Any] = None,
param: dict[Node, Any] | None = None,
timecontext: TimeContext | None = None,
):
"""Create a new scope.
Expand Down Expand Up @@ -133,7 +135,7 @@ def get_value(self, op: Node, timecontext: TimeContext | None = None) -> Any:
if timecontext is None:
return self._items[op].value
else:
# For op with timecontext, ther are some ops cannot use cached
# For op with timecontext, there are some ops cannot use cached
# result with a different (larger) timecontext to get the
# correct result.
# For example, a groupby followed by count, if we use a larger or
Expand All @@ -142,7 +144,7 @@ def get_value(self, op: Node, timecontext: TimeContext | None = None) -> Any:
# depending on other rows in result Dataframe, cannot use cached
# result with different time context to optimize calculation.
# These are time context sensitive operations. Since these cases
# are rare in acutal use case, we just enable optimization for
# are rare in actual use case, we just enable optimization for
# all nodes for now.
cached_timecontext = self._items[op].timecontext
if cached_timecontext:
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/base/df/timecontext.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
require extra data outside of the global time context that user defines.
For example, in asof_join, we need to look back extra `tolerance` daays
for the right table to get the data for joining. Similarly for window
operation with preceeding and following.
operation with preceding and following.
Algorithm to calculate context adjustment are defined in this module
and could be used by multiple backends.
"""
Expand Down
81 changes: 29 additions & 52 deletions ibis/backends/base/sql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@
import pandas as pd
import pyarrow as pa

__all__ = [
'BaseSQLBackend',
]
__all__ = ['BaseSQLBackend']


class BaseSQLBackend(BaseBackend):
"""Base backend class for backends that compile to SQL."""

compiler = Compiler
table_class = ops.DatabaseTable
table_expr_class = ir.Table

@property
def _sqlglot_dialect(self) -> str:
return self.name

def _from_url(self, url: str, **kwargs: Any) -> BaseBackend:
"""Connect to a backend using a URL `url`.
Expand Down Expand Up @@ -89,14 +89,16 @@ def table(self, name: str, database: str | None = None) -> ir.Table:
)
qualified_name = self._fully_qualified_name(name, database)
schema = self.get_schema(qualified_name)
node = self.table_class(qualified_name, schema, self)
return self.table_expr_class(node)
node = ops.DatabaseTable(name, schema, self, namespace=database)
return node.to_expr()

def _fully_qualified_name(self, name, database):
# XXX
return name

def sql(self, query: str, schema: sch.Schema | None = None) -> ir.Table:
def sql(
self, query: str, schema: sch.Schema | None = None, dialect: str | None = None
) -> ir.Table:
"""Convert a SQL query to an Ibis table expression.
Parameters
Expand All @@ -106,12 +108,16 @@ def sql(self, query: str, schema: sch.Schema | None = None) -> ir.Table:
schema
The expected schema for this query. If not provided, will be
inferred automatically if possible.
dialect
Optional string indicating the dialect of `query`. The default
value of `None` will use the backend's native dialect.
Returns
-------
Table
Table expression
"""
query = self._transpile_sql(query, dialect=dialect)
if schema is None:
schema = self._get_schema_using_query(query)
else:
Expand All @@ -131,11 +137,7 @@ def raw_sql(self, query: str):
query
DDL or DML statement
"""
cursor = self.con.execute(query)
if cursor:
return cursor
cursor.release()
return None
return self.con.execute(query)

@contextlib.contextmanager
def _safe_raw_sql(self, *args, **kwargs):
Expand All @@ -148,7 +150,7 @@ def _cursor_batches(
limit: int | str | None = None,
chunk_size: int = 1_000_000,
) -> Iterable[list]:
self._register_in_memory_tables(expr)
self._run_pre_execute_hooks(expr)
query_ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params)
sql = query_ast.compile()

Expand Down Expand Up @@ -202,6 +204,15 @@ def to_pyarrow_batches(

return pa.ipc.RecordBatchReader.from_batches(schema.to_pyarrow(), batches)

def _register_udfs(self, expr: ir.Expr) -> None:
"""Return an iterator of DDL strings, once for each UDFs contained within `expr`."""
if self.supports_python_udfs:
raise NotImplementedError(self.name)

def _define_udf_translation_rules(self, expr: ir.Expr) -> None:
if self.supports_python_udfs:
raise NotImplementedError(self.name)

def execute(
self,
expr: ir.Expr,
Expand Down Expand Up @@ -239,16 +250,14 @@ def execute(
# `external_tables` in clickhouse, but better to deprecate that
# feature than all this magic.
# we don't want to pass `timecontext` to `raw_sql`
self._run_pre_execute_hooks(expr)

kwargs.pop('timecontext', None)
query_ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params)
sql = query_ast.compile()
self._log(sql)

schema = self.ast_schema(query_ast, **kwargs)

# register all in memory tables if the backend supports cheap access
# to them
self._register_in_memory_tables(expr)
schema = expr.as_table().schema()

with self._safe_raw_sql(sql, **kwargs) as cursor:
result = self.fetch_from_cursor(cursor, schema)
Expand All @@ -270,39 +279,6 @@ def _register_in_memory_tables(self, expr: ir.Expr) -> None:
def fetch_from_cursor(self, cursor, schema):
"""Fetch data from cursor."""

def ast_schema(self, query_ast, **kwargs) -> sch.Schema:
"""Return the schema of the expression.
Parameters
----------
query_ast
The AST of the query
kwargs
Backend specific parameters
Returns
-------
Schema
An ibis schema
Raises
------
ValueError
if `self.expr` doesn't have a schema.
"""
dml = getattr(query_ast, 'dml', query_ast)
op = getattr(dml, 'parent_op', getattr(dml, 'table_set', None))

if isinstance(op, ops.TableNode):
return op.schema
elif isinstance(op, ops.Value):
return sch.schema({op.name: op.output_dtype})
else:
raise ValueError(
'Expression with type {} does not have a '
'schema'.format(type(self.expr))
)

def _log(self, sql: str) -> None:
"""Log the SQL, usually to the standard output.
Expand Down Expand Up @@ -338,6 +314,7 @@ def compile(
The output of compilation. The type of this value depends on the
backend.
"""
self._define_udf_translation_rules(expr)
return self.compiler.to_ast_ensure_limit(expr, limit, params=params).compile()

def _to_sql(self, expr: ir.Expr, **kwargs) -> str:
Expand Down
352 changes: 243 additions & 109 deletions ibis/backends/base/sql/alchemy/__init__.py

Large diffs are not rendered by default.

36 changes: 0 additions & 36 deletions ibis/backends/base/sql/alchemy/database.py

This file was deleted.

418 changes: 175 additions & 243 deletions ibis/backends/base/sql/alchemy/datatypes.py

Large diffs are not rendered by default.

14 changes: 6 additions & 8 deletions ibis/backends/base/sql/alchemy/geospatial.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from __future__ import annotations

try:
import geoalchemy2
import geoalchemy2.shape # noqa: F401
import geopandas # noqa: F401
except ImportError:
geospatial_supported = False
else:
geospatial_supported = True
from importlib.util import find_spec as _find_spec

geospatial_supported = (
_find_spec("geoalchemy2") is not None and _find_spec("geopandas") is not None
)
__all__ = ["geospatial_supported"]
13 changes: 6 additions & 7 deletions ibis/backends/base/sql/alchemy/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

import ibis.expr.analysis as an
import ibis.expr.operations as ops
from ibis.backends.base.sql.alchemy.database import AlchemyTable
from ibis.backends.base.sql.alchemy.translator import (
AlchemyContext,
AlchemyExprTranslator,
Expand Down Expand Up @@ -92,8 +91,8 @@ def _format_table(self, op):

translator = ctx.compiler.translator_class(ref_op, ctx)

if isinstance(ref_op, AlchemyTable):
result = ref_op.sqla_table
if isinstance(ref_op, ops.DatabaseTable):
result = ref_op.source._get_sqla_table(ref_op.name, schema=ref_op.namespace)
elif isinstance(ref_op, ops.UnboundTable):
# use SQLAlchemy's TableClause for unbound tables
result = sa.Table(
Expand Down Expand Up @@ -160,8 +159,8 @@ def _format_in_memory_table(self, op, ref_op, translator):
raw_rows = (
sa.select(
*(
translator.translate(ops.Literal(val, dtype=type_))
for val, type_ in zip(row, op.schema.types)
translator.translate(ops.Literal(val, dtype=type_)).label(name)
for val, (name, type_) in zip(row, op.schema.items())
)
)
for row in op.data.to_frame().itertuples(index=False)
Expand Down Expand Up @@ -269,8 +268,8 @@ def _add_select(self, table_set):
return result

if unnest_children:
# get all the unnests plus the current froms of the result selection
# and build up the cross join
# get all the unnests plus the current FROM clauses of the result
# selection and build up the cross join
table_set = functools.reduce(
functools.partial(sa.sql.FromClause.join, onclause=sa.true()),
toolz.unique(toolz.concatv(unnest_children, result.get_final_froms())),
Expand Down
45 changes: 26 additions & 19 deletions ibis/backends/base/sql/alchemy/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis.backends.base.sql.alchemy.database import AlchemyTable


class substr(GenericFunction):
Expand All @@ -23,6 +22,10 @@ class substr(GenericFunction):
inherit_cache = True


class try_cast(GenericFunction):
pass


def variance_reduction(func_name, suffix=None):
suffix = suffix or {'sample': '_samp', 'pop': '_pop'}

Expand Down Expand Up @@ -79,8 +82,6 @@ def get_sqla_table(ctx, table):
while sa_table is None and ctx_level.parent is not ctx_level:
ctx_level = ctx_level.parent
sa_table = ctx_level.get_ref(table)
elif isinstance(table, AlchemyTable):
sa_table = table.sqla_table
else:
sa_table = ctx.get_compiled_expr(table)

Expand Down Expand Up @@ -223,9 +224,7 @@ def _literal(_, op):
if value is None:
return sa.null()

if dtype.is_set():
return list(map(sa.literal, value))
elif dtype.is_array():
if dtype.is_array():
value = list(value)

return sa.literal(value)
Expand Down Expand Up @@ -334,16 +333,18 @@ def _translate_window_boundary(boundary):


def _window_function(t, window):
if isinstance(window.func, ops.CumulativeOp):
func = _cumulative_to_window(t, window.func, window.frame).op()
func = window.func.__window_op__

if isinstance(func, ops.CumulativeOp):
func = _cumulative_to_window(t, func, window.frame).op()
return t.translate(func)

reduction = t.translate(window.func)
reduction = t.translate(func)

# Some analytic functions need to have the expression of interest in
# the ORDER BY part of the window clause
if isinstance(window.func, t._require_order_by) and not window.frame.order_by:
order_by = t.translate(window.func.arg) # .args[0])
if isinstance(func, t._require_order_by) and not window.frame.order_by:
order_by = t.translate(func.arg) # .args[0])
else:
order_by = [t.translate(arg) for arg in window.frame.order_by]

Expand All @@ -361,7 +362,7 @@ def _window_function(t, window):
else:
raise NotImplementedError(type(window.frame))

if t._forbids_frame_clause and isinstance(window.func, t._forbids_frame_clause):
if t._forbids_frame_clause and isinstance(func, t._forbids_frame_clause):
# some functions on some backends don't support frame clauses
additional_params = {}
else:
Expand All @@ -373,7 +374,7 @@ def _window_function(t, window):
reduction, partition_by=partition_by, order_by=order_by, **additional_params
)

if isinstance(window.func, (ops.RowNumber, ops.DenseRank, ops.MinRank, ops.NTile)):
if isinstance(func, (ops.RowNumber, ops.DenseRank, ops.MinRank, ops.NTile)):
return result - 1
else:
return result
Expand Down Expand Up @@ -444,13 +445,19 @@ def _substring(t, op):

def _gen_string_find(func):
def string_find(t, op):
if op.start is not None:
raise NotImplementedError("`start` not yet implemented")

if op.end is not None:
raise NotImplementedError("`end` not yet implemented")

return func(t.translate(op.arg), t.translate(op.substr)) - 1
arg = t.translate(op.arg)
sub_string = t.translate(op.substr)

if (op_start := op.start) is not None:
start = t.translate(op_start)
arg = sa.func.substr(arg, start + 1)
pos = func(arg, sub_string)
return sa.case((pos > 0, pos - 1 + start), else_=-1)

return func(arg, sub_string) - 1

return string_find

Expand Down Expand Up @@ -533,7 +540,6 @@ class array_filter(FunctionElement):
ops.Negate: _negate,
ops.Round: _round,
ops.Literal: _literal,
ops.NullLiteral: lambda *_: sa.null(),
ops.SimpleCase: _simple_case,
ops.SearchedCase: _searched_case,
ops.TableColumn: _table_column,
Expand All @@ -546,7 +552,8 @@ class array_filter(FunctionElement):
# string
ops.Capitalize: unary(
lambda arg: sa.func.concat(
sa.func.upper(sa.func.substr(arg, 1, 1)), sa.func.substr(arg, 2)
sa.func.upper(sa.func.substr(arg, 1, 1)),
sa.func.lower(sa.func.substr(arg, 2)),
)
),
ops.LPad: fixed_arity(sa.func.lpad, 3),
Expand Down
18 changes: 13 additions & 5 deletions ibis/backends/base/sql/alchemy/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,20 @@
import operator

import sqlalchemy as sa
from sqlalchemy.engine.default import DefaultDialect

import ibis
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
from ibis.backends.base.sql.alchemy import to_sqla_type
from ibis.backends.base.sql.alchemy.datatypes import _DEFAULT_DIALECT
from ibis.backends.base.sql.alchemy.datatypes import AlchemyType
from ibis.backends.base.sql.alchemy.registry import (
fixed_arity,
sqlalchemy_operation_registry,
)
from ibis.backends.base.sql.compiler import ExprTranslator, QueryContext

_DEFAULT_DIALECT = DefaultDialect()


class AlchemyContext(QueryContext):
def collapse(self, queries):
Expand All @@ -40,6 +42,7 @@ class AlchemyExprTranslator(ExprTranslator):
_registry = sqlalchemy_operation_registry
_rewrites = ExprTranslator._rewrites.copy()

type_mapper = AlchemyType
context_class = AlchemyContext

_bool_aggs_need_cast_to_int32 = True
Expand Down Expand Up @@ -69,6 +72,14 @@ def integer_to_timestamp(self, arg, tz: str | None = None):

supports_unnest_in_select = True

@classmethod
def get_sqla_type(cls, ibis_type):
return cls.type_mapper.from_ibis(ibis_type)

@classmethod
def get_ibis_type(cls, sqla_type, nullable=True):
return cls.type_mapper.to_ibis(sqla_type, nullable=nullable)

@functools.cached_property
def dialect(self) -> sa.engine.interfaces.Dialect:
if (name := self._dialect_name) == "default":
Expand All @@ -87,9 +98,6 @@ def name(self, translated, name, force=False):
sa.sql.quoted_name(name, quote=force or self._quote_column_names)
)

def get_sqla_type(self, data_type):
return to_sqla_type(self.dialect, data_type)

def _maybe_cast_bool(self, op, arg):
if (
self._bool_aggs_need_cast_to_int32
Expand Down
11 changes: 8 additions & 3 deletions ibis/backends/base/sql/compiler/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from io import StringIO
from typing import Iterable

import sqlglot as sg
import toolz

import ibis.common.exceptions as com
Expand Down Expand Up @@ -101,13 +102,17 @@ def _format_table(self, op):
if isinstance(ref_op, ops.InMemoryTable):
result = self._format_in_memory_table(ref_op)
elif isinstance(ref_op, ops.PhysicalTable):
name = ref_op.name
# TODO(kszucs): add a mandatory `name` field to the base
# PhyisicalTable instead of the child classes, this should prevent
# this error scenario
if name is None:
if (name := ref_op.name) is None:
raise com.RelationError(f'Table did not have a name: {op!r}')
result = self._quote_identifier(name)

result = sg.table(
name,
db=getattr(ref_op, "namespace", None),
quoted=self.parent.translator_class._quote_identifiers,
).sql(dialect=self.parent.translator_class._dialect_name)
else:
# A subquery
if ctx.is_extracted(ref_op):
Expand Down
17 changes: 13 additions & 4 deletions ibis/backends/base/sql/compiler/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,13 @@ class ExprTranslator:
ops.CumeDist,
ops.NTile,
)
_unsupported_reductions = (
ops.ApproxMedian,
ops.GroupConcat,
ops.ApproxCountDistinct,
)
_dialect_name = "hive"
_quote_identifiers = None

def __init__(self, node, context, named=False, permit_subquery=False):
self.node = node
Expand Down Expand Up @@ -323,22 +330,24 @@ def _bucket(op):

@rewrites(ops.Any)
def _any_expand(op):
return ops.Max(op.arg)
return ops.Max(op.arg, where=op.where)


@rewrites(ops.NotAny)
def _notany_expand(op):
return ops.Equals(ops.Max(op.arg), ops.Literal(0, dtype=op.arg.output_dtype))
zero = ops.Literal(0, dtype=op.arg.output_dtype)
return ops.Min(ops.Equals(op.arg, zero), where=op.where)


@rewrites(ops.All)
def _all_expand(op):
return ops.Min(op.arg)
return ops.Min(op.arg, where=op.where)


@rewrites(ops.NotAll)
def _notall_expand(op):
return ops.Equals(ops.Min(op.arg), ops.Literal(0, dtype=op.arg.output_dtype))
zero = ops.Literal(0, dtype=op.arg.output_dtype)
return ops.Max(ops.Equals(op.arg, zero), where=op.where)


@rewrites(ops.Cast)
Expand Down
48 changes: 21 additions & 27 deletions ibis/backends/base/sql/ddl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import re

import sqlglot as sg

import ibis.expr.datatypes as dt
import ibis.expr.schema as sch
from ibis.backends.base.sql.compiler import DDL, DML
Expand Down Expand Up @@ -95,15 +97,11 @@ def _serdeproperties(props):

class _BaseQualifiedSQLStatement:
def _get_scoped_name(self, obj_name, database):
if database:
return f'{database}.`{obj_name}`'
elif not is_fully_qualified(obj_name):
if _is_quoted(obj_name):
return obj_name
else:
return f'`{obj_name}`'
else:
if is_fully_qualified(obj_name):
return obj_name
if _is_quoted(obj_name):
obj_name = obj_name[1:-1]
return sg.table(obj_name, db=database, quoted=True).sql(dialect="hive")


class BaseDDL(DDL, _BaseQualifiedSQLStatement):
Expand Down Expand Up @@ -431,27 +429,23 @@ def compile(self):


class RenameTable(AlterTable):
def __init__(self, old_name, new_name, old_database=None, new_database=None):
# if either database is None, the name is assumed to be fully scoped
self.old_name = old_name
self.old_database = old_database
self.new_name = new_name
self.new_database = new_database

new_qualified_name = new_name
if new_database is not None:
new_qualified_name = self._get_scoped_name(new_name, new_database)

old_qualified_name = old_name
if old_database is not None:
old_qualified_name = self._get_scoped_name(old_name, old_database)

self.old_qualified_name = old_qualified_name
self.new_qualified_name = new_qualified_name
def __init__(
self,
old_name: str,
new_name: str,
old_database: str | None = None,
new_database: str | None = None,
dialect: str = "hive",
):
self._old = sg.table(old_name, db=old_database, quoted=True).sql(
dialect=dialect
)
self._new = sg.table(new_name, db=new_database, quoted=True).sql(
dialect=dialect
)

def compile(self):
cmd = f'{self.old_qualified_name} RENAME TO {self.new_qualified_name}'
return self._wrap_command(cmd)
return self._wrap_command(f"{self._old} RENAME TO {self._new}")


__all__ = (
Expand Down
6 changes: 0 additions & 6 deletions ibis/backends/base/sql/registry/literal.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,7 @@ def literal(translator, op):
typeclass = 'timestamp'
elif dtype.is_interval():
typeclass = 'interval'
elif dtype.is_set():
typeclass = 'set'
else:
raise NotImplementedError(f'Unsupported type: {dtype!r}')

return literal_formatters[typeclass](translator, op)


def null_literal(translator, expr):
return 'NULL'
18 changes: 2 additions & 16 deletions ibis/backends/base/sql/registry/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
timestamp,
window,
)
from ibis.backends.base.sql.registry.literal import literal, null_literal
from ibis.backends.base.sql.registry.literal import literal


def alias(translator, op):
Expand Down Expand Up @@ -189,19 +189,6 @@ def round(translator, op):
return f'round({arg_formatted})'


# XXX this is not added to operation_registry, but looks like impala is
# using it in the tests, and it works, even if it's not imported anywhere
def hash(translator, op):
how = op.how

arg_formatted = translator.translate(op.arg)

if how == 'fnv':
return f'fnv_hash({arg_formatted})'
else:
raise NotImplementedError(how)


def concat(translator, op):
joined_args = ', '.join(map(translator.translate, op.arg))
return f"concat({joined_args})"
Expand Down Expand Up @@ -272,7 +259,6 @@ def count_star(translator, op):
ops.Round: round,
ops.Sign: sign,
ops.Sqrt: unary('sqrt'),
ops.Hash: hash,
ops.HashBytes: hashbytes,
ops.RandomScalar: lambda *_: 'rand(utc_to_unix_micros(utc_timestamp()))',
ops.Log: log,
Expand Down Expand Up @@ -352,13 +338,13 @@ def count_star(translator, op):
ops.ExtractHour: timestamp.extract_field('hour'),
ops.ExtractMinute: timestamp.extract_field('minute'),
ops.ExtractSecond: timestamp.extract_field('second'),
ops.ExtractMicrosecond: timestamp.extract_field('microsecond'),
ops.ExtractMillisecond: timestamp.extract_field('millisecond'),
ops.TimestampTruncate: timestamp.truncate,
ops.DateTruncate: timestamp.truncate,
ops.IntervalFromInteger: timestamp.interval_from_integer,
# Other operations
ops.Literal: literal,
ops.NullLiteral: null_literal,
ops.Cast: cast,
ops.Coalesce: varargs('coalesce'),
ops.Greatest: varargs('greatest'),
Expand Down
9 changes: 6 additions & 3 deletions ibis/backends/base/sql/registry/timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def truncate(translator, op):

arg_formatted = translator.translate(arg)
try:
unit = base_unit_names[unit]
unit = base_unit_names[unit.short]
except KeyError:
raise com.UnsupportedOperationError(
f'{unit!r} unit is not supported in timestamp truncate'
Expand Down Expand Up @@ -81,7 +81,7 @@ def _from_unixtime(translator, expr):

def timestamp_from_unix(translator, op):
val, unit = op.args
val = util.convert_unit(val, unit, 's').to_expr().cast("int32").op()
val = util.convert_unit(val, unit.short, 's').to_expr().cast("int32").op()
arg = _from_unixtime(translator, val)
return f'CAST({arg} AS timestamp)'

Expand All @@ -93,7 +93,10 @@ def day_of_week_index(t, op):
def strftime(t, op):
import sqlglot as sg

reverse_hive_mapping = {v: k for k, v in sg.dialects.hive.Hive.time_mapping.items()}
hive_dialect = sg.dialects.hive.Hive
if (time_mapping := getattr(hive_dialect, "TIME_MAPPING", None)) is None:
time_mapping = hive_dialect.time_mapping
reverse_hive_mapping = {v: k for k, v in time_mapping.items()}
format_str = sg.time.format_time(op.format_str.value, reverse_hive_mapping)
targ = t.translate(ops.Cast(op.arg, to=dt.string))
return f"from_unixtime(unix_timestamp({targ}), {format_str!r})"
Expand Down
28 changes: 13 additions & 15 deletions ibis/backends/base/sql/registry/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def interval_boundary_to_integer(boundary):

value = boundary.value
try:
multiplier = _map_interval_to_microseconds[value.output_dtype.unit]
multiplier = _map_interval_to_microseconds[value.output_dtype.unit.short]
except KeyError:
raise com.IbisInputError(
f"Unsupported interval unit: {value.output_dtype.unit}"
Expand Down Expand Up @@ -122,26 +122,24 @@ def format_window_frame(translator, func, frame):


def window(translator, op):
_unsupported_reductions = (
ops.ApproxMedian,
ops.GroupConcat,
ops.ApproxCountDistinct,
)
_unsupported_reductions = translator._unsupported_reductions

if isinstance(op.func, _unsupported_reductions):
func = op.func.__window_op__

if isinstance(func, _unsupported_reductions):
raise com.UnsupportedOperationError(
f'{type(op.func)} is not supported in window functions'
f'{type(func)} is not supported in window functions'
)

if isinstance(op.func, ops.CumulativeOp):
arg = cumulative_to_window(translator, op.func, op.frame)
if isinstance(func, ops.CumulativeOp):
arg = cumulative_to_window(translator, func, op.frame)
return translator.translate(arg)

# Some analytic functions need to have the expression of interest in
# the ORDER BY part of the window clause
frame = op.frame
if isinstance(op.func, translator._require_order_by) and not frame.order_by:
frame = frame.copy(order_by=(op.func.arg,))
if isinstance(func, translator._require_order_by) and not frame.order_by:
frame = frame.copy(order_by=(func.arg,))

# Time ranges need to be converted to microseconds.
if isinstance(frame, ops.RangeWindowFrame):
Expand All @@ -153,12 +151,12 @@ def window(translator, op):
'Rows with max lookback is not implemented for SQL-based backends.'
)

window_formatted = format_window_frame(translator, op.func, frame)
window_formatted = format_window_frame(translator, func, frame)

arg_formatted = translator.translate(op.func)
arg_formatted = translator.translate(func.__window_op__)
result = f'{arg_formatted} {window_formatted}'

if isinstance(op.func, ops.RankBase):
if isinstance(func, ops.RankBase):
return f'({result} - 1)'
else:
return result
Expand Down
Loading