52 changes: 50 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: "3.4"
services:
clickhouse:
image: clickhouse/clickhouse-server:23.6.1.1524-alpine
image: clickhouse/clickhouse-server:23.7.1.2470-alpine
ports:
- 8123:8123
- 9000:9000
Expand All @@ -16,6 +16,7 @@ services:
- clickhouse:/var/lib/clickhouse/user_files/ibis
networks:
- clickhouse

impala:
depends_on:
- impala-postgres
Expand Down Expand Up @@ -45,6 +46,7 @@ services:
- 25020:25020
networks:
- impala

impala-postgres:
user: postgres
hostname: postgres
Expand All @@ -60,6 +62,7 @@ services:
image: postgres:13.11-alpine
networks:
- impala

kudu:
cap_add:
- SYS_TIME
Expand All @@ -80,6 +83,7 @@ services:
- CMD-SHELL
- kudu cluster ksck kudu:7051
timeout: 10s

kudu-tserver:
cap_add:
- SYS_TIME
Expand All @@ -98,6 +102,7 @@ services:
- CMD-SHELL
- kudu cluster ksck kudu:7051
timeout: 10s

mysql:
environment:
MYSQL_ALLOW_EMPTY_PASSWORD: "true"
Expand All @@ -119,6 +124,8 @@ services:
- mysql
volumes:
- mysql:/data
- $PWD/docker/mysql:/docker-entrypoint-initdb.d:ro

postgres:
user: postgres
environment:
Expand Down Expand Up @@ -158,6 +165,7 @@ services:
- mssql:/data
networks:
- mssql

trino-postgres:
user: postgres
environment:
Expand All @@ -179,24 +187,64 @@ services:
- trino
volumes:
- trino-postgres:/data

hive-metastore-mariadb:
image: mariadb:10.11.4
environment:
MYSQL_ALLOW_EMPTY_PASSWORD: "true"
MYSQL_USER: admin
MYSQL_PASSWORD: admin
MYSQL_DATABASE: metastore_db
healthcheck:
interval: 1s
retries: 30
test:
- CMD
- mariadb-admin
- ping
timeout: 5s
networks:
- trino

hive-metastore-minio:
image: minio/minio:latest
environment:
- MINIO_ACCESS_KEY=accesskey
- MINIO_SECRET_KEY=secretkey
entrypoint: sh
command: -c 'mkdir -p /data/warehouse && /opt/bin/minio server /data'
networks:
- trino

hive-metastore:
build: ./docker/trino
image: ibis-hive-metastore
depends_on:
- hive-metastore-mariadb
- hive-metastore-minio
networks:
- trino

trino:
depends_on:
- trino-postgres
- hive-metastore
healthcheck:
interval: 5s
retries: 10
test:
- CMD-SHELL
- trino --execute 'SELECT 1 AS one'
timeout: 30s
image: trinodb/trino:420
image: trinodb/trino:422
ports:
- 8080:8080
networks:
- trino
volumes:
- $PWD/docker/trino/catalog/postgresql.properties:/etc/trino/catalog/postgresql.properties:ro
- $PWD/docker/trino/catalog/memory.properties:/etc/trino/catalog/memory.properties:ro
- $PWD/docker/trino/catalog/hive.properties:/etc/trino/catalog/hive.properties:ro
- $PWD/docker/trino/jvm.config:/etc/trino/jvm.config:ro

druid-postgres:
Expand Down
3 changes: 3 additions & 0 deletions docker/mysql/startup.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CREATE USER 'ibis'@'localhost' IDENTIFIED BY 'ibis';
GRANT CREATE, DROP ON *.* TO 'ibis'@'%';
FLUSH PRIVILEGES;
31 changes: 31 additions & 0 deletions docker/trino/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
FROM openjdk:8u342-jre

WORKDIR /opt

ENV HADOOP_VERSION=3.2.0
ENV METASTORE_VERSION=3.0.0

ENV HADOOP_HOME=/opt/hadoop-${HADOOP_VERSION}
ENV HIVE_HOME=/opt/apache-hive-metastore-${METASTORE_VERSION}-bin

RUN apt-get update \
&& apt-get install --assume-yes procps telnet \
&& apt-get clean \
&& curl -L https://apache.org/dist/hive/hive-standalone-metastore-${METASTORE_VERSION}/hive-standalone-metastore-${METASTORE_VERSION}-bin.tar.gz | tar zxf - && \
curl -L https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | tar zxf - && \
curl -L https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.19.tar.gz | tar zxf - && \
cp mysql-connector-java-8.0.19/mysql-connector-java-8.0.19.jar ${HIVE_HOME}/lib/ && \
rm -rf mysql-connector-java-8.0.19

COPY ./metastore-site.xml ${HIVE_HOME}/conf
COPY ./entrypoint.sh /entrypoint.sh

RUN groupadd -r hive --gid=1000 && \
useradd -r -g hive --uid=1000 -d ${HIVE_HOME} hive && \
chown hive:hive -R ${HIVE_HOME} && \
chown hive:hive /entrypoint.sh && chmod +x /entrypoint.sh

USER hive
EXPOSE 9083

ENTRYPOINT ["sh", "-c", "/entrypoint.sh"]
15 changes: 15 additions & 0 deletions docker/trino/catalog/hive.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
connector.name=hive

hive.allow-drop-table=true
hive.ignore-absent-partitions=true
hive.metastore.thrift.delete-files-on-drop=true
hive.metastore.uri=thrift://hive-metastore:9083
hive.metastore.username=admin
hive.non-managed-table-writes-enabled=true
hive.s3.aws-access-key=accesskey
hive.s3.aws-secret-key=secretkey
hive.s3.endpoint=http://hive-metastore-minio:9000
hive.s3.path-style-access=true
hive.s3select-pushdown.enabled=true
hive.storage-format=PARQUET
hive.timestamp-precision=MICROSECONDS
30 changes: 30 additions & 0 deletions docker/trino/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/sh

export HADOOP_HOME=/opt/hadoop-3.2.0
export HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/tools/lib/aws-java-sdk-bundle-1.11.375.jar:${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-aws-3.2.0.jar
export JAVA_HOME=/usr/local/openjdk-8

# Make sure mariadb is ready
MAX_TRIES=8
CURRENT_TRY=1
SLEEP_BETWEEN_TRY=4
MARIADB_HOSTNAME="hive-metastore-mariadb"
until [ "$(telnet "$MARIADB_HOSTNAME" 3306 | sed -n 2p)" = "Connected to ${MARIADB_HOSTNAME}." ] || [ "$CURRENT_TRY" -gt "$MAX_TRIES" ]; do
echo "Waiting for mariadb server..."
sleep "$SLEEP_BETWEEN_TRY"
CURRENT_TRY=$((CURRENT_TRY + 1))
done

if [ "$CURRENT_TRY" -gt "$MAX_TRIES" ]; then
echo "WARNING: Timeout when waiting for mariadb."
fi

# Check if schema exists
/opt/apache-hive-metastore-3.0.0-bin/bin/schematool -dbType mysql -info

if [ $? -eq 1 ]; then
echo "Getting schema info failed. Probably not initialized. Initializing..."
/opt/apache-hive-metastore-3.0.0-bin/bin/schematool -initSchema -dbType mysql
fi

/opt/apache-hive-metastore-3.0.0-bin/bin/start-metastore
56 changes: 56 additions & 0 deletions docker/trino/metastore-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
<configuration>
<property>
<name>metastore.thrift.uris</name>
<value>thrift://0.0.0.0:9083</value>
<description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
</property>
<property>
<name>metastore.task.threads.always</name>
<value>org.apache.hadoop.hive.metastore.events.EventCleanerTask,org.apache.hadoop.hive.metastore.MaterializationsCacheCleanerTask</value>
</property>
<property>
<name>metastore.expression.proxy</name>
<value>org.apache.hadoop.hive.metastore.DefaultPartitionExpressionProxy</value>
</property>
<property>
<name>metastore.warehouse.dir</name>
<value>s3a://warehouse/</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
</property>

<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://hive-metastore-mariadb:3306/metastore_db</value>
</property>

<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>admin</value>
</property>

<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>admin</value>
</property>

<property>
<name>fs.s3a.access.key</name>
<value>accesskey</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>secretkey</value>
</property>
<property>
<name>fs.s3a.endpoint</name>
<value>http://hive-metastore-minio:9000</value>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>true</value>
</property>

</configuration>
3 changes: 2 additions & 1 deletion docs/SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,10 @@
* [Schemas](reference/schemas.md)
* [Backend interfaces](reference/backends/)
* [Configuration](reference/config.md)
* [Supported Python versions](supported_python_versions.md)
* [Versioning Policy](versioning.md)
* [Release notes](release_notes.md)
* Blog
* [Ibis v6.0.0](blog/rendered/ibis-version-6.0.0-release.ipynb)
* [Ibis on :fire:: Supercharge Your Workflow with DuckDB and PyTorch](blog/rendered/torch.ipynb)
* [Campaign finance analysis with Ibis](blog/rendered/campaign-finance.ipynb)
* [Ibis sneak peek: writing to files](blog/ibis-to-file.md)
Expand Down
22 changes: 13 additions & 9 deletions docs/backends/app/backend_info_app.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import datetime
import tempfile
from pathlib import Path
Expand All @@ -21,7 +23,7 @@
ibis.options.verbose_log = lambda sql: sql_queries.append(sql)


@st.experimental_memo(ttl=ONE_HOUR_IN_SECONDS)
@st.cache_data(ttl=ONE_HOUR_IN_SECONDS)
def support_matrix_df():
resp = requests.get("https://ibis-project.org/backends/raw_support_matrix.csv")
resp.raise_for_status()
Expand All @@ -39,7 +41,7 @@ def support_matrix_df():
)


@st.experimental_memo(ttl=ONE_HOUR_IN_SECONDS)
@st.cache_data(ttl=ONE_HOUR_IN_SECONDS)
def backends_info_df():
return pd.DataFrame(
{
Expand Down Expand Up @@ -69,7 +71,7 @@ def backends_info_df():
support_matrix_table = ibis.memtable(support_matrix_df())


@st.experimental_memo(ttl=ONE_HOUR_IN_SECONDS)
@st.cache_data(ttl=ONE_HOUR_IN_SECONDS)
def get_all_backend_categories():
return (
backend_info_table.select(category=_.categories.unnest())
Expand All @@ -80,7 +82,7 @@ def get_all_backend_categories():
)


@st.experimental_memo(ttl=ONE_HOUR_IN_SECONDS)
@st.cache_data(ttl=ONE_HOUR_IN_SECONDS)
def get_all_operation_categories():
return (
support_matrix_table.select(_.operation_category)
Expand All @@ -90,7 +92,7 @@ def get_all_operation_categories():
)


@st.experimental_memo(ttl=ONE_HOUR_IN_SECONDS)
@st.cache_data(ttl=ONE_HOUR_IN_SECONDS)
def get_backend_names(categories: Optional[List[str]] = None):
backend_expr = backend_info_table.mutate(category=_.categories.unnest())
if categories:
Expand All @@ -105,13 +107,15 @@ def get_selected_backend_name():
selected_categories_names = st.sidebar.multiselect(
'Backend category',
options=backend_categories,
default=None,
default=backend_categories,
)
if not selected_categories_names:
return get_backend_names()
return get_backend_names(selected_categories_names)


def get_backend_subset(subset):
return st.sidebar.multiselect('Backend name', options=subset, default=subset)


def get_selected_operation_categories():
all_ops_categories = get_all_operation_categories()

Expand All @@ -128,7 +132,7 @@ def get_selected_operation_categories():
return selected_ops_categories


current_backend_names = get_selected_backend_name()
current_backend_names = get_backend_subset(get_selected_backend_name())
sort_by_coverage = st.sidebar.checkbox('Sort by API Coverage', value=False)
current_ops_categories = get_selected_operation_categories()

Expand Down
3 changes: 3 additions & 0 deletions docs/backends/bigquery.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
backend_name: Google BigQuery
backend_url: https://cloud.google.com/bigquery
exports: ["PyArrow", "Parquet", "CSV", "Pandas"]
memtable_impl: fallback
---

# BigQuery

{% include 'backends/badges.md' %}

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the BigQuery backend:
Expand Down
3 changes: 3 additions & 0 deletions docs/backends/clickhouse.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@ backend_name: ClickHouse
backend_url: https://clickhouse.yandex/
backend_module: clickhouse
exports: ["PyArrow", "Parquet", "CSV", "Pandas"]
memtable_impl: native
---

# ClickHouse

{% include 'backends/badges.md' %}

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the ClickHouse backend:
Expand Down
1 change: 1 addition & 0 deletions docs/backends/dask.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ backend_name: Dask
backend_url: https://dask.org
backend_module: dask
backend_param_style: a dictionary of paths
memtable_impl: native
---

{% include 'backends/template.md' %}
3 changes: 3 additions & 0 deletions docs/backends/datafusion.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@ backend_module: datafusion
version_added: "2.1"
exports: ["PyArrow", "Parquet", "Delta Lake", "CSV", "Pandas"]
imports: ["CSV", "Parquet", "Delta Lake"]
memtable_impl: not_implemented
---

# DataFusion

{% include 'backends/badges.md' %}

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the Apache Datafusion backend:
Expand Down
3 changes: 3 additions & 0 deletions docs/backends/druid.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ backend_name: Druid
backend_url: https://druid.apache.org/
backend_module: druid
exports: ["PyArrow", "Parquet", "CSV", "Pandas"]
memtable_impl: fallback
---

# Druid
Expand All @@ -13,6 +14,8 @@ exports: ["PyArrow", "Parquet", "CSV", "Pandas"]

The Druid backend is experimental and is subject to backwards incompatible changes.

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the Druid backend:
Expand Down
3 changes: 3 additions & 0 deletions docs/backends/duckdb.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ imports:
"SQLite",
"Postgres",
]
memtable_impl: native
---

# DuckDB
Expand All @@ -28,6 +29,8 @@ imports:
See [this issue](https://github.com/ibis-project/ibis/issues/4503) for
more details.

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the DuckDB backend:
Expand Down
1 change: 1 addition & 0 deletions docs/backends/impala.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ intro: |
without requiring you to switch back and forth between Python code and the
Impala shell.
exclude_backend_api: true
memtable_impl: fallback
---

{% include 'backends/template.md' %}
Expand Down
18 changes: 18 additions & 0 deletions docs/backends/memtable-template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{% set memtable_badges = {"native": "56ae57", "fallback": "goldenrod", "none": "ff6961"} %}

## `ibis.memtable` Support ![memtable](https://img.shields.io/badge/{{ memtable_impl }}-{{ memtable_badges[memtable_impl] }})

{% if memtable_impl == "not_implemented" %}

The {{ backend_name }} backend does not currently support in-memory tables.

Please [file an issue](https://github.com/ibis-project/ibis/issues/new/choose)
if you'd like the {{ backend_name }} backend to support in-memory tables.

{% else %}

The {{ backend_name }} backend supports `memtable`s {% if memtable_impl == "fallback" %} by constructing a string with the contents of the in-memory object. **This will be very inefficient for medium to large in-memory tables**. Please [file an issue](https://github.com/ibis-project/ibis/issues/new/choose) if you observe performance issues when using in-memory tables. {% elif memtable_impl == "native" %} by natively executing queries against the underlying storage (e.g., pyarrow Tables or pandas DataFrames).

{% endif %}

{% endif %}
3 changes: 3 additions & 0 deletions docs/backends/mssql.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@ backend_module: mssql
backend_param_style: connection parameters
version_added: "4.0"
exports: ["PyArrow", "Parquet", "CSV", "Pandas"]
memtable_impl: fallback
---

# MSSQL

{% include 'backends/badges.md' %}

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the MSSQL backend:
Expand Down
3 changes: 3 additions & 0 deletions docs/backends/mysql.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ backend_url: https://www.mysql.com/
backend_module: mysql
backend_param_style: a SQLAlchemy-style URI
exports: ["PyArrow", "Parquet", "CSV", "Pandas"]
memtable_impl: fallback
---

# MySQL

{% include 'backends/badges.md' %}

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the MySQL backend:
Expand Down
21 changes: 21 additions & 0 deletions docs/backends/oracle.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ backend_connection_example: ibis.connect("oracle://user:pass@host:port/service_n
is_experimental: true
version_added: "6.0"
exports: ["PyArrow", "Parquet", "CSV", "Pandas"]
memtable_impl: fallback
---

# Oracle
Expand All @@ -17,6 +18,8 @@ exports: ["PyArrow", "Parquet", "CSV", "Pandas"]

The Oracle backend is experimental and is subject to backwards incompatible changes.

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the Oracle backend:
Expand Down Expand Up @@ -71,3 +74,21 @@ passing a properly formatted Oracle connection URL to `ibis.connect`
```python
con = ibis.connect(f"oracle://{user}:{password}@{host}:{port}/{database}")
```

## Connecting to older Oracle databases

`ibis` uses the `python-oracledb` "thin client" to connect to Oracle databases.
Because early versions of Oracle did not perform case-sensitive checks in
passwords, some DBAs disable case sensitivity to avoid requiring users to update
their passwords. If case-sensitive passwords are disabled, then Ibis will not be
able to connect to the database.

To check if case-sensitivity is enforced you can run

```sql
show parameter sec_case_sensitive_logon;
```

If the returned value is `FALSE` then Ibis will _not_ connect.

For more information, see this [issue](https://github.com/oracle/python-oracledb/issues/26).
1 change: 1 addition & 0 deletions docs/backends/pandas.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ intro: Ibis's pandas backend is available in core Ibis.
backend_param_style: a dictionary of paths
do_connect_base: BasePandasBackend
is_core: true
memtable_impl: native
---

{% include 'backends/template.md' %}
Expand Down
3 changes: 3 additions & 0 deletions docs/backends/polars.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ is_experimental: true
version_added: "4.0"
exports: ["PyArrow", "Parquet", "Delta Lake", "CSV", "Pandas"]
imports: ["CSV", "Parquet", "Delta Lake", "Pandas"]
memtable_impl: native
---

# Polars
Expand All @@ -16,6 +17,8 @@ imports: ["CSV", "Parquet", "Delta Lake", "Pandas"]

The Polars backend is experimental and is subject to backwards incompatible changes.

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the Polars backend:
Expand Down
3 changes: 3 additions & 0 deletions docs/backends/postgresql.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ backend_url: https://www.postgresql.org/
backend_module: postgres
backend_param_style: a SQLAlchemy-style URI
exports: ["PyArrow", "Parquet", "CSV", "Pandas"]
memtable_impl: fallback
---

# PostgreSQL

{% include 'backends/badges.md' %}

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the Postgres backend:
Expand Down
7 changes: 5 additions & 2 deletions docs/backends/pyspark.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@ backend_name: PySpark
backend_url: https://spark.apache.org/docs/latest/api/python/
backend_module: pyspark
backend_param_style: PySpark things
exports: ["PyArrow", "Parquet", "CSV", "Pandas"]
imports: ["CSV", "Parquet"]
exports: ["PyArrow", "Parquet", "Delta Lake", "Pandas"]
imports: ["CSV", "Parquet", "Delta Lake"]
memtable_impl: native
---

# PySpark

{% include 'backends/badges.md' %}

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the PySpark backend:
Expand Down
3 changes: 3 additions & 0 deletions docs/backends/snowflake.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
backend_name: Snowflake
backend_url: https://snowflake.com/
exports: ["PyArrow", "Parquet", "CSV", "Pandas"]
memtable_impl: native
---

# Snowflake
Expand All @@ -12,6 +13,8 @@ exports: ["PyArrow", "Parquet", "CSV", "Pandas"]

The Snowflake backend is experimental and is subject to backwards incompatible changes.

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the Snowflake backend:
Expand Down
3 changes: 3 additions & 0 deletions docs/backends/sqlite.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@ backend_name: SQLite
backend_url: https://www.sqlite.org/
backend_module: sqlite
imports: ["CSV", "Parquet", "JSON", "PyArrow", "Pandas", "SQLite", "Postgres"]
memtable_impl: fallback
---

# SQLite

{% include 'backends/badges.md' %}

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the SQLite backend:
Expand Down
2 changes: 2 additions & 0 deletions docs/backends/template.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

{% if not (development_only | default(False)) %}

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the {{ backend_name }} backend:
Expand Down
3 changes: 3 additions & 0 deletions docs/backends/trino.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ backend_name: Trino
backend_url: https://trino.io
backend_module: trino
exports: ["PyArrow", "Parquet", "CSV", "Pandas"]
memtable_impl: fallback
---

# Trino
Expand All @@ -13,6 +14,8 @@ exports: ["PyArrow", "Parquet", "CSV", "Pandas"]

The Trino backend is experimental and is subject to backwards incompatible changes.

{% include 'backends/memtable-template.md' %}

## Install

Install `ibis` and dependencies for the Trino backend:
Expand Down
1,112 changes: 1,112 additions & 0 deletions docs/blog/rendered/ibis-version-6.0.0-release.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/blog/rendered/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Adapted from https://gist.github.com/pdet/e8d38734232c08e6c15aba79b4eb8368#file-taxi_prediction_example-py.
"""
from __future__ import annotations

import pyarrow as pa
import torch
Expand Down
2 changes: 1 addition & 1 deletion docs/community/contribute/01_environment.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ hide:

```sh
pip install 'poetry>=1.3,<1.4'
pip install -r requirements.txt
pip install -r requirements-dev.txt
```

1. Install ibis in development mode
Expand Down
36 changes: 18 additions & 18 deletions docs/concept/backends.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Backends

A backend is where execution of Ibis table expressions occur after compiling into some intermediate representation. A backend is often a database and the intermediate representation often SQL, but several types of backends exist. See the [backends page](/backends/) for specific documentation on each.
A backend is where execution of Ibis table expressions occur after compiling into some intermediate representation. A backend is often a database and the intermediate representation often SQL, but several types of backends exist. See the [backends page](../backends/index.md) for specific documentation on each.

## String generating backends

Expand All @@ -9,9 +9,9 @@ The first category of backends translate Ibis table expressions into query strin
The compiler turns each table expression into a query string and passes that query
to the database through a driver API for execution.

- [Apache Impala](/backends/impala/)
- [ClickHouse](/backends/clickhouse/)
- [Google BigQuery](/backends/bigquery/)
- [Apache Impala](../backends/impala.md)
- [ClickHouse](../backends/clickhouse.md)
- [Google BigQuery](../backends/bigquery.md)
- [HeavyAI](https://github.com/heavyai/ibis-heavyai)

## Expression generating backends
Expand All @@ -23,24 +23,24 @@ Instead of generating a query string for each table expression, these backends
produce another kind of table expression object and typically have high-level APIs
for execution.

- [Apache Arrow Datafusion](/backends/datafusion/)
- [Apache Druid](/backends/druid/)
- [Apache PySpark](/backends/pyspark/)
- [Dask](/backends/dask/)
- [DuckDB](/backends/duckdb/)
- [MS SQL Server](/backends/mssql/)
- [MySQL](/backends/mysql/)
- [Oracle](/backends/oracle/)
- [Polars](/backends/polars/)
- [PostgreSQL](/backends/postgresql/)
- [SQLite](/backends/sqlite/)
- [Snowflake](/backends/snowflake/)
- [Trino](/backends/trino/)
- [Apache Arrow Datafusion](../backends/datafusion.md)
- [Apache Druid](../backends/druid.md)
- [Apache PySpark](../backends/pyspark.md)
- [Dask](../backends/dask.md)
- [DuckDB](../backends/duckdb.md)
- [MS SQL Server](../backends/mssql.md)
- [MySQL](../backends/mysql.md)
- [Oracle](../backends/oracle.md)
- [Polars](../backends/polars.md)
- [PostgreSQL](../backends/postgresql.md)
- [SQLite](../backends/sqlite.md)
- [Snowflake](../backends/snowflake.md)
- [Trino](../backends/trino.md)

## Direct execution backends

The pandas backend is the only direct execution backend. A full description
of the implementation can be found in the module docstring of the pandas
backend located in `ibis/backends/pandas/core.py`.

- [pandas](/backends/pandas/)
- [pandas](../backends/pandas.md)
12 changes: 6 additions & 6 deletions docs/concept/why_ibis.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Why Ibis?

Ibis is a dataframe interface to execution engines with support for [15+
backends](/backends/). Ibis doesn't replace your existing execution
Ibis is a dataframe interface to execution engines with support for [17+
backends](../backends/index.md). Ibis doesn't replace your existing execution
engine, it _extends_ it with powerful abstractions and intuitive syntax.

Ibis works with what you already have, so why not check out our [getting started
guide](/tutorial/getting_started/)?
guide](../tutorial/getting_started.md)?

# How does Ibis compare to...

Expand Down Expand Up @@ -34,7 +34,7 @@ we can summarize some key points:
- Ibis lets you use SQL when you want to (for our SQL-based backends)

If your SQL-fu is strong, we might not convince you to leave it all behind, but
check out our [Ibis for SQL users guide](/tutorial/ibis-for-sql-users/)
check out our [Ibis for SQL users guide](../tutorial/ibis-for-sql-users.ipynb)
and see if it whets your appetite.

## `pandas`
Expand All @@ -53,7 +53,7 @@ Ibis to quickly and easily switch to a different backend that supports
out-of-core execution.

Ibis syntax is similar to `pandas` syntax, but it isn't a drop-in replacement.
Check out our [Ibis for pandas Users guide](/tutorial/ibis-for-pandas-users/) if
Check out our [Ibis for pandas Users guide](../tutorial/ibis-for-pandas-users.ipynb) if
you'd like to give Ibis a try!

## `sqlalchemy` and `sqlglot`
Expand All @@ -72,4 +72,4 @@ using SQLGlot.

If you are looking for a dataframe API to construct and execute your analytics
queries against a large collection of powerful execution engines, then allow us
point you at the [Ibis Getting Started guide](/tutorial/getting_started/).
point you at the [Ibis Getting Started guide](../tutorial/getting_started.md).
2 changes: 2 additions & 0 deletions docs/example_streamlit_app/example_streamlit_app.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import requests
import streamlit as st

Expand Down
2 changes: 1 addition & 1 deletion docs/how_to/streamlit.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Streamlit + Ibis = :heart:

Ibis supports the [streamlit `experimental_connection` interface](https://blog.streamlit.io/introducing-st-experimental_connection/), making it easier than ever to combine the powers of both tools!

Check out the example application below that shows the top N ingredients from a corpus of recipes using [the ClickHouse backend](/backends/clickhouse/)!
Check out the example application below that shows the top N ingredients from a corpus of recipes using [the ClickHouse backend](../backends/clickhouse.md)!

<div class="streamlit-app">
<iframe class="streamlit-app-inner" src="https://ibis-example.streamlit.app/?embedded=true"></iframe>
Expand Down
6 changes: 3 additions & 3 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ hide:
---

<div class="install-tutorial-button" markdown>
[Getting Started](/tutorial/getting_started/){ .md-button .md-button--primary }
[Install](/install/){ .md-button }
[Getting Started](tutorial/getting_started.md){ .md-button .md-button--primary }
[Install](install.md){ .md-button }
</div>

---
Expand Down Expand Up @@ -49,7 +49,7 @@ ORDER BY t1.year DESC

## Features

- **Consistent syntax across backends**: Enjoy a uniform Python API, whether using [DuckDB](https://duckdb.org), [PostgreSQL](https://postgresql.org), [PySpark](https://spark.apache.org/docs/latest/api/python/index.html), [BigQuery](https://cloud.google.com/bigquery/), or [any other supported backend](/backends/).
- **Consistent syntax across backends**: Enjoy a uniform Python API, whether using [DuckDB](https://duckdb.org), [PostgreSQL](https://postgresql.org), [PySpark](https://spark.apache.org/docs/latest/api/python/index.html), [BigQuery](https://cloud.google.com/bigquery/), or [any other supported backend](backends/index.md).
- **Performant**: Execute queries as fast as the database engine itself.
- **Interactive**: Explore data in a notebook or REPL.
- **Extensible**: Add new operations, optimizations, and custom APIs.
Expand Down
49 changes: 38 additions & 11 deletions docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,38 +7,65 @@ hide:

# Install Ibis

=== "pip"
## Using `pip`

We suggest starting with the DuckDB backend. It's performant and fully
featured.

```sh
pip install 'ibis-framework[duckdb]'
```

If you would like to use a different backend, all of the available options are
listed below.

{% for backend in sorted(ibis.backends.base._get_backend_names()) %}
{% if backend != "spark" %}
=== "{{ backend }}"

```sh
pip install 'ibis-framework[duckdb]' # (1) (2)
pip install 'ibis-framework[{{ backend }}]'
```

1. We suggest starting with the DuckDB backend. It's performant and fully featured. If you would like to use a different backend, all of the available options are listed below.
{% endif %}
{% endfor %}

Note that the `ibis-framework` package is _not_ the same as the `ibis` package
in PyPI. These two libraries cannot coexist in the same Python environment, as
they are both imported with the `ibis` module name.

2. Note that the `ibis-framework` package is *not* the same as the `ibis` package in PyPI. These two libraries cannot coexist in the same Python environment, as they are both imported with the `ibis` module name.
## Using `conda` or `mamba`

<!-- prettier-ignore-start -->

{% for mgr in ["conda", "mamba"] %}
=== "{{ mgr }}"

The base `ibis-framework` package includes support for the `duckdb`
backend. This is our recommended backend for local execution.

```sh
{{ mgr }} install -c conda-forge ibis-framework
```

{% endfor %}

## Install backend dependencies
If you would like to use a different backend, all of the available options
are listed below.

{% for backend in sorted(ibis.backends.base._get_backend_names()) %}
{% if backend != "spark" %}
=== "{{ backend }}"
=== "{{ backend }}"

```sh
pip install 'ibis-framework[{{ backend }}]'
```
```sh
{{ mgr }} install -c conda-forge ibis-{{ backend }}
```

{% endif %}
{% endfor %}

{% endfor %}

<!-- prettier-ignore-end -->

---

After you've successfully installed Ibis, try going through the tutorial:
Expand Down
1 change: 1 addition & 0 deletions docs/reference/expressions/top_level.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ These methods and objects are available directly in the `ibis` module.
::: ibis.deferred
::: ibis.desc
::: ibis.difference
::: ibis.dtype
::: ibis.get_backend
::: ibis.expr.types.Value.greatest
::: ibis.ifelse
Expand Down
104 changes: 103 additions & 1 deletion docs/release_notes.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,108 @@
Release Notes
---

## [6.1.0](https://github.com/ibis-project/ibis/compare/6.0.0...6.1.0) (2023-08-03)


### Features

* **api:** add `ibis.dtype` top-level API ([867e5f1](https://github.com/ibis-project/ibis/commit/867e5f1e3dc94fc40d075da34e8db63f690bb048))
* **api:** add `table.nunique()` for counting unique table rows ([adcd762](https://github.com/ibis-project/ibis/commit/adcd7628a7c2637e0bde1d924d11f7da5bd1659d))
* **api:** allow mixing literals and columns in `ibis.array` ([3355dd8](https://github.com/ibis-project/ibis/commit/3355dd88268c27642ab6dba7b1091caddd542130))
* **api:** improve efficiency of `__dataframe__` protocol ([15e27da](https://github.com/ibis-project/ibis/commit/15e27da1d0020a58c222ccab95b97d39a2c3e3c7))
* **api:** support boolean literals in join API ([c56376f](https://github.com/ibis-project/ibis/commit/c56376f6b209a1961ab50a7a3de24e9b20475196))
* **arrays:** add `concat` method equivalent to `__add__`/`__radd__` ([0ed0ab1](https://github.com/ibis-project/ibis/commit/0ed0ab14f93621c5d647338c5b2cb882abe36a85))
* **arrays:** add `repeat` method equivalent to `__mul__`/`__rmul__` ([b457c7b](https://github.com/ibis-project/ibis/commit/b457c7b1e02bb7d0e20accc1e145f9f8af3d09cf))
* **backends:** add `current_schema` API ([955a9d0](https://github.com/ibis-project/ibis/commit/955a9d0e55b19db0edec5fb891c22e964b15ff78))
* **bigquery:** fill out `CREATE TABLE` DDL options including support for `overwrite` ([5dac7ec](https://github.com/ibis-project/ibis/commit/5dac7eccd43fb8fbeb146340864837b5a1084a65))
* **datafusion:** add count_distinct, median, approx_median, stddev and var aggregations ([45089c4](https://github.com/ibis-project/ibis/commit/45089c4c9137971a3bc3b1743ca61493b93301a2))
* **datafusion:** add extract url fields functions ([4f5ea98](https://github.com/ibis-project/ibis/commit/4f5ea9896d8a9ca91f631ddd37c1eb1d775e6770))
* **datafusion:** add functions sign, power, nullifzero, log ([ef72e40](https://github.com/ibis-project/ibis/commit/ef72e403200ff0ef6eb0c87ab1d5e8484de4d636))
* **datafusion:** add RegexSearch, StringContains and StringJoin ([4edaab5](https://github.com/ibis-project/ibis/commit/4edaab587d7b5fc6d64f72bbeb121c0cdccb45ce))
* **datafusion:** implement in-memory table ([d4ec5c2](https://github.com/ibis-project/ibis/commit/d4ec5c244e1812041794f9c5abc04421c23ecb9e))
* **flink:** add tests and translation rules for additional operators ([fc2aa5d](https://github.com/ibis-project/ibis/commit/fc2aa5d7bcc63a82dd0069e3daa81b5146e18902))
* **flink:** implement translation rules and tests for over aggregation in Flink backend ([e173cd7](https://github.com/ibis-project/ibis/commit/e173cd799b2fb5a2a19e81b3fa67721d28ca11a3))
* **flink:** implement translation rules for literal expressions in flink compiler ([a8f4880](https://github.com/ibis-project/ibis/commit/a8f4880b44f7c58a41a23a00b8171c54c392f0a1))
* improved error messages when missing backend dependencies ([2fe851b](https://github.com/ibis-project/ibis/commit/2fe851b9dde5e30dc5597d6c64c4eed4a1186aa1))
* make output of `to_sql` a proper `str` subclass ([084bdb9](https://github.com/ibis-project/ibis/commit/084bdb9bcaf79dffa8dfafe950194606cd5f2ffb))
* **pandas:** add ExtractURLField functions ([e369333](https://github.com/ibis-project/ibis/commit/e36933326a8206baf15bf38a11eb2b21655f32ee))
* **polars:** implement `ops.SelfReference` ([983e393](https://github.com/ibis-project/ibis/commit/983e393ad9d2a3cf6d45a21bb664173fbda0784b))
* **pyspark:** read/write delta tables ([d403187](https://github.com/ibis-project/ibis/commit/d40318777f8ecd2d75b745e5a3895bac439caf77))
* refactor ddl for create_database and add create_schema where relevant ([d7a857c](https://github.com/ibis-project/ibis/commit/d7a857ca3eb4359cec800d45d2f47d6685c4ebe8))
* **sqlite:** add scalar python udf support to sqlite ([92f29e6](https://github.com/ibis-project/ibis/commit/92f29e6b820f207b16a306e5a39f91d396fe818a))
* **sqlite:** implement extract url field functions ([cb1956f](https://github.com/ibis-project/ibis/commit/cb1956ff25fb0e9d6d8a0c3dba9ddf512a6c42aa))
* **trino:** implement support for `.sql` table expression method ([479bc60](https://github.com/ibis-project/ibis/commit/479bc602b6c302753d34fa1e6c9ee544b591b61f))
* **trino:** support table properties when creating a table ([b9d65ef](https://github.com/ibis-project/ibis/commit/b9d65efebce5f5b750a447b3cdf00f505eec0111))


### Bug Fixes

* **api:** allow scalar window order keys ([3d3f4f3](https://github.com/ibis-project/ibis/commit/3d3f4f344880c2401f970373889348f6f5ce4150))
* **backends:** make `current_database` implementation and API consistent across all backends ([eeeeee0](https://github.com/ibis-project/ibis/commit/eeeeee057ba4e94a35721747493c18281c169b30))
* **bigquery:** respect the fully qualified table name at the init ([a25f460](https://github.com/ibis-project/ibis/commit/a25f460b06a2c9f47c0ad3ea2fb9e894c5ad3f4e))
* **clickhouse:** check dispatching instead of membership in the registry for `has_operation` ([acb7f3f](https://github.com/ibis-project/ibis/commit/acb7f3ff2d3b7f224307548698e003fac85bdff4))
* **datafusion:** always quote column names to prevent datafusion from normalizing case ([310db2b](https://github.com/ibis-project/ibis/commit/310db2bf9416b73b1e741570350034bdd9d4337f))
* **deps:** update dependency datafusion to v27 ([3a311cd](https://github.com/ibis-project/ibis/commit/3a311cd9d50fc12f527e7ba244169542a2796227))
* **druid:** handle conversion issues from string, binary, and timestamp ([b632063](https://github.com/ibis-project/ibis/commit/b632063dc9b7fcc722dcb4e5bb98b10aa4f7d54e))
* **duckdb:** avoid double escaping backslashes for bind parameters ([8436f57](https://github.com/ibis-project/ibis/commit/8436f573b5819001595a5a8fe6803fbfaf282b2f))
* **duckdb:** cast read_only to string for connection ([27e17d6](https://github.com/ibis-project/ibis/commit/27e17d6200740615b583dca267eb1517abf7ac41))
* **duckdb:** deduplicate results from `list_schemas()` ([172520e](https://github.com/ibis-project/ibis/commit/172520e52b946a3688af0bcfe93d621c160781e9))
* **duckdb:** ensure that current_database returns the correct value ([2039b1e](https://github.com/ibis-project/ibis/commit/2039b1e7cce182dbbc901480037a58488b3730cb))
* **duckdb:** handle conversion from duckdb_engine unsigned int aliases ([e6fd0cc](https://github.com/ibis-project/ibis/commit/e6fd0cc2d726668d4e645274b7637df3f886d08e))
* **duckdb:** map hugeint to decimal to avoid information loss ([4fe91d4](https://github.com/ibis-project/ibis/commit/4fe91d49f00ed76e9b677e5dfbdb1dbeda96a8f7))
* **duckdb:** run pre-execute-hooks in duckdb before file export ([5bdaa1d](https://github.com/ibis-project/ibis/commit/5bdaa1d6339c221c76016a6f04ded1f20b61e017))
* **duckdb:** use regexp_matches to ensure that matching checks containment instead of a full match ([0a0cda6](https://github.com/ibis-project/ibis/commit/0a0cda6f05c624cf4af58ed08209b79c1a7ca877))
* **examples:** remove example datasets that are incompatible with case-insensitive file systems ([4048826](https://github.com/ibis-project/ibis/commit/4048826714efdd41cc233a79603756ab66c813b1))
* **exprs:** ensure that left_semi and semi are equivalent ([bbc1eb7](https://github.com/ibis-project/ibis/commit/bbc1eb7ac2573aa0e762caae1cdc87137da2ac8b))
* forward arguments through `__dataframe__` protocol ([50f3be9](https://github.com/ibis-project/ibis/commit/50f3be972b794e767b00fb25b35b6e5e18f4d4c0))
* **ir:** change "it not a" to "is not a" in errors ([d0d463f](https://github.com/ibis-project/ibis/commit/d0d463febd56d966917788ab2e480d8e11608ad4))
* **memtable:** implement support for translation of empty memtable ([05b02da](https://github.com/ibis-project/ibis/commit/05b02da6f856e2939a3f65437a94306e60015330))
* **mysql:** fix UUID type reflection for sqlalchemy 2.0.18 ([12d4039](https://github.com/ibis-project/ibis/commit/12d4039b619fdbdb30e13dc3724ee8b3b1a6f1cf))
* **mysql:** pass-through kwargs to connect_args ([e3f3e2d](https://github.com/ibis-project/ibis/commit/e3f3e2d9d693aaa224d1baf96bfa706a7f447d09))
* **ops:** ensure that name attribute is always valid for `ops.SelfReference` ([9068aca](https://github.com/ibis-project/ibis/commit/9068aca381169e865b89945e802d0020b9c1e2e3))
* **polars:** ensure that `pivot_longer` works with more than one column ([822c912](https://github.com/ibis-project/ibis/commit/822c912b919a7049a32a3f01cae0abdb2433cb1f))
* **polars:** fix collect implementation ([c1182be](https://github.com/ibis-project/ibis/commit/c1182be6d3ee2bba866a39f0894bd3f2cd0d64ea))
* **postgres:** by default use domain socket ([e44fdfb](https://github.com/ibis-project/ibis/commit/e44fdfb46bea79aed126e24f034bcc00e6adae40))
* **pyspark:** make `has_operation` method a `[@classmethod](https://github.com/classmethod)` ([c1b7dbc](https://github.com/ibis-project/ibis/commit/c1b7dbc02d2ed215870138405aa39d20da90f00d))
* **release:** use @google/semantic-release-replace-plugin@1.2.0 to avoid module loading bug ([673aab3](https://github.com/ibis-project/ibis/commit/673aab3d01783c771f74a8e4650d94acf93baf56))
* **snowflake:** fix broken unnest functionality ([207587c](https://github.com/ibis-project/ibis/commit/207587cc778433ab03cb490e08d32243ef4842a4))
* **snowflake:** reset the schema and database to the original schema after creating them ([54ce26a](https://github.com/ibis-project/ibis/commit/54ce26a4b00d06b1bb6c8fd38dc9edaa1672aac0))
* **snowflake:** reset to original schema when resetting the database ([32ff832](https://github.com/ibis-project/ibis/commit/32ff8329e8905727772956875776f264e8fbe1d6))
* **snowflake:** use `regexp_instr != 0` instead of `REGEXP` keyword ([06e2be4](https://github.com/ibis-project/ibis/commit/06e2be4e2019b6fa714e1fcb34485860ef1ede79))
* **sqlalchemy:** add support for sqlalchemy string subclassed types ([8b33b35](https://github.com/ibis-project/ibis/commit/8b33b352d8e88cfd6aaa6c318e5eb41d5482b362))
* **sql:** handle parsing aliases ([3645cf4](https://github.com/ibis-project/ibis/commit/3645cf4119620e8b01e57c7f9b5965400476f7d1))
* **trino:** handle all remaining common datatype parsing ([b3778c7](https://github.com/ibis-project/ibis/commit/b3778c781c78d5cdaf66a2a7286ef9e57ec519db))
* **trino:** remove filter index warning in Trino dialect ([a2ae7ae](https://github.com/ibis-project/ibis/commit/a2ae7ae328d8f1e468686ba2505e101b42a6df6c))


### Documentation

* add conda/mamba install instructions for specific backends ([c643fca](https://github.com/ibis-project/ibis/commit/c643fcaf57a46a36b5d2bdb24088d18bd386a1bf))
* add docstrings to `DataType.is_*` methods ([ed40fdb](https://github.com/ibis-project/ibis/commit/ed40fdb9e961cdf70784aa665736f203661767c1))
* **backend-matrix:** add ability to select a specific subset of backends ([f663066](https://github.com/ibis-project/ibis/commit/f6630664ef941709bd4b52bc4050fc8f5c70ff42))
* **backends:** document memtable support and performance for each backend ([b321733](https://github.com/ibis-project/ibis/commit/b321733f4ae27d938f4a2e4fc30a2d38a57a2230))
* **blog:** v6.0.0 release blog ([21fc5da](https://github.com/ibis-project/ibis/commit/21fc5daa86b8738469973b027c9ded8228fc2275))
* document versioning policy ([242ea15](https://github.com/ibis-project/ibis/commit/242ea1527ee5bf8ca44b8adcac9e1f26adb7c51a))
* **dot-sql:** add examples of mixing ibis expressions and SQL strings ([5abd30e](https://github.com/ibis-project/ibis/commit/5abd30ebc92b70bf58afedba7f7b071b2840ef91))
* **dplyr:** small fixes to the dplyr getting started guide ([4b57f7f](https://github.com/ibis-project/ibis/commit/4b57f7fc4fdd3e839cdc2e1014955149bd4f14ca))
* expand docstring for `dtype` function ([39b7a24](https://github.com/ibis-project/ibis/commit/39b7a241cb9d6bf5d6f2b0235262726fc5fdb398))
* fix functions names in examples of extract url fields ([872445e](https://github.com/ibis-project/ibis/commit/872445e4b14a2035721287dbe1f7e77b5c25382e))
* fix heading in 6.0.0 blog ([0ad3ce2](https://github.com/ibis-project/ibis/commit/0ad3ce258af4258dbb2f29f3c48895980aba314a))
* **oracle:** add note about old password checks in oracle ([470b90b](https://github.com/ibis-project/ibis/commit/470b90baac1ce395052bf975b23b030427326a5d))
* **postgres:** fix postgres memtable docs ([7423eb9](https://github.com/ibis-project/ibis/commit/7423eb98ec03accf43471d07f4002aca400d2840))
* **release-notes:** fix typo ([a319e3a](https://github.com/ibis-project/ibis/commit/a319e3a7ab61dc9dc6299984b1d1f415d4ed5287))
* **social:** add social media preview cards ([e98a0a6](https://github.com/ibis-project/ibis/commit/e98a0a6e12582bcad672b975bd97e8fe15538890))
* update imports/exports for pyspark backend ([16d73c4](https://github.com/ibis-project/ibis/commit/16d73c4dab95fe4ce005140516a81341747df801))


### Refactors

* **pyarrow:** remove unnecessary calls to combine_chunks ([c026d2d](https://github.com/ibis-project/ibis/commit/c026d2d0768f94e3bc2afff24ed6fe0d6f54df75))
* **pyarrow:** use `schema.empty_table()` instead of manually constructing empty tables ([c099302](https://github.com/ibis-project/ibis/commit/c0993020ae2d141c58a1f33f94378c272f95b421))
* **result-handling:** remove `result_handler` in favor of expression specific methods ([3dc7143](https://github.com/ibis-project/ibis/commit/3dc7143402d9a31c2655d2d09f637d75598afefb))
* **snowflake:** enable multiple statements and clean up duplicated parameter setting code ([75824a6](https://github.com/ibis-project/ibis/commit/75824a6d302afd53988bfb4684e18d59354cd04f))
* **tests:** clean up backend test setup to make non-data-loading steps atomic ([16b4632](https://github.com/ibis-project/ibis/commit/16b4632ba73a2c88be5a31662aba9f094602354e))

## [6.0.0](https://github.com/ibis-project/ibis/compare/5.1.0...6.0.0) (2023-07-05)


Expand All @@ -13,7 +115,7 @@ Release Notes
* **snowflake/postgres:** Postgres UDFs now use the new `@udf.scalar.python` API. This should be a low-effort replacement for the existing API.
* **ir:** `ops.NullLiteral` is removed
* **datatypes:** `dt.Interval` has no longer a default unit, `dt.interval` is removed
* **deps:** `snowflake-connector-python`'s lower bound was increased to 3.0.2, the minimum version needed to avoid a high-severity vulernability. Please upgrade `snowflake-connector-python` to at least version 3.0.2.
* **deps:** `snowflake-connector-python`'s lower bound was increased to 3.0.2, the minimum version needed to avoid a high-severity vulnerability. Please upgrade `snowflake-connector-python` to at least version 3.0.2.
* **api:** `Table.difference()`, `Table.intersection()`, and `Table.union()` now require at least one argument.
* **postgres:** Ibis no longer automatically defines `first`/`last` reductions on connection to the postgres backend. Use DDL shown in https://wiki.postgresql.org/wiki/First/last_(aggregate) or one of the `pgxn` implementations instead.
* **api:** `ibis.examples.<example-name>.fetch` no longer forwards arbitrary keyword arguments to `read_csv`/`read_parquet`.
Expand Down
13 changes: 0 additions & 13 deletions docs/supported_python_versions.md

This file was deleted.

4 changes: 2 additions & 2 deletions docs/tutorial/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ This is a quick tour of some basic commands and usage patterns, just to get your
## Install `ibis`

This quick-start guide uses the DuckDB backend. You can check out the [Install
page](/install/) for information on how to install other backends.
page](../install.md) for information on how to install other backends.

```shell title="Install Ibis using pip"
$ pip install 'ibis-framework[duckdb]'
Expand Down Expand Up @@ -372,7 +372,7 @@ You can also use a `selector` alongside a column name.
└───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴───────┘
```

You can read more about [`selectors`](/reference/selectors/) in the docs!
You can read more about [`selectors`](../reference/selectors.md) in the docs!

### order_by

Expand Down
19 changes: 13 additions & 6 deletions docs/tutorial/ibis-for-dplyr-users.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions docs/tutorial/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

Welcome to the Ibis tutorials!

- **Learning Ibis for the first time?:** Check out the [Ibis getting started tutorial](./getting_started/)!
- **Coming from SQL?**: Take a look at [Ibis for SQL users](./ibis-for-sql-users/)!
- **Coming from pandas?**: Check out [Ibis for pandas users](./ibis-for-pandas-users/)!
- **Coming from R?**: See [Ibis for dplyr users](./ibis-for-dplyr-users/)!
- **Learning Ibis for the first time?:** Check out the [Ibis getting started tutorial](./getting_started.md)!
- **Coming from SQL?**: Take a look at [Ibis for SQL users](./ibis-for-sql-users.ipynb)!
- **Coming from pandas?**: Check out [Ibis for pandas users](./ibis-for-pandas-users.ipynb)!
- **Coming from R?**: See [Ibis for dplyr users](./ibis-for-dplyr-users.ipynb)!
- **Want to see some more examples?**: We've got [a repository of examples](https://github.com/ibis-project/ibis-examples) for that!
29 changes: 29 additions & 0 deletions docs/versioning.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Versioning Policy

Ibis follows a [Semantic Versioning](https://semver.org/) scheme
(`MAJOR.MINOR.PATCH`, like `6.1.0`).

- An increase in the `MAJOR` version number will happen when a release contains
breaking changes in the public API. This includes anything documented in the
[reference documentation](./reference/expressions/index.md), excluding any
features explicitly marked as "experimental". Features not part of the public
API (e.g. anything in `ibis.expr.operations` may make breaking changes at any
time).

- An increase in the `MINOR` or `PATCH` version number indicate changes to
public APIs that should remain compatible with previous Ibis versions with
the same `MAJOR` version number.

## Supported Python Versions

Ibis follows [NEP29](https://numpy.org/neps/nep-0029-deprecation_policy.html)
with respect to supported Python versions.

This has been in-place [since Ibis version 3.0.0](https://github.com/ibis-project/ibis/blob/5015677d78909473014a61725d371b4bf772cdff/docs/blog/Ibis-version-3.0.0-release.md?plain=1#L83).

The [support
table](https://numpy.org/neps/nep-0029-deprecation_policy.html#support-table)
shows the schedule for dropping support for Python versions.

The next major release of Ibis that occurs on or after the NEP29 drop date
removes support for the specified Python version.
40 changes: 31 additions & 9 deletions flake.lock
8 changes: 4 additions & 4 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@
sqlite-interactive
];
shellHook = ''
ln -sf "${pkgs.ibisTestingData}" "$PWD/ci/ibis-testing-data"
rm -f "$PWD/ci/ibis-testing-data"
ln -s "${pkgs.ibisTestingData}" "$PWD/ci/ibis-testing-data"
# necessary for mkdocs
export PYTHONPATH=''${PWD}''${PYTHONPATH:+:}''${PYTHONPATH}
Expand Down Expand Up @@ -118,15 +119,14 @@
in
rec {
packages = {
inherit (pkgs) ibis38 ibis39 ibis310 ibis311;
inherit (pkgs) ibis39 ibis310 ibis311;

default = pkgs.ibis310;

inherit (pkgs) update-lock-files gen-all-extras gen-examples check-poetry-version;
inherit (pkgs) update-lock-files gen-all-extras gen-examples check-poetry-version check-release-notes-spelling;
};

devShells = rec {
ibis38 = mkDevShell pkgs.ibisDevEnv38;
ibis39 = mkDevShell pkgs.ibisDevEnv39;
ibis310 = mkDevShell pkgs.ibisDevEnv310;
ibis311 = mkDevShell pkgs.ibisDevEnv311;
Expand Down
2 changes: 2 additions & 0 deletions gen_redirects.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import pathlib

import mkdocs_gen_files
Expand Down
12 changes: 10 additions & 2 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Initialize Ibis module."""
from __future__ import annotations

__version__ = "6.0.0"
__version__ = "6.1.0"

from ibis import examples, util
from ibis.backends.base import BaseBackend
Expand Down Expand Up @@ -72,7 +72,15 @@ def __getattr__(name: str) -> BaseBackend:
import ibis

(entry_point,) = entry_points
module = entry_point.load()
try:
module = entry_point.load()
except ImportError as exc:
raise ImportError(
f"Failed to import the {name} backend due to missing dependencies.\n\n"
f"You can pip or conda install the {name} backend as follows:\n\n"
f' python -m pip install -U "ibis-framework[{name}]" # pip install\n'
f" conda install -c conda-forge ibis-{name} # or conda install"
) from exc
backend = module.Backend()
# The first time a backend is loaded, we register its options, and we set
# it as an attribute of `ibis`, so `__getattr__` is not called again for it
Expand Down
188 changes: 119 additions & 69 deletions ibis/backends/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,35 +270,23 @@ def to_pyarrow(
"""
pa = self._import_pyarrow()
self._run_pre_execute_hooks(expr)
table_expr = expr.as_table()
arrow_schema = table_expr.schema().to_pyarrow()
try:
# Can't construct an array from record batches
# so construct at one column table (if applicable)
# then return the column _from_ the table
with self.to_pyarrow_batches(
expr, params=params, limit=limit, **kwargs
table_expr, params=params, limit=limit, **kwargs
) as reader:
table = pa.Table.from_batches(reader)
table = (
pa.Table.from_batches(reader)
.rename_columns(table_expr.columns)
.cast(arrow_schema)
)
except pa.lib.ArrowInvalid:
raise
except ValueError:
# The pyarrow batches iterator is empty so pass in an empty
# iterator and a pyarrow schema
schema = expr.as_table().schema()
table = pa.Table.from_batches([], schema=schema.to_pyarrow())

if isinstance(expr, ir.Table):
return table
elif isinstance(expr, ir.Column):
# Column will be a ChunkedArray, `combine_chunks` will
# flatten it
if len(table.columns[0]):
return table.columns[0].combine_chunks()
else:
return pa.array(table.columns[0])
elif isinstance(expr, ir.Scalar):
return table.columns[0][0]
else:
raise ValueError
table = arrow_schema.empty_table()

return expr.__pyarrow_result__(table)

@util.experimental
def to_pyarrow_batches(
Expand Down Expand Up @@ -531,6 +519,114 @@ def to_delta(
write_deltalake(path, batch_reader, **kwargs)


class CanListDatabases(abc.ABC):
@abc.abstractmethod
def list_databases(self, like: str | None = None) -> list[str]:
"""List existing databases in the current connection.
Parameters
----------
like
A pattern in Python's regex format to filter returned database
names.
Returns
-------
list[str]
The database names that exist in the current connection, that match
the `like` pattern if provided.
"""

@property
@abc.abstractmethod
def current_database(self) -> str:
"""The current database in use."""


class CanCreateDatabase(CanListDatabases):
@abc.abstractmethod
def create_database(self, name: str, force: bool = False) -> None:
"""Create a new database.
Parameters
----------
name
Name of the new database.
force
If `False`, an exception is raised if the database already exists.
"""

@abc.abstractmethod
def drop_database(self, name: str, force: bool = False) -> None:
"""Drop a database with name `name`.
Parameters
----------
name
Database to drop.
force
If `False`, an exception is raised if the database does not exist.
"""


class CanCreateSchema(abc.ABC):
@abc.abstractmethod
def create_schema(
self, name: str, database: str | None = None, force: bool = False
) -> None:
"""Create a schema named `name` in `database`.
Parameters
----------
name
Name of the schema to create.
database
Name of the database in which to create the schema. If `None`, the
current database is used.
force
If `False`, an exception is raised if the schema exists.
"""

@abc.abstractmethod
def drop_schema(
self, name: str, database: str | None = None, force: bool = False
) -> None:
"""Drop the schema with `name` in `database`.
Parameters
----------
name
Name of the schema to drop.
database
Name of the database to drop the schema from. If `None`, the
current database is used.
force
If `False`, an exception is raised if the schema does not exist.
"""

@abc.abstractmethod
def list_schemas(self, like: str | None = None) -> list[str]:
"""List existing schemas in the current connection.
Parameters
----------
like
A pattern in Python's regex format to filter returned schema
names.
Returns
-------
list[str]
The schema names that exist in the current connection, that match
the `like` pattern if provided.
"""

@property
@abc.abstractmethod
def current_schema(self) -> str:
"""Return the current schema."""


class BaseBackend(abc.ABC, _FileIOHandler):
"""Base backend class.
Expand Down Expand Up @@ -648,36 +744,6 @@ def database(self, name: str | None = None) -> Database:
"""
return Database(name=name or self.current_database, client=self)

@property
@abc.abstractmethod
def current_database(self) -> str | None:
"""Return the name of the current database.
Backends that don't support different databases will return None.
Returns
-------
str | None
Name of the current database.
"""

@abc.abstractmethod
def list_databases(self, like: str | None = None) -> list[str]:
"""List existing databases in the current connection.
Parameters
----------
like
A pattern in Python's regex format to filter returned database
names.
Returns
-------
list[str]
The database names that exist in the current connection, that match
the `like` pattern if provided.
"""

@staticmethod
def _filter_with_like(
values: Iterable[str],
Expand Down Expand Up @@ -849,22 +915,6 @@ def decorator(translation_function: Callable) -> None:

return decorator

def create_database(self, name: str, force: bool = False) -> None:
"""Create a new database.
Not all backends implement this method.
Parameters
----------
name
Name of the new database.
force
If `False`, an exception is raised if the database already exists.
"""
raise NotImplementedError(
f'Backend "{self.name}" does not implement "create_database"'
)

@abc.abstractmethod
def create_table(
self,
Expand Down
5 changes: 1 addition & 4 deletions ibis/backends/base/sql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,7 @@ def execute(
with self._safe_raw_sql(sql, **kwargs) as cursor:
result = self.fetch_from_cursor(cursor, schema)

if hasattr(getattr(query_ast, 'dml', query_ast), 'result_handler'):
result = query_ast.dml.result_handler(result)

return result
return expr.__pandas_result__(result)

def _register_in_memory_table(self, _: ops.InMemoryTable) -> None:
raise NotImplementedError(self.name)
Expand Down
33 changes: 22 additions & 11 deletions ibis/backends/base/sql/alchemy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis import util
from ibis.backends.base import CanCreateSchema
from ibis.backends.base.sql import BaseSQLBackend
from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported
from ibis.backends.base.sql.alchemy.query_builder import AlchemyCompiler
Expand Down Expand Up @@ -94,13 +95,23 @@ def _create_table_as(element, compiler, **kw):
return stmt + f"TABLE {name} AS {compiler.process(element.query, **kw)}"


class AlchemyCanCreateSchema(CanCreateSchema):
def list_schemas(self, like: str | None = None) -> list[str]:
return self._filter_with_like(self.inspector.get_schema_names(), like)


class BaseAlchemyBackend(BaseSQLBackend):
"""Backend class for backends that compile to SQLAlchemy expressions."""

compiler = AlchemyCompiler
supports_temporary_tables = True
_temporary_prefix = "TEMPORARY"

def _scalar_query(self, query):
method = "exec_driver_sql" if isinstance(query, str) else "execute"
with self.begin() as con:
return getattr(con, method)(query).scalar()

def _compile_type(self, dtype) -> str:
dialect = self.con.dialect
return sa.types.to_instance(
Expand Down Expand Up @@ -185,10 +196,9 @@ def _to_geodataframe(df, schema):
geom_col = None
for name, dtype in schema.items():
if dtype.is_geospatial():
geom_col = geom_col or name
df[name] = df[name].map(
lambda row: None if row is None else shape.to_shape(row)
)
if not geom_col:
geom_col = name
df[name] = df[name].map(shape.to_shape, na_action="ignore")
if geom_col:
df[geom_col] = gpd.array.GeometryArray(df[geom_col].values)
df = gpd.GeoDataFrame(df, geometry=geom_col)
Expand Down Expand Up @@ -446,11 +456,6 @@ def schema(self, name: str) -> sch.Schema:
"""
return self.database().schema(name)

@property
def current_database(self) -> str:
"""The name of the current database this client is connected to."""
return self.database_name

def _log(self, sql):
try:
query_str = str(sql)
Expand All @@ -471,6 +476,9 @@ def _get_sqla_table(
warnings.filterwarnings(
"ignore", message="Did not recognize type", category=sa.exc.SAWarning
)
warnings.filterwarnings(
"ignore", message="index key", category=sa.exc.SAWarning
)
table = sa.Table(
name,
meta,
Expand Down Expand Up @@ -794,12 +802,15 @@ def _create_temp_view(self, view: sa.Table, definition: sa.sql.Selectable) -> No
if raw_name not in self._temp_views and raw_name in self.list_tables():
raise ValueError(f"{raw_name} already exists as a table or view")
name = self._quote(raw_name)
self._execute_view_creation(name, definition)
self._temp_views.add(raw_name)
self._register_temp_view_cleanup(name, raw_name)

def _execute_view_creation(self, name, definition):
lines, params = self._get_compiled_statement(definition, name)
with self.begin() as con:
for line in lines:
con.exec_driver_sql(line, parameters=params or ())
self._temp_views.add(raw_name)
self._register_temp_view_cleanup(name, raw_name)

@abc.abstractmethod
def _metadata(self, query: str) -> Iterable[tuple[str, dt.DataType]]:
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/base/sql/alchemy/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,8 @@ def to_ibis(cls, typ: sat.TypeEngine, nullable: bool = True) -> dt.DataType:
elif isinstance(typ, sa.DateTime):
timezone = "UTC" if typ.timezone else None
return dt.Timestamp(timezone, nullable=nullable)
elif isinstance(typ, sat.String):
return dt.String(nullable=nullable)
elif geospatial_supported and isinstance(typ, ga.types._GISType):
name = typ.geometry_type.upper()
try:
Expand Down
14 changes: 14 additions & 0 deletions ibis/backends/base/sql/alchemy/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,13 @@ def _format_table(self, op):
backend._create_temp_view(view=result, definition=definition)
elif isinstance(ref_op, ops.InMemoryTable):
result = self._format_in_memory_table(op, ref_op, translator)
elif isinstance(ref_op, ops.DummyTable):
result = sa.select(
*(
translator.translate(value).label(name)
for name, value in zip(ref_op.schema.names, ref_op.values)
)
)
else:
# A subquery
if ctx.is_extracted(ref_op):
Expand Down Expand Up @@ -152,6 +159,13 @@ def _format_in_memory_table(self, op, ref_op, translator):
*columns,
quote=translator._quote_table_names,
)
elif not op.data:
result = sa.select(
*(
translator.translate(ops.Literal(None, dtype=type_)).label(name)
for name, type_ in op.schema.items()
)
).limit(0)
elif self.context.compiler.support_values_syntax_in_select:
rows = list(ref_op.data.to_frame().itertuples(index=False))
result = sa.values(*columns, name=ref_op.name).data(rows)
Expand Down
39 changes: 39 additions & 0 deletions ibis/backends/base/sql/alchemy/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any

import sqlalchemy as sa
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.functions import FunctionElement, GenericFunction

import ibis.common.exceptions as com
Expand Down Expand Up @@ -493,13 +494,50 @@ def _count_star(t, op):
return sa.func.count(t.translate(ops.Where(where, 1, None)))


def _count_distinct_star(t, op):
schema = op.arg.schema
cols = [sa.column(col, t.get_sqla_type(typ)) for col, typ in schema.items()]

if t._supports_tuple_syntax:
func = lambda *cols: sa.func.count(sa.distinct(sa.tuple_(*cols)))
else:
func = count_distinct

if op.where is None:
return func(*cols)

if t._has_reduction_filter_syntax:
return func(*cols).filter(t.translate(op.where))

if not t._supports_tuple_syntax and len(cols) > 1:
raise com.UnsupportedOperationError(
f"{t._dialect_name} backend doesn't support `COUNT(DISTINCT ...)` with a "
"filter with more than one column"
)

return sa.func.count(t.translate(ops.Where(op.where, sa.distinct(*cols), None)))


def _extract(fmt: str):
def translator(t, op: ops.Node):
return sa.cast(sa.extract(fmt, t.translate(op.arg)), sa.SMALLINT)

return translator


class count_distinct(FunctionElement):
inherit_cache = True


@compiles(count_distinct)
def compile_count_distinct(element, compiler, **kw):
quote_identifier = compiler.preparer.quote_identifier
clauses = ", ".join(
quote_identifier(compiler.process(clause, **kw)) for clause in element.clauses
)
return f"COUNT(DISTINCT {clauses})"


class array_map(FunctionElement):
pass

Expand All @@ -522,6 +560,7 @@ class array_filter(FunctionElement):
ops.NotContains: _contains(lambda left, right: left.notin_(right)),
ops.Count: reduction(sa.func.count),
ops.CountStar: _count_star,
ops.CountDistinctStar: _count_distinct_star,
ops.Sum: reduction(sa.func.sum),
ops.Mean: reduction(sa.func.avg),
ops.Min: reduction(sa.func.min),
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/base/sql/alchemy/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class AlchemyExprTranslator(ExprTranslator):

_bool_aggs_need_cast_to_int32 = True
_has_reduction_filter_syntax = False
_supports_tuple_syntax = False
_integer_to_timestamp = staticmethod(sa.func.to_timestamp)
_timestamp_type = sa.TIMESTAMP

Expand Down
3 changes: 0 additions & 3 deletions ibis/backends/base/sql/compiler/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ def __init__(
limit=None,
distinct=False,
indent=2,
result_handler=None,
parent_op=None,
):
self.translator_class = translator_class
Expand All @@ -214,8 +213,6 @@ def __init__(

self.indent = indent

self.result_handler = result_handler

def _translate(self, expr, named=False, permit_subquery=False):
translator = self.translator_class(
expr,
Expand Down
44 changes: 6 additions & 38 deletions ibis/backends/base/sql/compiler/select_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
from collections.abc import Mapping
from typing import NamedTuple

import toolz

import ibis.common.exceptions as com
import ibis.expr.analysis as an
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
Expand Down Expand Up @@ -56,7 +53,7 @@ def to_select(
self.context = context
self.translator_class = translator_class

self.op, self.result_handler = self._adapt_operation(node)
self.op = node.to_expr().as_table().op()
assert isinstance(self.op, ops.Node), type(self.op)

self.table_set = None
Expand All @@ -75,38 +72,6 @@ def to_select(

return select_query

@staticmethod
def _adapt_operation(node):
# Non-table expressions need to be adapted to some well-formed table
# expression, along with a way to adapt the results to the desired
# arity (whether array-like or scalar, for example)
#
# Canonical case is scalar values or arrays produced by some reductions
# (simple reductions, or distinct, say)
if isinstance(node, ops.TableNode):
return node, toolz.identity

elif isinstance(node, ops.Value):
if node.output_shape.is_scalar():
if an.is_scalar_reduction(node):
table_expr = an.reduction_to_aggregation(node)
return table_expr.op(), _get_scalar(node.name)
else:
return node, _get_scalar(node.name)
elif node.output_shape.is_columnar():
if isinstance(node, ops.TableColumn):
table_expr = node.table.to_expr()[[node.name]]
result_handler = _get_column(node.name)
else:
table_expr = node.to_expr().as_table()
result_handler = _get_column(node.name)

return table_expr.op(), result_handler
else:
raise com.TranslationError(f"Unexpected shape {node.output_shape}")
else:
raise com.TranslationError(f'Do not know how to execute: {type(node)}')

def _build_result_query(self):
self._collect_elements()
self._analyze_subqueries()
Expand All @@ -125,7 +90,6 @@ def _build_result_query(self):
limit=self.limit,
order_by=self.order_by,
distinct=self.distinct,
result_handler=self.result_handler,
parent_op=self.op,
)

Expand Down Expand Up @@ -165,7 +129,6 @@ def _collect_elements(self):

if isinstance(self.op, ops.TableNode):
self._collect(self.op, toplevel=True)
assert self.table_set is not None
else:
self.select_set = [self.op]

Expand Down Expand Up @@ -320,6 +283,11 @@ def _collect_PhysicalTable(self, op, toplevel=False):
self.select_set = [op]
self.table_set = op

def _collect_DummyTable(self, op, toplevel=False):
if toplevel:
self.select_set = list(op.values)
self.table_set = None

def _collect_SelfReference(self, op, toplevel=False):
if toplevel:
self._collect(op.table, toplevel=toplevel)
Expand Down
5 changes: 1 addition & 4 deletions ibis/backends/base/sql/ddl.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,7 @@ def format_schema(schema):


def _format_schema_element(name, t):
return '{} {}'.format(
quote_identifier(name, force=True),
type_to_sql_string(t),
)
return f'{quote_identifier(name, force=True)} {type_to_sql_string(t)}'


def _format_partition_kv(k, v, type):
Expand Down
16 changes: 4 additions & 12 deletions ibis/backends/base/sql/registry/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,12 @@ def substring(translator, op):
# Impala is 1-indexed
if length is None or isinstance(length, ops.Literal):
if lvalue := getattr(length, "value", None):
return 'substr({}, {} + 1, {})'.format(
arg_formatted, start_formatted, lvalue
)
return f'substr({arg_formatted}, {start_formatted} + 1, {lvalue})'
else:
return f'substr({arg_formatted}, {start_formatted} + 1)'
else:
length_formatted = translator.translate(length)
return 'substr({}, {} + 1, {})'.format(
arg_formatted, start_formatted, length_formatted
)
return f'substr({arg_formatted}, {start_formatted} + 1, {length_formatted})'


def string_find(translator, op):
Expand All @@ -31,13 +27,9 @@ def string_find(translator, op):
if (start := op.start) is not None:
if not isinstance(start, ops.Literal):
start_fmt = translator.translate(start)
return 'locate({}, {}, {} + 1) - 1'.format(
substr_formatted, arg_formatted, start_fmt
)
return f'locate({substr_formatted}, {arg_formatted}, {start_fmt} + 1) - 1'
elif sval := start.value:
return 'locate({}, {}, {}) - 1'.format(
substr_formatted, arg_formatted, sval + 1
)
return f'locate({substr_formatted}, {arg_formatted}, {sval + 1}) - 1'
else:
raise ValueError(f"invalid `start` value: {sval}")
else:
Expand Down
4 changes: 1 addition & 3 deletions ibis/backends/base/sql/registry/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,7 @@ def formatter(translator, op):

default_formatted = translator.translate(default)

return '{}({}, {}, {})'.format(
name, arg_formatted, offset_formatted, default_formatted
)
return f'{name}({arg_formatted}, {offset_formatted}, {default_formatted})'
elif offset is not None:
offset_formatted = translator.translate(offset)
return f'{name}({arg_formatted}, {offset_formatted})'
Expand Down
204 changes: 164 additions & 40 deletions ibis/backends/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import contextlib
import warnings
from typing import TYPE_CHECKING, Any, Callable, Iterable, Mapping
from urllib.parse import parse_qs, urlparse

Expand All @@ -17,7 +18,7 @@
import ibis.common.exceptions as com
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis.backends.base import Database
from ibis.backends.base import CanCreateSchema, CanListDatabases, Database
from ibis.backends.base.sql import BaseSQLBackend
from ibis.backends.bigquery.client import (
BigQueryCursor,
Expand All @@ -27,7 +28,7 @@
schema_from_bigquery_table,
)
from ibis.backends.bigquery.compiler import BigQueryCompiler
from ibis.backends.bigquery.datatypes import BigQuerySchema
from ibis.backends.bigquery.datatypes import BigQuerySchema, BigQueryType
from ibis.formats.pandas import PandasData

with contextlib.suppress(ImportError):
Expand Down Expand Up @@ -71,7 +72,7 @@ def _create_client_info_gapic(application_name):
return ClientInfo(user_agent=_create_user_agent(application_name))


class Backend(BaseSQLBackend):
class Backend(BaseSQLBackend, CanCreateSchema, CanListDatabases):
name = "bigquery"
compiler = BigQueryCompiler
supports_in_memory_tables = False
Expand Down Expand Up @@ -231,11 +232,50 @@ def project_id(self):
def dataset_id(self):
return self.dataset

def create_schema(
self,
name: str,
database: str | None = None,
force: bool = False,
collate: str | None = None,
**options: Any,
) -> None:
create_stmt = "CREATE SCHEMA"
if force:
create_stmt += " IF NOT EXISTS"

create_stmt += " "
create_stmt += ".".join(filter(None, [database, name]))

if collate is not None:
create_stmt += f" DEFAULT COLLATION {collate}"

options_str = ", ".join(f"{name}={value!r}" for name, value in options.items())
if options_str:
create_stmt += f" OPTIONS({options_str})"
self.raw_sql(create_stmt)

def drop_schema(
self,
name: str,
database: str | None = None,
force: bool = False,
cascade: bool = False,
) -> None:
drop_stmt = "DROP SCHEMA"
if force:
drop_stmt += " IF EXISTS"

drop_stmt += " "
drop_stmt += ".".join(filter(None, [database, name]))
drop_stmt += " CASCADE" if cascade else " RESTRICT"
self.raw_sql(drop_stmt)

def table(self, name: str, database: str | None = None) -> ir.TableExpr:
if database is None:
database = f"{self.data_project}.{self.current_database}"
t = super().table(name, database=database)
database = f"{self.data_project}.{self.current_schema}"
table_id = self._fully_qualified_name(name, database)
t = super().table(table_id)
bq_table = self.client.get_table(table_id)
return rename_partitioned_column(t, bq_table, self.partition_column)

Expand Down Expand Up @@ -288,6 +328,16 @@ def raw_sql(self, query: str, results=False, params=None):

@property
def current_database(self) -> str:
warnings.warn(
"current_database will return the current *data project* in ibis 7.0.0; "
"use current_schema for the current BigQuery dataset",
category=FutureWarning,
)
# TODO: return self.data_project in ibis 7.0.0
return self.dataset

@property
def current_schema(self) -> str | None:
return self.dataset

def database(self, name=None):
Expand Down Expand Up @@ -328,13 +378,10 @@ def execute(self, expr, params=None, limit="default", **kwargs):
sql = query_ast.compile()
self._log(sql)
cursor = self.raw_sql(sql, params=params, **kwargs)
schema = expr.as_table().schema()
result = self.fetch_from_cursor(cursor, schema)

if hasattr(getattr(query_ast, "dml", query_ast), "result_handler"):
result = query_ast.dml.result_handler(result)
result = self.fetch_from_cursor(cursor, expr.as_table().schema())

return result
return expr.__pandas_result__(result)

def fetch_from_cursor(self, cursor, schema):
arrow_t = self._cursor_to_arrow(cursor)
Expand Down Expand Up @@ -379,14 +426,7 @@ def to_pyarrow(
sql = query_ast.compile()
cursor = self.raw_sql(sql, params=params, **kwargs)
table = self._cursor_to_arrow(cursor)
if isinstance(expr, ir.Scalar):
assert len(table.columns) == 1, "len(table.columns) != 1"
return table[0][0]
elif isinstance(expr, ir.Column):
assert len(table.columns) == 1, "len(table.columns) != 1"
return table[0]
else:
return table
return expr.__pyarrow_result__(table)

def to_pyarrow_batches(
self,
Expand Down Expand Up @@ -419,13 +459,19 @@ def get_schema(self, name, database=None):
table = self.client.get_table(table_ref)
return schema_from_bigquery_table(table)

def list_databases(self, like=None):
def list_schemas(self, like=None):
results = [
dataset.dataset_id
for dataset in self.client.list_datasets(project=self.data_project)
]
return self._filter_with_like(results, like)

@ibis.util.deprecated(
instead="use `list_schemas()`", as_of="6.1.0", removed_in="8.0.0"
)
def list_databases(self, like=None):
return self.list_schemas(like=like)

def list_tables(self, like=None, database=None):
project, dataset = self._parse_project_and_dataset(database)
dataset_ref = bq.DatasetReference(project, dataset)
Expand All @@ -448,47 +494,119 @@ def create_table(
database: str | None = None,
temp: bool | None = None,
overwrite: bool = False,
default_collate: str | None = None,
partition_by: str | None = None,
cluster_by: Iterable[str] | None = None,
options: Mapping[str, Any] | None = None,
) -> ir.Table:
"""Create a table in BigQuery.

Parameters
----------
name
Name of the table to create
obj
The data with which to populate the table; optional, but one of `obj`
or `schema` must be specified
schema
The schema of the table to create; optional, but one of `obj` or
`schema` must be specified
database
The BigQuery *dataset* in which to create the table; optional
temp
This parameter is not yet supported in the BigQuery backend
overwrite
If `True`, replace the table if it already exists, otherwise fail if
the table exists
default_collate
Default collation for string columns. See BigQuery's documentation
for more details: https://cloud.google.com/bigquery/docs/reference/standard-sql/collation-concepts
partition_by
Partition the table by the given expression. See BigQuery's documentation
for more details: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#partition_expression
cluster_by
List of columns to cluster the table by. See BigQuery's documentation
for more details: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#clustering_column_list
options
BigQuery-specific table options; see the BigQuery documentation for
details: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#table_option_list

Returns
-------
Table
The table that was just created
"""
if obj is None and schema is None:
raise com.IbisError("The schema or obj parameter is required")
if temp is True:
raise NotImplementedError(
"BigQuery backend does not yet support temporary tables"
)
if overwrite is not False:
raise com.IbisError("One of the `schema` or `obj` parameter is required")

if temp:
# TODO: these require a BQ session; figure out how to handle that
raise NotImplementedError(
"BigQuery backend does not yet support overwriting tables"
"Temporary tables in the BigQuery backend are not yet supported"
)

create_stmt = "CREATE"

if overwrite:
create_stmt += " OR REPLACE"

table_ref = self._fully_qualified_name(name, database)

create_stmt += f" TABLE `{table_ref}`"

if isinstance(obj, ir.Table) and schema is not None:
if not schema.equals(obj.schema()):
raise com.IbisTypeError(
"""Provided schema and Ibis table schema are incompatible.
Please align the two schemas, or provide only one of the two arguments."""
"Provided schema and Ibis table schema are incompatible. Please "
"align the two schemas, or provide only one of the two arguments."
)

if schema is not None:
schema_str = ", ".join(
(
f"{name} {BigQueryType.from_ibis(typ)}"
+ " NOT NULL" * (not typ.nullable)
)
for name, typ in schema.items()
)
create_stmt += f" ({schema_str})"

if default_collate is not None:
create_stmt += f" DEFAULT COLLATE {default_collate!r}"

if partition_by is not None:
create_stmt += f" PARTITION BY {partition_by}"

if cluster_by is not None:
create_stmt += f" CLUSTER BY {', '.join(cluster_by)}"

if options:
pairs = ", ".join(f"{k}={v!r}" for k, v in options.items())
create_stmt += f" OPTIONS({pairs})"

if obj is not None:
import pyarrow as pa

project_id, dataset = self._parse_project_and_dataset(database)
if isinstance(obj, (pd.DataFrame, pa.Table)):
table = ibis.memtable(obj, schema=schema)
else:
table = obj
sql_select = self.compile(table)
table_ref = f"`{project_id}`.`{dataset}`.`{name}`"
self.raw_sql(f'CREATE TABLE {table_ref} AS ({sql_select})')
elif schema is not None:
table_id = self._fully_qualified_name(name, database)
table = bq.Table(table_id, schema=BigQuerySchema.from_ibis(schema))
self.client.create_table(table)
return self.table(name, database=database)

create_stmt += f" AS ({self.compile(table)})"

self.raw_sql(create_stmt)

return self.table(table_ref)

def drop_table(
self, name: str, *, database: str | None = None, force: bool = False
) -> None:
table_id = self._fully_qualified_name(name, database)
self.client.delete_table(table_id, not_found_ok=not force)
drop_stmt = "DROP TABLE"
if force:
drop_stmt += " IF EXISTS"
drop_stmt += f" `{table_id}`"
self.raw_sql(drop_stmt)

def create_view(
self,
Expand All @@ -501,14 +619,20 @@ def create_view(
or_replace = "OR REPLACE " * overwrite
sql_select = self.compile(obj)
table_id = self._fully_qualified_name(name, database)
code = f"CREATE {or_replace}VIEW {table_id} AS {sql_select}"
code = f"CREATE {or_replace}VIEW `{table_id}` AS {sql_select}"
self.raw_sql(code)
return self.table(name, database=database)

def drop_view(
self, name: str, *, database: str | None = None, force: bool = False
) -> None:
self.drop_table(name=name, database=database, force=force)
# default_project, default_dataset = self._parse_project_and_dataset(database)
table_id = self._fully_qualified_name(name, database)
drop_stmt = "DROP VIEW"
if force:
drop_stmt += " IF EXISTS"
drop_stmt += f" `{table_id}`"
self.raw_sql(drop_stmt)


def compile(expr, params=None, **kwargs):
Expand Down
19 changes: 19 additions & 0 deletions ibis/backends/bigquery/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
import toolz

import ibis.common.graph as lin
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis.backends.base.sql import compiler as sql_compiler
from ibis.backends.bigquery import operations, registry, rewrites
from ibis.backends.bigquery.datatypes import BigQueryType


class BigQueryUDFDefinition(sql_compiler.DDL):
Expand Down Expand Up @@ -127,6 +129,23 @@ def _quote_identifier(self, name):
return name
return f"`{name}`"

def _format_in_memory_table(self, op):
schema = op.schema
names = schema.names
types = schema.types

raw_rows = []
for row in op.data.to_frame().itertuples(index=False):
raw_row = ", ".join(
f"{self._translate(lit)} AS {name}"
for lit, name in zip(
map(ops.Literal, row, types), map(self._quote_identifier, names)
)
)
raw_rows.append(f"STRUCT({raw_row})")
array_type = BigQueryType.from_ibis(dt.Array(op.schema.as_struct()))
return f"UNNEST({array_type}[{', '.join(raw_rows)}])"


class BigQueryCompiler(sql_compiler.Compiler):
translator_class = BigQueryExprTranslator
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/bigquery/custom_udfs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
from ibis.backends.bigquery.compiler import BigQueryExprTranslator
Expand Down
19 changes: 10 additions & 9 deletions ibis/backends/bigquery/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def _struct_column(translator, op):


def _array_concat(translator, op):
return "ARRAY_CONCAT({})".format(", ".join(map(translator.translate, op.args)))
return "ARRAY_CONCAT({})".format(", ".join(map(translator.translate, op.arg)))


def _array_column(translator, op):
Expand Down Expand Up @@ -457,11 +457,7 @@ def compiles_strftime(translator, op):
fmt_string = translator.translate(format_str)
arg_formatted = translator.translate(arg)
if isinstance(arg_type, dt.Timestamp) and arg_type.timezone is None:
return "FORMAT_{}({}, {})".format(
strftime_format_func_name,
fmt_string,
arg_formatted,
)
return f"FORMAT_{strftime_format_func_name}({fmt_string}, {arg_formatted})"
elif isinstance(arg_type, dt.Timestamp):
return "FORMAT_{}({}, {}, {!r})".format(
strftime_format_func_name,
Expand All @@ -470,9 +466,7 @@ def compiles_strftime(translator, op):
arg_type.timezone,
)
else:
return "FORMAT_{}({}, {})".format(
strftime_format_func_name, fmt_string, arg_formatted
)
return f"FORMAT_{strftime_format_func_name}({fmt_string}, {arg_formatted})"


def compiles_string_to_timestamp(translator, op):
Expand Down Expand Up @@ -659,6 +653,12 @@ def table_column(translator, op):
return quoted_name


def _count_distinct_star(t, op):
raise com.UnsupportedOperationError(
"BigQuery doesn't support COUNT(DISTINCT ...) with multiple columns"
)


OPERATION_REGISTRY = {
**operation_registry,
# Literal
Expand Down Expand Up @@ -806,6 +806,7 @@ def table_column(translator, op):
ops.StartsWith: fixed_arity("STARTS_WITH", 2),
ops.EndsWith: fixed_arity("ENDS_WITH", 2),
ops.TableColumn: table_column,
ops.CountDistinctStar: _count_distinct_star,
}

_invalid_operations = {
Expand Down
21 changes: 11 additions & 10 deletions ibis/backends/bigquery/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@
from ibis.backends.tests.data import json_types, non_null_array_types, struct_types, win

if TYPE_CHECKING:
import pathlib
from pathlib import Path

import ibis.expr.types as ir

DATASET_ID = "ibis_gbq_testing"
Expand Down Expand Up @@ -83,13 +80,13 @@ class TestConf(UnorderedComparator, BackendTest, RoundAwayFromZero):
supports_structs = True
supports_json = True
check_names = False
deps = ("google.cloud.bigquery",)

@staticmethod
def format_table(name: str) -> str:
return f"{DATASET_ID}.{name}"

@staticmethod
def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
def _load_data(self, **_: Any) -> None:
"""Load test data into a BigQuery instance."""

credentials, default_project_id = google.auth.default(
Expand Down Expand Up @@ -198,7 +195,7 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
make_job,
client.load_table_from_file,
io.BytesIO(
data_dir.joinpath("avro", "struct_table.avro").read_bytes()
self.data_dir.joinpath("avro", "struct_table.avro").read_bytes()
),
bq.TableReference(testing_dataset, "struct_table"),
job_config=bq.LoadJobConfig(
Expand Down Expand Up @@ -267,7 +264,9 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
make_job,
client.load_table_from_file,
io.BytesIO(
data_dir.joinpath("parquet", f"{table}.parquet").read_bytes()
self.data_dir.joinpath(
"parquet", f"{table}.parquet"
).read_bytes()
),
bq.TableReference(testing_dataset, table),
job_config=bq.LoadJobConfig(
Expand All @@ -286,7 +285,9 @@ def _load_data(data_dir: Path, script_dir: Path, **_: Any) -> None:
make_job,
client.load_table_from_file,
io.BytesIO(
data_dir.joinpath("parquet", f"{table}.parquet").read_bytes()
self.data_dir.joinpath(
"parquet", f"{table}.parquet"
).read_bytes()
),
bq.TableReference(testing_dataset_tokyo, table),
job_config=bq.LoadJobConfig(
Expand All @@ -307,7 +308,7 @@ def functional_alltypes(self) -> ir.Table:
return t.select(~s.c("index", "Unnamed_0"))

@staticmethod
def connect(data_directory: pathlib.Path) -> Backend:
def connect(*, tmpdir, worker_id, **kw) -> Backend:
"""Connect to the test project and dataset."""
credentials, default_project_id = google.auth.default(
scopes=EXTERNAL_DATA_SCOPES
Expand All @@ -317,7 +318,7 @@ def connect(data_directory: pathlib.Path) -> Backend:
os.environ.get(PROJECT_ID_ENV_VAR, default_project_id) or DEFAULT_PROJECT_ID
)
con = ibis.bigquery.connect(
project_id=project_id, dataset_id=DATASET_ID, credentials=credentials
project_id=project_id, dataset_id=DATASET_ID, credentials=credentials, **kw
)
expr = ibis.literal(1)
try:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT t0.`title`, t0.`tags`
FROM (
SELECT t1.*
FROM `bigquery-public-data.stackoverflow`.posts_questions t1
FROM `bigquery-public-data.stackoverflow.posts_questions` t1
WHERE STRPOS(t1.`tags`, 'ibis') - 1 >= 0
) t0
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
WITH t0 AS (
SELECT t2.`float_col`, t2.`timestamp_col`, t2.`int_col`, t2.`string_col`
FROM `ibis-gbq.ibis_gbq_testing`.functional_alltypes t2
FROM `ibis-gbq.ibis_gbq_testing.functional_alltypes` t2
WHERE t2.`timestamp_col` < @param_0
)
SELECT count(t1.`foo`) AS `count`
Expand Down
48 changes: 40 additions & 8 deletions ibis/backends/bigquery/tests/system/test_client.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import collections
import datetime
import decimal
Expand All @@ -13,6 +15,7 @@
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
from ibis.backends.bigquery.client import bigquery_param
from ibis.util import gen_name


def test_column_execute(alltypes, df):
Expand All @@ -34,7 +37,8 @@ def test_list_tables(con):


def test_current_database(con, dataset_id):
db = con.current_database
with pytest.warns(FutureWarning, match="data project"):
db = con.current_database
assert db == dataset_id
assert db == con.dataset_id
assert con.list_tables(database=db, like="alltypes") == con.list_tables(
Expand Down Expand Up @@ -252,8 +256,8 @@ def test_multiple_project_queries(con):
result = join.compile()
expected = """\
SELECT t0.`title`
FROM `bigquery-public-data.stackoverflow`.posts_questions t0
INNER JOIN `nyc-tlc.yellow`.trips t1
FROM `bigquery-public-data.stackoverflow.posts_questions` t0
INNER JOIN `nyc-tlc.yellow.trips` t1
ON t0.`tags` = t1.`rate_code`"""
assert result == expected

Expand All @@ -268,8 +272,8 @@ def test_multiple_project_queries_database_api(con):
result = join.compile()
expected = """\
SELECT t0.`title`
FROM `bigquery-public-data.stackoverflow`.posts_questions t0
INNER JOIN `nyc-tlc.yellow`.trips t1
FROM `bigquery-public-data.stackoverflow.posts_questions` t0
INNER JOIN `nyc-tlc.yellow.trips` t1
ON t0.`tags` = t1.`rate_code`"""
assert result == expected

Expand Down Expand Up @@ -349,7 +353,7 @@ def test_approx_median(alltypes):
def test_create_table_bignumeric(con, temp_table):
schema = ibis.schema({'col1': dt.Decimal(76, 38)})
temporary_table = con.create_table(temp_table, schema=schema)
con.raw_sql(f"INSERT {con.current_database}.{temp_table} (col1) VALUES (10.2)")
con.raw_sql(f"INSERT {con.current_schema}.{temp_table} (col1) VALUES (10.2)")
df = temporary_table.execute()
assert df.shape == (1, 1)

Expand All @@ -358,7 +362,7 @@ def test_geography_table(con, temp_table):
schema = ibis.schema({'col1': dt.GeoSpatial(geotype="geography", srid=4326)})
temporary_table = con.create_table(temp_table, schema=schema)
con.raw_sql(
f"INSERT {con.current_database}.{temp_table} (col1) VALUES (ST_GEOGPOINT(1,3))"
f"INSERT {con.current_schema}.{temp_table} (col1) VALUES (ST_GEOGPOINT(1,3))"
)
df = temporary_table.execute()
assert df.shape == (1, 1)
Expand All @@ -374,7 +378,7 @@ def test_timestamp_table(con, temp_table):
)
temporary_table = con.create_table(temp_table, schema=schema)
con.raw_sql(
f"INSERT {con.current_database}.{temp_table} (datetime_col, timestamp_col) VALUES (CURRENT_DATETIME(), CURRENT_TIMESTAMP())"
f"INSERT {con.current_schema}.{temp_table} (datetime_col, timestamp_col) VALUES (CURRENT_DATETIME(), CURRENT_TIMESTAMP())"
)
df = temporary_table.execute()
assert df.shape == (1, 2)
Expand All @@ -385,3 +389,31 @@ def test_timestamp_table(con, temp_table):
("timestamp_col", dt.Timestamp(timezone="UTC")),
]
)


def test_fully_qualified_table_creation(con, project_id, dataset_id, temp_table):
schema = ibis.schema({'col1': dt.GeoSpatial(geotype="geography", srid=4326)})
t = con.create_table(f"{project_id}.{dataset_id}.{temp_table}", schema=schema)
assert t.get_name() == f"{project_id}.{dataset_id}.{temp_table}"


def test_create_table_with_options(con):
name = gen_name("bigquery_temp_table")
schema = ibis.schema(dict(a="int64", b="int64", c="array<string>", d="date"))
t = con.create_table(
name,
schema=schema,
overwrite=True,
default_collate="und:ci",
partition_by="d",
cluster_by=["a", "b"],
options={
"friendly_name": "bigquery_temp_table",
"description": "A table for testing BigQuery's create_table implementation",
"labels": [("org", "ibis")],
},
)
try:
assert t.execute().empty
finally:
con.drop_table(name)
2 changes: 2 additions & 0 deletions ibis/backends/bigquery/tests/system/test_connect.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from unittest import mock

import google.api_core.client_options
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/bigquery/tests/system/udf/test_udf_execute.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import os

import pandas as pd
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import pytest

from ibis.backends.bigquery import client
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/bigquery/tests/unit/test_compiler.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import datetime
import re
import time
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/bigquery/tests/unit/test_datatypes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import pytest
from pytest import param

Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/bigquery/tests/unit/udf/test_core.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import builtins
import sys
import tempfile
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/bigquery/tests/unit/udf/test_find.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import ast

from ibis.backends.bigquery.udf.find import find_names
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/bigquery/tests/unit/udf/test_usage.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import pytest
from pytest import param

Expand Down
5 changes: 1 addition & 4 deletions ibis/backends/bigquery/udf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,7 @@ def compiles_udf_node(t, op):
return f"{udf_node.__name__}({args})"

bigquery_signature = ", ".join(
"{name} {type}".format(
name=name,
type=BigQueryType.from_ibis(dt.dtype(type_)),
)
f"{name} {BigQueryType.from_ibis(dt.dtype(type_))}"
for name, type_ in params.items()
)
return_type = BigQueryType.from_ibis(dt.dtype(output_type))
Expand Down
6 changes: 1 addition & 5 deletions ibis/backends/bigquery/udf/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,11 +321,7 @@ def visit_Attribute(self, node):
return f"{self.visit(node.value)}.{node.attr}"

def visit_For(self, node):
lines = [
"for (let {} of {}) {{".format(
self.visit(node.target), self.visit(node.iter)
)
]
lines = [f"for (let {self.visit(node.target)} of {self.visit(node.iter)}) {{"]
with self.local_scope():
lines.append(indent(map(self.visit, node.body)))
lines.append("}")
Expand Down
Loading