42 changes: 21 additions & 21 deletions ci/schema/mssql.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
DROP TABLE IF EXISTS diamonds;
DROP TABLE IF EXISTS ibis_testing.dbo.diamonds;

CREATE TABLE diamonds (
CREATE TABLE ibis_testing.dbo.diamonds (
carat FLOAT,
cut VARCHAR(MAX),
color VARCHAR(MAX),
Expand All @@ -17,13 +17,13 @@ CREATE TABLE diamonds (
-- /data is a volume mount to the ibis testing data
-- used for snappy test data loading
-- DataFrame.to_sql is unusably slow for loading CSVs
BULK INSERT diamonds
BULK INSERT ibis_testing.dbo.diamonds
FROM '/data/diamonds.csv'
WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2)

DROP TABLE IF EXISTS astronauts;
DROP TABLE IF EXISTS ibis_testing.dbo.astronauts;

CREATE TABLE astronauts (
CREATE TABLE ibis_testing.dbo.astronauts (
"id" BIGINT,
"number" BIGINT,
"nationwide_number" BIGINT,
Expand All @@ -50,13 +50,13 @@ CREATE TABLE astronauts (
"total_eva_hrs" DOUBLE PRECISION
);

BULK INSERT astronauts
BULK INSERT ibis_testing.dbo.astronauts
FROM '/data/astronauts.csv'
WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2)

DROP TABLE IF EXISTS batting;
DROP TABLE IF EXISTS ibis_testing.dbo.batting;

CREATE TABLE batting (
CREATE TABLE ibis_testing.dbo.batting (
"playerID" VARCHAR(MAX),
"yearID" BIGINT,
stint BIGINT,
Expand All @@ -81,13 +81,13 @@ CREATE TABLE batting (
"GIDP" BIGINT
);

BULK INSERT batting
BULK INSERT ibis_testing.dbo.batting
FROM '/data/batting.csv'
WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2)

DROP TABLE IF EXISTS awards_players;
DROP TABLE IF EXISTS ibis_testing.dbo.awards_players;

CREATE TABLE awards_players (
CREATE TABLE ibis_testing.dbo.awards_players (
"playerID" VARCHAR(MAX),
"awardID" VARCHAR(MAX),
"yearID" BIGINT,
Expand All @@ -96,13 +96,13 @@ CREATE TABLE awards_players (
notes VARCHAR(MAX)
);

BULK INSERT awards_players
BULK INSERT ibis_testing.dbo.awards_players
FROM '/data/awards_players.csv'
WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2)

DROP TABLE IF EXISTS functional_alltypes;
DROP TABLE IF EXISTS ibis_testing.dbo.functional_alltypes;

CREATE TABLE functional_alltypes (
CREATE TABLE ibis_testing.dbo.functional_alltypes (
id INTEGER,
bool_col BIT,
tinyint_col SMALLINT,
Expand All @@ -118,21 +118,21 @@ CREATE TABLE functional_alltypes (
month INTEGER
);

BULK INSERT functional_alltypes
BULK INSERT ibis_testing.dbo.functional_alltypes
FROM '/data/functional_alltypes.csv'
WITH (FORMAT = 'CSV', FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2)

DROP TABLE IF EXISTS win;
DROP TABLE IF EXISTS ibis_testing.dbo.win;

CREATE TABLE win (g VARCHAR(MAX), x BIGINT NOT NULL, y BIGINT);
INSERT INTO win VALUES
CREATE TABLE ibis_testing.dbo.win (g VARCHAR(MAX), x BIGINT NOT NULL, y BIGINT);
INSERT INTO ibis_testing.dbo.win VALUES
('a', 0, 3),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1);

DROP TABLE IF EXISTS topk;
DROP TABLE IF EXISTS ibis_testing.dbo.topk;

CREATE TABLE topk (x BIGINT);
INSERT INTO topk VALUES (1), (1), (NULL);
CREATE TABLE ibis_testing.dbo.topk (x BIGINT);
INSERT INTO ibis_testing.dbo.topk VALUES (1), (1), (NULL);
26 changes: 26 additions & 0 deletions ci/set-milestone-on-issue.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash

set -euo pipefail

top="$(dirname "$(readlink -f "$0")")"

# find all pull requests associated with commit
linked_issues_query='.data.repository.pullRequest.closingIssuesReferences.nodes[].number'

gh pr list --search "$1" --state merged --json number --jq '.[].number' |
sed '/^$/d' |
while read -r pr; do
milestone="$(gh pr view "${pr}" --json milestone --jq '.milestone.title')"

if [ -n "${milestone}" ]; then
# find all issues associated with said pull requests
# taken from https://github.com/cli/cli/discussions/7097#discussioncomment-5229031
readarray -t issues < <(
gh api graphql -F owner=ibis-project -F repo=ibis -F pr="${pr}" -F query="@${top}/linked-issues.gql" \
--jq "${linked_issues_query}" | sed '/^$/d')

if [ "${#issues[@]}" -gt 0 ]; then
gh issue edit "${issues[@]}" --milestone "${milestone}"
fi
fi
done
15 changes: 10 additions & 5 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
services:
clickhouse:
image: clickhouse/clickhouse-server:24.6.2.17-alpine
image: clickhouse/clickhouse-server:24.7.2.13-alpine
ports:
- 8123:8123 # http port
- 9000:9000 # native protocol port
Expand Down Expand Up @@ -63,12 +63,17 @@ services:
environment:
MSSQL_SA_PASSWORD: 1bis_Testing!
ACCEPT_EULA: "Y"
# The default collation in MSSQL isS QL_Latin1_General_CP1_CI_AS
# where the CI stands for Case Insensitive.
# We use a case-sensitive collation for testing so that we don't
# break users with case-sensitive collations.
MSSQL_COLLATION: Latin1_General_100_BIN2_UTF8
healthcheck:
interval: 1s
retries: 20
test:
- CMD-SHELL
- /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P "$$MSSQL_SA_PASSWORD" -Q "IF DB_ID('ibis_testing') IS NULL BEGIN CREATE DATABASE [ibis_testing] END"
- $(find /opt -name sqlcmd -type f -executable) -C -S localhost -U sa -P "$$MSSQL_SA_PASSWORD" -Q "SELECT 1"
ports:
- 1433:1433
volumes:
Expand All @@ -94,7 +99,7 @@ services:
- trino

minio:
image: bitnami/minio:2024.7.16
image: bitnami/minio:2024.8.3
environment:
MINIO_ROOT_USER: accesskey
MINIO_ROOT_PASSWORD: secretkey
Expand Down Expand Up @@ -341,7 +346,7 @@ services:
- druid

oracle:
image: gvenzl/oracle-free:23.4-slim
image: gvenzl/oracle-free:23.5-slim
environment:
ORACLE_PASSWORD: ibis
ORACLE_DATABASE: IBIS_TESTING
Expand All @@ -362,7 +367,7 @@ services:
- oracle:/opt/oracle/data

exasol:
image: exasol/docker-db:8.27.0
image: exasol/docker-db:8.29.1
privileged: true
ports:
- 8563:8563
Expand Down
2 changes: 1 addition & 1 deletion conda/environment-arm64-flink.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,4 @@ dependencies:
- py4j =0.10.9.7
- pip
- pip:
- apache-flink >=1.19.1
- apache-flink =1.19.1
3 changes: 1 addition & 2 deletions docker/mysql/startup.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
CREATE USER 'ibis'@'localhost' IDENTIFIED BY 'ibis';
CREATE SCHEMA IF NOT EXISTS test_schema;
GRANT CREATE, DROP ON *.* TO 'ibis'@'%';
GRANT CREATE,SELECT,DROP ON `test_schema`.* TO 'ibis'@'%';
GRANT CREATE,SELECT,DROP ON *.* TO 'ibis'@'%';
FLUSH PRIVILEGES;
2 changes: 1 addition & 1 deletion docker/trino/jvm.config
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-server
-Xmx2G
-Xmx3G
-XX:InitialRAMPercentage=80
-XX:MaxRAMPercentage=80
-XX:G1HeapRegionSize=32M
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 2 additions & 0 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,8 @@ quartodoc:
package: ibis.expr.types.numeric
- name: BooleanValue
package: ibis.expr.types.logical
- name: BooleanColumn
package: ibis.expr.types.logical
- name: and_
dynamic: true
signature_name: full
Expand Down
18 changes: 16 additions & 2 deletions docs/backends/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,17 @@ def find_member_with_docstring(member):
return member

cls = member.parent
for base in cls.resolved_bases:
resolved_bases = cls.resolved_bases
# If we're a SQLBackend (likely) then also search through to `BaseBackend``
if resolved_bases and (sqlbackend := resolved_bases[0]).name == "SQLBackend":
for base in sqlbackend.resolved_bases:
if base not in resolved_bases:
resolved_bases.append(base)

# Remove `CanCreateSchema` and `CanListSchema` since they are deprecated
# and we don't want to document their existence.
filtered_bases = filter(lambda x: "schema" not in x.name.lower(), resolved_bases)
for base in filtered_bases:
try:
parent_member = get_callable(base, member.name)
except KeyError:
Expand Down Expand Up @@ -92,7 +102,11 @@ def dump_methods_to_json_for_algolia(backend, methods):
"objectID": base_url,
"href": base_url,
"title": f"{backend_name}.Backend.{method}",
"text": getattr(backend.all_members[method].docstring, "value", ""),
"text": getattr(
find_member_with_docstring(backend.all_members[method]).docstring,
"value",
"",
),
"crumbs": ["Backend API", "API", f"{backend_name} methods"],
}

Expand Down
12 changes: 6 additions & 6 deletions docs/contribute/02_workflow.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ all ordinary and edge cases.
Pytest markers can be used to assert that a test should fail or raise a specific error.
We use a number of pytest markers in ibis:

- `pytest.mark.notimpl`: the backend can do a thing, we haven't mapped the op
- `pytest.mark.notyet`: the backend cannot do a thing, but might in the future
- `pytest.mark.never`: the backend will never support this / pass this test (common example
here is a test running on sqlite that relies on strong typing)
- `pytest.mark.broken`: this test broke and it's demonstrably unrelated to the PR I'm working
on and fixing it shouldn't block this PR from going in (but we should fix it up pronto)
- `pytest.mark.notimpl`: We can implement/fix/workaround this on the ibis side, but haven't yet.
- `pytest.mark.notyet`: This requires the backend to implement/fix something.
We can't/won't do it on the ibis side.
- `pytest.mark.never`: The backend will never support this / pass this test.
We shouldn't have any hope of trying to fix this.
A common example here is a test running on sqlite that relies on strong typing.

Refrain from using a generic marker like `pytest.mark.xfail`.

Expand Down
6 changes: 3 additions & 3 deletions docs/posts/run-on-snowflake/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ figure out, and these are the questions we will answer throughout the post.

## Getting the Ibis connection

The release of Ibis 9.0 includes the introduction of a new method,
[`from_snowpark`](../../backends/snowflake.qmd#ibis.backends.snowflake.Backend.from_snowpark)
The release of Ibis 9.2 includes the introduction of a new method,
[`from_connection`](../../backends/snowflake.qmd#ibis.backends.snowflake.Backend.from_connection)
to provide users with a convenient mechanism to take an existing Snowpark
session and create an Ibis Snowflake backend instance with it.

Expand All @@ -50,7 +50,7 @@ import ibis
import snowflake.snowpark as sp

session = sp.Session.builder.create()
con = ibis.snowflake.from_snowpark(session)
con = ibis.snowflake.from_connection(session)
```

This connection uses the same session within Snowflake, so temporary objects
Expand Down
Binary file added docs/presentations/overview/img/future.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/presentations/overview/img/future2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/presentations/overview/img/layers.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/presentations/overview/img/uis.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,159 @@ format:
footer: <https://ibis-project.org>
# preview-links: true
chalkboard: true
incremental: true
incremental: false
# https://quarto.org/docs/presentations/revealjs/themes.html#using-themes
theme: dark
scrollable: true
# smaller: true
---

# what
# composable data systems

## A Python perspective

["The Road to Composable Data Systems: Thoughts on the Last 15 Years and the Future"](https://wesmckinney.com/blog/looking-back-15-years) by Wes McKinney:

> **pandas solved many problems that database systems also solve**, but almost no one in the data science ecosystem had the expertise to build a data frame library using database techniques. Eagerly-evaluated APIs (as opposed to “lazy” ones) make it more difficult to do efficient “query” planning and execution. **Data interoperability with other systems is always going to be painful**...
## A Python perspective

["The Road to Composable Data Systems: Thoughts on the Last 15 Years and the Future"](https://wesmckinney.com/blog/looking-back-15-years) by Wes McKinney:

> ...**unless faster, more efficient “standards” for interoperability are created**.
## Layers

["The Composable Codex"](https://voltrondata.com/codex) by Voltron Data:

![layers](img/layers.png)

## Future

["The Composable Codex"](https://voltrondata.com/codex) by Voltron Data:

![future](img/future2.png)

## Why composable data systems?

Efficiency:

- time
- money
- data mesh
- engineering productivity
- avoid vendor lock-in

## How can you implement it? {.smaller}

Choose your stack:

:::: {.columns}

::: {.column width="33%"}
**UI**:

- Ibis (Python)
- dplyr (R)
- SQL
- ...
:::

::: {.column width="33%"}
**Execution engine**:

- DuckDB
- DataFusion
- Polars
- Spark
- Trino
- ClickHouse
- Snowflake
- Databricks
- Theseus
- ...
:::

::: {.column width="33%"}
**Storage**:

- Iceberg
- Delta Lake
- Hudi
- Hive-partitioned Parquet files
- ...
:::

::::

## Choose your stack (there's more) {.smaller}

Additionally, choose tools for:

**Orchestration**:

- Airflow
- Prefect
- Dagster
- Kedro
- SQLMesh
- dbt
- ...

**Ingestion**:

- dlt
- Airbyte
- requests
- Ibis
- ...

**Visualization**:

- Altair
- plotnine
- Plotly
- seaborn
- matplotlib
- ...

**Dashboarding**:

- Streamlit
- Quarto dashboards
- Shiny for Python
- Dash
- ...

**Testing**:

- Great Expectations
- Pandera
- Pytest
- assert statements
- ...

**CLI**:

- Click
- Typer
- argparse
- ...

# what is Ibis?

## Ibis is a Python library for:

- exploratory data analysis (EDA)
- analytics
- data engineering
- machine learning
- building your own library (e.g. [Google BigFrames](https://github.com/googleapis/python-bigquery-dataframes))
- building your own library
- ...

::: {.fragment}
::: {.r-fit-text}
development to production with the same API
***development to production with the same API***
:::
:::

Expand Down Expand Up @@ -122,18 +254,19 @@ t.group_by("species", "island").agg(count=t.count()).order_by("count")

:::

## how it works
## How it works

Ibis compiles down to SQL or dataframe code:

```{python}
#| echo: false
import os
import sys
sys.path.append(os.path.abspath(".."))
sys.path.append(os.path.abspath("../.."))
from backends_sankey import fig
fig.show()
```

Expand Down Expand Up @@ -199,7 +332,7 @@ Analyzing 10M+ rows from 4+ data sources.

# why

## dataframe lore
## Dataframe lore {.smaller}

::: {.fragment .fade-in-then-semi-out}
Dataframes first appeared in the `S` programming language (*in 1991!*), then evolved into the `R` programming language.
Expand All @@ -225,7 +358,7 @@ This leads to data scientists frequently "throwing their work over the wall" to
But what if there were a new [standard](https://xkcd.com/927/)?
:::

## Ibis origins
## Ibis origins {.smaller}

::: {.fragment .fade-left}
from [Apache Arrow and the "10 Things I Hate About pandas"](https://wesmckinney.com/blog/apache-arrow-pandas-internals/) by Wes McKinney
Expand All @@ -235,7 +368,7 @@ from [Apache Arrow and the "10 Things I Hate About pandas"](https://wesmckinney.
> ...in 2015, I started the Ibis project...to create a pandas-friendly deferred expression system for static analysis and compilation [of] these types of [query planned, multicore execution] operations. Since an efficient multithreaded in-memory engine for pandas was not available when I started Ibis, I instead focused on building compilers for SQL engines (Impala, PostgreSQL, SQLite), similar to the R dplyr package. Phillip Cloud from the pandas core team has been actively working on Ibis with me for quite a long time.
:::

## two world problem {auto-animate="true"}
## Two world problem {auto-animate="true"}

::: {.nonincremental}
:::: {.columns}
Expand All @@ -251,7 +384,7 @@ Python:
::::
:::

## two world problem {auto-animate="true"}
## Two world problem {auto-animate="true"}

::: {.nonincremental}
:::: {.columns}
Expand All @@ -271,7 +404,7 @@ Python:
::::
:::

## two world problem {auto-animate="true"}
## Two world problem {auto-animate="true"}

::: {.nonincremental}
:::: {.columns}
Expand All @@ -293,7 +426,7 @@ Python:
::::
:::

## two world problem {auto-animate="true"}
## Two world problem {auto-animate="true"}

::: {.nonincremental}
:::: {.columns}
Expand All @@ -317,7 +450,7 @@ Python:
::::
:::

## two world problem {auto-animate="true"}
## Two world problem {auto-animate="true"}

::: {.nonincremental}
:::: {.columns}
Expand All @@ -343,7 +476,7 @@ Python:
::::
:::

## two world problem {auto-animate="true"}
## Two world problem {auto-animate="true"}

::: {.nonincremental}
:::: {.columns}
Expand Down Expand Up @@ -375,19 +508,40 @@ SQL:

## Python dataframe history {.smaller}

::: {.incremental}

- **pandas** (2008): dataframes in Python
- **Spark** (2009): distributed dataframes with PySpark
- **Dask** (2014): distributed pandas dataframes
- **Vaex** (2014): multicore dataframes in Python via C++
- [**Ibis**]{style="color:#7C65A0"} (2015): dataframes in Python with SQL-like syntax
- [**Ibis**]{style="color:#7C65A0"} (2015): backend-agnostic dataframes in Python
- **cuDF** (2017): pandas API on GPUs
- **Modin** (2018): pandas API on Ray/Dask
- **Koalas** (2019): pandas API on Spark, later renamed "pandas API on Spark"
- **Polars** (2020): multicore dataframes in Python via Rust
- [**Ibis**]{style="color:#7C65A0"} (2022): Ibis invested in heavily by Voltron Data
- **Snowpark Python** (2022): PySpark-like dataframes on Snowflake
- **Daft** (2022): distributed dataframes in Python via Rust
- **BigQuery DataFrames** (2023): pandas API on Google BigQuery (via [Ibis]{style="color:#7C65A0"}!)
- **Snowpark pandas API** (2024): pandas API on Snowflake
- [**SQLFrame**]{style="color:#7C65A0"} (2024): backend-agnostic dataframes in Python (PySpark API)
- **DataFusion dataframes** (2024): multicore dataframes in Python via Rust

:::

## Obligatory standards xkcd

![standards](https://imgs.xkcd.com/comics/standards.png)

## Standards and composability

All Python dataframe libraries that are not Ibis (or SQLFrame) **lock you into an execution engine**.

::: {.fragment}
::: {.r-fit-text}
***Good [standards are composable]{style="color:#7C65A0"} and adopted by competitors.***
:::
:::

## Python dataframe history (aside) {.smaller}

Expand All @@ -411,6 +565,7 @@ pandas clones:
::: {.column width=33%}
PySpark clones:

- [SQLFrame]{style="color:#7C65A0"}
- Snowpark Python (sort of)
- DuckDB Spark API
- SQLGlot Spark API
Expand All @@ -419,14 +574,16 @@ PySpark clones:
::: {.column width=33%}
something else:

- Ibis
- [Ibis]{style="color:#7C65A0"}
- Polars
- Daft
- DataFusion
:::

::::
:::

## database history
## Database history

- they got faster

Expand Down Expand Up @@ -544,7 +701,7 @@ penguins.group_by(["species", "island"]).agg(penguins.count().name("count"))

A distributed SQL query engine.

## and more!
## ...and more!

:::: {.columns}

Expand Down Expand Up @@ -576,10 +733,9 @@ New backends are easy to add!^\*^
^\*^usually
:::


# how

## try it out now
## Try it out now!

Install:

Expand Down
4 changes: 3 additions & 1 deletion docs/presentations/pycon2024/maintainers.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ _CI must complete "quickly"_

::: {.callout-warning}
## Opinions follow
Opinions herein…
:::

- **Env setup needs to be _fast_**: avoid constraint solving
Expand Down Expand Up @@ -282,7 +283,8 @@ _We've added 3 or 4 new backends since the switch_
## Tools: GitHub Actions {.smaller}

::: {.callout-note}
### I don't work for GitHub
## I don't work for GitHub
…even though it might seem like it
:::

- Pay for the [the Teams plan](https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration#usage-limits) to get more concurrency
Expand Down
76 changes: 76 additions & 0 deletions docs/release_notes_generated.qmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,82 @@
---
---

## [9.3.0](https://github.com/ibis-project/ibis/compare/9.2.0...9.3.0) (2024-08-07)

### Features

* **api:** support `ignore_null` in `collect` ([71271dd](https://github.com/ibis-project/ibis/commit/71271dd262f28dd34e58475d9b2e1e2c70cb4cb8))
* **api:** support `ignore_null` in `first`/`last` ([8d4f97f](https://github.com/ibis-project/ibis/commit/8d4f97f996cf3dacda0323de1704f84a7f4d57ad))
* **api:** support `order_by` in order-sensitive aggregates (`collect`/`group_concat`/`first`/`last`) ([#9729](https://github.com/ibis-project/ibis/issues/9729)) ([a18cb5d](https://github.com/ibis-project/ibis/commit/a18cb5d30b25f73cb990b15cd184eecfdd2c0cc6))
* **api:** support quarterly truncation ([#9715](https://github.com/ibis-project/ibis/issues/9715)) ([75b31c2](https://github.com/ibis-project/ibis/commit/75b31c2ebc0a136a11513b483a35088c38bdb0a9)), closes [#9714](https://github.com/ibis-project/ibis/issues/9714)
* **array:** implement min, max, any, all, sum, mean ([#9704](https://github.com/ibis-project/ibis/issues/9704)) ([793efbc](https://github.com/ibis-project/ibis/commit/793efbca7992423753d85c1fdab8850994c30f1d))
* **bigquery:** support timestamp bucket ([fd61f2c](https://github.com/ibis-project/ibis/commit/fd61f2c34eafcc50d7a8de652222554b45bd2ef6))
* **datafusion:** `pivot_longer` ([2330b0c](https://github.com/ibis-project/ibis/commit/2330b0c86f9b0c18da7d26d4f6e9384f58d6c022))
* **datafusion:** enable array flatten, group concat, and timestamp now ([4d110a0](https://github.com/ibis-project/ibis/commit/4d110a0ac07fafdf55b071953fa24b3186b558f1))
* **datafusion:** struct literals ([a63cee9](https://github.com/ibis-project/ibis/commit/a63cee9ddb2ab20df0e3519df017fc3e02101dfd))
* **datafusion:** unnest ([a706f54](https://github.com/ibis-project/ibis/commit/a706f542477fb5c099f16a097eb39dffb1271eca))
* **duckdb:** add support for passing a subset of column types to `read_csv` ([#9776](https://github.com/ibis-project/ibis/issues/9776)) ([c1dcf67](https://github.com/ibis-project/ibis/commit/c1dcf676a6e9e5b7f581bb51c20b617dbe46ea7e))
* **duckdb:** support arbitrary url prefixes ([#9691](https://github.com/ibis-project/ibis/issues/9691)) ([11af489](https://github.com/ibis-project/ibis/commit/11af489aac9e9fbbdb68fb93c70adedf754a5c00))
* **mssql:** support case-sensitive collations ([#9700](https://github.com/ibis-project/ibis/issues/9700)) ([9382a0e](https://github.com/ibis-project/ibis/commit/9382a0e71dbafae1219979a2833a34f12e8501fc))
* **oracle:** support group_concat operator ([47d97ea](https://github.com/ibis-project/ibis/commit/47d97eafb3517431d1deb59e6240b0775987c6dc))
* **pyspark:** add support for pyarrow and python UDFs ([#9753](https://github.com/ibis-project/ibis/issues/9753)) ([02a1d48](https://github.com/ibis-project/ibis/commit/02a1d48bb3284303c56fde6e5c5b5a16fef2dc9c))
* **snowflake:** add `userinfo` URL parsing ([524a2fa](https://github.com/ibis-project/ibis/commit/524a2fa9e283b8678fc3e04aa72a7d5429162389))
* **ux:** allow window functions in predicates and compile to `QUALIFY` where possible ([#9787](https://github.com/ibis-project/ibis/issues/9787)) ([0370bcb](https://github.com/ibis-project/ibis/commit/0370bcb762ad24c27dee88aa39e7223a42b5b27e))

### Bug Fixes

* **algolia:** add parent class docstring to algolia index ([#9739](https://github.com/ibis-project/ibis/issues/9739)) ([3bc9799](https://github.com/ibis-project/ibis/commit/3bc97991b375b89d72bd83a1b359b8322d5a98ee))
* **bigquery:** repr geospatial values in interactive mode ([#9712](https://github.com/ibis-project/ibis/issues/9712)) ([bd8c93f](https://github.com/ibis-project/ibis/commit/bd8c93f8a6db2976c9f5dbed72c9d68f61435096))
* **case:** fix dshape, error on noncomparable and empty cases ([#9559](https://github.com/ibis-project/ibis/issues/9559)) ([ff2d019](https://github.com/ibis-project/ibis/commit/ff2d0190b29578a02fd7aee96e699621c57383ad))
* **compiler-internals:** define unsupported operations after simple operations ([#9755](https://github.com/ibis-project/ibis/issues/9755)) ([d9b6264](https://github.com/ibis-project/ibis/commit/d9b62640c3b54c3500b59cbdbe09ebd7cdd4a541))
* **deps:** update dependency atpublic to v5 ([#9697](https://github.com/ibis-project/ibis/issues/9697)) ([a4f3940](https://github.com/ibis-project/ibis/commit/a4f39405a113fdcc8b94474a764c58c4637f25f5))
* **deps:** update dependency sqlglot to >=23.4,<25.10 ([#9774](https://github.com/ibis-project/ibis/issues/9774)) ([7144257](https://github.com/ibis-project/ibis/commit/71442570331c1fe52af6cd2140dd2239c08979d6))
* **deps:** update dependency sqlglot to >=23.4,<25.8 ([#9696](https://github.com/ibis-project/ibis/issues/9696)) ([d4a2ea2](https://github.com/ibis-project/ibis/commit/d4a2ea2e9237b8bc0eb1b7faafe6e26c65cd7184))
* **deps:** update dependency sqlglot to >=23.4,<25.9 ([#9719](https://github.com/ibis-project/ibis/issues/9719)) ([b1d8b2e](https://github.com/ibis-project/ibis/commit/b1d8b2ed8e4eedf23967118924fa23df6548f7a2))
* **drop:** ignore order for `DropColumns` equality ([#9677](https://github.com/ibis-project/ibis/issues/9677)) ([ae1e112](https://github.com/ibis-project/ibis/commit/ae1e1128f2838d90dcc74be7b8229f2253434195))
* **druid:** get basic timestamp functionality working ([#9692](https://github.com/ibis-project/ibis/issues/9692)) ([6cd3eee](https://github.com/ibis-project/ibis/commit/6cd3eeeaa88323fd14a4388be9d42a64a22d2ea1))
* **duckdb:** avoid literals casts that might defeat optimization ([e4ff1bd](https://github.com/ibis-project/ibis/commit/e4ff1bd71088b0f49d5d2d40d7bc040710d705ef))
* **duckdb:** ensure that array remove doesn't remove `NULL`s ([f0c3be4](https://github.com/ibis-project/ibis/commit/f0c3be42b049c045a5c8764f8a2aa9021d138ade))
* **duckdb:** use `register` directly instead of calling `read_in_memory` ([597817f](https://github.com/ibis-project/ibis/commit/597817fbd59725d804acdf7d906e25519fee967b))
* **internals:** ensure that CTEs are emitted in topological order ([#9726](https://github.com/ibis-project/ibis/issues/9726)) ([acd7d82](https://github.com/ibis-project/ibis/commit/acd7d82ec3778b593c4aadfba6f62dd0119c7d28))
* **polars:** fix polars `std`/`var` to properly handle `sample`/`population` ([f83d84f](https://github.com/ibis-project/ibis/commit/f83d84f6a978936d27c538bced23164c79eb4de2))
* **polars:** remove bogus minus-one-week truncation ([ac519b2](https://github.com/ibis-project/ibis/commit/ac519b273cd6c7f151c9abee350698e49c7be3b0))
* **postgres:** handle enums by delegating to the parent class ([#9769](https://github.com/ibis-project/ibis/issues/9769)) ([3f01075](https://github.com/ibis-project/ibis/commit/3f010753db2ee8994a1f28528e5b8123b24f5969)), closes [#9295](https://github.com/ibis-project/ibis/issues/9295)
* **snowflake:** bring back `where` filter support in `group_concat`; fix `array_agg` ordering ([#9758](https://github.com/ibis-project/ibis/issues/9758)) ([6e7e4de](https://github.com/ibis-project/ibis/commit/6e7e4de53348085ebe4ecd38b96e7de62f613c83))
* **sql:** only return tables in `current_database` ([#9748](https://github.com/ibis-project/ibis/issues/9748)) ([c7f5717](https://github.com/ibis-project/ibis/commit/c7f57172afd599e6ce2927d4521806b9b74720e8))
* **types:** fix histogram bin allocation ([#9711](https://github.com/ibis-project/ibis/issues/9711)) ([6634864](https://github.com/ibis-project/ibis/commit/6634864365055abd2a6a01f21736718f20def2b8)), closes [#9687](https://github.com/ibis-project/ibis/issues/9687)

### Documentation

* **algolia:** add custom attributes to backend and core methods ([#9730](https://github.com/ibis-project/ibis/issues/9730)) ([d9473cf](https://github.com/ibis-project/ibis/commit/d9473cfb5da7de5b10bc86e431d2d18d696c9125))
* **browser-repl:** fix jupyterlite build ([#9762](https://github.com/ibis-project/ibis/issues/9762)) ([f403aa1](https://github.com/ibis-project/ibis/commit/f403aa1afcb30ff640e9c6ee56f5ddd14a1363e8))
* fix spelling in pivot_longer explanation ([#9780](https://github.com/ibis-project/ibis/issues/9780)) ([3201d8b](https://github.com/ibis-project/ibis/commit/3201d8b57d1fc65c785c51732fb2dc85f8b2bd03))
* fix typo in `drop` method docstring ([#9727](https://github.com/ibis-project/ibis/issues/9727)) ([4cf0014](https://github.com/ibis-project/ibis/commit/4cf0014aafa11ad008389961ab8232c7de87b687))
* **presentations:** update overview slides ([#9685](https://github.com/ibis-project/ibis/issues/9685)) ([d3a2c0c](https://github.com/ibis-project/ibis/commit/d3a2c0c5fb174b0dfa65d9a425bec498a6ad2429))
* replace all double graves with single graves ([#9679](https://github.com/ibis-project/ibis/issues/9679)) ([dd26d60](https://github.com/ibis-project/ibis/commit/dd26d6016e0d1ec08779fac8627d35555ad3d0fa))

### Refactors

* **dependencies:** pandas and numpy are now optional for non-backend installs ([#9564](https://github.com/ibis-project/ibis/issues/9564)) ([cff210a](https://github.com/ibis-project/ibis/commit/cff210af96323e200a31888b070d126be20a5eb0))
* **duckdb:** use replace to generate less sql ([#9713](https://github.com/ibis-project/ibis/issues/9713)) ([f89aa32](https://github.com/ibis-project/ibis/commit/f89aa32e8de1f4a7d2bcc7379c690003fe4e2b51))
* **internals:** remove unnecessary dynamism in `drop` method ([#9682](https://github.com/ibis-project/ibis/issues/9682)) ([5ac84c5](https://github.com/ibis-project/ibis/commit/5ac84c51c985e654bdc6f84540319649481a07c6))
* **pandas:** remove unreachable code in pandas backend ([#9786](https://github.com/ibis-project/ibis/issues/9786)) ([dc6bfe2](https://github.com/ibis-project/ibis/commit/dc6bfe2adae9235b0529aa20d4ef5a65ec2fd4b2))
* **polars:** delete some dead versioning code ([b23c5a3](https://github.com/ibis-project/ibis/commit/b23c5a3a66940086e317e3f1e1aa50f61c587f4c))
* **polars:** remove casting where possible; handle conversion on output ([#9673](https://github.com/ibis-project/ibis/issues/9673)) ([8717629](https://github.com/ibis-project/ibis/commit/871762929489d53cba288ab4b7028b432184734b))
* **polars:** remove extra backwards compatibility code no longer in use after 1.0 upgrade ([feb12f4](https://github.com/ibis-project/ibis/commit/feb12f48fcfd139622db68e3f5982831f8ff9666))
* **sql:** make compilers usable with a base install ([#9766](https://github.com/ibis-project/ibis/issues/9766)) ([84a786d](https://github.com/ibis-project/ibis/commit/84a786d236912839a4be4fb05b6e2e8097bb01a3))
* **table_loc:** return consistent object from catalog.db parsing ([#9743](https://github.com/ibis-project/ibis/issues/9743)) ([1ae2a37](https://github.com/ibis-project/ibis/commit/1ae2a37156f3c347dd2411c601381a2eb13026c5))

### Performance

* **schema:** speed up failure case of duplicate keys when constructing schemas ([#9698](https://github.com/ibis-project/ibis/issues/9698)) ([870ac73](https://github.com/ibis-project/ibis/commit/870ac73ba47ab3ed04ec89da9b786abfeb991112))
* **selectors:** speed up the `c` selector by caching the column existence check ([fdaeb5c](https://github.com/ibis-project/ibis/commit/fdaeb5c010b2dd5ad7abc59899d1933a59b05607))

### Deprecations

* **duckdb:** deprecate `read_in_memory` ([#9666](https://github.com/ibis-project/ibis/issues/9666)) ([e13af72](https://github.com/ibis-project/ibis/commit/e13af724eb996cf3c1c23fd602f049d488dbaa35))
* **snowflake:** deprecate `from_snowpark` method ([#9680](https://github.com/ibis-project/ibis/issues/9680)) ([7254f65](https://github.com/ibis-project/ibis/commit/7254f65fdab82d3e155a9e7487e6e1fa46b005e5))

## [9.2.0](https://github.com/ibis-project/ibis/compare/9.1.0...9.2.0) (2024-07-22)

### Features
Expand Down
12 changes: 6 additions & 6 deletions flake.lock
9 changes: 3 additions & 6 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,7 @@

inherit shellHook;

PGPASSWORD = "postgres";
MYSQL_PWD = "ibis";
MSSQL_SA_PASSWORD = "1bis_Testing!";
DRUID_URL = "druid://localhost:8082/druid/v2/sql";
PYSPARK_PYTHON = "${env}/bin/python";

# needed for mssql+pyodbc
ODBCSYSINI = pkgs.writeTextDir "odbcinst.ini" ''
Expand All @@ -125,9 +122,9 @@
in
rec {
packages = {
inherit (pkgs) ibis310 ibis311 ibis312;
inherit (pkgs) ibisCore310 ibisCore311 ibisCore312 ibisLocal310 ibisLocal311 ibisLocal312;

default = pkgs.ibis312;
default = pkgs.ibisCore312;

inherit (pkgs) update-lock-files gen-examples check-release-notes-spelling;
};
Expand Down
5 changes: 2 additions & 3 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

__version__ = "9.2.0"
__version__ = "9.3.0"

import warnings
from typing import Any
Expand Down Expand Up @@ -100,7 +100,6 @@ def load_backend(name: str) -> BaseBackend:
# - compile
# - has_operation
# - _from_url
# - _to_sqlglot
#
# We also copy over the docstring from `do_connect` to the proxy `connect`
# method, since that's where all the backend-specific kwargs are currently
Expand All @@ -121,7 +120,7 @@ def connect(*args, **kwargs):
proxy.has_operation = backend.has_operation
proxy.name = name
proxy._from_url = backend._from_url
proxy._to_sqlglot = backend._to_sqlglot

# Add any additional methods that should be exposed at the top level
for attr in getattr(backend, "_top_level_methods", ()):
setattr(proxy, attr, getattr(backend, attr))
Expand Down
13 changes: 0 additions & 13 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1032,14 +1032,9 @@ def _register_in_memory_table(self, op: ops.InMemoryTable):

def _run_pre_execute_hooks(self, expr: ir.Expr) -> None:
"""Backend-specific hooks to run before an expression is executed."""
self._define_udf_translation_rules(expr)
self._register_udfs(expr)
self._register_in_memory_tables(expr)

def _define_udf_translation_rules(self, expr: ir.Expr):
if self.supports_python_udfs:
raise NotImplementedError(self.name)

def compile(
self,
expr: ir.Expr,
Expand All @@ -1048,14 +1043,6 @@ def compile(
"""Compile an expression."""
return self.compiler.to_sql(expr, params=params)

def _to_sqlglot(self, expr: ir.Expr, **kwargs) -> sg.exp.Expression:
"""Convert an Ibis expression to a sqlglot expression.
Called by `ibis.to_sql`; gives the backend an opportunity to generate
nicer SQL for human consumption.
"""
raise NotImplementedError(f"Backend '{self.name}' backend doesn't support SQL")

def execute(self, expr: ir.Expr) -> Any:
"""Execute an expression."""

Expand Down
177 changes: 41 additions & 136 deletions ibis/backends/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pydata_google_auth import cache

import ibis
import ibis.backends.sql.compilers as sc
import ibis.common.exceptions as com
import ibis.expr.operations as ops
import ibis.expr.schema as sch
Expand All @@ -32,9 +33,7 @@
schema_from_bigquery_table,
)
from ibis.backends.bigquery.datatypes import BigQuerySchema
from ibis.backends.bigquery.udf.core import PythonToJavaScriptTranslator
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers import BigQueryCompiler
from ibis.backends.sql.datatypes import BigQueryType

if TYPE_CHECKING:
Expand Down Expand Up @@ -150,7 +149,7 @@ def _force_quote_table(table: sge.Table) -> sge.Table:

class Backend(SQLBackend, CanCreateDatabase, CanCreateSchema):
name = "bigquery"
compiler = BigQueryCompiler()
compiler = sc.bigquery.compiler
supports_in_memory_tables = True
supports_python_udfs = False

Expand Down Expand Up @@ -381,27 +380,27 @@ def do_connect(
auth_cache
Selects the behavior of the credentials cache.
``'default'``
`'default'``
Reads credentials from disk if available, otherwise
authenticates and caches credentials to disk.
``'reauth'``
`'reauth'``
Authenticates and caches credentials to disk.
``'none'``
`'none'``
Authenticates and does **not** cache credentials.
Defaults to ``'default'``.
Defaults to `'default'`.
partition_column
Identifier to use instead of default ``_PARTITIONTIME`` partition
column. Defaults to ``'PARTITIONTIME'``.
Identifier to use instead of default `_PARTITIONTIME` partition
column. Defaults to `'PARTITIONTIME'`.
client
A ``Client`` from the ``google.cloud.bigquery`` package. If not
set, one is created using the ``project_id`` and ``credentials``.
A `Client` from the `google.cloud.bigquery` package. If not
set, one is created using the `project_id` and `credentials`.
storage_client
A ``BigQueryReadClient`` from the
``google.cloud.bigquery_storage_v1`` package. If not set, one is
created using the ``project_id`` and ``credentials``.
A `BigQueryReadClient` from the
`google.cloud.bigquery_storage_v1` package. If not set, one is
created using the `project_id` and `credentials`.
location
Default location for BigQuery objects.
Expand Down Expand Up @@ -487,7 +486,7 @@ def from_connection(
storage_client: bqstorage.BigQueryReadClient | None = None,
dataset_id: str = "",
) -> Backend:
"""Create a BigQuery `Backend` from an existing ``Client``.
"""Create a BigQuery `Backend` from an existing `Client`.
Parameters
----------
Expand Down Expand Up @@ -587,19 +586,19 @@ def table(

# Default `catalog` to None unless we've parsed it out of the database/schema kwargs
# Raise if there are path specifications in both the name and as a kwarg
catalog = None if table_loc is None else table_loc.catalog
catalog = table_loc.args["catalog"] # args access will return None, not ''
if table.catalog:
if table_loc is not None and table_loc.catalog:
if table_loc.catalog:
raise com.IbisInputError(
"Cannot specify catalog both in the table name and as an argument"
)
else:
catalog = table.catalog

# Default `db` to None unless we've parsed it out of the database/schema kwargs
db = None if table_loc is None else table_loc.db
db = table_loc.args["db"] # args access will return None, not ''
if table.db:
if table_loc is not None and table_loc.db:
if table_loc.db:
raise com.IbisInputError(
"Cannot specify database both in the table name and as an argument"
)
Expand Down Expand Up @@ -652,44 +651,6 @@ def _get_schema_using_query(self, query: str) -> sch.Schema:
)
return BigQuerySchema.to_ibis(job.schema)

def _to_sqlglot(
self,
expr: ir.Expr,
limit: str | None = None,
params: Mapping[ir.Expr, Any] | None = None,
**kwargs,
) -> Any:
"""Compile an Ibis expression.
Parameters
----------
expr
Ibis expression
limit
For expressions yielding result sets; retrieve at most this number
of values/rows. Overrides any limit already set on the expression.
params
Named unbound parameters
kwargs
Keyword arguments passed to the compiler
Returns
-------
Any
The output of compilation. The type of this value depends on the
backend.
"""
self._define_udf_translation_rules(expr)
sql = super()._to_sqlglot(expr, limit=limit, params=params, **kwargs)

query = sql.transform(
_qualify_memtable,
dataset=getattr(self._session_dataset, "dataset_id", None),
project=getattr(self._session_dataset, "project", None),
).transform(_remove_null_ordering_from_unsupported_window)
return query

def raw_sql(self, query: str, params=None, page_size: int | None = None):
query_parameters = [
bigquery_param(
Expand Down Expand Up @@ -723,19 +684,25 @@ def current_database(self) -> str | None:
return self.dataset

def compile(
self, expr: ir.Expr, limit: str | None = None, params=None, **kwargs: Any
self,
expr: ir.Expr,
limit: str | None = None,
params=None,
pretty: bool = True,
**kwargs: Any,
):
"""Compile an Ibis expression to a SQL string."""
query = self._to_sqlglot(expr, limit=limit, params=params, **kwargs)
udf_sources = []
for udf_node in expr.op().find(ops.ScalarUDF):
compile_func = getattr(
self, f"_compile_{udf_node.__input_type__.name.lower()}_udf"
)
if sql := compile_func(udf_node):
udf_sources.append(sql.sql(self.name, pretty=True))

sql = ";\n".join([*udf_sources, query.sql(dialect=self.name, pretty=True)])
session_dataset = self._session_dataset
query = self.compiler.to_sqlglot(
expr,
limit=limit,
params=params,
session_dataset_id=getattr(session_dataset, "dataset_id", None),
session_project=getattr(session_dataset, "project", None),
**kwargs,
)
queries = query if isinstance(query, list) else [query]
sql = ";\n".join(query.sql(self.dialect, pretty=pretty) for query in queries)
self._log(sql)
return sql

Expand Down Expand Up @@ -1178,68 +1145,6 @@ def _clean_up_cached_table(self, name):
force=True,
)

def _get_udf_source(self, udf_node: ops.ScalarUDF):
name = type(udf_node).__name__
type_mapper = self.compiler.udf_type_mapper

body = PythonToJavaScriptTranslator(udf_node.__func__).compile()
config = udf_node.__config__
libraries = config.get("libraries", [])

signature = [
sge.ColumnDef(
this=sg.to_identifier(name, quoted=self.compiler.quoted),
kind=type_mapper.from_ibis(param.annotation.pattern.dtype),
)
for name, param in udf_node.__signature__.parameters.items()
]

lines = ['"""']

if config.get("strict", True):
lines.append('"use strict";')

lines += [
body,
"",
f"return {udf_node.__func_name__}({', '.join(udf_node.argnames)});",
'"""',
]

func = sge.Create(
kind="FUNCTION",
this=sge.UserDefinedFunction(
this=sg.to_identifier(name), expressions=signature, wrapped=True
),
# not exactly what I had in mind, but it works
#
# quoting is too simplistic to handle multiline strings
expression=sge.Var(this="\n".join(lines)),
exists=False,
properties=sge.Properties(
expressions=[
sge.TemporaryProperty(),
sge.ReturnsProperty(this=type_mapper.from_ibis(udf_node.dtype)),
sge.StabilityProperty(
this="IMMUTABLE" if config.get("determinism") else "VOLATILE"
),
sge.LanguageProperty(this=sg.to_identifier("js")),
]
+ [
sge.Property(
this=sg.to_identifier("library"),
value=self.compiler.f.array(*libraries),
)
]
* bool(libraries)
),
)

return func

def _compile_python_udf(self, udf_node: ops.ScalarUDF) -> None:
return self._get_udf_source(udf_node)

def _register_udfs(self, expr: ir.Expr) -> None:
"""No op because UDFs made with CREATE TEMPORARY FUNCTION must be followed by a query."""

Expand Down Expand Up @@ -1318,20 +1223,20 @@ def connect(
auth_cache
Selects the behavior of the credentials cache.
``'default'``
`'default'``
Reads credentials from disk if available, otherwise
authenticates and caches credentials to disk.
``'reauth'``
`'reauth'``
Authenticates and caches credentials to disk.
``'none'``
`'none'``
Authenticates and does **not** cache credentials.
Defaults to ``'default'``.
Defaults to `'default'`.
partition_column
Identifier to use instead of default ``_PARTITIONTIME`` partition
column. Defaults to ``'PARTITIONTIME'``.
Identifier to use instead of default `_PARTITIONTIME` partition
column. Defaults to `'PARTITIONTIME'`.
Returns
-------
Expand Down
21 changes: 13 additions & 8 deletions ibis/backends/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

from __future__ import annotations

import contextlib
import functools

import dateutil.parser
import google.cloud.bigquery as bq
import pandas as pd

import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
Expand Down Expand Up @@ -69,9 +70,9 @@ def bq_param_array(dtype: dt.Array, value, name):

@bigquery_param.register
def bq_param_timestamp(_: dt.Timestamp, value, name):
# TODO(phillipc): Not sure if this is the correct way to do this.
timestamp_value = pd.Timestamp(value, tz="UTC").to_pydatetime()
return bq.ScalarQueryParameter(name, "TIMESTAMP", timestamp_value)
with contextlib.suppress(TypeError):
value = dateutil.parser.parse(value)
return bq.ScalarQueryParameter(name, "TIMESTAMP", value.isoformat())


@bigquery_param.register
Expand All @@ -96,9 +97,13 @@ def bq_param_boolean(_: dt.Boolean, value, name):

@bigquery_param.register
def bq_param_date(_: dt.Date, value, name):
return bq.ScalarQueryParameter(
name, "DATE", pd.Timestamp(value).to_pydatetime().date()
)
with contextlib.suppress(TypeError):
value = dateutil.parser.parse(value)

with contextlib.suppress(AttributeError):
value = value.date()

return bq.ScalarQueryParameter(name, "DATE", value.isoformat())


def rename_partitioned_column(table_expr, bq_table, partition_col):
Expand Down Expand Up @@ -134,7 +139,7 @@ def parse_project_and_dataset(project: str, dataset: str = "") -> tuple[str, str
project : str
A project name
dataset : Optional[str]
A ``<project>.<dataset>`` string or just a dataset name
A `<project>.<dataset>` string or just a dataset name
Examples
--------
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/bigquery/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def convert_GeoSpatial(cls, s, dtype, pandas_type):
import geopandas as gpd
import shapely as shp

return gpd.GeoSeries(shp.from_wkt(s))
return gpd.GeoSeries(shp.from_wkb(s))

convert_Point = convert_LineString = convert_Polygon = convert_MultiLineString = (
convert_MultiPoint
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

29 changes: 15 additions & 14 deletions ibis/backends/bigquery/tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,9 @@ def test_parted_column(con, kind):
assert t.columns == [expected_column, "string_col", "int_col"]


def test_cross_project_query(public, snapshot):
def test_cross_project_query(public):
table = public.table("posts_questions")
expr = table[table.tags.contains("ibis")][["title", "tags"]]
result = expr.compile()
snapshot.assert_match(result, "out.sql")
n = 5
df = expr.limit(n).execute()
assert len(df) == n
Expand All @@ -226,17 +224,6 @@ def test_exists_table_different_project(con):
assert "foobar" not in con.list_tables(database=dataset)


def test_multiple_project_queries(con, snapshot):
so = con.table(
"posts_questions",
database=("bigquery-public-data", "stackoverflow"),
)
trips = con.table("trips", database="nyc-tlc.yellow")
join = so.join(trips, so.tags == trips.rate_code)[[so.title]]
result = join.compile()
snapshot.assert_match(result, "out.sql")


def test_multiple_project_queries_execute(con):
posts_questions = con.table(
"posts_questions", database="bigquery-public-data.stackoverflow"
Expand Down Expand Up @@ -455,3 +442,17 @@ def test_complex_column_name(con):
)
result = con.to_pandas(expr)
assert result == 1


def test_geospatial_interactive(con, monkeypatch):
pytest.importorskip("geopandas")

monkeypatch.setattr(ibis.options, "interactive", True)
t = con.table("bigquery-public-data.geo_us_boundaries.zip_codes")
expr = (
t.filter(lambda t: t.zip_code_geom.geometry_type() == "ST_Polygon")
.head(1)
.zip_code_geom
)
result = repr(expr)
assert "POLYGON" in result
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ SELECT
`t0`.`value` <= 3
)
THEN 1
ELSE CAST(NULL AS INT64)
ELSE NULL
END AS `tmp`
FROM `t` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
SELECT
st_difference(`t0`.`geog0`, `t0`.`geog1`) AS `tmp`
FROM `t` AS `t0`
*
REPLACE (st_asbinary(`tmp`) AS `tmp`)
FROM (
SELECT
st_difference(`t0`.`geog0`, `t0`.`geog1`) AS `tmp`
FROM `t` AS `t0`
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
SELECT
st_intersection(`t0`.`geog0`, `t0`.`geog1`) AS `tmp`
FROM `t` AS `t0`
*
REPLACE (st_asbinary(`tmp`) AS `tmp`)
FROM (
SELECT
st_intersection(`t0`.`geog0`, `t0`.`geog1`) AS `tmp`
FROM `t` AS `t0`
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
SELECT
st_union(`t0`.`geog0`, `t0`.`geog1`) AS `tmp`
FROM `t` AS `t0`
*
REPLACE (st_asbinary(`tmp`) AS `tmp`)
FROM (
SELECT
st_union(`t0`.`geog0`, `t0`.`geog1`) AS `tmp`
FROM `t` AS `t0`
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
SELECT
st_geogpoint(`t0`.`lon`, `t0`.`lat`) AS `tmp`
FROM `t` AS `t0`
*
REPLACE (st_asbinary(`tmp`) AS `tmp`)
FROM (
SELECT
st_geogpoint(`t0`.`lon`, `t0`.`lat`) AS `tmp`
FROM `t` AS `t0`
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
SELECT
st_simplify(`t0`.`geog`, 5.2) AS `tmp`
FROM `t` AS `t0`
*
REPLACE (st_asbinary(`tmp`) AS `tmp`)
FROM (
SELECT
st_simplify(`t0`.`geog`, 5.2) AS `tmp`
FROM `t` AS `t0`
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
SELECT
st_buffer(`t0`.`geog`, 5.2) AS `tmp`
FROM `t` AS `t0`
*
REPLACE (st_asbinary(`tmp`) AS `tmp`)
FROM (
SELECT
st_buffer(`t0`.`geog`, 5.2) AS `tmp`
FROM `t` AS `t0`
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
SELECT
st_centroid(`t0`.`geog`) AS `tmp`
FROM `t` AS `t0`
*
REPLACE (st_asbinary(`tmp`) AS `tmp`)
FROM (
SELECT
st_centroid(`t0`.`geog`) AS `tmp`
FROM `t` AS `t0`
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
SELECT
st_endpoint(`t0`.`geog`) AS `tmp`
FROM `t` AS `t0`
*
REPLACE (st_asbinary(`tmp`) AS `tmp`)
FROM (
SELECT
st_endpoint(`t0`.`geog`) AS `tmp`
FROM `t` AS `t0`
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
SELECT
st_pointn(`t0`.`geog`, 3) AS `tmp`
FROM `t` AS `t0`
*
REPLACE (st_asbinary(`tmp`) AS `tmp`)
FROM (
SELECT
st_pointn(`t0`.`geog`, 3) AS `tmp`
FROM `t` AS `t0`
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
SELECT
st_startpoint(`t0`.`geog`) AS `tmp`
FROM `t` AS `t0`
*
REPLACE (st_asbinary(`tmp`) AS `tmp`)
FROM (
SELECT
st_startpoint(`t0`.`geog`) AS `tmp`
FROM `t` AS `t0`
)
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
SELECT
st_union_agg(`t0`.`geog`) AS `tmp`
FROM `t` AS `t0`
*
REPLACE (st_asbinary(`tmp`) AS `tmp`)
FROM (
SELECT
st_union_agg(`t0`.`geog`) AS `tmp`
FROM `t` AS `t0`
)
5 changes: 4 additions & 1 deletion ibis/backends/bigquery/tests/unit/udf/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@

import pytest

from ibis.backends.bigquery.udf.core import PythonToJavaScriptTranslator, SymbolTable
from ibis.backends.sql.compilers.bigquery.udf.core import (
PythonToJavaScriptTranslator,
SymbolTable,
)


def test_symbol_table():
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/bigquery/tests/unit/udf/test_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import ast

from ibis.backends.bigquery.udf.find import find_names
from ibis.backends.sql.compilers.bigquery.udf.find import find_names
from ibis.util import is_iterable


Expand Down
8 changes: 4 additions & 4 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from clickhouse_connect.driver.external import ExternalData

import ibis
import ibis.backends.sql.compilers as sc
import ibis.common.exceptions as com
import ibis.config
import ibis.expr.operations as ops
Expand All @@ -26,7 +27,6 @@
from ibis.backends import BaseBackend, CanCreateDatabase
from ibis.backends.clickhouse.converter import ClickHousePandasData
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers import ClickHouseCompiler
from ibis.backends.sql.compilers.base import C

if TYPE_CHECKING:
Expand All @@ -44,7 +44,7 @@ def _to_memtable(v):

class Backend(SQLBackend, CanCreateDatabase):
name = "clickhouse"
compiler = ClickHouseCompiler()
compiler = sc.clickhouse.compiler

# ClickHouse itself does, but the client driver does not
supports_temporary_tables = False
Expand Down Expand Up @@ -732,7 +732,7 @@ def create_table(
expression = None

if obj is not None:
expression = self._to_sqlglot(obj)
expression = self.compiler.to_sqlglot(obj)
external_tables.update(self._collect_in_memory_tables(obj))

code = sge.Create(
Expand All @@ -759,7 +759,7 @@ def create_view(
database: str | None = None,
overwrite: bool = False,
) -> ir.Table:
expression = self._to_sqlglot(obj)
expression = self.compiler.to_sqlglot(obj)
src = sge.Create(
this=sg.table(name, db=database),
kind="VIEW",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ SELECT
WHEN empty(groupArray("t0"."string_col"))
THEN NULL
ELSE arrayStringConcat(groupArray("t0"."string_col"), ',')
END AS "GroupConcat(string_col, ',')"
END AS "GroupConcat(string_col, ',', ())"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ SELECT
WHEN empty(groupArrayIf("t0"."string_col", "t0"."bool_col" = 0))
THEN NULL
ELSE arrayStringConcat(groupArrayIf("t0"."string_col", "t0"."bool_col" = 0), ',')
END AS "GroupConcat(string_col, ',', Equals(bool_col, 0))"
END AS "GroupConcat(string_col, ',', (), Equals(bool_col, 0))"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ SELECT
WHEN empty(groupArray("t0"."string_col"))
THEN NULL
ELSE arrayStringConcat(groupArray("t0"."string_col"), '-')
END AS "GroupConcat(string_col, '-')"
END AS "GroupConcat(string_col, '-', ())"
FROM "functional_alltypes" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
indexOf(['a','b','c'], "t0"."string_col") - 1 AS "FindInSet(string_col, ('a', 'b', 'c'))"
indexOf(['a', 'b', 'c'], "t0"."string_col") - 1 AS "FindInSet(string_col, ('a', 'b', 'c'))"
FROM "functional_alltypes" AS "t0"
67 changes: 4 additions & 63 deletions ibis/backends/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from typing import TYPE_CHECKING, Any

import _pytest
import pandas as pd
import pytest
from packaging.requirements import Requirement
from packaging.version import parse as vparse
Expand Down Expand Up @@ -370,22 +369,6 @@ def _filter_none_from_raises(kwargs):
kwargs = _filter_none_from_raises(kwargs)
item.add_marker(pytest.mark.xfail(**kwargs))

# Something has been exposed as broken by a new test and it shouldn't be
# imperative for a contributor to fix it just because they happened to
# bring it to attention -- USE SPARINGLY
for marker in item.iter_markers(name="broken"):
if backend in marker.args[0]:
if (
item.location[0] in FIlES_WITH_STRICT_EXCEPTION_CHECK
and "raises" not in marker.kwargs.keys()
):
raise ValueError("broken requires a raises")

kwargs = marker.kwargs.copy()
kwargs.setdefault("reason", f"Feature is failing on {backend}")
kwargs = _filter_none_from_raises(kwargs)
item.add_marker(pytest.mark.xfail(**kwargs))

for marker in item.iter_markers(name="xfail_version"):
kwargs = marker.kwargs.copy()
kwargs = _filter_none_from_raises(kwargs)
Expand Down Expand Up @@ -587,7 +570,7 @@ def geo_df(geo):


@pytest.fixture
def temp_table(con) -> str:
def temp_table(con):
"""Return a temporary table name.
Parameters
Expand All @@ -606,7 +589,7 @@ def temp_table(con) -> str:


@pytest.fixture
def temp_table2(con) -> str:
def temp_table2(con):
name = util.gen_name("temp_table2")
yield name
with contextlib.suppress(NotImplementedError):
Expand All @@ -622,7 +605,7 @@ def temp_table_orig(con, temp_table):


@pytest.fixture
def temp_view(ddl_con) -> str:
def temp_view(ddl_con):
"""Return a temporary view name.
Parameters
Expand All @@ -641,7 +624,7 @@ def temp_view(ddl_con) -> str:


@pytest.fixture
def alternate_current_database(ddl_con, ddl_backend) -> str:
def alternate_current_database(ddl_con, ddl_backend):
"""Create a temporary database and yield its name. Drops the created
database upon completion.
Expand All @@ -664,48 +647,6 @@ def alternate_current_database(ddl_con, ddl_backend) -> str:
ddl_con.drop_database(name, force=True)


@pytest.fixture
def test_employee_schema() -> ibis.schema:
sch = ibis.schema(
[
("first_name", "string"),
("last_name", "string"),
("department_name", "string"),
("salary", "float64"),
]
)

return sch


@pytest.fixture
def test_employee_data_1():
df = pd.DataFrame(
{
"first_name": ["A", "B", "C"],
"last_name": ["D", "E", "F"],
"department_name": ["AA", "BB", "CC"],
"salary": [100.0, 200.0, 300.0],
}
)

return df


@pytest.fixture
def test_employee_data_2():
df2 = pd.DataFrame(
{
"first_name": ["X", "Y", "Z"],
"last_name": ["A", "B", "C"],
"department_name": ["XX", "YY", "ZZ"],
"salary": [400.0, 500.0, 600.0],
}
)

return df2


@pytest.fixture
def assert_sql(con, snapshot):
def checker(expr, file_name="out.sql"):
Expand Down
54 changes: 36 additions & 18 deletions ibis/backends/dask/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import dask.dataframe as dd
import numpy as np
import pandas as pd
from packaging.version import parse as vparse

import ibis.backends.dask.kernels as dask_kernels
import ibis.expr.operations as ops
Expand Down Expand Up @@ -97,23 +96,6 @@ def mapper(df, cases, results, default):

return cls.partitionwise(mapper, kwargs, name=op.name, dtype=dtype)

@classmethod
def visit(cls, op: ops.TimestampTruncate | ops.DateTruncate, arg, unit):
# TODO(kszucs): should use serieswise()
if vparse(pd.__version__) >= vparse("2.2"):
units = {"m": "min"}
else:
units = {"m": "Min", "ms": "L"}

unit = units.get(unit.short, unit.short)

if unit in "YMWD":
return arg.dt.to_period(unit).dt.to_timestamp()
try:
return arg.dt.floor(unit)
except ValueError:
return arg.dt.to_period(unit).dt.to_timestamp()

@classmethod
def visit(cls, op: ops.IntervalFromInteger, unit, **kwargs):
if unit.short in {"Y", "Q", "M", "W"}:
Expand Down Expand Up @@ -221,6 +203,42 @@ def agg(df):

return agg

@classmethod
def visit(cls, op: ops.First, arg, where, order_by, include_null):
if order_by:
raise UnsupportedOperationError(
"ordering of order-sensitive aggregations via `order_by` is "
"not supported for this backend"
)

def first(df):
def inner(arg):
if not include_null:
arg = arg.dropna()
return arg.iat[0] if len(arg) else None

return df.reduction(inner) if isinstance(df, dd.Series) else inner(df)

return cls.agg(first, arg, where)

@classmethod
def visit(cls, op: ops.Last, arg, where, order_by, include_null):
if order_by:
raise UnsupportedOperationError(
"ordering of order-sensitive aggregations via `order_by` is "
"not supported for this backend"
)

def last(df):
def inner(arg):
if not include_null:
arg = arg.dropna()
return arg.iat[-1] if len(arg) else None

return df.reduction(inner) if isinstance(df, dd.Series) else inner(df)

return cls.agg(last, arg, where)

@classmethod
def visit(cls, op: ops.Correlation, left, right, where, how):
if how == "pop":
Expand Down
12 changes: 1 addition & 11 deletions ibis/backends/dask/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,14 @@
}


def maybe_pandas_reduction(func):
def inner(df):
return df.reduction(func) if isinstance(df, dd.Series) else func(df)

return inner


reductions = {
**pandas_kernels.reductions,
ops.Mode: lambda x: x.mode().loc[0],
ops.ApproxMedian: lambda x: x.median_approximate(),
ops.BitAnd: lambda x: x.reduction(np.bitwise_and.reduce),
ops.BitOr: lambda x: x.reduction(np.bitwise_or.reduce),
ops.BitXor: lambda x: x.reduction(np.bitwise_xor.reduce),
ops.Arbitrary: lambda x: x.reduction(pandas_kernels.first),
# Window functions are calculated locally using pandas
ops.Last: maybe_pandas_reduction(pandas_kernels.last),
ops.First: maybe_pandas_reduction(pandas_kernels.first),
ops.Arbitrary: lambda x: x.reduction(pandas_kernels.arbitrary),
}

serieswise = {
Expand Down
9 changes: 5 additions & 4 deletions ibis/backends/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import sqlglot.expressions as sge

import ibis
import ibis.backends.sql.compilers as sc
import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
Expand All @@ -23,7 +24,6 @@
from ibis import util
from ibis.backends import CanCreateCatalog, CanCreateDatabase, CanCreateSchema, NoUrl
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers import DataFusionCompiler
from ibis.backends.sql.compilers.base import C
from ibis.common.dispatch import lazy_singledispatch
from ibis.expr.operations.udf import InputType
Expand Down Expand Up @@ -68,7 +68,7 @@ class Backend(SQLBackend, CanCreateCatalog, CanCreateDatabase, CanCreateSchema,
name = "datafusion"
supports_in_memory_tables = True
supports_arrays = True
compiler = DataFusionCompiler()
compiler = sc.datafusion.compiler

@property
def version(self):
Expand Down Expand Up @@ -629,16 +629,17 @@ def create_table(
# If it's a memtable, it will get registered in the pre-execute hooks
self._run_pre_execute_hooks(table)

compiler = self.compiler
relname = "_"
query = sg.select(
*(
self.compiler.cast(
compiler.cast(
sg.column(col, table=relname, quoted=quoted), dtype
).as_(col, quoted=quoted)
for col, dtype in table.schema().items()
)
).from_(
self._to_sqlglot(table).subquery(
compiler.to_sqlglot(table).subquery(
sg.to_identifier(relname, quoted=quoted)
)
)
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/druid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
import pydruid.db
import sqlglot as sg

import ibis.backends.sql.compilers as sc
import ibis.expr.datatypes as dt
import ibis.expr.schema as sch
from ibis import util
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers import DruidCompiler
from ibis.backends.sql.compilers.base import STAR
from ibis.backends.sql.datatypes import DruidType

Expand All @@ -31,7 +31,7 @@

class Backend(SQLBackend):
name = "druid"
compiler = DruidCompiler()
compiler = sc.druid.compiler
supports_create_or_replace = False
supports_in_memory_tables = True

Expand Down
23 changes: 18 additions & 5 deletions ibis/backends/druid/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def run_query(session: Session, query: str) -> None:
class TestConf(ServiceBackendTest):
# druid has the same rounding behavior as postgres
check_dtype = False
returned_timestamp_unit = "s"
returned_timestamp_unit = "ms"
supports_arrays = False
native_bool = True
supports_structs = False
Expand All @@ -106,13 +106,26 @@ class TestConf(ServiceBackendTest):
@property
def functional_alltypes(self) -> ir.Table:
t = self.connection.table("functional_alltypes")
# The parquet loading for booleans appears to be broken in Druid, so
# I'm using this as a workaround to make the data match what's on disk.
return t.mutate(bool_col=1 - t.id % 2)
return t.mutate(
# The parquet loading for booleans appears to be broken in Druid, so
# I'm using this as a workaround to make the data match what's on disk.
bool_col=1 - t.id % 2,
# timestamp_col is loaded as a long because druid's type system is
# awful: it does 99% of the work of a proper timestamp type, but
# encodes it as an integer. I've never seen or heard of any other
# tool that calls itself a time series database or "good for
# working with time series", that lacks a first-class timestamp
# type.
timestamp_col=t.timestamp_col.to_timestamp(unit="ms"),
)

@property
def test_files(self) -> Iterable[Path]:
return self.data_dir.joinpath("parquet").glob("*.parquet")
return [
path
for path in self.data_dir.joinpath("parquet").glob("*.parquet")
if path.name != "functional_alltypes.parquet"
] + [self.data_dir.joinpath("csv", "functional_alltypes.csv")]

def _load_data(self, **_: Any) -> None:
"""Load test data into a druid backend instance.
Expand Down
270 changes: 144 additions & 126 deletions ibis/backends/duckdb/__init__.py

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions ibis/backends/duckdb/converter.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
from __future__ import annotations

import numpy as np

from ibis.formats.pandas import PandasData


class DuckDBPandasData(PandasData):
@staticmethod
def convert_Array(s, dtype, pandas_type):
return s.replace(np.nan, None)
return s.replace(float("nan"), None)
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
ST_DWITHIN("t0"."geom", "t0"."geom", CAST(3.0 AS DOUBLE)) AS "tmp"
ST_DWITHIN("t0"."geom", "t0"."geom", 3.0) AS "tmp"
FROM "t" AS "t0"
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SELECT
ST_ASWKB("p") AS p
*
REPLACE (ST_ASWKB("p") AS "p")
FROM (
SELECT
ST_GEOMFROMTEXT('POINT (1 0)') AS "p"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SELECT
ST_ASWKB("p") AS p
*
REPLACE (ST_ASWKB("p") AS "p")
FROM (
SELECT
ST_GEOMFROMTEXT('POINT (1 0)') AS "p"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SELECT
ST_ASWKB("result") AS result
*
REPLACE (ST_ASWKB("result") AS "result")
FROM (
SELECT
ST_GEOMFROMTEXT('POINT (0 0)') AS "result"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SELECT
ST_ASWKB("result") AS result
*
REPLACE (ST_ASWKB("result") AS "result")
FROM (
SELECT
ST_GEOMFROMTEXT('POINT (1 1)') AS "result"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SELECT
ST_ASWKB("result") AS result
*
REPLACE (ST_ASWKB("result") AS "result")
FROM (
SELECT
ST_GEOMFROMTEXT('POINT (2 2)') AS "result"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SELECT
ST_ASWKB("result") AS result
*
REPLACE (ST_ASWKB("result") AS "result")
FROM (
SELECT
ST_GEOMFROMTEXT('LINESTRING (0 0, 1 1, 2 2)') AS "result"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SELECT
ST_ASWKB("result") AS result
*
REPLACE (ST_ASWKB("result") AS "result")
FROM (
SELECT
ST_GEOMFROMTEXT('LINESTRING (2 2, 1 1, 0 0)') AS "result"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SELECT
ST_ASWKB("result") AS result
*
REPLACE (ST_ASWKB("result") AS "result")
FROM (
SELECT
ST_GEOMFROMTEXT('POLYGON ((0 0, 1 1, 2 2, 0 0))') AS "result"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SELECT
ST_ASWKB("result") AS result
*
REPLACE (ST_ASWKB("result") AS "result")
FROM (
SELECT
ST_GEOMFROMTEXT('MULTIPOLYGON (((0 0, 1 1, 2 2, 0 0)))') AS "result"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SELECT
ST_ASWKB("result") AS result
*
REPLACE (ST_ASWKB("result") AS "result")
FROM (
SELECT
ST_GEOMFROMTEXT('MULTILINESTRING ((0 0, 1 1, 2 2), (2 2, 1 1, 0 0))') AS "result"
Expand Down
87 changes: 84 additions & 3 deletions ibis/backends/duckdb/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,11 +266,13 @@ def test_connect_duckdb(url, tmp_path):
@pytest.mark.parametrize(
"out_method, extension", [("to_csv", "csv"), ("to_parquet", "parquet")]
)
def test_connect_local_file(out_method, extension, test_employee_data_1, tmp_path):
getattr(test_employee_data_1, out_method)(tmp_path / f"out.{extension}")
def test_connect_local_file(out_method, extension, tmp_path):
df = pd.DataFrame({"a": [1, 2, 3]})
path = tmp_path / f"out.{extension}"
getattr(df, out_method)(path)
with pytest.warns(FutureWarning, match="v9.1"):
# ibis.connect uses con.register
con = ibis.connect(tmp_path / f"out.{extension}")
con = ibis.connect(path)
t = next(iter(con.tables.values()))
assert not t.head().execute().empty

Expand Down Expand Up @@ -322,3 +324,82 @@ def test_connect_named_in_memory_db():

default_memory_db = ibis.duckdb.connect()
assert "ork" not in default_memory_db.list_tables()


@pytest.mark.parametrize(
("url", "method_name"),
[
("hf://datasets/datasets-examples/doc-formats-csv-1/data.csv", "read_csv"),
("hf://datasets/datasets-examples/doc-formats-jsonl-1/data.jsonl", "read_json"),
(
"hf://datasets/datasets-examples/doc-formats-parquet-1/data/train-00000-of-00001.parquet",
"read_parquet",
),
],
ids=["csv", "jsonl", "parquet"],
)
@pytest.mark.xfail(
LINUX and SANDBOXED,
reason="nix on linux is not allowed to access the network and cannot download the httpfs extension",
raises=duckdb.Error,
)
def test_hugging_face(con, url, method_name):
method = getattr(con, method_name)
t = method(url)
assert t.count().execute() > 0


def test_multiple_tables_with_the_same_name(tmp_path):
# check within the same database
path = tmp_path / "test1.ddb"
with duckdb.connect(str(path)) as con:
con.execute("CREATE TABLE t (x INT)")
con.execute("CREATE SCHEMA s")
con.execute("CREATE TABLE s.t (y STRING)")

con = ibis.duckdb.connect(path)
t1 = con.table("t")
t2 = con.table("t", database="s")
assert t1.schema() == ibis.schema({"x": "int32"})
assert t2.schema() == ibis.schema({"y": "string"})

path = tmp_path / "test2.ddb"
with duckdb.connect(str(path)) as c:
c.execute("CREATE TABLE t (y DOUBLE[])")

# attach another catalog and check that too
con.attach(path, name="w")
t1 = con.table("t")
t2 = con.table("t", database="s")
assert t1.schema() == ibis.schema({"x": "int32"})
assert t2.schema() == ibis.schema({"y": "string"})

t3 = con.table("t", database="w.main")

assert t3.schema() == ibis.schema({"y": "array<float64>"})


@pytest.mark.parametrize(
"input",
[
{"columns": {"lat": "float64", "lon": "float64", "geom": "geometry"}},
{"types": {"geom": "geometry"}},
],
)
@pytest.mark.parametrize("all_varchar", [True, False])
@pytest.mark.xfail(
LINUX and SANDBOXED,
reason="nix on linux cannot download duckdb extensions or data due to sandboxing",
raises=duckdb.IOException,
)
@pytest.mark.xdist_group(name="duckdb-extensions")
def test_read_csv_with_types(tmp_path, input, all_varchar):
con = ibis.duckdb.connect()
data = b"""\
lat,lon,geom
1.0,2.0,POINT (1 2)
2.0,3.0,POINT (2 3)"""
path = tmp_path / "data.csv"
path.write_bytes(data)
t = con.read_csv(path, all_varchar=all_varchar, **input)
assert t.schema()["geom"].is_geospatial()
15 changes: 10 additions & 5 deletions ibis/backends/duckdb/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,11 +321,13 @@ def test_re_read_in_memory_overwrite(con):
df_pandas_1 = pd.DataFrame({"a": ["a"], "b": [1], "d": ["hi"]})
df_pandas_2 = pd.DataFrame({"a": [1], "c": [1.4]})

table = con.read_in_memory(df_pandas_1, table_name="df")
with pytest.warns(FutureWarning, match="create_table"):
table = con.read_in_memory(df_pandas_1, table_name="df")
assert len(table.columns) == 3
assert table.schema() == ibis.schema([("a", "str"), ("b", "int"), ("d", "str")])

table = con.read_in_memory(df_pandas_2, table_name="df")
with pytest.warns(FutureWarning, match="create_table"):
table = con.read_in_memory(df_pandas_2, table_name="df")
assert len(table.columns) == 2
assert table.schema() == ibis.schema([("a", "int"), ("c", "float")])

Expand Down Expand Up @@ -415,7 +417,8 @@ def test_s3_403_fallback(con, httpserver, monkeypatch):

def test_register_numpy_str(con):
data = pd.DataFrame({"a": [np.str_("xyz"), None]})
result = con.read_in_memory(data)
with pytest.warns(FutureWarning, match="create_table"):
result = con.read_in_memory(data)
tm.assert_frame_equal(result.execute(), data)


Expand All @@ -428,7 +431,8 @@ def test_register_recordbatchreader_warns(con):
)
reader = table.to_reader()
sol = table.to_pandas()
t = con.read_in_memory(reader)
with pytest.warns(FutureWarning, match="create_table"):
t = con.read_in_memory(reader)

# First execute is fine
res = t.execute()
Expand All @@ -440,7 +444,8 @@ def test_register_recordbatchreader_warns(con):

# Re-registering over the name with a new reader is fine
reader = table.to_reader()
t = con.read_in_memory(reader, table_name=t.get_name())
with pytest.warns(FutureWarning, match="create_table"):
t = con.read_in_memory(reader, table_name=t.get_name())
res = t.execute()
tm.assert_frame_equal(res, sol)

Expand Down
27 changes: 14 additions & 13 deletions ibis/backends/exasol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import sqlglot.expressions as sge

import ibis
import ibis.backends.sql.compilers as sc
import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
Expand All @@ -20,7 +21,6 @@
from ibis import util
from ibis.backends import CanCreateDatabase, CanCreateSchema
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers import ExasolCompiler
from ibis.backends.sql.compilers.base import STAR, C

if TYPE_CHECKING:
Expand All @@ -39,7 +39,7 @@

class Backend(SQLBackend, CanCreateDatabase, CanCreateSchema):
name = "exasol"
compiler = ExasolCompiler()
compiler = sc.exasol.compiler
supports_temporary_tables = False
supports_create_or_replace = False
supports_in_memory_tables = False
Expand Down Expand Up @@ -345,16 +345,9 @@ def create_table(

if temp:
raise com.UnsupportedOperationError(
"Creating temp tables is not supported by Exasol."
f"Creating temp tables is not supported by {self.name}"
)

if database is not None and database != self.current_database:
raise com.UnsupportedOperationError(
"Creating tables in other databases is not supported by Exasol"
)
else:
database = None

quoted = self.compiler.quoted

temp_memtable_view = None
Expand All @@ -367,7 +360,7 @@ def create_table(

self._run_pre_execute_hooks(table)

query = self._to_sqlglot(table)
query = self.compiler.to_sqlglot(table)
else:
query = None

Expand Down Expand Up @@ -435,7 +428,11 @@ def drop_database(
raise NotImplementedError(
"`catalog` argument is not supported for the Exasol backend"
)
drop_schema = sg.exp.Drop(kind="SCHEMA", this=name, exists=force)
drop_schema = sg.exp.Drop(
kind="SCHEMA",
this=sg.to_identifier(name, quoted=self.compiler.quoted),
exists=force,
)
with self.begin() as con:
con.execute(drop_schema.sql(dialect=self.dialect))

Expand All @@ -446,7 +443,11 @@ def create_database(
raise NotImplementedError(
"`catalog` argument is not supported for the Exasol backend"
)
create_database = sg.exp.Create(kind="SCHEMA", this=name, exists=force)
create_database = sg.exp.Create(
kind="SCHEMA",
this=sg.to_identifier(name, quoted=self.compiler.quoted),
exists=force,
)
open_database = self.current_database
with self.begin() as con:
con.execute(create_database.sql(dialect=self.dialect))
Expand Down
22 changes: 9 additions & 13 deletions ibis/backends/flink/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import sqlglot.expressions as sge

import ibis
import ibis.backends.sql.compilers as sc
import ibis.common.exceptions as exc
import ibis.expr.operations as ops
import ibis.expr.schema as sch
Expand All @@ -23,7 +24,6 @@
RenameTable,
)
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers import FlinkCompiler
from ibis.backends.tests.errors import Py4JJavaError
from ibis.expr.operations.udf import InputType
from ibis.util import gen_name
Expand All @@ -44,7 +44,7 @@

class Backend(SQLBackend, CanCreateDatabase, NoUrl):
name = "flink"
compiler = FlinkCompiler()
compiler = sc.flink.compiler
supports_temporary_tables = True
supports_python_udfs = True

Expand Down Expand Up @@ -321,26 +321,27 @@ def version(self) -> str:
def _register_udfs(self, expr: ir.Expr) -> None:
for udf_node in expr.op().find(ops.ScalarUDF):
register_func = getattr(
self, f"_compile_{udf_node.__input_type__.name.lower()}_udf"
self, f"_register_{udf_node.__input_type__.name.lower()}_udf"
)
register_func(udf_node)

def _register_udf(self, udf_node: ops.ScalarUDF):
import pyflink.table.udf
from pyflink.table.udf import udf

from ibis.backends.flink.datatypes import FlinkType

name = type(udf_node).__name__
self._table_env.drop_temporary_function(name)
udf = pyflink.table.udf.udf(

func = udf(
udf_node.__func__,
result_type=FlinkType.from_ibis(udf_node.dtype),
func_type=_INPUT_TYPE_TO_FUNC_TYPE[udf_node.__input_type__],
)
self._table_env.create_temporary_function(name, udf)
self._table_env.create_temporary_function(name, func)

_compile_pandas_udf = _register_udf
_compile_python_udf = _register_udf
_register_pandas_udf = _register_udf
_register_python_udf = _register_udf

def compile(
self,
Expand All @@ -354,11 +355,6 @@ def compile(
expr, params=params, pretty=pretty
) # Discard `limit` and other kwargs.

def _to_sqlglot(
self, expr: ir.Expr, params: Mapping[ir.Expr, Any] | None = None, **_: Any
) -> str:
return super()._to_sqlglot(expr, params=params)

def execute(self, expr: ir.Expr, **kwargs: Any) -> Any:
"""Execute an expression."""
self._register_udfs(expr)
Expand Down
6 changes: 3 additions & 3 deletions ibis/backends/impala/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import sqlglot.expressions as sge
from impala.error import Error as ImpylaError

import ibis.backends.sql.compilers as sc
import ibis.common.exceptions as com
import ibis.config
import ibis.expr.schema as sch
Expand All @@ -38,7 +39,6 @@
wrap_udf,
)
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers import ImpalaCompiler

if TYPE_CHECKING:
from collections.abc import Mapping
Expand All @@ -64,7 +64,7 @@

class Backend(SQLBackend):
name = "impala"
compiler = ImpalaCompiler()
compiler = sc.impala.compiler

supports_in_memory_tables = True

Expand Down Expand Up @@ -132,7 +132,7 @@ def do_connect(
ca_cert
Local path to 3rd party CA certificate or copy of server
certificate for self-signed certificates. If SSL is enabled, but
this argument is ``None``, then certificate validation is skipped.
this argument is `None`, then certificate validation is skipped.
user
LDAP user to authenticate
password
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
FIRST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC) AS `First(double_col)`
FIRST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC) AS `First(double_col, ())`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
LAST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC) AS `Last(double_col)`
LAST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC) AS `Last(double_col, ())`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ FROM (
`t0`.`f` <= 50
)
THEN 3
ELSE CAST(NULL AS TINYINT)
ELSE NULL
END AS `tier`,
COUNT(*) AS `CountStar(alltypes)`
FROM `alltypes` AS `t0`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ SELECT
`t0`.`f` < 50
)
THEN 2
ELSE CAST(NULL AS TINYINT)
ELSE NULL
END AS `Bucket(f, ())`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ SELECT
`t0`.`f` <= 50
)
THEN 2
ELSE CAST(NULL AS TINYINT)
ELSE NULL
END AS `Bucket(f, ())`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ SELECT
THEN 3
WHEN 50 <= `t0`.`f`
THEN 4
ELSE CAST(NULL AS TINYINT)
ELSE NULL
END AS `Bucket(f, ())`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ SELECT
`t0`.`f` <= 50
)
THEN 2
ELSE CAST(NULL AS TINYINT)
ELSE NULL
END AS `Bucket(f, ())`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ SELECT
`t0`.`f` <= 50
)
THEN 3
ELSE CAST(NULL AS TINYINT)
ELSE NULL
END AS `Bucket(f, ())`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
SELECT
CASE
WHEN `t0`.`f` <= 10
THEN 0
WHEN 10 < `t0`.`f`
THEN 1
ELSE CAST(NULL AS TINYINT)
END AS `Bucket(f, ())`
CASE WHEN `t0`.`f` <= 10 THEN 0 WHEN 10 < `t0`.`f` THEN 1 ELSE NULL END AS `Bucket(f, ())`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ SELECT
`t0`.`f` <= 50
)
THEN 2
ELSE CAST(NULL AS TINYINT)
ELSE NULL
END AS `Bucket(f, ())`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
SELECT
CASE
WHEN `t0`.`f` < 10
THEN 0
WHEN 10 <= `t0`.`f`
THEN 1
ELSE CAST(NULL AS TINYINT)
END AS `Bucket(f, ())`
CASE WHEN `t0`.`f` < 10 THEN 0 WHEN 10 <= `t0`.`f` THEN 1 ELSE NULL END AS `Bucket(f, ())`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
SELECT
CAST(CASE
WHEN `t0`.`f` < 10
THEN 0
WHEN 10 <= `t0`.`f`
THEN 1
ELSE CAST(NULL AS TINYINT)
END AS INT) AS `Cast(Bucket(f, ()), int32)`
CAST(CASE WHEN `t0`.`f` < 10 THEN 0 WHEN 10 <= `t0`.`f` THEN 1 ELSE NULL END AS INT) AS `Cast(Bucket(f, ()), int32)`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
SELECT
CAST(CASE
WHEN `t0`.`f` < 10
THEN 0
WHEN 10 <= `t0`.`f`
THEN 1
ELSE CAST(NULL AS TINYINT)
END AS DOUBLE) AS `Cast(Bucket(f, ()), float64)`
CAST(CASE WHEN `t0`.`f` < 10 THEN 0 WHEN 10 <= `t0`.`f` THEN 1 ELSE NULL END AS DOUBLE) AS `Cast(Bucket(f, ()), float64)`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ SELECT
`t0`.`f` <= 50
)
THEN 3
ELSE CAST(NULL AS TINYINT)
ELSE NULL
END AS `Bucket(f, ())`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ SELECT
THEN 3
WHEN 50 < `t0`.`f`
THEN 4
ELSE CAST(NULL AS TINYINT)
ELSE NULL
END AS `Bucket(f, ())`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ SELECT
THEN `t0`.`d` * 2
WHEN `t0`.`c` < 0
THEN `t0`.`a` * 2
ELSE CAST(NULL AS BIGINT)
END AS `SearchedCase((Greater(f, 0), Less(c, 0)), (Multiply(d, 2), Multiply(a, 2)), Cast(None, int64))`
ELSE NULL
END AS `SearchedCase((Greater(f, 0), Less(c, 0)), (Multiply(d, 2), Multiply(a, 2)), None)`
FROM `alltypes` AS `t0`
113 changes: 62 additions & 51 deletions ibis/backends/mssql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import sqlglot.expressions as sge

import ibis
import ibis.backends.sql.compilers as sc
import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
Expand All @@ -22,7 +23,6 @@
from ibis import util
from ibis.backends import CanCreateCatalog, CanCreateDatabase, CanCreateSchema, NoUrl
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers import MSSQLCompiler
from ibis.backends.sql.compilers.base import STAR, C

if TYPE_CHECKING:
Expand Down Expand Up @@ -54,9 +54,28 @@ def datetimeoffset_to_datetime(value):
)


# For testing we use the collation "Latin1_General_100_BIN2_UTF8"
# which is case-sensitive and supports UTF8.
# This allows us to (hopefully) support both case-sensitive and case-insensitive
# collations.
# It DOES mean, though, that we need to be correct in our usage of case when
# referring to system tables and views.
# So, the correct casing for the tables and views we use often (and the
# corresponding columns):
#
#
# Info schema tables:
# - INFORMATION_SCHEMA.COLUMNS
# - INFORMATION_SCHEMA.SCHEMATA
# - INFORMATION_SCHEMA.TABLES
# Temp table location: tempdb.dbo
# Catalogs: sys.databases
# Databases: sys.schemas


class Backend(SQLBackend, CanCreateCatalog, CanCreateDatabase, CanCreateSchema, NoUrl):
name = "mssql"
compiler = MSSQLCompiler()
compiler = sc.mssql.compiler
supports_create_or_replace = False

@property
Expand Down Expand Up @@ -112,11 +131,15 @@ def do_connect(
if user is None and password is None:
kwargs.setdefault("Trusted_Connection", "yes")

if database is not None:
# passing database=None tries to interpolate "None" into the
# connection string and use it as a database
kwargs["database"] = database

self.con = pyodbc.connect(
user=user,
server=f"{host},{port}",
password=password,
database=database,
driver=driver,
**kwargs,
)
Expand Down Expand Up @@ -155,29 +178,28 @@ def get_schema(
if name.startswith("ibis_cache_"):
catalog, database = ("tempdb", "dbo")
name = "##" + name
conditions = [sg.column("table_name").eq(sge.convert(name))]

if database is not None:
conditions.append(sg.column("table_schema").eq(sge.convert(database)))

query = (
sg.select(
"column_name",
"data_type",
"is_nullable",
"numeric_precision",
"numeric_scale",
"datetime_precision",
C.column_name,
C.data_type,
C.is_nullable,
C.numeric_precision,
C.numeric_scale,
C.datetime_precision,
)
.from_(
sg.table(
"columns",
db="information_schema",
"COLUMNS",
db="INFORMATION_SCHEMA",
catalog=catalog or self.current_catalog,
)
)
.where(*conditions)
.order_by("ordinal_position")
.where(
C.table_name.eq(sge.convert(name)),
C.table_schema.eq(sge.convert(database or self.current_database)),
)
.order_by(C.ordinal_position)
)

with self._safe_raw_sql(query) as cur:
Expand Down Expand Up @@ -212,23 +234,33 @@ def get_schema(
return sch.Schema(mapping)

def _get_schema_using_query(self, query: str) -> sch.Schema:
# Docs describing usage of dm_exec_describe_first_result_set
# https://learn.microsoft.com/en-us/sql/relational-databases/system-dynamic-management-views/sys-dm-exec-describe-first-result-set-transact-sql?view=sql-server-ver16
tsql = sge.convert(str(query)).sql(self.dialect)
query = f"EXEC sp_describe_first_result_set @tsql = N{tsql}"

# For some reason when using "Latin1_General_100_BIN2_UTF8"
# the stored procedure `sp_describe_first_result_set` starts throwing errors about DLL loading.
# This "dynamic management function" uses the same algorithm and allows
# us to pre-filter the columns we want back.
# The syntax is:
# `sys.dm_exec_describe_first_result_set(@tsql, @params, @include_browse_information)`
query = f"""SELECT name,
is_nullable AS nullable,
system_type_name,
precision,
scale
FROM
sys.dm_exec_describe_first_result_set({tsql}, NULL, 0)"""
with self._safe_raw_sql(query) as cur:
rows = cur.fetchall()

schema = {}
for (
_,
_,
name,
nullable,
_,
system_type_name,
_,
precision,
scale,
*_,
) in sorted(rows, key=itemgetter(1)):
newtyp = self.compiler.type_mapper.from_string(
system_type_name, nullable=nullable
Expand Down Expand Up @@ -454,26 +486,20 @@ def list_tables(
"""
table_loc = self._warn_and_create_table_loc(database, schema)
catalog, db = self._to_catalog_db_tuple(table_loc)
conditions = []

if table_loc is not None:
conditions.append(C.table_schema.eq(sge.convert(db)))

sql = (
sg.select("table_name")
sg.select(C.table_name)
.from_(
sg.table(
"tables",
db="information_schema",
"TABLES",
db="INFORMATION_SCHEMA",
catalog=catalog if catalog is not None else self.current_catalog,
)
)
.where(C.table_schema.eq(sge.convert(db or self.current_database)))
.distinct()
)

if conditions:
sql = sql.where(*conditions)

sql = sql.sql(self.dialect)

with self._safe_raw_sql(sql) as cur:
Expand All @@ -486,8 +512,8 @@ def list_databases(
) -> list[str]:
query = sg.select(C.schema_name).from_(
sg.table(
"schemata",
db="information_schema",
"SCHEMATA",
db="INFORMATION_SCHEMA",
catalog=catalog or self.current_catalog,
)
)
Expand Down Expand Up @@ -571,7 +597,7 @@ def create_table(

self._run_pre_execute_hooks(table)

query = self._to_sqlglot(table)
query = self.compiler.to_sqlglot(table)
else:
query = None

Expand Down Expand Up @@ -693,21 +719,6 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
if not df.empty:
cur.executemany(insert_stmt, data)

def _to_sqlglot(
self, expr: ir.Expr, *, limit: str | None = None, params=None, **_: Any
):
"""Compile an Ibis expression to a sqlglot object."""
table_expr = expr.as_table()
conversions = {
name: ibis.ifelse(table_expr[name], 1, 0).cast("boolean")
for name, typ in table_expr.schema().items()
if typ.is_boolean()
}

if conversions:
table_expr = table_expr.mutate(**conversions)
return super()._to_sqlglot(table_expr, limit=limit, params=params)

def _cursor_batches(
self,
expr: ir.Expr,
Expand Down
14 changes: 12 additions & 2 deletions ibis/backends/mssql/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import os
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any

import pytest

Expand Down Expand Up @@ -35,13 +35,23 @@ class TestConf(ServiceBackendTest):
def test_files(self) -> Iterable[Path]:
return self.data_dir.joinpath("csv").glob("*.csv")

def postload(self, **kw: Any):
self.connection = self.connect(database=IBIS_TEST_MSSQL_DB, **kw)

def _load_data(self, *, database: str = IBIS_TEST_MSSQL_DB, **_):
with self.connection._safe_raw_sql(
"IF DB_ID('ibis_testing') is NULL BEGIN CREATE DATABASE [ibis_testing] END"
):
pass

super()._load_data(database=database, **_)

@staticmethod
def connect(*, tmpdir, worker_id, **kw):
return ibis.mssql.connect(
host=MSSQL_HOST,
user=MSSQL_USER,
password=MSSQL_PASS,
database=IBIS_TEST_MSSQL_DB,
port=MSSQL_PORT,
driver=MSSQL_PYODBC_DRIVER,
autocommit=True,
Expand Down
17 changes: 15 additions & 2 deletions ibis/backends/mssql/tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import pytest
from pytest import param

import ibis
import ibis.expr.datatypes as dt
Expand Down Expand Up @@ -35,11 +36,23 @@
("DATETIME", dt.Timestamp(scale=3)),
# Characters strings
("CHAR", dt.string),
("TEXT", dt.string),
param(
"TEXT",
dt.string,
marks=pytest.mark.notyet(
["mssql"], reason="Not supported by UTF-8 aware collations"
),
),
("VARCHAR", dt.string),
# Unicode character strings
("NCHAR", dt.string),
("NTEXT", dt.string),
param(
"NTEXT",
dt.string,
marks=pytest.mark.notyet(
["mssql"], reason="Not supported by UTF-8 aware collations"
),
),
("NVARCHAR", dt.string),
# Binary strings
("BINARY", dt.binary),
Expand Down
28 changes: 10 additions & 18 deletions ibis/backends/mysql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
from typing import TYPE_CHECKING, Any
from urllib.parse import unquote_plus

import numpy as np
import pymysql
import sqlglot as sg
import sqlglot.expressions as sge

import ibis
import ibis.backends.sql.compilers as sc
import ibis.common.exceptions as com
import ibis.expr.operations as ops
import ibis.expr.schema as sch
Expand All @@ -24,7 +24,6 @@
from ibis.backends import CanCreateDatabase
from ibis.backends.mysql.datatypes import _type_from_cursor_info
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers import MySQLCompiler
from ibis.backends.sql.compilers.base import STAR, TRUE, C

if TYPE_CHECKING:
Expand All @@ -38,7 +37,7 @@

class Backend(SQLBackend, CanCreateDatabase):
name = "mysql"
compiler = MySQLCompiler()
compiler = sc.mysql.compiler
supports_create_or_replace = False

def _from_url(self, url: ParseResult, **kwargs):
Expand Down Expand Up @@ -318,7 +317,7 @@ def list_tables(
[deprecated] The schema to perform the list against.
database
Database to list tables from. Default behavior is to show tables in
the current database (``self.current_database``).
the current database (`self.current_database`).
"""
if schema is not None:
self._warn_schema()
Expand All @@ -343,11 +342,11 @@ def list_tables(

conditions = [TRUE]

if table_loc is not None:
if (sg_cat := table_loc.args["catalog"]) is not None:
sg_cat.args["quoted"] = False
if (sg_db := table_loc.args["db"]) is not None:
sg_db.args["quoted"] = False
if (sg_cat := table_loc.args["catalog"]) is not None:
sg_cat.args["quoted"] = False
if (sg_db := table_loc.args["db"]) is not None:
sg_db.args["quoted"] = False
if table_loc.catalog or table_loc.db:
conditions = [C.table_schema.eq(sge.convert(table_loc.sql(self.name)))]

col = "table_name"
Expand Down Expand Up @@ -397,13 +396,6 @@ def create_table(
if obj is None and schema is None:
raise ValueError("Either `obj` or `schema` must be specified")

if database is not None and database != self.current_database:
raise com.UnsupportedOperationError(
"Creating tables in other databases is not supported by Postgres"
)
else:
database = None

properties = []

if temp:
Expand All @@ -419,7 +411,7 @@ def create_table(

self._run_pre_execute_hooks(table)

query = self._to_sqlglot(table)
query = self.compiler.to_sqlglot(table)
else:
query = None

Expand Down Expand Up @@ -516,7 +508,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:

df = op.data.to_frame()
# nan can not be used with MySQL
df = df.replace(np.nan, None)
df = df.replace(float("nan"), None)

data = df.itertuples(index=False)
sql = self._build_insert_template(
Expand Down
13 changes: 6 additions & 7 deletions ibis/backends/oracle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,21 @@
from typing import TYPE_CHECKING, Any
from urllib.parse import unquote_plus

import numpy as np
import oracledb
import sqlglot as sg
import sqlglot.expressions as sge

import ibis
import ibis.backends.sql.compilers as sc
import ibis.common.exceptions as exc
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis import util
from ibis.backends import CanListDatabase, CanListSchema
from ibis.backends.sql import STAR, SQLBackend
from ibis.backends.sql.compilers import OracleCompiler
from ibis.backends.sql.compilers.base import C
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers.base import STAR, C

if TYPE_CHECKING:
from urllib.parse import ParseResult
Expand Down Expand Up @@ -79,7 +78,7 @@ def metadata_row_to_type(

class Backend(SQLBackend, CanListDatabase, CanListSchema):
name = "oracle"
compiler = OracleCompiler()
compiler = sc.oracle.compiler

@cached_property
def version(self):
Expand Down Expand Up @@ -420,7 +419,7 @@ def create_table(

self._run_pre_execute_hooks(table)

query = self._to_sqlglot(table)
query = self.compiler.to_sqlglot(table)
else:
query = None

Expand Down Expand Up @@ -534,7 +533,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
properties=sge.Properties(expressions=[sge.TemporaryProperty()]),
).sql(self.name)

data = op.data.to_frame().replace({np.nan: None})
data = op.data.to_frame().replace(float("nan"), None)
insert_stmt = self._build_insert_template(
name, schema=schema, placeholder=":{i:d}"
)
Expand Down
18 changes: 0 additions & 18 deletions ibis/backends/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,24 +245,6 @@ def drop_table(self, name: str, *, force: bool = False) -> None:

def _convert_object(self, obj: Any) -> Any:
return _convert_object(obj, self)
if isinstance(obj, pd.DataFrame):
return obj
elif isinstance(obj, ir.Table):
op = obj.op()
if isinstance(op, ops.InMemoryTable):
return op.data.to_frame()
else:
raise com.BackendConversionError(
f"Unable to convert {obj.__class__} object "
f"to backend type: {self.__class__.backend_table_type}"
)
elif isinstance(obj, pa.Table):
return obj.to_pandas()
else:
raise com.BackendConversionError(
f"Unable to convert {obj.__class__} object "
f"to backend type: {self.__class__.backend_table_type}"
)

@classmethod
@lru_cache
Expand Down
Loading