Showing with 173 additions and 44 deletions.
  1. +3 −0 .github/workflows/ibis-backends.yml
  2. +1 −1 .github/workflows/ibis-tpch-queries.yml
  3. +1 −1 .releaserc.js
  4. +1 −1 ci/release/dry_run.sh
  5. +1 −1 ci/release/run.sh
  6. +1 −0 docker/trino/catalog/hive.properties
  7. +21 −0 docs/release_notes.md
  8. +1 −1 ibis/__init__.py
  9. +16 −1 ibis/backends/base/sql/registry/literal.py
  10. +3 −8 ibis/backends/bigquery/registry.py
  11. +1 −0 ...ds/bigquery/tests/unit/snapshots/test_compiler/test_literal_string/escape_ascii_sequences/out.sql
  12. +1 −0 ...backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_string/escape_backslash/out.sql
  13. +1 −0 ibis/backends/bigquery/tests/unit/snapshots/test_compiler/test_literal_string/escape_quote/out.sql
  14. +1 −0 ...uery/tests/unit/snapshots/test_compiler/test_literal_string/not_escape_special_characters/out.sql
  15. +14 −0 ibis/backends/bigquery/tests/unit/test_compiler.py
  16. +1 −1 ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_extract/out.sql
  17. +1 −1 ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_replace/out.sql
  18. +1 −1 ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/re_search/out.sql
  19. +1 −1 ibis/backends/impala/tests/snapshots/test_string_builtins/test_string_builtins/rlike/out.sql
  20. +0 −14 ibis/backends/tests/test_string.py
  21. +57 −10 ibis/backends/trino/__init__.py
  22. +44 −1 ibis/backends/trino/tests/test_client.py
  23. +1 −1 pyproject.toml
3 changes: 3 additions & 0 deletions .github/workflows/ibis-backends.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ jobs:
- name: remove snowflake deps that are not compatible with everything else
run: poetry remove snowflake-sqlalchemy snowflake-connector-python

- name: constrain pandas
run: poetry add 'pandas<2.1'

- name: update deps originally constrained by snowflake
run: poetry update numpy pandas pyarrow datafusion

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ibis-tpch-queries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
working-directory: tpc-queries
run: |
python -m pip install -r requirements.txt
python -m pip install -U 'duckdb>=0.4' 'duckdb-engine>=0.6'
python -m pip install -U 'duckdb>=0.4' 'duckdb-engine>=0.6' 'pandas<2.1'
- name: install ibis
run: python -m pip install ".[sqlite,duckdb]"
Expand Down
2 changes: 1 addition & 1 deletion .releaserc.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"use strict";

module.exports = {
branches: ["master"],
branches: ["master", "next-feature"],
tagFormat: "${version}",
preset: "conventionalcommits",
plugins: [
Expand Down
2 changes: 1 addition & 1 deletion ci/release/dry_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ nix develop '.#release' -c npx --yes \
-p "@semantic-release/exec" \
-p "@semantic-release/git" \
-p "semantic-release-replace-plugin@1.2.0" \
-p "conventional-changelog-conventionalcommits" \
-p "conventional-changelog-conventionalcommits@6.1.0" \
semantic-release \
--ci \
--dry-run \
Expand Down
2 changes: 1 addition & 1 deletion ci/release/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ nix develop '.#release' -c npx --yes \
-p "@semantic-release/exec" \
-p "@semantic-release/git" \
-p "semantic-release-replace-plugin@1.2.0" \
-p "conventional-changelog-conventionalcommits" \
-p "conventional-changelog-conventionalcommits@6.1.0" \
semantic-release --ci
1 change: 1 addition & 0 deletions docker/trino/catalog/hive.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
connector.name=hive

hive.allow-drop-table=true
hive.allow-rename-table=true
hive.ignore-absent-partitions=true
hive.metastore.thrift.delete-files-on-drop=true
hive.metastore.uri=thrift://hive-metastore:9083
Expand Down
21 changes: 21 additions & 0 deletions docs/release_notes.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,27 @@
Release Notes
---

## [6.2.0](https://github.com/ibis-project/ibis/compare/6.1.0...6.2.0) (2023-08-31)


### Features

* **trino:** add source application to trino backend ([cf5fdb9](https://github.com/ibis-project/ibis/commit/cf5fdb9d29567c72680d94f66c9445d76d9ccbef))


### Bug Fixes

* **bigquery,impala:** escape all ASCII escape sequences in string literals ([402f5ca](https://github.com/ibis-project/ibis/commit/402f5ca756acc85e1ddddbc60af2417f371e4ee0))
* **bigquery:** correctly escape ASCII escape sequences in regex patterns ([a455203](https://github.com/ibis-project/ibis/commit/a455203d7dc6f5410ad2627123ec18e7126d6bc9))
* **release:** pin conventional-changelog-conventionalcommits to 6.1.0 ([d6526b8](https://github.com/ibis-project/ibis/commit/d6526b83c2250f4f620ab9e71f0421a65e89d55b))
* **trino:** ensure that list_databases look at all catalogs not just the current one ([cfbdbf1](https://github.com/ibis-project/ibis/commit/cfbdbf19426e9771cbda9f9e665173da7286e3c5))
* **trino:** override incorrect base sqlalchemy `list_schemas` implementation ([84d38a1](https://github.com/ibis-project/ibis/commit/84d38a1e9cd67eb480ed9faa4fc506f98247dea3))


### Documentation

* **trino:** add connection docstring ([507a00e](https://github.com/ibis-project/ibis/commit/507a00e95784fd4054bd5146f49c2e6e168793db))

## [6.1.0](https://github.com/ibis-project/ibis/compare/6.0.0...6.1.0) (2023-08-03)


Expand Down
2 changes: 1 addition & 1 deletion ibis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Initialize Ibis module."""
from __future__ import annotations

__version__ = "6.1.0"
__version__ = "6.2.0"

from ibis import examples, util
from ibis.backends.base import BaseBackend
Expand Down
17 changes: 16 additions & 1 deletion ibis/backends/base/sql/registry/literal.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,22 @@ def _boolean_literal_format(translator, op):


def _string_literal_format(translator, op):
return "'{}'".format(op.value.replace("'", "\\'"))
return "'{}'".format(
op.value
# Escape \ first so we don't double escape other characters.
.replace("\\", "\\\\")
# Escape ' since we're using those for the string literal.
.replace("'", "\\'")
# ASCII escape sequences that are recognized in Python:
# https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
.replace("\a", "\\a") # Bell
.replace("\b", "\\b") # Backspace
.replace("\f", "\\f") # Formfeed
.replace("\n", "\\n") # Newline / Linefeed
.replace("\r", "\\r") # Carriage return
.replace("\t", "\\t") # Tab
.replace("\v", "\\v") # Vertical tab
)


def _number_literal_format(translator, op):
Expand Down
11 changes: 3 additions & 8 deletions ibis/backends/bigquery/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,20 +156,15 @@ def _string_find(translator, op):
)


def _translate_pattern(translator, op):
# add 'r' to string literals to indicate to BigQuery this is a raw string
return "r" * isinstance(op, ops.Literal) + translator.translate(op)


def _regex_search(translator, op):
arg = translator.translate(op.arg)
regex = _translate_pattern(translator, op.pattern)
regex = translator.translate(op.pattern)
return f"REGEXP_CONTAINS({arg}, {regex})"


def _regex_extract(translator, op):
arg = translator.translate(op.arg)
regex = _translate_pattern(translator, op.pattern)
regex = translator.translate(op.pattern)
index = translator.translate(op.index)
matches = f"REGEXP_CONTAINS({arg}, {regex})"
# non-greedily match the regex's prefix so the regex can match as much as possible
Expand All @@ -185,7 +180,7 @@ def _regex_extract(translator, op):

def _regex_replace(translator, op):
arg = translator.translate(op.arg)
regex = _translate_pattern(translator, op.pattern)
regex = translator.translate(op.pattern)
replacement = translator.translate(op.replacement)
return f"REGEXP_REPLACE({arg}, {regex}, {replacement})"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 'a\ab\bc\fd\ne\rf\tg\vh' AS `'a_x07b_x08c_x0cd_ne_rf_tg_x0bh'`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 'a\\b\\c' AS `'a_b_c'`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 'a\'b"c' AS `'a_'b_c'`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT '`~!@#$%^&*()_=+-|[]{};:/?<>' AS `'_#_%_&__=+-|_:_<>'`
14 changes: 14 additions & 0 deletions ibis/backends/bigquery/tests/unit/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,20 @@ def test_integer_to_timestamp(case, unit, snapshot):
snapshot.assert_match(to_sql(expr), "out.sql")


@pytest.mark.parametrize(
("case",),
[
param("a\\b\\c", id="escape_backslash"),
param("a\ab\bc\fd\ne\rf\tg\vh", id="escape_ascii_sequences"),
param("a'b\"c", id="escape_quote"),
param("`~!@#$%^&*()_=+-|[]{};:/?<>", id="not_escape_special_characters"),
],
)
def test_literal_string(case, snapshot):
expr = ibis.literal(case)
snapshot.assert_match(to_sql(expr), "out.sql")


@pytest.mark.parametrize(
("case", "dtype"),
[
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
regexp_extract(`string_col`, '[\d]+', 0)
regexp_extract(`string_col`, '[\\d]+', 0)
Original file line number Diff line number Diff line change
@@ -1 +1 @@
regexp_replace(`string_col`, '[\d]+', 'aaa')
regexp_replace(`string_col`, '[\\d]+', 'aaa')
Original file line number Diff line number Diff line change
@@ -1 +1 @@
regexp_like(`string_col`, '[\d]+')
regexp_like(`string_col`, '[\\d]+')
Original file line number Diff line number Diff line change
@@ -1 +1 @@
regexp_like(`string_col`, '[\d]+')
regexp_like(`string_col`, '[\\d]+')
14 changes: 0 additions & 14 deletions ibis/backends/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.notimpl(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -211,7 +210,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.notimpl(["impala"], raises=AssertionError),
],
),
param(
Expand Down Expand Up @@ -240,7 +238,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -252,7 +249,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -276,7 +272,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -290,7 +285,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -304,7 +298,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -318,7 +311,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -330,7 +322,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -342,7 +333,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -369,10 +359,6 @@ def test_string_col_is_unicode(alltypes, df):
["mysql", "mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(
["impala"],
raises=AssertionError,
),
],
),
param(
Expand Down
67 changes: 57 additions & 10 deletions ibis/backends/trino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,22 @@ def current_database(self) -> str:
return self._scalar_query(sa.select(sa.literal_column("current_catalog")))

def list_databases(self, like: str | None = None) -> list[str]:
s = sa.table(
"schemata",
sa.column("catalog_name", sa.VARCHAR()),
schema="information_schema",
)
query = "SHOW CATALOGS"
with self.begin() as con:
catalogs = list(con.exec_driver_sql(query).scalars())
return self._filter_with_like(catalogs, like=like)

def list_schemas(
self, like: str | None = None, database: str | None = None
) -> list[str]:
query = "SHOW SCHEMAS"

if database is not None:
query += f" IN {self._quote(database)}"

query = sa.select(sa.distinct(s.c.catalog_name)).order_by(s.c.catalog_name)
with self.begin() as con:
results = list(con.execute(query).scalars())
return self._filter_with_like(results, like=like)
schemata = list(con.exec_driver_sql(query).scalars())
return self._filter_with_like(schemata, like)

@property
def current_schema(self) -> str:
Expand All @@ -68,9 +74,49 @@ def do_connect(
port: int = 8080,
database: str | None = None,
schema: str | None = None,
source: str | None = None,
**connect_args,
) -> None:
"""Create an Ibis client connected to a Trino database."""
"""Connect to Trino.
Parameters
----------
user
Username to connect with
password
Password to connect with
host
Hostname of the Trino server
port
Port of the Trino server
database
Catalog to use on the Trino server
schema
Schema to use on the Trino server
source
Application name passed to Trino
connect_args
Additional keyword arguments passed directly to SQLAlchemy's
`create_engine`
Examples
--------
>>> catalog = "hive"
>>> schema = "default"
Connect using a URL, with the default user, password, host and port
>>> con = ibis.connect(f"trino:///{catalog}/{schema}")
Connect using a URL
>>> con = ibis.connect(f"trino://user:password@host:port/{catalog}/{schema}")
Connect using keyword arguments
>>> con = ibis.trino.connect(database=catalog, schema=schema)
>>> con = ibis.trino.connect(database=catalog, schema=schema, source="my-app")
"""
database = "/".join(filter(None, (database, schema)))
url = sa.engine.URL.create(
drivername="trino",
Expand All @@ -79,6 +125,7 @@ def do_connect(
host=host,
port=port,
database=database,
query=dict(source="ibis" if source is None else source),
)
connect_args.setdefault("timezone", "UTC")
with warnings.catch_warnings():
Expand Down Expand Up @@ -110,7 +157,7 @@ def column_reflect(inspector, table, column_info):

@contextlib.contextmanager
def _prepare_metadata(self, query: str) -> Iterator[dict[str, str]]:
name = util.gen_name("ibis_trino_metadata")
name = util.gen_name("trino_metadata")
with self.begin() as con:
con.exec_driver_sql(f"PREPARE {name} FROM {query}")
try:
Expand Down
Loading