diff --git a/.github/workflows/ibis-docs-main.yml b/.github/workflows/ibis-docs-main.yml index 57277bb0de87..6478f0b857d2 100644 --- a/.github/workflows/ibis-docs-main.yml +++ b/.github/workflows/ibis-docs-main.yml @@ -4,7 +4,6 @@ on: push: branches: - main - - "*.x.x" merge_group: # only a single docs job that pushes to `main` can run at any given time diff --git a/.github/workflows/ibis-docs-pr.yml b/.github/workflows/ibis-docs-pr.yml index 9a6e5cca50c8..c45bdb709ba2 100644 --- a/.github/workflows/ibis-docs-pr.yml +++ b/.github/workflows/ibis-docs-pr.yml @@ -5,6 +5,7 @@ on: branches: - main - "*.x.x" + - "the-epic-split" merge_group: concurrency: diff --git a/docs/posts/ibis-duckdb-geospatial/index.qmd b/docs/posts/ibis-duckdb-geospatial/index.qmd index 575b25fdbd57..b78f7764056a 100644 --- a/docs/posts/ibis-duckdb-geospatial/index.qmd +++ b/docs/posts/ibis-duckdb-geospatial/index.qmd @@ -117,7 +117,7 @@ boroughs ``` ```{python} -boroughs.filter(_.geom.intersects(broad_station.geom)) +boroughs.filter(boroughs.geom.intersects(broad_station.select(broad_station.geom).to_array())) ``` ### `d_within` (ST_DWithin) @@ -133,10 +133,15 @@ streets Using the deferred API, we can check which streets are within `d=10` meters of distance. ```{python} -sts_near_broad = streets.filter(_.geom.d_within(broad_station.geom, 10)) +sts_near_broad = streets.filter(_.geom.d_within(broad_station.select(_.geom).to_array(), 10)) sts_near_broad ``` +::: {.callout-note} +In the previous query, `streets` and `broad_station` are different tables. We use [`to_array()`](../../reference/expression-tables.qmd#ibis.expr.types.relations.Table.to_array) to generate a +scalar subquery from a table with a single column (whose shape is scalar). +::: + To visualize the findings, we will convert the tables to GeoPandas DataFrames. ```{python} @@ -196,7 +201,7 @@ To find if there were any homicides in that area, we can find where the polygon 200 meters buffer to our "Broad St" station point intersects with the geometry column in our homicides table. ```{python} -h_near_broad = homicides.filter(_.geom.intersects(broad_station.geom.buffer(200))) +h_near_broad = homicides.filter(_.geom.intersects(broad_station.select(_.geom.buffer(200)).to_array())) h_near_broad ``` @@ -205,7 +210,7 @@ data we can't tell the street near which it happened. However, we can check if t distance of a street. ```{python} -h_street = streets.filter(_.geom.d_within(h_near_broad.geom, 2)) +h_street = streets.filter(_.geom.d_within(h_near_broad.select(_.geom).to_array(), 2)) h_street ``` diff --git a/docs/tutorials/ibis-for-sql-users.qmd b/docs/tutorials/ibis-for-sql-users.qmd index 59a01d889b8b..9fd863a3edd0 100644 --- a/docs/tutorials/ibis-for-sql-users.qmd +++ b/docs/tutorials/ibis-for-sql-users.qmd @@ -807,7 +807,7 @@ ibis.to_sql(expr) You can mix the overlapping key names with other expressions: ```{python} -joined = t4.join(t5, ["key1", "key2", t4.key3.left(4) == t4.key3.left(4)]) +joined = t4.join(t5, ["key1", "key2", t4.key3.left(4) == t5.key3.left(4)]) expr = joined[t4, t5.value2] ibis.to_sql(expr) ``` diff --git a/gen_matrix.py b/gen_matrix.py index e6c21381dd7e..9f9745cb7239 100644 --- a/gen_matrix.py +++ b/gen_matrix.py @@ -26,13 +26,7 @@ def get_leaf_classes(op): def main(): - internal_ops = { - # Never translates into anything - ops.UnresolvedExistsSubquery, - ops.ScalarParameter, - } - - public_ops = frozenset(get_leaf_classes(ops.Value)) - internal_ops + public_ops = frozenset(get_leaf_classes(ops.Value)) support = {"operation": [f"{op.__module__}.{op.__name__}" for op in public_ops]} support.update( (name, list(map(backend.has_operation, public_ops))) diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index 52febaf80e3a..2ae904f505ba 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -11,7 +11,7 @@ import ibis.expr.operations as ops import ibis.expr.schema as sch import ibis.expr.types as ir -from ibis.backends.base import BaseBackend, CanCreateDatabase +from ibis.backends.base import BaseBackend, CanCreateDatabase, NoUrl from ibis.backends.base.sql.ddl import fully_qualified_re, is_fully_qualified from ibis.backends.flink.compiler.core import FlinkCompiler from ibis.backends.flink.ddl import ( @@ -38,7 +38,7 @@ from ibis.api import Watermark -class Backend(BaseBackend, CanCreateDatabase): +class Backend(BaseBackend, CanCreateDatabase, NoUrl): name = "flink" compiler = FlinkCompiler supports_temporary_tables = True diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index b4eaff5e428e..cf316217170f 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -127,15 +127,7 @@ def dtype(self): @public class ScalarSubquery(Subquery): - def __init__(self, rel): - from ibis.expr.operations import Reduction - - super().__init__(rel=rel) - if not isinstance(self.value, Reduction): - raise IntegrityError( - f"Subquery {self.value!r} is not a reduction, only " - "reductions can be used as scalar subqueries" - ) + shape = ds.scalar @public diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index f50b72926dc9..673432550446 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -147,11 +147,6 @@ def test_subquery_integrity_check(): with pytest.raises(IntegrityError, match=msg): ops.ScalarSubquery(t) - agg = t.agg(t.a.sum() + 1) - msg = "is not a reduction" - with pytest.raises(IntegrityError, match=msg): - ops.ScalarSubquery(agg) - def test_select_turns_scalar_reduction_into_subquery(): arr = ibis.literal([1, 2, 3]) @@ -180,6 +175,17 @@ def test_select_turns_value_with_multiple_parents_into_subquery(): assert t1.op() == expected +def test_value_to_array_creates_subquery(): + rel = t.int_col.sum().as_table() + with pytest.warns(FutureWarning, match="implicit"): + expr = rel.to_array() + + op = expr.op() + assert op.shape.is_scalar() + assert op.dtype.is_int64() + assert isinstance(op, ops.ScalarSubquery) + + def test_mutate(): proj = t.select(t, other=t.int_col + 1) expected = Project( diff --git a/ibis/expr/types/__init__.py b/ibis/expr/types/__init__.py index 99bd54d2f6e4..9105bc911615 100644 --- a/ibis/expr/types/__init__.py +++ b/ibis/expr/types/__init__.py @@ -1,5 +1,7 @@ from __future__ import annotations +# ruff: noqa: I001 + from ibis.expr.types.arrays import * # noqa: F403 from ibis.expr.types.binary import * # noqa: F403 from ibis.expr.types.collections import * # noqa: F403 diff --git a/ibis/expr/types/core.py b/ibis/expr/types/core.py index f42e60e47347..5d0b447521b1 100644 --- a/ibis/expr/types/core.py +++ b/ibis/expr/types/core.py @@ -85,7 +85,7 @@ def __repr__(self) -> str: except TranslationError as e: lines = [ "Translation to backend failed", - f"Error message: {repr(e)}", + f"Error message: {e!r}", "Expression repr follows:", self._repr(), ] diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index ab6c587b8a33..6e3021780507 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -1,31 +1,36 @@ from __future__ import annotations import functools +from typing import TYPE_CHECKING, Any + from public import public -from typing import Any, Optional, TYPE_CHECKING -from collections.abc import Iterator, Mapping import ibis import ibis.expr.operations as ops - from ibis import util -from ibis.expr.types import Table, Value from ibis.common.deferred import Deferred +from ibis.common.egraph import DisjointSet +from ibis.common.exceptions import ( + ExpressionError, + IbisInputError, + InputTypeError, + IntegrityError, +) from ibis.expr.analysis import flatten_predicates -from ibis.common.exceptions import ExpressionError, IntegrityError +from ibis.expr.rewrites import peel_join_field +from ibis.expr.types.generic import Value from ibis.expr.types.relations import ( + Table, bind, - dereference_values, dereference_mapping, unwrap_aliases, ) -from ibis.expr.operations.relations import JoinKind -from ibis.expr.rewrites import peel_join_field -from ibis.common.egraph import DisjointSet if TYPE_CHECKING: from collections.abc import Sequence + from ibis.expr.operations.relations import JoinKind + def disambiguate_fields( how, @@ -36,9 +41,7 @@ def disambiguate_fields( left_template, right_template, ): - """ - Resolve name collisions between the left and right tables. - """ + """Resolve name collisions between the left and right tables.""" collisions = set() left_template = left_template or "{name}" right_template = right_template or "{name}" @@ -190,6 +193,9 @@ def prepare_predicates( The right table predicates Predicates to bind and dereference, see the possible values above + comparison + The comparison operation to construct if the input is a pair of + expression-like objects """ deref_left = dereference_mapping_left(left) deref_right = dereference_mapping_right(right) @@ -266,7 +272,7 @@ def _finish(self) -> Table: return Table(self.op()) @functools.wraps(Table.join) - def join( # noqa: D102 + def join( self, right, predicates: Any, @@ -275,8 +281,8 @@ def join( # noqa: D102 lname: str = "", rname: str = "{name}_right", ): - import pyarrow as pa import pandas as pd + import pyarrow as pa # TODO(kszucs): factor out to a helper function if isinstance(right, (pd.DataFrame, pa.Table)): @@ -324,7 +330,7 @@ def join( # noqa: D102 return self.__class__(left, collisions=collisions, equalities=equalities) @functools.wraps(Table.asof_join) - def asof_join( # noqa: D102 + def asof_join( self: Table, right: Table, on, @@ -403,7 +409,7 @@ def asof_join( # noqa: D102 return self.__class__(left, collisions=collisions, equalities=equalities) @functools.wraps(Table.cross_join) - def cross_join( # noqa: D102 + def cross_join( self: Table, right: Table, *rest: Table, @@ -418,7 +424,7 @@ def cross_join( # noqa: D102 return left @functools.wraps(Table.select) - def select(self, *args, **kwargs): # noqa: D102 + def select(self, *args, **kwargs): chain = self.op() values = bind(self, (args, kwargs)) values = unwrap_aliases(values) diff --git a/ibis/expr/types/logical.py b/ibis/expr/types/logical.py index 09927223f2ac..01483bd241cc 100644 --- a/ibis/expr/types/logical.py +++ b/ibis/expr/types/logical.py @@ -306,7 +306,7 @@ def any(self, where: BooleanValue | None = None) -> BooleanValue: >>> (t.arr == None).any(where=t.arr != None) False """ - from ibis.common.deferred import Call, _, Deferred + from ibis.common.deferred import Call, Deferred, _ parents = self.op().relations diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 951f72e5b64a..4521de905d8f 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -25,6 +25,7 @@ from ibis.expr.types.core import Expr, _FixedTextJupyterMixin from ibis.expr.types.generic import ValueExpr, literal from ibis.selectors import Selector +from ibis.util import deprecated if TYPE_CHECKING: import pandas as pd @@ -1127,9 +1128,9 @@ def aggregate( metrics = unwrap_aliases(metrics) having = unwrap_aliases(having) - groups = dereference_values(self.op(), groups) - metrics = dereference_values(self.op(), metrics) - having = dereference_values(self.op(), having) + groups = dereference_values(node, groups) + metrics = dereference_values(node, metrics) + having = dereference_values(node, having) # the user doesn't need to specify the metrics used in the having clause # explicitly, we implicitly add them to the metrics list by looking for @@ -1804,6 +1805,23 @@ def intersect(self, table: Table, *rest: Table, distinct: bool = True) -> Table: node = ops.Intersection(node, table, distinct=distinct) return node.to_expr().select(self.columns) + @deprecated(as_of="9.0", instead="conversion to scalar subquery is implicit") + def to_array(self) -> ir.Column: + """View a single column table as an array. + + Returns + ------- + Value + A single column view of a table + """ + schema = self.schema() + if len(schema) != 1: + raise com.ExpressionError( + "Table must have exactly one column when viewed as array" + ) + + return ops.ScalarSubquery(self).to_expr() + def mutate(self, *exprs: Sequence[ir.Expr] | None, **mutations: ir.Value) -> Table: """Add columns to a table expression. diff --git a/ibis/expr/types/temporal_windows.py b/ibis/expr/types/temporal_windows.py index 865a9922e6a2..74560d0f36a2 100644 --- a/ibis/expr/types/temporal_windows.py +++ b/ibis/expr/types/temporal_windows.py @@ -5,11 +5,8 @@ from public import public import ibis.common.exceptions as com -import ibis.expr.analysis as an import ibis.expr.operations as ops import ibis.expr.types as ir -from ibis.common.deferred import Deferred -from ibis.selectors import Selector from ibis.expr.types.relations import bind if TYPE_CHECKING: