Skip to content

Commit

Permalink
fix(api): restore and deprecate ir.Table.to_array() (#8227)
Browse files Browse the repository at this point in the history
Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
  • Loading branch information
kszucs and cpcloud committed Feb 12, 2024
1 parent 55146bc commit 22de674
Show file tree
Hide file tree
Showing 14 changed files with 74 additions and 54 deletions.
1 change: 0 additions & 1 deletion .github/workflows/ibis-docs-main.yml
Expand Up @@ -4,7 +4,6 @@ on:
push:
branches:
- main
- "*.x.x"
merge_group:

# only a single docs job that pushes to `main` can run at any given time
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/ibis-docs-pr.yml
Expand Up @@ -5,6 +5,7 @@ on:
branches:
- main
- "*.x.x"
- "the-epic-split"
merge_group:

concurrency:
Expand Down
13 changes: 9 additions & 4 deletions docs/posts/ibis-duckdb-geospatial/index.qmd
Expand Up @@ -117,7 +117,7 @@ boroughs
```

```{python}
boroughs.filter(_.geom.intersects(broad_station.geom))
boroughs.filter(boroughs.geom.intersects(broad_station.select(broad_station.geom).to_array()))
```

### `d_within` (ST_DWithin)
Expand All @@ -133,10 +133,15 @@ streets
Using the deferred API, we can check which streets are within `d=10` meters of distance.

```{python}
sts_near_broad = streets.filter(_.geom.d_within(broad_station.geom, 10))
sts_near_broad = streets.filter(_.geom.d_within(broad_station.select(_.geom).to_array(), 10))
sts_near_broad
```

::: {.callout-note}
In the previous query, `streets` and `broad_station` are different tables. We use [`to_array()`](../../reference/expression-tables.qmd#ibis.expr.types.relations.Table.to_array) to generate a
scalar subquery from a table with a single column (whose shape is scalar).
:::

To visualize the findings, we will convert the tables to GeoPandas DataFrames.

```{python}
Expand Down Expand Up @@ -196,7 +201,7 @@ To find if there were any homicides in that area, we can find where the polygon
200 meters buffer to our "Broad St" station point intersects with the geometry column in our homicides table.

```{python}
h_near_broad = homicides.filter(_.geom.intersects(broad_station.geom.buffer(200)))
h_near_broad = homicides.filter(_.geom.intersects(broad_station.select(_.geom.buffer(200)).to_array()))
h_near_broad
```

Expand All @@ -205,7 +210,7 @@ data we can't tell the street near which it happened. However, we can check if t
distance of a street.

```{python}
h_street = streets.filter(_.geom.d_within(h_near_broad.geom, 2))
h_street = streets.filter(_.geom.d_within(h_near_broad.select(_.geom).to_array(), 2))
h_street
```

Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/ibis-for-sql-users.qmd
Expand Up @@ -807,7 +807,7 @@ ibis.to_sql(expr)
You can mix the overlapping key names with other expressions:

```{python}
joined = t4.join(t5, ["key1", "key2", t4.key3.left(4) == t4.key3.left(4)])
joined = t4.join(t5, ["key1", "key2", t4.key3.left(4) == t5.key3.left(4)])
expr = joined[t4, t5.value2]
ibis.to_sql(expr)
```
Expand Down
8 changes: 1 addition & 7 deletions gen_matrix.py
Expand Up @@ -26,13 +26,7 @@ def get_leaf_classes(op):


def main():
internal_ops = {
# Never translates into anything
ops.UnresolvedExistsSubquery,
ops.ScalarParameter,
}

public_ops = frozenset(get_leaf_classes(ops.Value)) - internal_ops
public_ops = frozenset(get_leaf_classes(ops.Value))
support = {"operation": [f"{op.__module__}.{op.__name__}" for op in public_ops]}
support.update(
(name, list(map(backend.has_operation, public_ops)))
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/flink/__init__.py
Expand Up @@ -11,7 +11,7 @@
import ibis.expr.operations as ops
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis.backends.base import BaseBackend, CanCreateDatabase
from ibis.backends.base import BaseBackend, CanCreateDatabase, NoUrl
from ibis.backends.base.sql.ddl import fully_qualified_re, is_fully_qualified
from ibis.backends.flink.compiler.core import FlinkCompiler
from ibis.backends.flink.ddl import (
Expand All @@ -38,7 +38,7 @@
from ibis.api import Watermark


class Backend(BaseBackend, CanCreateDatabase):
class Backend(BaseBackend, CanCreateDatabase, NoUrl):
name = "flink"
compiler = FlinkCompiler
supports_temporary_tables = True
Expand Down
10 changes: 1 addition & 9 deletions ibis/expr/operations/relations.py
Expand Up @@ -127,15 +127,7 @@ def dtype(self):

@public
class ScalarSubquery(Subquery):
def __init__(self, rel):
from ibis.expr.operations import Reduction

super().__init__(rel=rel)
if not isinstance(self.value, Reduction):
raise IntegrityError(
f"Subquery {self.value!r} is not a reduction, only "
"reductions can be used as scalar subqueries"
)
shape = ds.scalar


@public
Expand Down
16 changes: 11 additions & 5 deletions ibis/expr/tests/test_newrels.py
Expand Up @@ -147,11 +147,6 @@ def test_subquery_integrity_check():
with pytest.raises(IntegrityError, match=msg):
ops.ScalarSubquery(t)

agg = t.agg(t.a.sum() + 1)
msg = "is not a reduction"
with pytest.raises(IntegrityError, match=msg):
ops.ScalarSubquery(agg)


def test_select_turns_scalar_reduction_into_subquery():
arr = ibis.literal([1, 2, 3])
Expand Down Expand Up @@ -180,6 +175,17 @@ def test_select_turns_value_with_multiple_parents_into_subquery():
assert t1.op() == expected


def test_value_to_array_creates_subquery():
rel = t.int_col.sum().as_table()
with pytest.warns(FutureWarning, match="implicit"):
expr = rel.to_array()

op = expr.op()
assert op.shape.is_scalar()
assert op.dtype.is_int64()
assert isinstance(op, ops.ScalarSubquery)


def test_mutate():
proj = t.select(t, other=t.int_col + 1)
expected = Project(
Expand Down
2 changes: 2 additions & 0 deletions ibis/expr/types/__init__.py
@@ -1,5 +1,7 @@
from __future__ import annotations

# ruff: noqa: I001

from ibis.expr.types.arrays import * # noqa: F403
from ibis.expr.types.binary import * # noqa: F403
from ibis.expr.types.collections import * # noqa: F403
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/types/core.py
Expand Up @@ -85,7 +85,7 @@ def __repr__(self) -> str:
except TranslationError as e:
lines = [
"Translation to backend failed",
f"Error message: {repr(e)}",
f"Error message: {e!r}",
"Expression repr follows:",
self._repr(),
]
Expand Down
40 changes: 23 additions & 17 deletions ibis/expr/types/joins.py
@@ -1,31 +1,36 @@
from __future__ import annotations

import functools
from typing import TYPE_CHECKING, Any

from public import public
from typing import Any, Optional, TYPE_CHECKING
from collections.abc import Iterator, Mapping

import ibis
import ibis.expr.operations as ops

from ibis import util
from ibis.expr.types import Table, Value
from ibis.common.deferred import Deferred
from ibis.common.egraph import DisjointSet
from ibis.common.exceptions import (
ExpressionError,
IbisInputError,
InputTypeError,
IntegrityError,
)
from ibis.expr.analysis import flatten_predicates
from ibis.common.exceptions import ExpressionError, IntegrityError
from ibis.expr.rewrites import peel_join_field
from ibis.expr.types.generic import Value
from ibis.expr.types.relations import (
Table,
bind,
dereference_values,
dereference_mapping,
unwrap_aliases,
)
from ibis.expr.operations.relations import JoinKind
from ibis.expr.rewrites import peel_join_field
from ibis.common.egraph import DisjointSet

if TYPE_CHECKING:
from collections.abc import Sequence

from ibis.expr.operations.relations import JoinKind


def disambiguate_fields(
how,
Expand All @@ -36,9 +41,7 @@ def disambiguate_fields(
left_template,
right_template,
):
"""
Resolve name collisions between the left and right tables.
"""
"""Resolve name collisions between the left and right tables."""
collisions = set()
left_template = left_template or "{name}"
right_template = right_template or "{name}"
Expand Down Expand Up @@ -190,6 +193,9 @@ def prepare_predicates(
The right table
predicates
Predicates to bind and dereference, see the possible values above
comparison
The comparison operation to construct if the input is a pair of
expression-like objects
"""
deref_left = dereference_mapping_left(left)
deref_right = dereference_mapping_right(right)
Expand Down Expand Up @@ -266,7 +272,7 @@ def _finish(self) -> Table:
return Table(self.op())

@functools.wraps(Table.join)
def join( # noqa: D102
def join(
self,
right,
predicates: Any,
Expand All @@ -275,8 +281,8 @@ def join( # noqa: D102
lname: str = "",
rname: str = "{name}_right",
):
import pyarrow as pa
import pandas as pd
import pyarrow as pa

# TODO(kszucs): factor out to a helper function
if isinstance(right, (pd.DataFrame, pa.Table)):
Expand Down Expand Up @@ -324,7 +330,7 @@ def join( # noqa: D102
return self.__class__(left, collisions=collisions, equalities=equalities)

@functools.wraps(Table.asof_join)
def asof_join( # noqa: D102
def asof_join(
self: Table,
right: Table,
on,
Expand Down Expand Up @@ -403,7 +409,7 @@ def asof_join( # noqa: D102
return self.__class__(left, collisions=collisions, equalities=equalities)

@functools.wraps(Table.cross_join)
def cross_join( # noqa: D102
def cross_join(
self: Table,
right: Table,
*rest: Table,
Expand All @@ -418,7 +424,7 @@ def cross_join( # noqa: D102
return left

@functools.wraps(Table.select)
def select(self, *args, **kwargs): # noqa: D102
def select(self, *args, **kwargs):
chain = self.op()
values = bind(self, (args, kwargs))
values = unwrap_aliases(values)
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/types/logical.py
Expand Up @@ -306,7 +306,7 @@ def any(self, where: BooleanValue | None = None) -> BooleanValue:
>>> (t.arr == None).any(where=t.arr != None)
False
"""
from ibis.common.deferred import Call, _, Deferred
from ibis.common.deferred import Call, Deferred, _

parents = self.op().relations

Expand Down
24 changes: 21 additions & 3 deletions ibis/expr/types/relations.py
Expand Up @@ -25,6 +25,7 @@
from ibis.expr.types.core import Expr, _FixedTextJupyterMixin
from ibis.expr.types.generic import ValueExpr, literal
from ibis.selectors import Selector
from ibis.util import deprecated

if TYPE_CHECKING:
import pandas as pd
Expand Down Expand Up @@ -1127,9 +1128,9 @@ def aggregate(
metrics = unwrap_aliases(metrics)
having = unwrap_aliases(having)

groups = dereference_values(self.op(), groups)
metrics = dereference_values(self.op(), metrics)
having = dereference_values(self.op(), having)
groups = dereference_values(node, groups)
metrics = dereference_values(node, metrics)
having = dereference_values(node, having)

# the user doesn't need to specify the metrics used in the having clause
# explicitly, we implicitly add them to the metrics list by looking for
Expand Down Expand Up @@ -1804,6 +1805,23 @@ def intersect(self, table: Table, *rest: Table, distinct: bool = True) -> Table:
node = ops.Intersection(node, table, distinct=distinct)
return node.to_expr().select(self.columns)

@deprecated(as_of="9.0", instead="conversion to scalar subquery is implicit")
def to_array(self) -> ir.Column:
"""View a single column table as an array.
Returns
-------
Value
A single column view of a table
"""
schema = self.schema()
if len(schema) != 1:
raise com.ExpressionError(
"Table must have exactly one column when viewed as array"
)

return ops.ScalarSubquery(self).to_expr()

def mutate(self, *exprs: Sequence[ir.Expr] | None, **mutations: ir.Value) -> Table:
"""Add columns to a table expression.
Expand Down
3 changes: 0 additions & 3 deletions ibis/expr/types/temporal_windows.py
Expand Up @@ -5,11 +5,8 @@
from public import public

import ibis.common.exceptions as com
import ibis.expr.analysis as an
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis.common.deferred import Deferred
from ibis.selectors import Selector
from ibis.expr.types.relations import bind

if TYPE_CHECKING:
Expand Down

0 comments on commit 22de674

Please sign in to comment.