Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: fix a crash when retrieving rows from a multiple-index table #15826

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 17 additions & 5 deletions astropy/table/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,19 @@
array.view(Column) -> no indices
"""

from __future__ import annotations

from copy import deepcopy
from typing import TYPE_CHECKING

import numpy as np

from .bst import MaxValue, MinValue
from .sorted_array import SortedArray

if TYPE_CHECKING:
from collections.abc import Iterable


class QueryError(ValueError):
"""
Expand Down Expand Up @@ -827,21 +833,27 @@
self.table = table
self.indices = table.indices

def _get_rows(self, item):
def _get_rows(
self,
item: int | Iterable[int] | slice | tuple[str, slice],
) -> list[np.int64]:
Comment on lines +836 to +839
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

obviously, these annotations are not part of the fix, rather a byproduct of my time spent looking for the bug :)

"""
Retrieve Table rows indexes by value slice.
"""
if len(self.indices) == 0:
raise ValueError("Can only use TableLoc for a table with indices")

if isinstance(item, tuple):
if (

Check warning on line 846 in astropy/table/index.py

View check run for this annotation

Codecov / codecov/patch

astropy/table/index.py#L846

Added line #L846 was not covered by tests
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I note that this block is identical to

if isinstance(item, tuple):
key, item = item
else:
key = self.table.primary_key

So I suppose it should be updated as well, and probably even abstracted away in a helper function to avoid repeated code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the other hand I don't know if that would fix anything currently broken. In other words, I don't know that iloc needs it.

isinstance(item, tuple)
and len(item) == 2
and isinstance(item[0], str)
and isinstance(item[1], slice)
):
key, item = item
else:
key = self.table.primary_key

index = self.indices[key]
if len(index.columns) > 1:
raise ValueError("Cannot use .loc on multi-column indices")

if isinstance(item, slice):
# None signifies no upper/lower bound
Expand All @@ -854,7 +866,7 @@
# item should be a list or ndarray of values
rows = []
for key in item:
p = index.find((key,))
p = index.find(key if isinstance(key, tuple) else (key,))

Check warning on line 869 in astropy/table/index.py

View check run for this annotation

Codecov / codecov/patch

astropy/table/index.py#L869

Added line #L869 was not covered by tests
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@taldcroft this part of the fix is yours. Let me know if I should explicitly add you as a co-author of the commit.

if len(p) == 0:
raise KeyError(f"No matches found for key {key}")
else:
Expand Down
3 changes: 1 addition & 2 deletions astropy/table/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1039,8 +1039,7 @@ def indices(self):
def loc(self):
"""
Return a TableLoc object that can be used for retrieving
rows by index in a given data range. Note that both loc
and iloc work only with single-column indices.
rows by index in a given data range.
"""
return TableLoc(self)

Expand Down
61 changes: 61 additions & 0 deletions astropy/table/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,3 +604,64 @@ def test_hstack_qtable_table():
def test_index_slice_exception():
with pytest.raises(TypeError, match="index_slice must be tuple or slice"):
SlicedIndex(None, None)


@pytest.mark.parametrize(
"col_a_constructor, expected_row_repr_lines, expected_table_pformat",
[
pytest.param(
lambda vals: vals,
[
"<Row index=0>",
" c b a ",
"int64 int64 int64",
"----- ----- -----",
" 0 1 3",
],
[
" c b a ",
"--- --- ---",
" 2 2 2",
" 3 2 2",
],
id="list",
),
pytest.param(
lambda vals: Time(vals, format="cxcsec"),
[
"<Row index=0>",
" c b a ",
"int64 int64 Time",
"----- ----- ----",
" 0 1 3.0",
],
[
" c b a ",
"--- --- ---",
" 2 2 2.0",
" 3 2 2.0",
],
id="Time",
),
],
)
def test_multi_index(
col_a_constructor, expected_row_repr_lines, expected_table_pformat
):
# see https://github.com/astropy/astropy/issues/13176
# forcing dtype to int64 for portability
# (Windows + numpy 1.x uses int32 by default)
vals_a = np.array([3, 3, 2, 2, 1], dtype="int64")
vals_b = np.array([1, 2, 2, 2, 1], dtype="int64")
vals_c = np.array([0, 1, 2, 3, 4], dtype="int64")

col_a = col_a_constructor(vals_a)
t = QTable([vals_c, vals_b, col_a], names=("c", "b", "a"))
t.add_index(["a", "b"])
res1 = t.loc[t["a"][0], t["b"][0]]
assert isinstance(res1, Row)
assert repr(res1).splitlines() == expected_row_repr_lines

res2 = t.loc[t["a"][2], t["b"][2]]
assert isinstance(res2, Table)
assert res2.pformat() == expected_table_pformat
1 change: 1 addition & 0 deletions docs/changes/table/15826.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a crash when retrieving rows from a multiple-index table.