Skip to content

Commit

Permalink
TST: more explicit conditions for selecting alignment records
Browse files Browse the repository at this point in the history
[FIXED] any alignment record whose segment overlaps the query
    coordinates is now returned
  • Loading branch information
GavinHuttley committed Dec 31, 2023
1 parent 03f3089 commit 9f194b4
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 9 deletions.
21 changes: 12 additions & 9 deletions src/ensembl_lite/_aligndb.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,18 @@ def _get_block_id(
) -> list[int]:
sql = f"SELECT block_id from {self.table_name} WHERE species = ? AND coord_name = ?"
values = species, coord_name
if start and end:
sql = f"{sql} AND start > ? AND end < ?"
values += (start, end)
elif start:
sql = f"{sql} AND start > ?"
values += (start,)
elif end:
sql = f"{sql} AND end < ?"
values += (end,)
if start is not None and end is not None:
# as long as start or end are within the record start/end, it's a match
sql = f"{sql} AND ((start <= ? AND ? < end) OR (start <= ? AND ? < end))"
values += (start, start, end, end)
elif start is not None:
# the aligned segment overlaps start
sql = f"{sql} AND start <= ? AND ? < end"
values += (start, start)
elif end is not None:
# the aligned segment overlaps end
sql = f"{sql} AND start <= ? AND ? < end"
values += (end, end)

return self.db.execute(sql, values).fetchall()

Expand Down
50 changes: 50 additions & 0 deletions tests/test_aligndb.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,3 +283,53 @@ def test_select_alignment_rc():
align_db=align_db, genomes=genomes, species="human", coord_name="s1"
)
assert got.to_dict() == expect.to_dict()


@pytest.mark.parametrize(
"coord",
(
("human", "s1", None, 11), # finish within
("human", "s1", 3, None), # start within
("human", "s1", 3, 9), # within
("human", "s1", 3, 13), # extends past
),
)
def test_align_db_get_records(coord):
kwargs = dict(zip(("species", "coord_name", "start", "end"), coord))
# records are, we should get a single hit from each query
# [('blah', 0, 'human', 's1', 1, 12, '+', array([], dtype=int32)),
_, align_db = make_sample(two_aligns=True)
got = list(align_db.get_records_matching(**kwargs))
assert len(got) == 1


@pytest.mark.parametrize(
"coord",
(
("human", "s1"),
("mouse", "s2"),
("dog", "s3"),
),
)
def test_align_db_get_records_required_only(coord):
kwargs = dict(zip(("species", "coord_name"), coord))
# two hits for each species
_, align_db = make_sample(two_aligns=True)
got = list(align_db.get_records_matching(**kwargs))
assert len(got) == 2


@pytest.mark.parametrize(
"coord",
(
("human", "s2"),
("mouse", "xx"),
("blah", "s3"),
),
)
def test_align_db_get_records_no_matches(coord):
kwargs = dict(zip(("species", "coord_name"), coord))
# no hits at all
_, align_db = make_sample()
got = list(align_db.get_records_matching(**kwargs))
assert not len(got)

0 comments on commit 9f194b4

Please sign in to comment.