Skip to content

Commit

Permalink
Merge pull request #488 from GavinHuttley/develop
Browse files Browse the repository at this point in the history
BUG: new Map.zeroed() method and usage, fixes #487
  • Loading branch information
GavinHuttley committed Jan 14, 2020
2 parents f204c4d + 782343c commit fbadc0f
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 8 deletions.
10 changes: 7 additions & 3 deletions src/cogent3/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1734,9 +1734,9 @@ def dotplot(

# Deep copying Aligned instance to ensure only region specified by Aligned.map is displayed.
if isinstance(seq1, Aligned):
seq1 = seq1.deepcopy()
seq1 = seq1.deepcopy(sliced=True)
if isinstance(seq2, Aligned):
seq2 = seq2.deepcopy()
seq2 = seq2.deepcopy(sliced=True)

if seq1.is_annotated() or seq2.is_annotated():
annotated = True
Expand Down Expand Up @@ -2056,7 +2056,11 @@ def deepcopy(self, sliced=True):
if sliced:
span = self.map.get_covering_span()
new_seq = new_seq[span.start : span.end]
return self.__class__(self.map, new_seq)
new_map = self.map.zeroed()
else:
new_map = self.map

return self.__class__(new_map, new_seq)

def __repr__(self):
return "%s of %s" % (repr(self.map), repr(self.data))
Expand Down
26 changes: 26 additions & 0 deletions src/cogent3/core/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,32 @@ def to_rich_dict(self):
data["version"] = __version__
return data

def zeroed(self):
"""returns a new instance with the first span starting at 0
Note
----
Useful when an Annotatable object is sliced, but the connection to
the original parent is being deliberately broken as in the
Sequence.deepcopy(sliced=True) case.
"""
# todo there's probably a more efficient way to do this
# create the new instance
from cogent3.util.deserialise import deserialise_map_spans

data = self.to_rich_dict()
zeroed = deserialise_map_spans(data)
zeroed.parent_length = len(self)
min_val = min(zeroed.start, zeroed.end)
for span in zeroed.spans:
if span.lost:
continue
span.start -= min_val
span.end -= min_val

return zeroed


class SpansOnly(ConstrainedList):
"""List that converts elements to Spans on addition."""
Expand Down
33 changes: 28 additions & 5 deletions tests/test_core/test_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1281,8 +1281,10 @@ def test_dotplot(self):

def test_dotplot_annotated(self):
"""exercising dotplot method with annotated sequences"""
seqs = self.Class(data=self.brca1_data, moltype=DNA)
seqs = self.Class(data={"Human": "CAGATTTGGCAGTT-", "Mouse": "CAGATTCAGCAGGTG"})

seqs = seqs.take_seqs(["Human", "Mouse"])

if type(self.Class) != ArrayAlignment:
# we annotated Human
seq = seqs.get_seq("Human")
Expand Down Expand Up @@ -2255,7 +2257,7 @@ def test_counts_per_pos(self):
[0, 0, 3, 0, 0],
[0, 2, 0, 1, 0],
[0, 1, 2, 0, 0],
[0, 2, 0, 1, 0]
[0, 2, 0, 1, 0],
]
)

Expand Down Expand Up @@ -2659,7 +2661,7 @@ def test_annotate_matches_to(self):
self.assertEqual(str(aln[a].named_seqs["x"]), "TTCCACTTC")

def test_deepcopy(self):
"""correctly deep copy aligned objects in an alignment"""
"""correctly deepcopy Aligned objects in an alignment"""
path = "data/brca1_5.paml"
# generates an annotatable Alignment object
aln = load_aligned_seqs(path, array_align=False, moltype="dna")
Expand Down Expand Up @@ -2687,17 +2689,21 @@ def test_deepcopy(self):

# for these species, each has an annotation spanning slice boundary or within it
for name in ["Mouse", "Human", "HowlerMon"]:
new_seq = aln.named_seqs[name].deepcopy()
new_seq = aln.named_seqs[name].deepcopy(sliced=True)
seq = aln.named_seqs[name]
self.assertNotEqual(new_seq.map.parent_length, seq.map.parent_length)

self.assertEqual(len(new_seq.data), 10)
self.assertTrue(new_seq.data.is_annotated())
self.assertEqual(len(new_seq.data.annotations), 1)
# tests the case when sliced argument if False
new_seq = aln.named_seqs[name].deepcopy(sliced=False)
self.assertEqual(new_seq.map.parent_length, seq.map.parent_length)
self.assertEqual(len(new_seq.data), len(aln.named_seqs[name].data))
self.assertTrue(new_seq.data.is_annotated())
# for these species, each has an annotation outside slice
for name in ["NineBande", "DogFaced"]:
new_seq = aln.named_seqs[name].deepcopy()
new_seq = aln.named_seqs[name].deepcopy(sliced=True)
self.assertEqual(len(new_seq.data), 10)
self.assertFalse(new_seq.data.is_annotated())
# tests the case when sliced argument if False
Expand All @@ -2716,6 +2722,23 @@ def test_deepcopy(self):
self.assertTrue(new_seq.data.is_annotated())
self.assertEqual(len(new_seq.data.annotations), 2)

def test_deepcopy2(self):
""""Aligned.deepcopy correctly handles gapped sequences"""
seqs = self.Class(
data={
"a": "CAGATTTGGCAGTT-",
"b": "-AGATTCAGCAGGTG",
"c": "CAGAT-CAGCAGGTG",
"d": "CAGATTCAGCAGGTG",
},
moltype="dna",
)
lengths = {len(s.deepcopy(sliced=True)) for s in seqs.seqs}
self.assertEqual(lengths, {len(seqs)})
rc = seqs.rc()
lengths = {len(s.deepcopy(sliced=True)) for s in rc.seqs}
self.assertEqual(lengths, {len(seqs)})

def test_dotplot(self):
"""exercising dotplot method"""
aln = self.Class([["name1", "TTTTTTAAAA"], ["name2", "AAAATTTTTT"]])
Expand Down

0 comments on commit fbadc0f

Please sign in to comment.