Skip to content

Commit

Permalink
Merge pull request #463 from GavinHuttley/develop
Browse files Browse the repository at this point in the history
BUG: fixed a recently introduced testing bug
  • Loading branch information
GavinHuttley committed Jan 2, 2020
2 parents 42b65ee + bb1ef0d commit 594fbe4
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 94 deletions.
1 change: 0 additions & 1 deletion src/cogent3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
SequenceCollection,
)
from cogent3.core.genetic_code import available_codes, get_code

# note that moltype has to be imported last, because it sets the moltype in
# the objects created by the other modules.
from cogent3.core.moltype import (
Expand Down
69 changes: 33 additions & 36 deletions src/cogent3/core/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,42 +98,6 @@ def __str__(self):
"""__str__ returns self._seq unmodified."""
return self._seq

def annotate_matches_to(self, pattern, annot_type, name, allow_multiple=False):
"""
Adds an annotation at the specified pattern in a sequence.
The pattern allows for IUPAC ambiguities,
as they are converted to regex.
Parameters
----------
pattern : string
The search string for which annotations are made.
annot_type : string
The type of the annotation (e.g. exon).
name : string
The name of the annotation.
allow_multiple : boolean
If True, checks for multiple occurences of the input pattern.
Returns
-------
Returns a list of Annotation instances.
"""
pattern = self.moltype.to_regex(seq=pattern)
pos = [m.span() for m in re.finditer(pattern, self._seq)]
if not pos:
return []
annot = []
if allow_multiple:
for i in range(0, len(pos)):
annot.append(
self.add_feature(annot_type, f"{name}:{i}", [pos[i]])
)
else:
pos = pos[:1]
annot.append(self.add_feature(annot_type, name, pos))
return annot

def to_fasta(self, make_seqlabel=None, block_size=60):
"""Return string of self in FASTA format, no trailing newline
Expand Down Expand Up @@ -1086,6 +1050,39 @@ def is_annotated(self):
"""returns True if sequence has any annotations"""
return len(self.annotations) != 0

def annotate_matches_to(self, pattern, annot_type, name, allow_multiple=False):
"""Adds an annotation at sequence positions matching pattern.
Parameters
----------
pattern : string
The search string for which annotations are made. IUPAC ambiguities
are converted to regex on sequences with the appropriate MolType.
annot_type : string
The type of the annotation (e.g. "domain").
name : string
The name of the annotation.
allow_multiple : boolean
If True, allows multiple occurrences of the input pattern. Otherwise
only the first match is used.
Returns
-------
Returns a list of Annotation instances.
"""
pattern = self.moltype.to_regex(seq=pattern)
pos = [m.span() for m in re.finditer(pattern, self._seq)]
if not pos:
return []

num_match = len(pos) if allow_multiple else 1
annot = [
self.add_feature(annot_type, f"{name}:{i}", [pos[i]])
for i in range(num_match)
]

return annot


class ProteinSequence(Sequence):
"""Holds the standard Protein sequence. MolType set in moltype module."""
Expand Down
29 changes: 9 additions & 20 deletions tests/test_core/test_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2572,32 +2572,21 @@ def test_add_from_ref_aln(self):
) # test wrong_refseq

def test_annotate_matches_to(self):
"""Aligned. annotate_matches_to correctly delegates to sequence"""
"""Aligned.annotate_matches_to correctly delegates to sequence"""
aln = Alignment(dict(x="TTCCACTTCCGCTT"), moltype="dna")
seq = aln.named_seqs["x"]
pattern = "CCRC"
annot = seq.annotate_matches_to(
pattern=pattern,
annot_type="domain",
name="fred",
allow_multiple=True,
)
regular_expression = DNA.to_regex(seq=pattern)
for i in range(0, len(annot)):
fred = annot[i].get_slice()
self.assertEqual(
str(fred), re.search(regular_expression, str(fred)).group()
)
annot = seq.annotate_matches_to(
pattern=pattern,
annot_type="domain",
name="fred",
allow_multiple=False,
pattern=pattern, annot_type="domain", name="fred", allow_multiple=True
)
fred = annot[0].get_slice()
self.assertEqual(
str(fred), re.search(regular_expression, str(fred)).group()
got = [a.get_slice() for a in annot]
matches = ["CCAC", "CCGC"]
self.assertEqual(got, matches)
annot = seq.annotate_matches_to(
pattern=pattern, annot_type="domain", name="fred", allow_multiple=False
)
got = [a.get_slice() for a in annot]
self.assertEqual(got, matches[:1])

def test_deepcopy(self):
"""correctly deep copy aligned objects in an alignment"""
Expand Down
25 changes: 24 additions & 1 deletion tests/test_core/test_features.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from unittest import TestCase, main

from cogent3 import DNA, make_aligned_seqs
from cogent3 import ASCII, DNA, make_aligned_seqs
from cogent3.core.annotation import Feature, Variable
# Complete version of manipulating sequence annotations
from cogent3.util.deserialise import deserialise_object
Expand Down Expand Up @@ -724,6 +724,29 @@ def test_nested_deserialise_annotation(self):
new_nested_feature.to_rich_dict(), self.nested_feature.to_rich_dict()
)

def test_annotate_matches_to(self):
"""annotate_matches_to attaches annotations correctly to a Sequence
"""
seq = DNA.make_seq("TTCCACTTCCGCTT", name="x")
pattern = "CCRC"
annot = seq.annotate_matches_to(
pattern=pattern, annot_type="domain", name="fred", allow_multiple=True
)
self.assertEqual([a.get_slice() for a in annot], ["CCAC", "CCGC"])
annot = seq.annotate_matches_to(
pattern=pattern, annot_type="domain", name="fred", allow_multiple=False
)
self.assertEqual(len(annot), 1)
fred = annot[0].get_slice()
self.assertEqual(str(fred), "CCAC")
# For Sequence objects of a non-IUPAC MolType, annotate_matches_to
# should return an empty annotation.
seq = ASCII.make_seq(seq="TTCCACTTCCGCTT")
annot = seq.annotate_matches_to(
pattern=pattern, annot_type="domain", name="fred", allow_multiple=False
)
self.assertEqual(annot, [])


if __name__ == "__main__":
main()
37 changes: 1 addition & 36 deletions tests/test_core/test_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,42 +50,6 @@ class SequenceTests(TestCase):
DNA = DnaSequence
PROT = ProteinSequence

def test_annotate_matches_to(self):
"""
annotate_matches_to method should attach
annotations correctly to a Sequence object, tested
for both multiple and singular annotations.
For Sequence objects of MolType
ASCII, annotate_matches_to should return an empty annotation.
"""
seq = self.DNA("TTCCACTTCCGCTT", name="x")
pattern = "CCRC"
annot = seq.annotate_matches_to(
pattern=pattern,
annot_type="domain",
name="fred",
allow_multiple=True,
)
self.assertEqual([a.get_slice() for a in annot], ["CCAC", "CCGC"])
annot = seq.annotate_matches_to(
pattern=pattern,
annot_type="domain",
name="fred",
allow_multiple=False,
)
fred = annot[0].get_slice()[0:len(pattern)]
self.assertEqual(len(annot), 1)
self.assertEqual(str(fred), "CCAC")
seq = ASCII.make_seq(seq="TTCCACTTCCGCTT")
annot = seq.annotate_matches_to(
pattern=pattern,
annot_type="domain",
name="fred",
allow_multiple=False,
)
self.assertEqual(annot, [])


def test_init_empty(self):
"""Sequence and subclasses should init correctly."""
# NOTE: ModelSequences can't be initialized empty because it screws up
Expand Down Expand Up @@ -994,6 +958,7 @@ def test_DnaSequence(self):
self.assertEqual(DnaSequence("TTTAc").rc(), "GTAAA")


# TODO move methods of this class onto the single class that inherits from it!
class ModelSequenceTests(object):
"""base class for tests of specific ArraySequence objects."""

Expand Down

0 comments on commit 594fbe4

Please sign in to comment.