Merge pull request #463 from GavinHuttley/develop

BUG: fixed a recently introduced testing bug
cogent3 · Jan 2, 2020 · 594fbe4 · 594fbe4
2 parents 42b65ee + bb1ef0d
commit 594fbe4
Show file tree

Hide file tree

Showing 5 changed files with 67 additions and 94 deletions.
diff --git a/src/cogent3/__init__.py b/src/cogent3/__init__.py
@@ -18,7 +18,6 @@
     SequenceCollection,
 )
 from cogent3.core.genetic_code import available_codes, get_code
-
 # note that moltype has to be imported last, because it sets the moltype in
 # the objects created by the other modules.
 from cogent3.core.moltype import (

diff --git a/src/cogent3/core/sequence.py b/src/cogent3/core/sequence.py
@@ -98,42 +98,6 @@ def __str__(self):
         """__str__ returns self._seq unmodified."""
         return self._seq
 
-    def annotate_matches_to(self, pattern, annot_type, name, allow_multiple=False):
-        """
-        Adds an annotation at the specified pattern in a sequence.
-        The pattern allows for IUPAC ambiguities,
-        as they are converted to regex.
-
-        Parameters
-        ----------
-        pattern : string
-            The search string for which annotations are made.
-        annot_type : string
-            The type of the annotation (e.g. exon).
-        name : string
-            The name of the annotation.
-        allow_multiple : boolean
-            If True, checks for multiple occurences of the input pattern.
-
-        Returns
-        -------
-        Returns a list of Annotation instances.
-        """
-        pattern = self.moltype.to_regex(seq=pattern)
-        pos = [m.span() for m in re.finditer(pattern, self._seq)]
-        if not pos:
-            return []
-        annot = []
-        if allow_multiple:
-            for i in range(0, len(pos)):
-                annot.append(
-                    self.add_feature(annot_type, f"{name}:{i}", [pos[i]])
-                )
-        else:
-            pos = pos[:1]
-            annot.append(self.add_feature(annot_type, name, pos))
-        return annot
-
     def to_fasta(self, make_seqlabel=None, block_size=60):
         """Return string of self in FASTA format, no trailing newline
 
@@ -1086,6 +1050,39 @@ def is_annotated(self):
         """returns True if sequence has any annotations"""
         return len(self.annotations) != 0
 
+    def annotate_matches_to(self, pattern, annot_type, name, allow_multiple=False):
+        """Adds an annotation at sequence positions matching pattern.
+
+        Parameters
+        ----------
+        pattern : string
+            The search string for which annotations are made. IUPAC ambiguities
+            are converted to regex on sequences with the appropriate MolType.
+        annot_type : string
+            The type of the annotation (e.g. "domain").
+        name : string
+            The name of the annotation.
+        allow_multiple : boolean
+            If True, allows multiple occurrences of the input pattern. Otherwise
+            only the first match is used.
+
+        Returns
+        -------
+        Returns a list of Annotation instances.
+        """
+        pattern = self.moltype.to_regex(seq=pattern)
+        pos = [m.span() for m in re.finditer(pattern, self._seq)]
+        if not pos:
+            return []
+
+        num_match = len(pos) if allow_multiple else 1
+        annot = [
+            self.add_feature(annot_type, f"{name}:{i}", [pos[i]])
+            for i in range(num_match)
+        ]
+
+        return annot
+
 
 class ProteinSequence(Sequence):
     """Holds the standard Protein sequence. MolType set in moltype module."""

diff --git a/tests/test_core/test_alignment.py b/tests/test_core/test_alignment.py
@@ -2572,32 +2572,21 @@ def test_add_from_ref_aln(self):
         )  # test wrong_refseq
 
     def test_annotate_matches_to(self):
-        """Aligned. annotate_matches_to correctly delegates to sequence"""
+        """Aligned.annotate_matches_to correctly delegates to sequence"""
         aln = Alignment(dict(x="TTCCACTTCCGCTT"), moltype="dna")
         seq = aln.named_seqs["x"]
         pattern = "CCRC"
         annot = seq.annotate_matches_to(
-            pattern=pattern,
-            annot_type="domain",
-            name="fred",
-            allow_multiple=True,
-        )
-        regular_expression = DNA.to_regex(seq=pattern)
-        for i in range(0, len(annot)):
-            fred = annot[i].get_slice()
-            self.assertEqual(
-                str(fred), re.search(regular_expression, str(fred)).group()
-            )
-        annot = seq.annotate_matches_to(
-            pattern=pattern,
-            annot_type="domain",
-            name="fred",
-            allow_multiple=False,
+            pattern=pattern, annot_type="domain", name="fred", allow_multiple=True
         )
-        fred = annot[0].get_slice()
-        self.assertEqual(
-            str(fred), re.search(regular_expression, str(fred)).group()
+        got = [a.get_slice() for a in annot]
+        matches = ["CCAC", "CCGC"]
+        self.assertEqual(got, matches)
+        annot = seq.annotate_matches_to(
+            pattern=pattern, annot_type="domain", name="fred", allow_multiple=False
         )
+        got = [a.get_slice() for a in annot]
+        self.assertEqual(got, matches[:1])
 
     def test_deepcopy(self):
         """correctly deep copy aligned objects in an alignment"""

diff --git a/tests/test_core/test_features.py b/tests/test_core/test_features.py
@@ -1,6 +1,6 @@
 from unittest import TestCase, main
 
-from cogent3 import DNA, make_aligned_seqs
+from cogent3 import ASCII, DNA, make_aligned_seqs
 from cogent3.core.annotation import Feature, Variable
 # Complete version of manipulating sequence annotations
 from cogent3.util.deserialise import deserialise_object
@@ -724,6 +724,29 @@ def test_nested_deserialise_annotation(self):
             new_nested_feature.to_rich_dict(), self.nested_feature.to_rich_dict()
         )
 
+    def test_annotate_matches_to(self):
+        """annotate_matches_to attaches annotations correctly to a Sequence
+        """
+        seq = DNA.make_seq("TTCCACTTCCGCTT", name="x")
+        pattern = "CCRC"
+        annot = seq.annotate_matches_to(
+            pattern=pattern, annot_type="domain", name="fred", allow_multiple=True
+        )
+        self.assertEqual([a.get_slice() for a in annot], ["CCAC", "CCGC"])
+        annot = seq.annotate_matches_to(
+            pattern=pattern, annot_type="domain", name="fred", allow_multiple=False
+        )
+        self.assertEqual(len(annot), 1)
+        fred = annot[0].get_slice()
+        self.assertEqual(str(fred), "CCAC")
+        # For Sequence objects of a non-IUPAC MolType, annotate_matches_to
+        # should return an empty annotation.
+        seq = ASCII.make_seq(seq="TTCCACTTCCGCTT")
+        annot = seq.annotate_matches_to(
+            pattern=pattern, annot_type="domain", name="fred", allow_multiple=False
+        )
+        self.assertEqual(annot, [])
+
 
 if __name__ == "__main__":
     main()
diff --git a/tests/test_core/test_sequence.py b/tests/test_core/test_sequence.py
@@ -50,42 +50,6 @@ class SequenceTests(TestCase):
     DNA = DnaSequence
     PROT = ProteinSequence
 
-    def test_annotate_matches_to(self):
-        """
-        annotate_matches_to method should attach
-        annotations correctly to a Sequence object, tested
-        for both multiple and singular annotations.
-        For Sequence objects of MolType
-        ASCII, annotate_matches_to should return an empty annotation.
-        """
-        seq = self.DNA("TTCCACTTCCGCTT", name="x")
-        pattern = "CCRC"
-        annot = seq.annotate_matches_to(
-            pattern=pattern,
-            annot_type="domain",
-            name="fred",
-            allow_multiple=True,
-        )
-        self.assertEqual([a.get_slice() for a in annot], ["CCAC", "CCGC"])
-        annot = seq.annotate_matches_to(
-            pattern=pattern,
-            annot_type="domain",
-            name="fred",
-            allow_multiple=False,
-        )
-        fred = annot[0].get_slice()[0:len(pattern)]
-        self.assertEqual(len(annot), 1)
-        self.assertEqual(str(fred), "CCAC")
-        seq = ASCII.make_seq(seq="TTCCACTTCCGCTT")
-        annot = seq.annotate_matches_to(
-            pattern=pattern,
-            annot_type="domain",
-            name="fred",
-            allow_multiple=False,
-        )
-        self.assertEqual(annot, [])
-
-
     def test_init_empty(self):
         """Sequence and subclasses should init correctly."""
         # NOTE: ModelSequences can't be initialized empty because it screws up
@@ -994,6 +958,7 @@ def test_DnaSequence(self):
         self.assertEqual(DnaSequence("TTTAc").rc(), "GTAAA")
 
 
+# TODO move methods of this class onto the single class that inherits from it!
 class ModelSequenceTests(object):
     """base class for tests of specific ArraySequence objects."""