Skip to content

Commit

Permalink
Merge pull request #374 from wjjmjh/370_with_masked_annotations
Browse files Browse the repository at this point in the history
`with_masked_annotations()` handles nested annotations
  • Loading branch information
GavinHuttley committed Nov 12, 2019
2 parents 5611777 + 0643cc0 commit f757046
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 5 deletions.
26 changes: 24 additions & 2 deletions src/cogent3/core/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def detach_annotations(self, annots):
def add_feature(self, type, name, spans):
return self.add_annotation(Feature, type, name, spans)

def get_annotations_matching(self, annotation_type, name=None):
def get_annotations_matching(self, annotation_type, name=None, extend_query=False):
"""
Parameters
Expand All @@ -182,19 +182,33 @@ def get_annotations_matching(self, annotation_type, name=None):
name of the annotation type. Wild-cards allowed.
name : string
name of the instance. Wild-cards allowed.
extend_query : boolean
queries sub-annotations if True
Returns
-------
list of AnnotatableFeatures
"""
result = []
if len(self.annotations) == 0:
return result
for annotation in self.annotations:
if fnmatch(annotation.type, annotation_type) and (
name is None or fnmatch(annotation.name, name)
):
result.append(annotation)
if extend_query:
result.extend(
annotation.get_annotations_matching(
annotation_type, name, extend_query=extend_query
)
)
return result

def get_region_covering_all(self, annotations, feature_class=None):
def get_region_covering_all(
self, annotations, feature_class=None, extend_query=False
):
if extend_query:
annotations = [annot._projected_to_base(self) for annot in annotations]
spans = []
annotation_types = []
for annot in annotations:
Expand Down Expand Up @@ -241,6 +255,9 @@ def _annotations_nucleic_reversed_on(self, new):
annotations.append(annot.__class__(new, new_map, annot))
new.attach_annotations(annotations)

def _projected_to_base(self, base):
raise NotImplementedError


class _Serialisable:
def to_rich_dict(self):
Expand Down Expand Up @@ -360,6 +377,11 @@ def __repr__(self):
name = ' "%s"' % name
return "%s%s at %s" % (self.type, name, self.map)

def _projected_to_base(self, base):
if self.parent == base:
return self.__class__(base, self.map, original=self)
return self.remapped_to(base, self.parent._projected_to_base(base).map)

def remapped_to(self, grandparent, gmap):
map = gmap[self.map]
return self.__class__(grandparent, map, original=self)
Expand Down
12 changes: 9 additions & 3 deletions src/cogent3/core/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,9 @@ def annotate_from_gff(self, f):
feat_label = gff.parse_attributes(attributes)
self.add_feature(feature, feat_label, [(start, end)])

def with_masked_annotations(self, annot_types, mask_char=None, shadow=False):
def with_masked_annotations(
self, annot_types, mask_char=None, shadow=False, extend_query=False
):
"""returns a sequence with annot_types regions replaced by mask_char
if shadow is False, otherwise all other regions are masked.
Expand All @@ -771,6 +773,8 @@ def with_masked_annotations(self, annot_types, mask_char=None, shadow=False):
shadow
whether to mask the annotated regions, or everything but
the annotated regions
extend_query : boolean
queries sub-annotations if True
"""
if mask_char is None:
Expand All @@ -782,9 +786,11 @@ def with_masked_annotations(self, annot_types, mask_char=None, shadow=False):
annotations = []
annot_types = [annot_types, [annot_types]][isinstance(annot_types, str)]
for annot_type in annot_types:
annotations += self.get_annotations_matching(annot_type)
annotations += self.get_annotations_matching(
annot_type, extend_query=extend_query
)

region = self.get_region_covering_all(annotations)
region = self.get_region_covering_all(annotations, extend_query=extend_query)
if shadow:
region = region.get_shadow()

Expand Down
78 changes: 78 additions & 0 deletions tests/test_core/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,28 @@ def test_get_annotations_matching(self):
str(exons), '[exon "fred" at [10:15]/48, exon "trev" at [30:40]/48]'
)

def test_get_nested_annotations_matching(self):
"""correctly identifies all features of a given type when nested annotations"""

seq = DNA.make_seq("AAAAAAAAA", name="x")
exon = seq.add_annotation(Feature, "exon", "fred", [(3, 8)])
nested_exon = exon.add_annotation(Feature, "exon", "fred", [(3, 7)])
exons = seq.get_annotations_matching("exon", extend_query=True)
self.assertEqual(len(exons), 2)
self.assertEqual(str(exons), '[exon "fred" at [3:8]/9, exon "fred" at [3:7]/5]')
# tests multiple layers of nested annotations
nested_exon.add_annotation(Feature, "exon", "fred", [(3, 6)])
exons = seq.get_annotations_matching("exon", extend_query=True)
self.assertEqual(len(exons), 3)
self.assertEqual(
str(exons),
'[exon "fred" at [3:8]/9, exon "fred" at [3:7]/5, exon "fred" at [3:6]/4]',
)
# tests extend_query=False, and only get back the base exon
exons = seq.get_annotations_matching("exon")
self.assertEqual(len(exons), 1)
self.assertEqual(str(exons), '[exon "fred" at [3:8]/9]')

def test_get_annotations_matching2(self):
"""get_annotations_matching returns empty feature if no matches"""

Expand Down Expand Up @@ -426,6 +448,62 @@ def test_annotated_region_masks(self):
">x\nC-CCC?????GGG??\n>y\n-?----????G-G??\n",
)

def test_projected_to_base(self):
"""tests a given annotation is correctly projected on the base sequence"""

seq = DNA.make_seq("AAAAAAAAATTTTTTTTT", name="x")
layer_one = seq.add_feature("repeat", "frog", [(1, 17)])
layer_two = layer_one.add_feature("repeat", "frog", [(2, 16)])
got = layer_two._projected_to_base(seq)
self.assertEqual(got.map.start, 3)
self.assertEqual(got.map.end, 17)
self.assertEqual(got.map.parent_length, len(seq))

layer_three = layer_two.add_feature("repeat", "frog", [(5, 10)])
got = layer_three._projected_to_base(seq)
self.assertEqual(got.map.start, 8)
self.assertEqual(got.map.end, 13)
self.assertEqual(got.map.parent_length, len(seq))

layer_four = layer_three.add_feature("repeat", "frog", [(0, 4)])
layer_five = layer_four.add_feature("repeat", "frog", [(1, 2)])
got = layer_five._projected_to_base(seq)
self.assertEqual(got.map.start, 9)
self.assertEqual(got.map.end, 10)
self.assertEqual(got.map.parent_length, len(seq))

def test_nested_annotated_region_masks(self):
"""masking a sequence with specific features when nested annotations"""

aln = make_aligned_seqs(
data=[["x", "C-GGCAAAAATTTAA"], ["y", "-T----TTTTG-GTT"]], array_align=False
)
gene = aln.get_seq("x").add_feature("gene", "norwegian", [(0, 4)])
self.assertEqual(str(gene.get_slice()), "CGGC")
gene.add_feature("repeat", "blue", [(1, 3)])
# evaluate the sequence directly
masked = str(
aln.get_seq("x").with_masked_annotations(
"repeat", mask_char="?", extend_query=True
)
)
self.assertEqual(masked, "C??CAAAAATTTAA")

exon = aln.get_seq("y").add_feature("repeat", "frog", [(1, 4)])
self.assertEqual(str(exon.get_slice()), "TTT")
# evaluate the sequence directly
masked = str(
aln.get_seq("y").with_masked_annotations(
"repeat", mask_char="?", extend_query=True
)
)
self.assertEqual(masked, "T???TGGTT")

masked = aln.with_masked_annotations("gene", mask_char="?")
got = masked.to_dict()
self.assertEqual(got["x"], "?-???AAAAATTTAA")
self.assertEqual(got["y"], "-T----TTTTG-GTT")

def test_annotated_separately_equivalence(self):
"""allow defining features as a series or individually"""

Expand Down

0 comments on commit f757046

Please sign in to comment.