From ad5e0620c46b778b00edd4f588d87c8e8d621323 Mon Sep 17 00:00:00 2001 From: Hayden Metsky Date: Tue, 12 May 2020 20:31:55 -0400 Subject: [PATCH] Fix bug when fragment length is longer than sequence length but <2x as long This fixes a bug in genome.Genome.break_into_fragments() when --cluster-from-fragments is specified with a fragment_length that is longer than the sequence length and include_full_end is True. In this case, break_from_fragments() ought to yield seq in its entirety. However, in this case, len(seq)-fragment_length is negative. seq[len(seq)-fragment_length] would pull out the last len(seq)-fragment_length nt of seq. If fragment_length is sufficiently long (>=2*len(seq)), the result would be fine: it would yield seq in its entirety, as desired. However, if fragment_length is >len(seq) but <2*len(seq), it would not yield all of seq. --- catch/genome.py | 2 +- catch/tests/test_genome.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/catch/genome.py b/catch/genome.py index 4fb6c4f03..cd4e905f4 100644 --- a/catch/genome.py +++ b/catch/genome.py @@ -83,7 +83,7 @@ def fragments(seq): if include_full_end and len(fragment) < fragment_length: # This is at the end of seq; instead, use the last # fragment_length nt - yield seq[(len(seq) - fragment_length):] + yield seq[max(0, len(seq) - fragment_length):] else: yield fragment diff --git a/catch/tests/test_genome.py b/catch/tests/test_genome.py index edfcbef8b..c89a2468b 100644 --- a/catch/tests/test_genome.py +++ b/catch/tests/test_genome.py @@ -92,6 +92,13 @@ def test_break_into_fragments_from_one_seq_with_full_end(self): OrderedDict([('0', 'ATC'), ('1', 'GTT'), ('2', 'TAA')])) self.assertEqual(broken, expected_genome) + def test_break_into_fragments_from_one_seq_with_full_end_and_long_fragment(self): + genome_one = genome.Genome.from_one_seq('ATCGTTAA') + broken = genome_one.break_into_fragments(12, include_full_end=True) + expected_genome = genome.Genome.from_chrs( + OrderedDict([('0', 'ATCGTTAA')])) + self.assertEqual(broken, expected_genome) + def test_break_into_fragments_from_chrs(self): genome_two_chrs = genome.Genome.from_chrs( OrderedDict([("chr1", 'ATCGTTAA'), ("chr2", 'AATTCCGGG')]))