Skip to content

Commit

Permalink
update test structure to allow for gaps aligning to gaps or adjacent …
Browse files Browse the repository at this point in the history
…to gaps (now that we have multi-alignment inputs)
  • Loading branch information
dpark01 committed Aug 5, 2015
1 parent b80d081 commit a29814d
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 38 deletions.
15 changes: 4 additions & 11 deletions interhost.py
Expand Up @@ -250,19 +250,11 @@ def __init__(self, seq0, seq1) :
baseCount1 = 0 # Number of real bases in seq1 up to and including cur pos
beforeStart = True # Haven't yet reached first pair of aligned real bases
gapSinceLast = False # Have encounted a gap since last pair in mapArrays
prevRealBase0 = prevRealBase1 = True
for b0, b1 in zip_longest(seq0, seq1) :
assert b0 != None and b1 != None, 'CoordMapper2Seqs: sequences '\
'must be same length.'
if b0 is None or b1 is None:
raise Exception('CoordMapper2Seqs: sequences must be same length.')
realBase0 = b0 != '-'
realBase1 = b1 != '-'
# commented out 7/8/15 since with multi-alignments
# sequences can have gaps where they may not in pairwise alignments
#assert realBase0 or realBase1, 'CoordMapper2Seqs: gap aligned to gap.'
#assert (realBase0 or prevRealBase1) and (realBase1 or prevRealBase0),\
# 'CoordMapper2Seqs: gap in one sequence adjacent to gap in other.'
prevRealBase0 = realBase0
prevRealBase1 = realBase1
baseCount0 += realBase0
baseCount1 += realBase1
if realBase0 and realBase1 :
Expand All @@ -275,7 +267,8 @@ def __init__(self, seq0, seq1) :
finalPos1 = baseCount1 # Last pair of aligned real bases so far
else :
gapSinceLast = True
assert len(self.mapArrays[0]) != 0, 'CoordMapper2Seqs: no aligned bases.'
if len(self.mapArrays[0]) == 0:
raise Exception('CoordMapper2Seqs: no aligned bases.')
if self.mapArrays[0][-1] != finalPos0 :
self.mapArrays[0].append(finalPos0)
self.mapArrays[1].append(finalPos1)
Expand Down
143 changes: 116 additions & 27 deletions test/unit/test_interhost.py
Expand Up @@ -4,11 +4,7 @@

import interhost
import test, util.file
import unittest, shutil, argparse, os
from interhost import CoordMapper2Seqs as Cm2s

import logging
log = logging.getLogger(__name__)
import unittest, argparse, itertools

class TestCommandHelp(unittest.TestCase):
def test_help_parser_for_each_command(self):
Expand Down Expand Up @@ -231,39 +227,132 @@ def test_map_chr_only(self):
self.assertEqual(self.cm.mapChr('third_chr', 'chr3'), 'chr3')
self.assertRaises(KeyError, self.cm.mapChr, 'nonexistentchr', 'chr1')

class TestCoordMapper2Seqs(test.TestCaseWithTmp):
class TestSpecificAlignments(test.TestCaseWithTmp):
""" For the most part, CoordMapper2Seqs is tested implicitly when
CoordMapper is tested. Focus here on special cases that are hard
or impossible to get out of the aligner.
"""
def test_basic_alignment(self) :
alignment = makeTempFasta([
('s1', 'ATCG'),
('s2', 'ACCG'),
('s3', 'AG-T'),
])
cm = interhost.CoordMapper()
cm.load_alignments([alignment])

def test_unequal_len(self) :
with self.assertRaises(AssertionError) :
cm2s = Cm2s('AA', 'A')
alignment = makeTempFasta([
('s1', 'AA'),
('s2', 'A'),
])
cm = interhost.CoordMapper()
with self.assertRaises(Exception) :
cm.load_alignments([alignment])

def test_no_real_bases(self) :
with self.assertRaises(AssertionError) :
cm2s = Cm2s('AA', '--')
with self.assertRaises(AssertionError) :
cm2s = Cm2s('--', 'AA')
def test_no_real_bases_in_sample(self) :
alignment1 = makeTempFasta([
('s1', 'AA'),
('s2', '--'),
])
cm = interhost.CoordMapper()
with self.assertRaises(Exception) :
cm.load_alignments([alignment1])

alignment2 = makeTempFasta([
('s1', '--'),
('s2', 'AA'),
('s3', 'TT'),
])
cm = interhost.CoordMapper()
with self.assertRaises(Exception) :
cm.load_alignments([alignment2])

# commented out 7/8/15 since with multi-alignments
# sequences can have gaps where they may not in pairwise alignments
#def test_aligned_gaps(self) :
# with self.assertRaises(AssertionError) :
# cm2s = Cm2s('A-A', 'A-A')
def test_no_real_bases_at_position(self) :
alignment = makeTempFasta([
('s1', 'AT-G'),
('s2', 'AC-G'),
('s3', 'AG-T'),
])
cm = interhost.CoordMapper()
cm.load_alignments([alignment])
for i in (1,2,3):
self.assertEqual(cm.mapChr('s1', 's2', i), ('s2', i))
self.assertEqual(cm.mapChr('s2', 's1', i), ('s1', i))
self.assertEqual(cm.mapChr('s1', 's3', i), ('s3', i))
self.assertEqual(cm.mapChr('s3', 's1', i), ('s1', i))
self.assertEqual(cm.mapChr('s2', 's3', i), ('s3', i))
self.assertEqual(cm.mapChr('s3', 's2', i), ('s2', i))

#def test_adjacent_gaps(self) :
# with self.assertRaises(AssertionError) :
# cm2s = Cm2s('AC-T', 'A-GT')
def test_aligned_gaps(self) :
alignment = makeTempFasta([
('s1', 'ATCG'),
('s2', 'AC-G'),
('s3', 'AG-T'),
])
cm = interhost.CoordMapper()
cm.load_alignments([alignment])
for i in (1,2,3):
self.assertEqual(cm.mapChr('s2', 's3', i), ('s3', i))
self.assertEqual(cm.mapChr('s3', 's2', i), ('s2', i))
for x,y in ((1,1), (2,2), (3,2), (4,3)):
self.assertEqual(cm.mapChr('s1', 's2', x), ('s2', y))
self.assertEqual(cm.mapChr('s1', 's3', x), ('s3', y))
for x,y in ((1,1), (2,[2,3]), (3,4)):
self.assertEqual(cm.mapChr('s2', 's1', x), ('s1', y))
self.assertEqual(cm.mapChr('s3', 's1', x), ('s1', y))

def test_adjacent_gaps(self) :
alignment = makeTempFasta([
('s1', 'ATCTG'),
('s2', 'AC--G'),
('s3', 'A-TTG'),
('s4', 'A-C-G'),
('s5', 'A--CG'),
])
cm = interhost.CoordMapper()
cm.load_alignments([alignment])
for x,y in ((1,1), (2,2), (3,2), (4,2), (5,3)):
self.assertEqual(cm.mapChr('s1', 's2', x), ('s2', y))
for x,y in ((1,1), (2,[2,4]), (3,5)):
self.assertEqual(cm.mapChr('s2', 's1', x), ('s1', y))
for x,y in ((1,1), (2,1), (3,2), (4,3), (5,4)):
self.assertEqual(cm.mapChr('s1', 's3', x), ('s3', y))
for x,y in ((1,[1,2]), (2,3), (3,4), (4,5)):
self.assertEqual(cm.mapChr('s3', 's1', x), ('s1', y))
for x,y in ((1,1), (2,[2,3]), (3,4)):
self.assertEqual(cm.mapChr('s2', 's3', x), ('s3', y))
for x,y in ((1,1), (2,2), (3,2), (4,3)):
self.assertEqual(cm.mapChr('s3', 's2', x), ('s2', y))
for a,b in itertools.combinations(('s2', 's4', 's5'), 2):
for i in (1,2,3):
self.assertEqual(cm.mapChr(a, b, i), (b, i))
self.assertEqual(cm.mapChr(b, a, i), (a, i))

def test_one_real_base(self) :
cm2s = Cm2s('AC-', '-CA')
self.assertEqual(cm2s(2, 0), 1)
self.assertEqual(cm2s(1, 1), 2)
alignment = makeTempFasta([
('s1', 'AC-'),
('s2', '-CA'),
])
cm = interhost.CoordMapper()
cm.load_alignments([alignment])
self.assertEqual(cm.mapChr('s1', 's2', 1), ('s2', None))
self.assertEqual(cm.mapChr('s1', 's2', 2), ('s2', 1))
self.assertEqual(cm.mapChr('s2', 's1', 1), ('s1', 2))
self.assertEqual(cm.mapChr('s2', 's1', 2), ('s1', None))

def test_exactly_two_pairs(self) :
cm2s = Cm2s('A--T', 'AGGT')
self.assertEqual([cm2s(n, 0) for n in [1, 2]], [[1, 3], 4])
self.assertEqual([cm2s(n, 1) for n in [1, 2, 3, 4]], [1, 1, 1, 2])
alignment = makeTempFasta([
('s1', 'A--T'),
('s2', 'AGGT'),
])
cm = interhost.CoordMapper()
cm.load_alignments([alignment])
self.assertEqual(cm.mapChr('s1', 's2', 1), ('s2', [1,3]))
self.assertEqual(cm.mapChr('s1', 's2', 2), ('s2', 4))
self.assertEqual(cm.mapChr('s2', 's1', 1), ('s1', 1))
self.assertEqual(cm.mapChr('s2', 's1', 2), ('s1', 1))
self.assertEqual(cm.mapChr('s2', 's1', 3), ('s1', 1))
self.assertEqual(cm.mapChr('s2', 's1', 4), ('s1', 2))


0 comments on commit a29814d

Please sign in to comment.