Skip to content

Commit

Permalink
fix left edge of chromosome bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt Rasmussen committed Dec 11, 2014
1 parent eabbb08 commit 85cfd42
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 10 deletions.
1 change: 0 additions & 1 deletion .gitignore
Expand Up @@ -18,7 +18,6 @@ dist
build
eggs
parts
bin
var
sdist
develop-eggs
Expand Down
13 changes: 10 additions & 3 deletions hgvs/tests/test_variants.py
Expand Up @@ -6,6 +6,8 @@


_genome_seq = dict([
(('chr1', 0, 41), 'N' * 41),
(('chr1', 1, 31), 'N' * 30),
(('chr17', 41246230, 41246271), 'AGCCTCATGAGGATCACTGGCCAGTAAGTCTATTTTCTCTG'), # nopep8
(('chr17', 41246218, 41246248), 'TTTACATATTAAAGCCTCATGAGGATCACT'),
(('chr17', 41246249, 41246279), 'GCCAGTAAGTCTATTTTCTCTGAAGAACCA'),
Expand Down Expand Up @@ -36,6 +38,10 @@
# Trim common prefix, triallelic
(('chr17', 41246248, 'TGGC', ['TGGA', 'TGAC']),
('chr17', 41246249, 'GGC', ['GAC', 'GGA'])),

# Left edge of chromosome left justify, right pad.
(('chr1', 5, 'NN', ['N']),
('chr1', 1, 'NN', ['N'])),
]


Expand All @@ -50,6 +56,7 @@ def test_normalize_variant(self):
chrom, offset, ref, alts = variant
norm_variant = normalize_variant(
chrom, offset, ref, alts, genome).variant
self.assertEqual(norm_variant, true_variant,
'Variant failed to normalize: %s' %
repr(variant))
self.assertEqual(
norm_variant, true_variant,
'Variant failed to normalize %s: %s != %s' %
(repr(variant), repr(norm_variant), repr(true_variant)))
21 changes: 15 additions & 6 deletions hgvs/variants.py
Expand Up @@ -20,6 +20,9 @@ def get_sequence(genome, chrom, start, end, is_forward_strand=True):
Coordinates are 0-based, end-exclusive.
"""
# Prevent fetching negative coordinates.
start = max(start, 0)

if start >= end:
return ''
else:
Expand Down Expand Up @@ -80,7 +83,7 @@ def justify_genomic_indel(genome, chrom, start, end, indel, justify,
start, end: 0-based, end-exclusive coordinates of 'indel'.
"""
while True:
seq_start = start - flank_length
seq_start = max(start - flank_length, 0)
indel_len = len(indel)
fetch_len = indel_len + 2 * flank_length
seq = get_sequence(
Expand Down Expand Up @@ -261,7 +264,7 @@ def _1bp_pad(self):
return

# Pad sequences with one 5-prime base before the mutation event.
empty_seq = any(1 for allele in self.alleles if not allele)
empty_seq = any(not allele for allele in self.alleles)
uniq_starts = set(allele[0] for allele in self.alleles if allele)
if empty_seq or len(uniq_starts) > 1:
# Fetch more 5p flanking sequence if needed.
Expand All @@ -271,11 +274,17 @@ def _1bp_pad(self):
self.genome, self.position.chrom, start - 5, start)

self.log.append('1bp pad')
for i, allele in enumerate(self.alleles):
self.alleles[i] = self.seq_5p[-1] + self.alleles[i]
if self.seq_5p:
for i, allele in enumerate(self.alleles):
self.alleles[i] = self.seq_5p[-1] + self.alleles[i]

self.seq_5p = self.seq_5p[:-1]
self.position.chrom_start -= 1
self.seq_5p = self.seq_5p[:-1]
self.position.chrom_start -= 1
else:
# According to VCF standard, if there is no 5prime sequence,
# use 3prime sequence instead.
for i, allele in enumerate(self.alleles):
self.alleles[i] = self.alleles[i] + self.seq_3p[0]

if len(set(a[0] for a in self.alleles)) != 1:
raise AssertionError(
Expand Down

0 comments on commit 85cfd42

Please sign in to comment.