Skip to content

Commit

Permalink
Closes #557: Add option to format translation initiation codon varian…
Browse files Browse the repository at this point in the history
…ts as `p.Met1?`

p.Met1? is the new default
  • Loading branch information
andreas-invitae authored and reece committed Apr 21, 2019
1 parent d682a42 commit ae87ff4
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 19 deletions.
1 change: 1 addition & 0 deletions hgvs/_data/defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ replace_reference = True
max_ref_length = 0
p_3_letter = True
p_term_asterisk = False
p_init_met = True

[validator]
strict = True
Expand Down
21 changes: 15 additions & 6 deletions hgvs/edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,15 @@ def _format_config_na(self, conf=None):
def _format_config_aa(self, conf=None):
p_3_letter = hgvs.global_config.formatting.p_3_letter
p_term_asterisk = hgvs.global_config.formatting.p_term_asterisk
p_init_met = hgvs.global_config.formatting.p_init_met

if conf and "p_3_letter" in conf and conf["p_3_letter"] is not None:
p_3_letter = conf["p_3_letter"]
if conf and "p_term_asterisk" in conf and conf["p_term_asterisk"] is not None:
p_term_asterisk = conf["p_term_asterisk"]
return p_3_letter, p_term_asterisk
if conf and "p_init_met" in conf and conf["p_init_met"] is not None:
p_init_met = conf["p_init_met"]
return p_3_letter, p_term_asterisk, p_init_met

def _del_ins_lengths(self, ilen):
raise HGVSUnsupportedOperationError(
Expand Down Expand Up @@ -169,6 +173,7 @@ class AARefAlt(Edit):
ref = attr.ib(default=None)
alt = attr.ib(default=None)
uncertain = attr.ib(default=False)
init_met = attr.ib(default=False)

def __attrs_post_init__(self):
self.ref = aa_to_aa1(self.ref)
Expand All @@ -179,10 +184,14 @@ def format(self, conf=None):
# raise HGVSError("RefAlt: ref and alt sequences are both undefined")
return "="

p_3_letter, p_term_asterisk = self._format_config_aa(conf)
p_3_letter, p_term_asterisk, p_init_met = self._format_config_aa(conf)

if self.init_met and p_init_met:
s = "Met1?"
elif self.init_met and not p_init_met:
s = "?"
# subst and delins
if self.ref is not None and self.alt is not None:
elif self.ref is not None and self.alt is not None:
if self.ref == self.alt:
if p_3_letter:
s = "{ref}=".format(ref=aa1_to_aa3(self.ref))
Expand Down Expand Up @@ -267,7 +276,7 @@ def _del_ins_lengths(self, ilen):
@attr.s(slots=True)
class AASub(AARefAlt):
def format(self, conf=None):
p_3_letter, p_term_asterisk = self._format_config_aa(conf)
p_3_letter, p_term_asterisk, p_init_met = self._format_config_aa(conf)

if p_3_letter:
s = aa1_to_aa3(self.alt) if self.alt != "?" else self.alt
Expand Down Expand Up @@ -300,7 +309,7 @@ def __attrs_post_init__(self):
self.alt = aa_to_aa1(self.alt)

def format(self, conf=None):
p_3_letter, p_term_asterisk = self._format_config_aa(conf)
p_3_letter, p_term_asterisk, p_init_met = self._format_config_aa(conf)

st_length = self.length or ""
if p_3_letter:
Expand Down Expand Up @@ -345,7 +354,7 @@ def __attrs_post_init__(self):
self.aaterm = aa_to_aa1(self.aaterm)

def format(self, conf=None):
p_3_letter, p_term_asterisk = self._format_config_aa(conf)
p_3_letter, p_term_asterisk, p_init_met = self._format_config_aa(conf)

st_alt = self.alt or ""
st_aaterm = self.aaterm or ""
Expand Down
8 changes: 5 additions & 3 deletions hgvs/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from __future__ import absolute_import, division, print_function, unicode_literals

import copy
import logging

from bioutils.sequences import reverse_complement

Expand All @@ -14,7 +13,6 @@
import hgvs.variantmapper
from hgvs.utils.norm import normalize_alleles
from hgvs.exceptions import HGVSDataNotAvailableError, HGVSUnsupportedOperationError, HGVSInvalidVariantError
from six.moves import range


class Normalizer(object):
Expand Down Expand Up @@ -57,7 +55,11 @@ def normalize(self, var):
if self.validator:
self.validator.validate(var)

if var.posedit is None or var.posedit.uncertain or var.posedit.pos is None:
init_met = False
if var.posedit is not None and isinstance(var.posedit, hgvs.edit.AARefAlt):
init_met = var.posedit.init_met

if var.posedit is None or var.posedit.uncertain or init_met or var.posedit.pos is None:
return var

type = var.type
Expand Down
14 changes: 11 additions & 3 deletions hgvs/utils/altseq_to_hgvsp.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(self, ref_data, alt_data):
self._frameshift_start = self._alt_data.frameshift_start
self._is_substitution = self._alt_data.is_substitution
self._is_ambiguous = self._alt_data.is_ambiguous
self._is_init_met = False

if DBG:
print("len ref seq:{} len alt seq:{}".format(len(self._ref_seq), len(self._alt_seq)))
Expand Down Expand Up @@ -186,6 +187,7 @@ def _convert_to_sequence_variants(self, variant, acc):
aa_start = aa_end = AAPosition(base=start, aa=deletion)
ref = ''
alt = ''
self._is_init_met = True
self._is_ambiguous = True # side-effect

if insertion and insertion.find("*") == 0: # stop codon at variant position
Expand Down Expand Up @@ -288,7 +290,9 @@ def _convert_to_sequence_variants(self, variant, acc):
acc=acc,
is_ambiguous=self._is_ambiguous,
is_sub=is_sub,
is_ext=is_ext)
is_ext=is_ext,
is_init_met=self._is_init_met
)

return var_p

Expand Down Expand Up @@ -324,9 +328,13 @@ def _create_variant(self,
is_ambiguous=False,
is_sub=False,
is_ext=False,
is_no_protein=False):
is_no_protein=False,
is_init_met=False):
"""Creates a SequenceVariant object"""
if is_ambiguous:

if is_init_met:
posedit = AARefAlt(ref=ref, alt=alt, init_met=True)
elif is_ambiguous:
posedit = None
else:
interval = Interval(start=start, end=end)
Expand Down
4 changes: 4 additions & 0 deletions tests/data/ext.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@ EXT06 NM_152263.2:c.855delA NP_689476.2:p.(Ter286Asnext*73)
MULTISTOP01 NC_000001.10:g.26136244G>A NM_020451.2:c.943G>A NP_065184.2:p.?
WHOLEGENE01 NM_000249.3:c.-7_*46del NP_000240.1:p.0?
WHOLEGENE02 NC_000003.11:g.37035019_37092164del NM_000249.3:c.-20_*20del NP_000240.1:p.0?
INITMET01 NC_000012.11:g.66583080A>G NM_007199.2:c.1A>G NP_009130.2:p.Met1?
INITMET02 NC_000001.10:g.231557634_231557635insGGC NM_022051.2:c.-1_1insGCC NP_071334.1:p.Met1?
INITMET03 NC_000016.9:g.89985654_89985683del NM_002386.3:c.-11_19del NP_002377.4:p.Met1?

2 changes: 1 addition & 1 deletion tests/data/gcp/real.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,5 @@ ID00057 NC_000010.10:g.89711873A>C NM_000314.4:c.493-2A>C NP_000305.3:p.?
ID00058 NC_000010.10:g.89717676G>A NM_000314.4:c.701G>A NP_000305.3:p.(Arg234Gln)
ID00059 NC_000010.10:g.89717777G>A NM_000314.4:c.801+1G>A NP_000305.3:p.?
ID00060 NC_000010.10:g.89720648dupT NM_000314.4:c.802-3dupT NP_000305.3:p.?
ID00061 NC_000005.9:g.131705667G>T NM_003060.3:c.3G>T NP_003051.1:p.?
ID00061 NC_000005.9:g.131705667G>T NM_003060.3:c.3G>T NP_003051.1:p.Met1?
ID00062 NC_000005.9:g.131706014G>A NM_003060.3:c.350G>A NP_003051.1:p.(Trp117*)
18 changes: 16 additions & 2 deletions tests/test_hgvs_variantmapper_cp_real.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@ def gcp_file_reader(fn):
continue
yield rec


class TestHgvsCToPReal(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.hdp = hgvs.dataproviders.uta.connect(
mode=os.environ.get("HGVS_CACHE_MODE", "run"), cache=CACHE)
mode=os.environ.get("HGVS_CACHE_MODE", "learn"), cache=CACHE)
cls._hm = hgvs.variantmapper.VariantMapper(cls.hdp)
cls._hp = hgvs.parser.Parser()
cls._failed = []
Expand Down Expand Up @@ -86,6 +85,21 @@ def _append_fail(self, out, row_id, hgvsg, hgvsc, hgvsp_expected, hgvsp_actual,
out.write("{}\t{}\t{}\t{}\t{}\t{}\n".format(row_id, hgvsg, hgvsc, hgvsp_expected,
hgvsp_actual, msg))

def test_c_to_p_format(self):
hgvsc = 'NM_022464.4:c.3G>A'
hgvsp_expected = 'NP_071909.1:p.Met1?'
hgvsp_expected_alternative = 'NP_071909.1:p.?'

var_c = self._hp.parse_hgvs_variant(hgvsc)
var_p = self._hm.c_to_p(
var_c, hgvsp_expected.split(":")[0])

default_format_p = var_p.format()
self.assertEqual(hgvsp_expected, default_format_p)

alt_format_p = var_p.format(conf={'p_init_met': False})
self.assertEqual(hgvsp_expected_alternative, alt_format_p)


if __name__ == "__main__":
unittest.main()
Expand Down
8 changes: 4 additions & 4 deletions tests/test_hgvs_variantmapper_cp_sanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,22 +184,22 @@ def test_delins_into_three_prime_utr_no_frameshift(self):
# http://varnomen.hgvs.org/recommendations/protein/variant/substitution/
def test_substitution_removes_start_codon(self):
hgvsc = "NM_999999.1:c.1A>G"
hgvsp_expected = "MOCK:p.?"
hgvsp_expected = "MOCK:p.Met1?"
self._run_conversion(hgvsc, hgvsp_expected)

def test_deletion_from_five_prime_utr_frameshift(self):
hgvsc = "NM_999999.1:c.-3_1del"
hgvsp_expected = "MOCK:p.?"
hgvsp_expected = "MOCK:p.Met1?"
self._run_conversion(hgvsc, hgvsp_expected)

def test_deletion_from_five_prime_utr_no_frameshift(self):
hgvsc = "NM_999999.1:c.-3_3del"
hgvsp_expected = "MOCK:p.?"
hgvsp_expected = "MOCK:p.Met1?"
self._run_conversion(hgvsc, hgvsp_expected)

def test_delins_from_five_prime_utr_no_frameshift(self):
hgvsc = "NM_999999.1:c.-3_3delinsAAA"
hgvsp_expected = "MOCK:p.?"
hgvsp_expected = "MOCK:p.Met1?"
self._run_conversion(hgvsc, hgvsp_expected)

def test_delete_entire_gene(self):
Expand Down

0 comments on commit ae87ff4

Please sign in to comment.