# Parse and Format a Variant

In [5]:
import hgvs.parser
hp = hgvs.parser.Parser()

g_hgvs = 'NC_000001.10:g.150550916_150550920delGACAAinsCAATACC'
g_var = hp.parse_hgvs_variant(g_hgvs)
g_var

SequenceVariant(ac=NC_000001.10, type=g, posedit=150550916_150550920delinsCAATACC, gene=None)

# Map variants between sequences

In [6]:
import hgvs.dataproviders.uta
hdp = hgvs.dataproviders.uta.connect()

import hgvs.assemblymapper

am = hgvs.assemblymapper.AssemblyMapper(hdp, assembly_name='GRCh37', alt_aln_method='splign')

In [7]:
c_vars = [am.g_to_c(g_var, ac) for ac in am.relevant_transcripts(g_var)]
c_vars

[SequenceVariant(ac=NM_001197320.1, type=c, posedit=277_281delinsGGTATTG, gene=None),
 SequenceVariant(ac=NM_001197320.2, type=c, posedit=277_281delinsGGTATTG, gene=None),
 SequenceVariant(ac=NM_021960.4, type=c, posedit=736_740delinsGGTATTG, gene=None),
 SequenceVariant(ac=NM_021960.5, type=c, posedit=736_740delinsGGTATTG, gene=None),
 SequenceVariant(ac=NM_182763.2, type=c, posedit=688+399_688+403delinsGGTATTG, gene=None),
 SequenceVariant(ac=NM_182763.3, type=c, posedit=688+399_688+403delinsGGTATTG, gene=None)]

In [8]:
p_vars = [am.c_to_p(c_var) for c_var in c_vars]
p_vars

[SequenceVariant(ac=NP_001184249.1, type=p, posedit=(Leu93GlyfsTer5), gene=None),
 SequenceVariant(ac=NP_001184249.1, type=p, posedit=(Leu93GlyfsTer5), gene=None),
 SequenceVariant(ac=NP_068779.1, type=p, posedit=(Leu246GlyfsTer5), gene=None),
 SequenceVariant(ac=NP_068779.1, type=p, posedit=(Leu246GlyfsTer5), gene=None),
 SequenceVariant(ac=NP_877495.1, type=p, posedit=None, gene=None),
 SequenceVariant(ac=NP_877495.1, type=p, posedit=None, gene=None)]

# Normalize (Shuffle and Rewrite) Variants

In [9]:
import hgvs.normalizer
hn = hgvs.normalizer.Normalizer(hdp)
hn.normalize(hp.parse_hgvs_variant('NM_021960.4:c.735_736insT'))

SequenceVariant(ac=NM_021960.4, type=c, posedit=737dup, gene=None)

# Validate Variants

In [10]:
import hgvs.validator
hv = hgvs.validator.Validator(hdp)
try:
    hv.validate(hp.parse_hgvs_variant('NM_021960.4:c.736_740delATGTCinsGGTATTG'))
except Exception as e:
    print(e)

NM_021960.4:c.736_740delinsGGTATTG: Variant reference (ATGTC) does not agree with reference sequence (TTGTC)


# `SequenceVariant` instances are structured variant representations

In [11]:
g_var.ac, g_var.type, g_var.posedit

('NC_000001.10',
 'g',
 PosEdit(pos=150550916_150550920, edit=delinsCAATACC, uncertain=False))

In [12]:
g_var.posedit.pos.start

SimplePosition(base=150550916, uncertain=False)

In [13]:
g_var.posedit.edit.ref, g_var.posedit.edit.alt

('GACAA', 'CAATACC')

# Format variants simply by "stringifying" them with `print` or `format`

In [14]:
print("\n".join(["Your variant was {v} mapped to:".format(v=g_var)]
                + ["  {c_var} ({p_var})".format(c_var=c_var, p_var=p_var)
                   for c_var,p_var in zip(c_vars,p_vars)]))

Your variant was NC_000001.10:g.150550916_150550920delinsCAATACC mapped to:
  NM_001197320.1:c.277_281delinsGGTATTG (NP_001184249.1:p.(Leu93GlyfsTer5))
  NM_001197320.2:c.277_281delinsGGTATTG (NP_001184249.1:p.(Leu93GlyfsTer5))
  NM_021960.4:c.736_740delinsGGTATTG (NP_068779.1:p.(Leu246GlyfsTer5))
  NM_021960.5:c.736_740delinsGGTATTG (NP_068779.1:p.(Leu246GlyfsTer5))
  NM_182763.2:c.688+399_688+403delinsGGTATTG (NP_877495.1:p.?)
  NM_182763.3:c.688+399_688+403delinsGGTATTG (NP_877495.1:p.?)


NCBI SeqViewer for MCL1 region on GRCh37.p13
<img src="images/MCL1.png">
<img src="images/MCL1-zoom.png">