# Parse and Format a Variant

In [1]:
import hgvs.parser
hp = hgvs.parser.Parser()

g_hgvs = 'NC_000001.10:g.150550916_150550920delGACAAinsCAATACC'
g_var = hp.parse_hgvs_variant(g_hgvs)
g_var

SequenceVariant(ac=NC_000001.10, type=g, posedit=150550916_150550920delGACAAinsCAATACC)

# Map variants between sequences

In [2]:
import hgvs.dataproviders.uta
hdp = hgvs.dataproviders.uta.connect()

import hgvs.variantmapper
evm = hgvs.variantmapper.EasyVariantMapper(hdp, primary_assembly='GRCh37', alt_aln_method='splign')

In [3]:
c_vars = [evm.g_to_c(g_var, ac) for ac in evm.relevant_transcripts(g_var)]
c_vars

[SequenceVariant(ac=NM_182763.2, type=c, posedit=688+399_688+403delTTGTCinsGGTATTG),
 SequenceVariant(ac=NM_021960.4, type=c, posedit=736_740delTTGTCinsGGTATTG),
 SequenceVariant(ac=NM_001197320.1, type=c, posedit=277_281delTTGTCinsGGTATTG)]

In [4]:
p_vars = [evm.c_to_p(c_var) for c_var in c_vars]
p_vars

[SequenceVariant(ac=NP_877495.1, type=p, posedit=?),
 SequenceVariant(ac=NP_068779.1, type=p, posedit=(Leu246GlyfsTer5)),
 SequenceVariant(ac=NP_001184249.1, type=p, posedit=(Leu93GlyfsTer5))]

# Normalize (Shuffle and Rewrite) Variants

In [5]:
import hgvs.normalizer
hn = hgvs.normalizer.Normalizer(hdp)
hn.normalize(hp.parse_hgvs_variant('NM_021960.4:c.735_736insT'))

SequenceVariant(ac=NM_021960.4, type=c, posedit=737dupT)

# Validate Variants

In [6]:
import hgvs.validator
hv = hgvs.validator.Validator(hdp)
try:
    hv.validate(hp.parse_hgvs_variant('NM_021960.4:c.736_740delATGTCinsGGTATTG'))
except Exception as e:
    print(e)

NM_021960.4:c.736_740delATGTCinsGGTATTG: Variant reference (ATGTC) does not agree with reference sequence (TTGTC)


# `SequenceVariant` instances are structured variant representations

In [7]:
g_var.ac, g_var.type, g_var.posedit

('NC_000001.10',
 'g',
 PosEdit(pos=150550916_150550920, edit=delGACAAinsCAATACC, uncertain=False))

In [8]:
g_var.posedit.pos.start

SimplePosition(base=150550916, uncertain=False)

In [9]:
g_var.posedit.edit.ref, g_var.posedit.edit.alt

('GACAA', 'CAATACC')

# Format variants simply by "stringifying" them with `print` or `format`

In [10]:
print("\n".join(["Your variant was {v} mapped to:".format(v=g_var)]
                + ["  {c_var} ({p_var})".format(c_var=c_var, p_var=p_var)
                   for c_var,p_var in zip(c_vars,p_vars)]))

Your variant was NC_000001.10:g.150550916_150550920delGACAAinsCAATACC mapped to:
  NM_182763.2:c.688+399_688+403delTTGTCinsGGTATTG (NP_877495.1:p.?)
  NM_021960.4:c.736_740delTTGTCinsGGTATTG (NP_068779.1:p.(Leu246GlyfsTer5))
  NM_001197320.1:c.277_281delTTGTCinsGGTATTG (NP_001184249.1:p.(Leu93GlyfsTer5))


NCBI SeqViewer for MCL1 region on GRCh37.p13
<img src="images/MCL1.png">
<img src="images/MCL1-zoom.png">