# HGVS VMC Demo
Demonstrates converting bidirectionally between VMC data structures and HGVS expressions

---

In [1]:
from support.vmchelper import VMCHelper 
from support.utils import json_pretty_format

def opp(o):
    """object pretty print -- prints any VMC object"""
    print(json_pretty_format(o.serialize()))

In [2]:
vh = VMCHelper()

# Allele
Add two alleles, then inspect bundle

* Accepts hgvs = syntax ⇒ requires on-the-fly reference lookup
* Two alleles at same location ⇒ two alleles that reference one location
* Sequence identifier stored in identifiers

In [3]:
vh.add_hgvs_allele("NM_000041.3:c.388C>T")

<Allele id=VMC:GA_xlhfF8gmHpo-veJxu405TWlKsf9VQOtJ location_id=VMC:GL_8-ViSIvP5iAmlIt4bXVLOLrA_Nf_emgJ state=T>

In [4]:
vh.add_hgvs_allele("NM_000041.3:c.388=")

<Allele id=VMC:GA_FDez5GA0kZdSJ--h3stLv9LF8l4OQ-_I location_id=VMC:GL_8-ViSIvP5iAmlIt4bXVLOLrA_Nf_emgJ state=C>

In [7]:
opp(vh.as_bundle())

{
    "alleles": {
        "VMC:GA_FDez5GA0kZdSJ--h3stLv9LF8l4OQ-_I": {
            "id": "VMC:GA_FDez5GA0kZdSJ--h3stLv9LF8l4OQ-_I",
            "location_id": "VMC:GL_8-ViSIvP5iAmlIt4bXVLOLrA_Nf_emgJ",
            "state": "C"
        },
        "VMC:GA_xlhfF8gmHpo-veJxu405TWlKsf9VQOtJ": {
            "id": "VMC:GA_xlhfF8gmHpo-veJxu405TWlKsf9VQOtJ",
            "location_id": "VMC:GL_8-ViSIvP5iAmlIt4bXVLOLrA_Nf_emgJ",
            "state": "T"
        }
    },
    "genotypes": {},
    "haplotypes": {},
    "identifiers": {
        "VMC:GS_RARDhX0ZLluRb9MPWPP0UxWuba-L4RiN": [
            "NCBI:NM_000041.3"
        ]
    },
    "locations": {
        "VMC:GL_8-ViSIvP5iAmlIt4bXVLOLrA_Nf_emgJ": {
            "id": "VMC:GL_8-ViSIvP5iAmlIt4bXVLOLrA_Nf_emgJ",
            "interval": {
                "end": 388,
                "start": 387
            },
            "sequence_id": "VMC:GS_RARDhX0ZLluRb9MPWPP0UxWuba-L4RiN"
        }
    },
    "meta": {
        "generated_at": "2018-09-17T09:

# Haplotype
* Adds new allele at new location
* Creates haplotype record at third location (span) that bounds the two alleles

In [8]:
vh.add_hgvs_haplotype(["NM_000041.3:c.388=",
                       "NM_000041.3:c.526C>T"])

<Haplotype allele_ids=<#/definitions/Id=['VMC:GA_FDez5GA0kZdSJ--h3stLv9LF8l4OQ-_I', 'VMC:GA_8GXRvwUiPqedmbLe8XC5bjU0MAtGzVaf']> completeness=UNKNOWN id=VMC:GH_XUqd-RLjlrwDuZHZsGlkSQoLo-qgNkji location_id=VMC:GL_1my1_jue4nJslOX2YrXX12CsFIpFzanZ>

In [9]:
opp(vh.as_bundle())

{
    "alleles": {
        "VMC:GA_8GXRvwUiPqedmbLe8XC5bjU0MAtGzVaf": {
            "id": "VMC:GA_8GXRvwUiPqedmbLe8XC5bjU0MAtGzVaf",
            "location_id": "VMC:GL_YxX7xmdBNfU4PsUR-SGVbx2UpFplLOfO",
            "state": "T"
        },
        "VMC:GA_FDez5GA0kZdSJ--h3stLv9LF8l4OQ-_I": {
            "id": "VMC:GA_FDez5GA0kZdSJ--h3stLv9LF8l4OQ-_I",
            "location_id": "VMC:GL_8-ViSIvP5iAmlIt4bXVLOLrA_Nf_emgJ",
            "state": "C"
        },
        "VMC:GA_xlhfF8gmHpo-veJxu405TWlKsf9VQOtJ": {
            "id": "VMC:GA_xlhfF8gmHpo-veJxu405TWlKsf9VQOtJ",
            "location_id": "VMC:GL_8-ViSIvP5iAmlIt4bXVLOLrA_Nf_emgJ",
            "state": "T"
        }
    },
    "genotypes": {},
    "haplotypes": {
        "VMC:GH_XUqd-RLjlrwDuZHZsGlkSQoLo-qgNkji": {
            "allele_ids": [
                "VMC:GA_FDez5GA0kZdSJ--h3stLv9LF8l4OQ-_I",
                "VMC:GA_8GXRvwUiPqedmbLe8XC5bjU0MAtGzVaf"
            ],
            "completeness": "UNKNOWN",
            "id": "VMC

# Genotype
* No new locations, alleles, or haplotypes

In [10]:
vh.add_hgvs_genotype([["NM_000041.3:c.388=",
                       "NM_000041.3:c.526C>T"],
                      ["NM_000041.3:c.388=",
                       "NM_000041.3:c.526C>T"]])

<Genotype completeness=UNKNOWN haplotype_ids=<#/definitions/Id=['VMC:GH_XUqd-RLjlrwDuZHZsGlkSQoLo-qgNkji', 'VMC:GH_XUqd-RLjlrwDuZHZsGlkSQoLo-qgNkji']> id=VMC:GG_M7YwvMeamjp69uu8f4omgXD3PacuGb3->

In [9]:
opp(vh.as_bundle())

{
    "alleles": {
        "VMC:GA_8GXRvwUiPqedmbLe8XC5bjU0MAtGzVaf": {
            "id": "VMC:GA_8GXRvwUiPqedmbLe8XC5bjU0MAtGzVaf",
            "location_id": "VMC:GL_YxX7xmdBNfU4PsUR-SGVbx2UpFplLOfO",
            "state": "T"
        },
        "VMC:GA_FDez5GA0kZdSJ--h3stLv9LF8l4OQ-_I": {
            "id": "VMC:GA_FDez5GA0kZdSJ--h3stLv9LF8l4OQ-_I",
            "location_id": "VMC:GL_8-ViSIvP5iAmlIt4bXVLOLrA_Nf_emgJ",
            "state": "C"
        },
        "VMC:GA_xlhfF8gmHpo-veJxu405TWlKsf9VQOtJ": {
            "id": "VMC:GA_xlhfF8gmHpo-veJxu405TWlKsf9VQOtJ",
            "location_id": "VMC:GL_8-ViSIvP5iAmlIt4bXVLOLrA_Nf_emgJ",
            "state": "T"
        }
    },
    "genotypes": {
        "VMC:GG_M7YwvMeamjp69uu8f4omgXD3PacuGb3-": {
            "completeness": "UNKNOWN",
            "haplotype_ids": [
                "VMC:GH_XUqd-RLjlrwDuZHZsGlkSQoLo-qgNkji",
                "VMC:GH_XUqd-RLjlrwDuZHZsGlkSQoLo-qgNkji"
            ],
            "id": "VMC:GG_M7YwvMeamjp69uu

In [10]:
import pprint
pprint.pprint(vh.as_hgvs(), indent=2)

{ 'alleles': [ 'NM_000041.3:g.388C>T',
               'NM_000041.3:g.388=',
               'NM_000041.3:g.526G>T'],
  'genotypes': [ [ ['NM_000041.3:g.388=', 'NM_000041.3:g.526G>T'],
                   ['NM_000041.3:g.388=', 'NM_000041.3:g.526G>T']]],
  'haplotypes': [['NM_000041.3:g.388=', 'NM_000041.3:g.526G>T']]}
