# Creating ApoE Bundles from HGVS

HGVS is the de facto standard for manipulating human-readable variation data. This notebook demonstrates the VMC BundleManager, which facilitates translating HGVS expressions to Alleles, Haplotypes, and Genotypes.


```
                             rs7412 
                             NC_000019.10:g.44908822
                             NM_000041.3:c.526
                             C          T
rs429358                 C   APOE-ε4    APOE-ε1
NC_000019.10:g.44908684  T   APOE-ε3    APOE-ε2
NM_000041.3:c.388
```

In [1]:
apoe_alleles = {
    'rs7412C': {'g37': 'NC_000019.9:g.45412079=',
                'g38': 'NC_000019.10:g.44908822=',
                'c': 'NM_000041.3:c.526=',
                'p': 'NP_000032.1:p.Arg176='},
    'rs7412T': {'g37': 'NC_000019.9:g.45412079C>T',
                'g38': 'NC_000019.10:g.44908822C>T',
                'c': 'NM_000041.3:c.526C>T',
                'p': 'NP_000032.1:p.Arg176Cys'},
    'rs429358T': {'g37': 'NC_000019.9:g.45411941=',
                  'g38': 'NC_000019.10:g.44908684=',
                  'c': 'NM_000041.3:c.388=',
                  'p': 'NP_000032.1:p.Cys130='},
    'rs429358C': {'g37': 'NC_000019.9:g.45411941T>C',
                  'g38': 'NC_000019.10:g.44908684T>C',
                  'c': 'NM_000041.3:c.388T>C',
                  'p': 'NP_000032.1:p.Cys130Arg'}
}
apoe_haplotypes = {
    "ε1": ["rs429358C", "rs7412T"],
    "ε2": ["rs429358T", "rs7412T"],
    "ε3": ["rs429358T", "rs7412C"],
    "ε4": ["rs429358C", "rs7412C"],
}

In [2]:
from vmc.extra.bundlemanager import BundleManager
bm = BundleManager()

# Alleles

In [3]:
for vname, alleles in apoe_alleles.items():
    for h in alleles.values():
        a = bm.add_hgvs_allele(h)

# Haplotypes

In [4]:
haplotypes_hgvs = {
    t: {name: [apoe_alleles[vname][t] for vname in vnames] for name, vnames in apoe_haplotypes.items()}
    for t in "g37 g38 c p".split()
}
haplotypes_hgvs

{'g37': {'ε1': ['NC_000019.9:g.45411941T>C', 'NC_000019.9:g.45412079C>T'],
  'ε2': ['NC_000019.9:g.45411941=', 'NC_000019.9:g.45412079C>T'],
  'ε3': ['NC_000019.9:g.45411941=', 'NC_000019.9:g.45412079='],
  'ε4': ['NC_000019.9:g.45411941T>C', 'NC_000019.9:g.45412079=']},
 'g38': {'ε1': ['NC_000019.10:g.44908684T>C', 'NC_000019.10:g.44908822C>T'],
  'ε2': ['NC_000019.10:g.44908684=', 'NC_000019.10:g.44908822C>T'],
  'ε3': ['NC_000019.10:g.44908684=', 'NC_000019.10:g.44908822='],
  'ε4': ['NC_000019.10:g.44908684T>C', 'NC_000019.10:g.44908822=']},
 'c': {'ε1': ['NM_000041.3:c.388T>C', 'NM_000041.3:c.526C>T'],
  'ε2': ['NM_000041.3:c.388=', 'NM_000041.3:c.526C>T'],
  'ε3': ['NM_000041.3:c.388=', 'NM_000041.3:c.526='],
  'ε4': ['NM_000041.3:c.388T>C', 'NM_000041.3:c.526=']},
 'p': {'ε1': ['NP_000032.1:p.Cys130Arg', 'NP_000032.1:p.Arg176Cys'],
  'ε2': ['NP_000032.1:p.Cys130=', 'NP_000032.1:p.Arg176Cys'],
  'ε3': ['NP_000032.1:p.Cys130=', 'NP_000032.1:p.Arg176='],
  'ε4': ['NP_000032.1:p.Cys

In [5]:
# create protein haplotypes
for t in "g37 g38 c p".split():
    for name, hs in haplotypes_hgvs[t].items():
        h = bm.add_hgvs_haplotype(hs)

In [14]:
for t in "g37 g38 c p".split():
    hht = haplotypes_hgvs[t]
    for h1 in "ε3 ε4".split():
        for h2 in "ε3 ε4".split():
            gt_alleles = (hht[h1], hht[h2])
            g = bm.add_hgvs_genotype(gt_alleles)

In [15]:
bm.as_bundle().as_dict()

{'alleles': {'VMC:GA_eMSY7YTp8vF94KLHb--gTFIw7ZTCL-g9': {'id': 'VMC:GA_eMSY7YTp8vF94KLHb--gTFIw7ZTCL-g9',
   'location_id': 'VMC:GL_pP3VNZk9XnBbKCpbmWZ_4uo7glzy52B8',
   'state': 'C'},
  'VMC:GA__8rLiy7YkQDNy-t536RpVFGxIDiWLr6J': {'id': 'VMC:GA__8rLiy7YkQDNy-t536RpVFGxIDiWLr6J',
   'location_id': 'VMC:GL_heG_PQopyF0Vgi4zH6yiLf3diyfyJcV3',
   'state': 'C'},
  'VMC:GA_FF1sjhCulonKevwWyUtHz0elzKkMGOia': {'id': 'VMC:GA_FF1sjhCulonKevwWyUtHz0elzKkMGOia',
   'location_id': 'VMC:GL_YxX7xmdBNfU4PsUR-SGVbx2UpFplLOfO',
   'state': 'G'},
  'VMC:GA_HXfVz-_qMdKeEuF8-jw6JYrjR2aFWC9d': {'id': 'VMC:GA_HXfVz-_qMdKeEuF8-jw6JYrjR2aFWC9d',
   'location_id': 'VMC:GL_dTK1c6jdZmkSUqVYQBESKi8-G3U1SvAR',
   'state': 'R'},
  'VMC:GA_juxtrStjpvLUPWoHIeJ7rEhpYCMe37vJ': {'id': 'VMC:GA_juxtrStjpvLUPWoHIeJ7rEhpYCMe37vJ',
   'location_id': 'VMC:GL_pP3VNZk9XnBbKCpbmWZ_4uo7glzy52B8',
   'state': 'T'},
  'VMC:GA_BqFKgQ350SUi4-uaGCvMM1ag7z0dNmvC': {'id': 'VMC:GA_BqFKgQ350SUi4-uaGCvMM1ag7z0dNmvC',
   'location_id': 'VMC:G

In [16]:
bm.as_hgvs()

{'alleles': ['NC_000019.9:g.45412079=',
  'NC_000019.10:g.44908822=',
  'NM_000041.3:g.526=',
  'NP_000032.1:g.176=',
  'NC_000019.9:g.45412079C>T',
  'NC_000019.10:g.44908822C>T',
  'NM_000041.3:g.526G>T',
  'NP_000032.1:g.176R>C',
  'NC_000019.9:g.45411941=',
  'NC_000019.10:g.44908684=',
  'NM_000041.3:g.388=',
  'NP_000032.1:g.130=',
  'NC_000019.9:g.45411941T>C',
  'NC_000019.10:g.44908684T>C',
  'NP_000032.1:g.130C>R'],
 'haplotypes': [['NC_000019.9:g.45411941T>C', 'NC_000019.9:g.45412079C>T'],
  ['NC_000019.9:g.45411941=', 'NC_000019.9:g.45412079C>T'],
  ['NC_000019.9:g.45411941=', 'NC_000019.9:g.45412079='],
  ['NC_000019.9:g.45411941T>C', 'NC_000019.9:g.45412079='],
  ['NC_000019.10:g.44908684T>C', 'NC_000019.10:g.44908822C>T'],
  ['NC_000019.10:g.44908684=', 'NC_000019.10:g.44908822C>T'],
  ['NC_000019.10:g.44908684=', 'NC_000019.10:g.44908822='],
  ['NC_000019.10:g.44908684T>C', 'NC_000019.10:g.44908822='],
  ['NM_000041.3:g.388=', 'NM_000041.3:g.526G>T'],
  ['NM_000041.3:g.