# Associated Data Demo
Demonstrates how VMC might be used in conjunction with application-specific data

---

In [1]:
import json

from vmc.extra.demoapp import DemoApp 
from vmc.extra.utils import json_pretty_format

demoapp = DemoApp()

In [2]:
# *1: https://www.pharmgkb.org/haplotype/PA165819268
# *3A: https://www.pharmgkb.org/haplotype/PA165819270
# *3B: https://www.pharmgkb.org/haplotype/PA165819271
# *3C: https://www.pharmgkb.org/haplotype/PA165819272

# TPMT_hgvs_alleles
rs1142345T = "NC_000006.11:g.18130918="
rs1142345C = "NC_000006.11:g.18130918T>C"
rs1800460C = "NC_000006.11:g.18139228="
rs1800460T = "NC_000006.11:g.18139228C>T"

TPMT_hgvs_haplotypes = {
    "TPMT*1": [rs1142345T, rs1800460C],
    "TPMT*3A": [rs1142345C, rs1800460T],
    "TPMT*3B": [rs1800460T],
    "TPMT*3C": [rs1142345C],
}

# Data from CPIC via Bob Freimuth (thanks!)
pop_labels = "CEU MED SA AFR".split()
pop_freq = {
    "TPMT*1": dict(zip(pop_labels, [0.95726, 0.96081, 0.95233, 0.93901])),
    "TPMT*3A": dict(zip(pop_labels, [0.0356, 0.0254, 0.0287, 0.00198])),
    "TPMT*3B": dict(zip(pop_labels, [0.000561, 0.00426, 0.000486, 0])),
    "TPMT*3C": dict(zip(pop_labels, [0.004205, 00.00545, 0.00924, 0.0495])),
}

pharmgkb_ids = {
    "TPMT*1": "PA165819268",
    "TPMT*3A": "PA165819270",
    "TPMT*3B": "PA165819271",
    "TPMT*3C": "PA165819272",
    }

def make_pharmgkb_info(name):
    id = pharmgkb_ids[name]
    return {
        "id": id,
        "name": "TPMT",
        "links": {
            "haplotype": f"https://www.pharmgkb.org/haplotype/{id}",
            "clinicalAnnotation": f"https://www.pharmgkb.org/haplotype/{id}/clinicalAnnotation",
        }
    }

In [3]:
for hapname in "TPMT*1 TPMT*3A TPMT*3B TPMT*3C".split():
    haplotype = demoapp.bm.add_hgvs_haplotype(TPMT_hgvs_haplotypes[hapname])
    demoapp.bm.identifiers[haplotype.id].add(hapname)
    demoapp.annotations["pop_freq"][haplotype.id] = pop_freq[hapname]
    demoapp.annotations["pharmgkb"][haplotype.id] = make_pharmgkb_info(hapname)

In [4]:
import json
print(json.dumps(demoapp.as_dict(), indent=2))

{
  "appinfo": {
    "what": "ever"
  },
  "vmcbundle": {
    "alleles": {
      "VMC:GA_j5vIldfzLiAk15mlHnNxiP9EZo532j5T": {
        "id": "VMC:GA_j5vIldfzLiAk15mlHnNxiP9EZo532j5T",
        "location_id": "VMC:GL_5P6nPZ3SFyN0kMBsByRKRWHh2ioBoQZY",
        "state": "T"
      },
      "VMC:GA_oScxrygZCMJZZ7Fpz_4IA-gaqcUUuIuD": {
        "id": "VMC:GA_oScxrygZCMJZZ7Fpz_4IA-gaqcUUuIuD",
        "location_id": "VMC:GL_tIJkI1050v0af2MdPkwZ-DSTYIEZsAz3",
        "state": "C"
      },
      "VMC:GA_b4hNJQg8nhjf1GP_lPFyokj0cC9EMyGE": {
        "id": "VMC:GA_b4hNJQg8nhjf1GP_lPFyokj0cC9EMyGE",
        "location_id": "VMC:GL_5P6nPZ3SFyN0kMBsByRKRWHh2ioBoQZY",
        "state": "C"
      },
      "VMC:GA_MIaib3N_InVs1kMsE_4dC8-tx6eVP9j_": {
        "id": "VMC:GA_MIaib3N_InVs1kMsE_4dC8-tx6eVP9j_",
        "location_id": "VMC:GL_tIJkI1050v0af2MdPkwZ-DSTYIEZsAz3",
        "state": "T"
      }
    },
    "genotypes": {},
    "haplotypes": {
      "VMC:GH_BsOxQ7mEbHhNXNkpiDrEHzFbYQ9oA4dB": {
        "al

In [5]:
demoapp.get_record("VMC:GH_Pi0KRfcBXsMlBW2th1tBNul53qJ5P_TM")

{'object': <Haplotype allele_ids=<#/definitions/Id=['VMC:GA_MIaib3N_InVs1kMsE_4dC8-tx6eVP9j_']> completeness=UNKNOWN id=VMC:GH_Pi0KRfcBXsMlBW2th1tBNul53qJ5P_TM location_id=VMC:GL_tIJkI1050v0af2MdPkwZ-DSTYIEZsAz3>,
 'type': 'Haplotype',
 'pop_freq': {'CEU': 0.000561, 'MED': 0.00426, 'SA': 0.000486, 'AFR': 0},
 'pharmgkb': {'id': 'PA165819271',
  'name': 'TPMT',
  'links': {'haplotype': 'https://www.pharmgkb.org/haplotype/PA165819271',
   'clinicalAnnotation': 'https://www.pharmgkb.org/haplotype/PA165819271/clinicalAnnotation'}}}