In [7]:
from ga4gh.vrs import models, vrs_deref, vrs_enref
from ga4gh.core import ga4gh_identify, ga4gh_serialize, ga4gh_digest, ga4gh_deref

import json
def ppo(o, indent=2):
    """pretty print object as json"""
    print(json.dumps(o.as_dict(), sort_keys=True, indent=indent))
    
from ga4gh.vrs.dataproxy import SeqRepoRESTDataProxy
from biocommons.seqrepo import SeqRepo
from ga4gh.vrs.extras.translator import Translator

In [8]:
seqrepo_rest_service_url = "https://services.genomicmedlab.org/seqrepo"
dp = SeqRepoRESTDataProxy(base_url=seqrepo_rest_service_url)

tlr = Translator(data_proxy=dp)

## Representing PharmGKB CYP2C19*3/*4

In [9]:
# Haplotype 1: PharmGKB CYP2C19*3
# H1 Allele 1: "NC_000010.11:g.94780653G>A"
# H1 Allele 2: "NC_000010.11:g.94842866A>G"

hgvs_expression1 = "NC_000010.11:g.94780653G>A"
h1_allele1 = tlr.translate_from(hgvs_expression1,'hgvs')

hgvs_expression2 = "NC_000010.11:g.94842866A>G"
h1_allele2 = tlr.translate_from(hgvs_expression2,'hgvs')

In [10]:
#hgvs_expression = "NC_000010.11:g.94761665G>A"

In [11]:
#h1_allele1 = tlr.translate_from(hgvs_expression,'hgvs')

In [12]:
# Haplotype 2: PharmGKB CYP2C19*4
# H2 Allele 1: "NC_000010.11:g.94761900C>T"
# H2 Allele 2: "NC_000010.11:g.94762706A>G"
# H2 Allele 3: "NC_000010.11:g.94842866A>G"

hgvs_expression1 = "NC_000010.11:g.94761900C>T"
h2_allele1 = tlr.translate_from(hgvs_expression1,'hgvs')

hgvs_expression2 = "NC_000010.11:g.94762706A>G"
h2_allele2 = tlr.translate_from(hgvs_expression2,'hgvs')

hgvs_expression3 = "NC_000010.11:g.94842866A>G"
h2_allele3 = tlr.translate_from(hgvs_expression3, 'hgvs')

In [13]:
h1 = models.Haplotype(members=[h1_allele1, h1_allele2])

In [14]:
h2 = models.Haplotype(members=[h2_allele1, h2_allele2, h2_allele3])

In [15]:
h2

<Haplotype _id=None members=<#/definitions/Haplotype/members_<anonymous_field>=[<Allele _id=<Literal<str> ga4gh:VA.jWqv036CdZJs4YjwEYptDIBcoT7Uxv5I> location=<SequenceLocation _id=None interval=<SequenceInterval end=<Number type=<Literal<str> Number> value=<Literal<int> 94761900>> start=<Number type=<Literal<str> Number> value=<Literal<int> 94761899>> type=<Literal<str> SequenceInterval>> sequence_id=<Literal<str> ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB> type=<Literal<str> SequenceLocation>> state=<LiteralSequenceExpression sequence=<Literal<str> T> type=<Literal<str> LiteralSequenceExpression>> type=<Literal<str> Allele>>, <Allele _id=<Literal<str> ga4gh:VA.bUbyRF0WcbsoJIAJIe6e8YlBVjTrhjdh> location=<SequenceLocation _id=None interval=<SequenceInterval end=<Number type=<Literal<str> Number> value=<Literal<int> 94762706>> start=<Number type=<Literal<str> Number> value=<Literal<int> 94762705>> type=<Literal<str> SequenceInterval>> sequence_id=<Literal<str> ga4gh:SQ.ss8r_wB0-b9r44TQTMm

In [9]:
models.Number(value=1)

<Number type=<Literal<str> Number> value=<Literal<int> 1>>

In [10]:
gt_mem1 = models.GenotypeMember(copies=models.Number(value=1), variation=h1_allele1)

In [11]:
gt_mem2 = models.GenotypeMember(variation=h2, copies=models.Number(value=1))
gt = models.Genotype(members=[gt_mem1, gt_mem2], copies=models.Number(value=2))

In [12]:
ppo(gt)

{
  "copies": {
    "type": "Number",
    "value": 2
  },
  "members": [
    {
      "copies": {
        "type": "Number",
        "value": 1
      },
      "type": "GenotypeMember",
      "variation": {
        "_id": "ga4gh:VA.3AIu_q8I6dxuX76aZIq43GvogEsLuvAN",
        "location": {
          "interval": {
            "end": {
              "type": "Number",
              "value": 94761665
            },
            "start": {
              "type": "Number",
              "value": 94761664
            },
            "type": "SequenceInterval"
          },
          "sequence_id": "ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB",
          "type": "SequenceLocation"
        },
        "state": {
          "sequence": "A",
          "type": "LiteralSequenceExpression"
        },
        "type": "Allele"
      }
    },
    {
      "copies": {
        "type": "Number",
        "value": 1
      },
      "type": "GenotypeMember",
      "variation": {
        "members": [
          {
           

In [13]:
gt._id = ga4gh_identify(gt)

In [14]:
ppo(gt)

{
  "_id": "ga4gh:GT.W2sHtmm7mse3anXQ3KvlVk1TL-YeA37b",
  "copies": {
    "type": "Number",
    "value": 2
  },
  "members": [
    {
      "copies": {
        "type": "Number",
        "value": 1
      },
      "type": "GenotypeMember",
      "variation": {
        "_id": "ga4gh:VA.3AIu_q8I6dxuX76aZIq43GvogEsLuvAN",
        "location": {
          "interval": {
            "end": {
              "type": "Number",
              "value": 94761665
            },
            "start": {
              "type": "Number",
              "value": 94761664
            },
            "type": "SequenceInterval"
          },
          "sequence_id": "ga4gh:SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB",
          "type": "SequenceLocation"
        },
        "state": {
          "sequence": "A",
          "type": "LiteralSequenceExpression"
        },
        "type": "Allele"
      }
    },
    {
      "copies": {
        "type": "Number",
        "value": 1
      },
      "type": "GenotypeMember",
      "vari