In [1]:
import vmc

# Regions

Regions refer to contiguous spans of an implied sequence.
Regions are not identifiable objects, so have no computed identifier defined.

In [2]:
sr = vmc.models.SimpleRegion(start=42, end=43)
sr.as_dict()

{'end': 43, 'start': 42, 'type': 'SimpleRegion'}

In [3]:
nr = vmc.models.NestedRegion(
    inner=vmc.models.SimpleRegion(start=29,end=30),
    outer=vmc.models.SimpleRegion(start=30,end=39))
nr.as_dict()

{'inner': {'end': 30, 'start': 29, 'type': 'SimpleRegion'},
 'outer': {'end': 39, 'start': 30, 'type': 'SimpleRegion'},
 'type': 'NestedRegion'}

In [4]:
rr = vmc.models.RangedRegion(
    start=vmc.models.SimpleRegion(start=20,end=29),
    end=vmc.models.SimpleRegion(start=30,end=39))
rr.as_dict()

{'end': {'end': 39, 'start': 30, 'type': 'SimpleRegion'},
 'start': {'end': 29, 'start': 20, 'type': 'SimpleRegion'},
 'type': 'RangedRegion'}

# Locations
Locations imply coordinate system and coordinates. 

The most common Location is a SequenceLocation, i.e., a Region on a named sequence.
Locations may also be more conceptual, such as a cytoband region or a gene.
Any of these may be used as the Location for Variation.

In [5]:
slsr = vmc.models.SequenceLocation(sequence_id="NM_0001234.5", region=sr)
slsr.id = vmc.computed_id(slsr)
slsr.as_dict()

{'id': 'VMC:GL_yKxYQ4j-D1f43mjYbobEKk74CVfwSEQj',
 'region': {'end': 43, 'start': 42, 'type': 'SimpleRegion'},
 'sequence_id': 'NM_0001234.5',
 'type': 'SequenceLocation'}

In [6]:
slnr = vmc.models.SequenceLocation(sequence_id="NM_0001234.5", region=nr)
slnr.id = vmc.computed_id(slnr)
slnr.as_dict()

{'id': 'VMC:GL_y1KI63endgw2MbqVIy5n4Ef2BWbnEVH2',
 'region': {'inner': {'end': 30, 'start': 29, 'type': 'SimpleRegion'},
  'outer': {'end': 39, 'start': 30, 'type': 'SimpleRegion'},
  'type': 'NestedRegion'},
 'sequence_id': 'NM_0001234.5',
 'type': 'SequenceLocation'}

In [7]:
slrr = vmc.models.SequenceLocation(sequence_id="NM_0001234.5", region=rr)
slrr.id = vmc.computed_id(slrr)
slrr.as_dict()

{'id': 'VMC:GL_SS97Ti3Tjad9M8GW1F0miTgS06RJ6fKB',
 'region': {'end': {'end': 39, 'start': 30, 'type': 'SimpleRegion'},
  'start': {'end': 29, 'start': 20, 'type': 'SimpleRegion'},
  'type': 'RangedRegion'},
 'sequence_id': 'NM_0001234.5',
 'type': 'SequenceLocation'}

In [8]:
cbl = vmc.models.CytobandLocation(chr="11", start="q22.3", end="q23.1")
cbl.id = vmc.computed_id(cbl)
cbl.as_dict()

{'chr': '11',
 'end': 'q23.1',
 'id': 'VMC:GL_R2RiNOcD_3F-NNEQUrIst3M84LTsVQWF',
 'start': 'q22.3',
 'type': 'CytobandLocation'}

In [9]:
gl = vmc.models.GeneLocation(gene="HGNC:MSH2")
gl.id = vmc.computed_id(gl)
gl.as_dict()

{'gene': 'HGNC:MSH2',
 'id': 'VMC:GL_HUswIoUpNqPZa2rBwJR_32At9A3wnWJJ',
 'type': 'GeneLocation'}

# Alleles

In [10]:
a = vmc.models.Allele(location=slsr, state="del")
a.id = vmc.computed_id(a)
a.as_dict()

{'id': 'VMC:GA_-8HFqBFAse_G8aG5X7-4goA02dRpccqU',
 'location': {'id': 'VMC:GL_yKxYQ4j-D1f43mjYbobEKk74CVfwSEQj',
  'region': {'end': 43, 'start': 42, 'type': 'SimpleRegion'},
  'sequence_id': 'NM_0001234.5',
  'type': 'SequenceLocation'},
 'state': 'del',
 'type': 'Allele'}

In [11]:
a = vmc.models.Allele(location=slrr, state="del")
a.id = vmc.computed_id(a)
a.as_dict()

{'id': 'VMC:GA_N4VBPe4xX1LHeydXlWWNqFfeCLqVG1K5',
 'location': {'id': 'VMC:GL_SS97Ti3Tjad9M8GW1F0miTgS06RJ6fKB',
  'region': {'end': {'end': 39, 'start': 30, 'type': 'SimpleRegion'},
   'start': {'end': 29, 'start': 20, 'type': 'SimpleRegion'},
   'type': 'RangedRegion'},
  'sequence_id': 'NM_0001234.5',
  'type': 'SequenceLocation'},
 'state': 'del',
 'type': 'Allele'}

In [12]:
a = vmc.models.Allele(location=gl, state="del")
a.id = vmc.computed_id(a)
a.as_dict()

{'id': 'VMC:GA_NmaqpwVF2tj2LuvHATGMW-qqYUHwhiOL',
 'location': {'gene': 'HGNC:MSH2',
  'id': 'VMC:GL_HUswIoUpNqPZa2rBwJR_32At9A3wnWJJ',
  'type': 'GeneLocation'},
 'state': 'del',
 'type': 'Allele'}

# Haplotypes
A Haplotype is a collection of allele_ids, with optional specification for covered location and completeness

In [13]:
h = vmc.models.Haplotype(
    location_id=slsr.id,
    allele_ids=[
    'BOGUS:XX_WMv1y-3Q460hi_S3ND5N5Ct2Ci58TOZd',
    'BOGUS:XX_jW7bSR3Obmx3IewIRSJkJMf6t7b73LVU',
    'BOGUS:XX_23DL4svp8FvWdMrkhuOckbyjM-0I1Dov',
    'BOGUS:XX_n363FutAEo79HhjNl7wea61SGc_tU40j'],
    completeness="PARTIAL")
h.id = vmc.computed_id(h)
h.as_dict()

{'allele_ids': ['BOGUS:XX_WMv1y-3Q460hi_S3ND5N5Ct2Ci58TOZd',
  'BOGUS:XX_jW7bSR3Obmx3IewIRSJkJMf6t7b73LVU',
  'BOGUS:XX_23DL4svp8FvWdMrkhuOckbyjM-0I1Dov',
  'BOGUS:XX_n363FutAEo79HhjNl7wea61SGc_tU40j'],
 'completeness': 'PARTIAL',
 'id': 'VMC:GH_d-arw1iegJ7WghhHiKzv90K0diZf-YTY',
 'location_id': 'VMC:GL_yKxYQ4j-D1f43mjYbobEKk74CVfwSEQj',
 'type': 'Haplotype'}

# Genotypes
A Genotype is a collection of Haplotypes_ids, with optional specification for completeness

In [14]:
g = vmc.models.Genotype(
    haplotype_ids=[
    'BOGUS:XX_WMv1y-3Q460hi_S3ND5N5Ct2Ci58TOZd',
    'BOGUS:XX_jW7bSR3Obmx3IewIRSJkJMf6t7b73LVU',
    'BOGUS:XX_23DL4svp8FvWdMrkhuOckbyjM-0I1Dov',
    'BOGUS:XX_n363FutAEo79HhjNl7wea61SGc_tU40j'],
    completeness="PARTIAL")
g.id = vmc.computed_id(g)
g.as_dict()

{'completeness': 'PARTIAL',
 'haplotype_ids': ['BOGUS:XX_WMv1y-3Q460hi_S3ND5N5Ct2Ci58TOZd',
  'BOGUS:XX_jW7bSR3Obmx3IewIRSJkJMf6t7b73LVU',
  'BOGUS:XX_23DL4svp8FvWdMrkhuOckbyjM-0I1Dov',
  'BOGUS:XX_n363FutAEo79HhjNl7wea61SGc_tU40j'],
 'id': 'VMC:GG_5IARhH263DsFl6P7WfCLAfqkcnCbl4tn',
 'type': 'Genotype'}

# VariationSet
VariationSet is just a bucket of ids, which may not even exist.

In [15]:
vs = vmc.models.VariationSet(member_ids=[
    'BOGUS:XX_WMv1y-3Q460hi_S3ND5N5Ct2Ci58TOZd',
    'BOGUS:XX_jW7bSR3Obmx3IewIRSJkJMf6t7b73LVU',
    'BOGUS:XX_23DL4svp8FvWdMrkhuOckbyjM-0I1Dov',
    'BOGUS:XX_n363FutAEo79HhjNl7wea61SGc_tU40j',
    'BOGUS:XX_pel3HzoNSMCEvPoQQD-AOBE8I8s0eCn9',
    'BOGUS:XX_X2x6a4Xvil365Ea-Po8WcuuQPWx973U8',
    'BOGUS:XX_QHDx_0DbssgtGljy-K1q7WAcNkqD5TY-',
    'BOGUS:XX_3x2p-8eCIc0pU-if_6CFBKGLziZRSWdz',
    'BOGUS:XX_RXF8gSNDDyPQ0opTA8ordEE6hGGm2GYJ',
    'BOGUS:XX_7tDaRPzXL4rfOLoYtRUUGCTy65ptDs8J'])
vs.id = vmc.computed_id(vs)
vs.as_dict()

{'id': 'VMC:GVS_6m4w9eXBvYrs3FKOZq62E_955VZ7Oo7V',
 'member_ids': ['BOGUS:XX_WMv1y-3Q460hi_S3ND5N5Ct2Ci58TOZd',
  'BOGUS:XX_jW7bSR3Obmx3IewIRSJkJMf6t7b73LVU',
  'BOGUS:XX_23DL4svp8FvWdMrkhuOckbyjM-0I1Dov',
  'BOGUS:XX_n363FutAEo79HhjNl7wea61SGc_tU40j',
  'BOGUS:XX_pel3HzoNSMCEvPoQQD-AOBE8I8s0eCn9',
  'BOGUS:XX_X2x6a4Xvil365Ea-Po8WcuuQPWx973U8',
  'BOGUS:XX_QHDx_0DbssgtGljy-K1q7WAcNkqD5TY-',
  'BOGUS:XX_3x2p-8eCIc0pU-if_6CFBKGLziZRSWdz',
  'BOGUS:XX_RXF8gSNDDyPQ0opTA8ordEE6hGGm2GYJ',
  'BOGUS:XX_7tDaRPzXL4rfOLoYtRUUGCTy65ptDs8J'],
 'type': 'VariationSet'}