# Aggregate Variation

In [1]:
from ga4gh.vr import models
from ga4gh.core import ga4gh_identify, ga4gh_serialize, ga4gh_digest

## Setup Sample Alleles

In [2]:
a1 = models.Allele(
    location=models.SequenceLocation(
        sequence_id="GA4GH:SQ.01234abcde",
        interval=models.SimpleInterval(start=10, end=11)
    ),
    state=models.SequenceState(sequence="C")
)
a2 = models.Allele(
    location=models.SequenceLocation(
        sequence_id="GA4GH:SQ.01234abcde",
        interval=models.SimpleInterval(start=20, end=21)
    ),
    state=models.SequenceState(sequence="C")
)
a3 = models.Allele(
    location=models.SequenceLocation(
        sequence_id="GA4GH:SQ.01234abcde",
        interval=models.SimpleInterval(start=30, end=31)
    ),
    state=models.SequenceState(sequence="C")
)

## DiscreteVariationSet

In [3]:
dvs = models.DiscreteVariationSet(members=[a1,a2,a3]) 

In [4]:
dvs.as_dict()

{'members': [{'location': {'interval': {'end': 11,
     'start': 10,
     'type': 'SimpleInterval'},
    'sequence_id': 'GA4GH:SQ.01234abcde',
    'type': 'SequenceLocation'},
   'state': {'sequence': 'C', 'type': 'SequenceState'},
   'type': 'Allele'},
  {'location': {'interval': {'end': 21, 'start': 20, 'type': 'SimpleInterval'},
    'sequence_id': 'GA4GH:SQ.01234abcde',
    'type': 'SequenceLocation'},
   'state': {'sequence': 'C', 'type': 'SequenceState'},
   'type': 'Allele'},
  {'location': {'interval': {'end': 31, 'start': 30, 'type': 'SimpleInterval'},
    'sequence_id': 'GA4GH:SQ.01234abcde',
    'type': 'SequenceLocation'},
   'state': {'sequence': 'C', 'type': 'SequenceState'},
   'type': 'Allele'}],
 'type': 'DiscreteVariationSet'}

In [5]:
ga4gh_serialize(dvs)

b'{"members":["5Rl7J7r03Otb2JiXordEhXzaehpFxw9T","_vL0Ebg2EO2EAEBp_M28-d7Tv7yeLFG5","q4Vjf3bdgSF-cosPhTWZUXFWOSschANK"],"type":"DiscreteVariationSet"}'

In [6]:
ga4gh_identify(dvs)

'ga4gh:DVS.c8NvWrImfJgfb85XW0XCPRSC1j2ehJ4d'

In [7]:
dvs = models.DiscreteVariationSet(members=[a3,a2,a1]) 

In [8]:
ga4gh_identify(dvs)

'ga4gh:DVS.c8NvWrImfJgfb85XW0XCPRSC1j2ehJ4d'

## Haplotype

In [9]:
hap = models.Haplotype(members=[a1,a2,a3])

In [10]:
hap.as_dict()

{'completeness': 'UNKNOWN',
 'members': [{'location': {'interval': {'end': 11,
     'start': 10,
     'type': 'SimpleInterval'},
    'sequence_id': 'GA4GH:SQ.01234abcde',
    'type': 'SequenceLocation'},
   'state': {'sequence': 'C', 'type': 'SequenceState'},
   'type': 'Allele'},
  {'location': {'interval': {'end': 21, 'start': 20, 'type': 'SimpleInterval'},
    'sequence_id': 'GA4GH:SQ.01234abcde',
    'type': 'SequenceLocation'},
   'state': {'sequence': 'C', 'type': 'SequenceState'},
   'type': 'Allele'},
  {'location': {'interval': {'end': 31, 'start': 30, 'type': 'SimpleInterval'},
    'sequence_id': 'GA4GH:SQ.01234abcde',
    'type': 'SequenceLocation'},
   'state': {'sequence': 'C', 'type': 'SequenceState'},
   'type': 'Allele'}],
 'type': 'Haplotype'}

In [11]:
ga4gh_identify(hap)

'ga4gh:VH.eH7q9fjHrBocCxZPR15LtIIzoJcIU5Pw'