-
Notifications
You must be signed in to change notification settings - Fork 8
/
test_for_vcf
49 lines (44 loc) · 1.79 KB
/
test_for_vcf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
from vcf import VCFReader
import random
def load_vcf_example(vcf_file):
reader = VCFReader(filename=vcf_file)
for record in reader:
sequence_tmpl = {
'text': {'status': 'generated'},
'resourceType': 'Sequence',
'coordinate': {
'chromosome': {'text': record.CHROM,
'start': record.POS,
'end': record.end,
'genomeBuild': {'text': 'GRCh37'}
},
'source': 'germline' if random.random() < 0.5 else 'somatic',
'species': {'text': 'Homo sapiens',
'coding': [{
'system': 'http://snomed.info/sct',
'code': '337915000'}]},
'referenceAllele': record.REF
}
for sample in record.samples:
sample_id = sample.sample
reads = sample.gt_bases
if '/' in reads:
delimiter = '/'
elif '|' in reads:
delimiter = '|'
else:
delimiter = '.'
seq_data = dict(sequence_tmpl)
print reads, reads.split(delimiter), record.REF
seq_data['observedAllele'] = reads.split(delimiter)
# get name of the variant
variant_id = record.ID
variant = variant_id if variant_id is not None else 'anonymous variant'
seq_data['variantID'] = variant_id
seq_data['text']['div'] = '<div>Genotype of %s is %s</div>'% (variant, reads)
print 'Created Sequence at %s:%s-%s'% (record.CHROM, record.POS, record.end)
break
BASEDIR = 'fhir'
for example_file in os.listdir(os.path.join(BASEDIR, 'examples/vcf')):
load_vcf_example(os.path.join(BASEDIR, 'examples/vcf', example_file))