-
Notifications
You must be signed in to change notification settings - Fork 8
/
vcflib.pyx
134 lines (102 loc) · 3.53 KB
/
vcflib.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
Cython wrapper for classes defined in Variant.cpp.
Try to keep it simple and stay close to the C++ API.
"""
from cython.operator cimport dereference as deref
from collections import namedtuple
import numpy as np
cimport numpy as np
import time
import sys
VariantTuple = namedtuple('Variant', ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'samples'])
# expose constants to Python
TYPE_FLOAT = FIELD_FLOAT
TYPE_INTEGER = FIELD_INTEGER
TYPE_BOOL = FIELD_BOOL
TYPE_STRING = FIELD_STRING
TYPE_UNKNOWN = FIELD_UNKNOWN
cdef class PyVariantCallFile:
def __cinit__(self, filename):
self.thisptr = new VariantCallFile()
self.thisptr.open(filename)
def __dealloc__(self):
del self.thisptr
def __len__(self):
cdef Variant var
var.setVariantCallFile(self.thisptr)
n = 0
while self.thisptr.getNextVariant(var):
n += 1
return n
def __iter__(self):
cdef Variant *var
cdef vector[string] filters
cdef char semicolon = ';'
var = new Variant(deref(self.thisptr))
while self.thisptr.getNextVariant(deref(var)):
# split the filter field here in C++ to avoid having to do it in Python later
filters = split(var.filter, semicolon)
yield VariantTuple(var.sequenceName,
var.position,
var.id,
var.ref,
var.alt,
var.quality,
filters,
var.info,
var.samples)
del var
def setRegion(self, *args):
if len(args) == 1:
self.thisptr.setRegion(args[0])
elif len(args) == 3:
self.thisptr.setRegion(args[0], args[1], args[2])
else:
raise Exception('either provide a single region string or provide seq, start, end')
property infoIds:
def __get__(self):
return self.thisptr.infoIds()
property formatIds:
def __get__(self):
return self.thisptr.formatIds()
property filterIds:
def __get__(self):
return self.thisptr.filterIds()
property infoTypes:
def __get__(self):
return self.thisptr.infoTypes
property formatTypes:
def __get__(self):
return self.thisptr.formatTypes
property infoCounts:
def __get__(self):
return self.thisptr.infoCounts
property formatCounts:
def __get__(self):
return self.thisptr.formatCounts
property parseSamples:
def __get__(self):
return self.thisptr.parseSamples
def __set__(self, v):
self.thisptr.parseSamples = v
property header:
def __get__(self):
return self.thisptr.header
property fileformat: # [sic] no camel case
def __get__(self):
return self.thisptr.fileformat
property fileDate:
def __get__(self):
return self.thisptr.fileDate
property source:
def __get__(self):
return self.thisptr.source
property reference:
def __get__(self):
return self.thisptr.reference
property phasing:
def __get__(self):
return self.thisptr.phasing
property sampleNames:
def __get__(self):
return self.thisptr.sampleNames