Permalink
Browse files

documentation, fix bug on parsing null genotypes

  • Loading branch information...
1 parent b67b497 commit fa752c46920276acd18a594c3ca5992046930022 James Casbon committed Jan 23, 2012
Showing with 42 additions and 6 deletions.
  1. +42 −6 vcf.py
View
48 vcf.py
@@ -58,6 +58,18 @@
>>> print record.INFO['AF']
[0.5]
+There are a number of convienience functions for each ``Record`` allowing you to
+examine properties of interest::
+
+ >>> print record.num_called, record.call_rate, record.num_unknown
+ 3 1.0 0
+ >>> print record.num_hom_ref, record.num_het, record.num_hom_alt
+ 1 1 1
+ >>> print record.nucl_diversity, record.aaf
+ 0.6 0.5
+ >>> print record.get_hets()
+ [Call(sample=NA00002, GT=1|0)]
+
``record.FORMAT`` will be a string specifying the format of the genotype
fields. In case the FORMAT column does not exist, ``record.FORMAT`` is
``None``. Finally, ``record.samples`` is a list of dictionaries containing the
@@ -73,6 +85,24 @@
>>> print record.genotype('NA00001')['GT']
0|0
+The genotypes are represented by ``Call`` objects, which have three attributes: the
+corresponding Record ``site``, the sample name in ``sample`` and a dictionary of
+call data in ``data``::
+
+ >>> call = record.genotype('NA00001')
+ >>> print call.site
+ Record(CHROM=20, POS=17330, REF=T, ALT=['A'])
+ >>> print call.sample
+ NA00001
+ >>> print call.data
+ {'GT': '0|0', 'HQ': [58, 50], 'DP': [3], 'GQ': [49]}
+
+There are also a number of methods::
+
+ >>> print call.called, call.gt_type, call.gt_bases, call.phased
+ True 0 T|T True
+
+
Metadata regarding the VCF file itself can be investigated through the
following attributes:
@@ -273,7 +303,7 @@ def gt_type(self):
if (int(a1) == 0) and (int(a2) == 0): return 0
elif (int(a1) == 0) and (int(a2) >= 1): return 1
elif (int(a2) == 0) and (int(a1) >= 1): return 1
- elif (int(a1) >= 1) and (int(a2) >= 1):
+ elif (int(a1) >= 1) and (int(a2) >= 1):
# same alt, so hom_alt
if a1 == a2: return 2
# diff alts, so het
@@ -335,7 +365,7 @@ def genotype(self, name):
def num_called(self):
"""Return the number of called samples"""
return sum(s.called for s in self.samples)
-
+
@property
def call_rate(self):
""" return the fraction of genotypes that were actually called. """
@@ -576,11 +606,17 @@ def _parse_sample(self, sample, samp_fmt, samp_fmt_types):
vals = vals.split(',')
if fmt == 'GT':
+
gt = vals[0]
- if self.aggro and gt == './.':
- gt = None
- sampdict[fmt] = gt
- break
+
+
+ if gt == './.':
+ if self.aggro:
+ gt = None
+ sampdict[fmt] = gt
+ break
+ else:
+ sampdict[fmt] = gt
else:
if entry_type == 'Integer':
sampdict[fmt] = mapper(int, vals)

0 comments on commit fa752c4

Please sign in to comment.