Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

ready 0.4.0 release

  • Loading branch information...
commit d748c5c7281599f4b8e794fe008c80ffbb6cf7fb 1 parent d3b841d
James Casbon authored
Showing with 55 additions and 25 deletions.
  1. +16 −5 README.rst
  2. +10 −2 docs/HISTORY.rst
  3. +17 −6 vcf/__init__.py
  4. +12 −12 vcf/parser.py
View
21 README.rst
@@ -1,5 +1,7 @@
A VCFv4.0 parser for Python.
+Online version of PyVCF documentation is available at http://pyvcf.rtfd.org/
+
The intent of this module is to mimic the ``csv`` module in the Python stdlib,
as opposed to more flexible serialization formats like JSON or YAML. ``vcf``
will attempt to parse the content of each record based on the data types
@@ -12,7 +14,7 @@ There main interface is the class: ``Reader``. It takes a file-like
object and acts as a reader::
>>> import vcf
- >>> vcf_reader = vcf.Reader(open('test/example.vcf', 'rb'))
+ >>> vcf_reader = vcf.Reader(open('test/example-4.0.vcf', 'rb'))
>>> for record in vcf_reader:
... print record
Record(CHROM=20, POS=14370, REF=G, ALT=['A'])
@@ -47,7 +49,7 @@ one-entry Python lists (see, e.g., ``Record.ALT``). Semicolon-delimited lists
of key=value pairs are converted to Python dictionaries, with flags being given
a ``True`` value. Integers and floats are handled exactly as you'd expect::
- >>> vcf_reader = vcf.Reader(open('test/example.vcf', 'rb'))
+ >>> vcf_reader = vcf.Reader(open('test/example-4.0.vcf', 'rb'))
>>> record = vcf_reader.next()
>>> print record.POS
14370
@@ -66,8 +68,13 @@ examine properties of interest::
>>> print record.nucl_diversity, record.aaf
0.6 0.5
>>> print record.get_hets()
- [Call(sample=NA00002, GT=1|0, GQ=[48])]
-
+ [Call(sample=NA00002, GT=1|0, GQ=48)]
+ >>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
+ True False True False
+ >>> print record.var_type, record.var_subtype
+ snp ts
+ >>> print record.is_monomorphic
+ False
``record.FORMAT`` will be a string specifying the format of the genotype
fields. In case the FORMAT column does not exist, ``record.FORMAT`` is
@@ -94,7 +101,11 @@ call data in ``data``::
>>> print call.sample
NA00001
>>> print call.data
- {'GT': '0|0', 'HQ': [58, 50], 'DP': [3], 'GQ': [49]}
+ {'GT': '0|0', 'HQ': [58, 50], 'DP': 3, 'GQ': 49}
+
+Please note that as of release 0.4.0, attributes known to have single values (such as
+``DP`` and ``GQ`` above) are returned as values. Other attributes are returned
+as lists (such as ``HQ`` above).
There are also a number of methods::
View
12 docs/HISTORY.rst
@@ -8,11 +8,19 @@ Issues should be reported at the github issue tracker.
Changes
=======
-Pending
--------
+0.4.0 Release
+-------------
* Package structure
* add ``vcf.utils`` module with ``walk_together`` method
+* samtools tests
+* support Freebayes' non standard '.' for no call
+* fix vcf_melt
+* support monomorphic sites, add ``is_monomorphic`` method, handle null QUALs
+* filter support for files with monomorphic calls
+* Values declared as single are no-longer returned in lists
+* several performance improvements
+
0.3.0 Release
-------------
View
23 vcf/__init__.py
@@ -1,6 +1,8 @@
#!/usr/bin/env python
'''A VCFv4.0 parser for Python.
+Online version of PyVCF documentation is available at http://pyvcf.rtfd.org/
+
The intent of this module is to mimic the ``csv`` module in the Python stdlib,
as opposed to more flexible serialization formats like JSON or YAML. ``vcf``
will attempt to parse the content of each record based on the data types
@@ -13,7 +15,7 @@
object and acts as a reader::
>>> import vcf
- >>> vcf_reader = vcf.Reader(open('test/example.vcf', 'rb'))
+ >>> vcf_reader = vcf.Reader(open('test/example-4.0.vcf', 'rb'))
>>> for record in vcf_reader:
... print record
Record(CHROM=20, POS=14370, REF=G, ALT=['A'])
@@ -48,7 +50,7 @@
of key=value pairs are converted to Python dictionaries, with flags being given
a ``True`` value. Integers and floats are handled exactly as you'd expect::
- >>> vcf_reader = vcf.Reader(open('test/example.vcf', 'rb'))
+ >>> vcf_reader = vcf.Reader(open('test/example-4.0.vcf', 'rb'))
>>> record = vcf_reader.next()
>>> print record.POS
14370
@@ -67,8 +69,13 @@
>>> print record.nucl_diversity, record.aaf
0.6 0.5
>>> print record.get_hets()
- [Call(sample=NA00002, GT=1|0, GQ=[48])]
-
+ [Call(sample=NA00002, GT=1|0, GQ=48)]
+ >>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
+ True False True False
+ >>> print record.var_type, record.var_subtype
+ snp ts
+ >>> print record.is_monomorphic
+ False
``record.FORMAT`` will be a string specifying the format of the genotype
fields. In case the FORMAT column does not exist, ``record.FORMAT`` is
@@ -95,7 +102,11 @@
>>> print call.sample
NA00001
>>> print call.data
- {'GT': '0|0', 'HQ': [58, 50], 'DP': [3], 'GQ': [49]}
+ {'GT': '0|0', 'HQ': [58, 50], 'DP': 3, 'GQ': 49}
+
+Please note that as of release 0.4.0, attributes known to have single values (such as
+``DP`` and ``GQ`` above) are returned as values. Other attributes are returned
+as lists (such as ``HQ`` above).
There are also a number of methods::
@@ -157,4 +168,4 @@
from filters import Base as Filter
from parser import RESERVED_INFO, RESERVED_FORMAT
-VERSION = '0.4.0pre'
+VERSION = '0.4.0'
View
24 vcf/parser.py
@@ -348,7 +348,7 @@ def get_hets(self):
def get_unknowns(self):
""" The list of unknown genotypes"""
return [s for s in self.samples if s.gt_type is None]
-
+
@property
def is_snp(self):
""" Return whether or not the variant is a SNP """
@@ -357,7 +357,7 @@ def is_snp(self):
if alt not in ['A', 'C', 'G', 'T']:
return False
return True
-
+
@property
def is_indel(self):
""" Return whether or not the variant is an INDEL """
@@ -374,13 +374,13 @@ def is_transition(self):
""" Return whether or not the SNP is a transition """
# if multiple alts, it is unclear if we have a transition
if len(self.ALT) > 1: return False
-
+
if self.is_snp:
# just one alt allele
alt_allele = self.ALT[0]
- if ((self.REF == "A" and alt_allele == "G") or
+ if ((self.REF == "A" and alt_allele == "G") or
(self.REF == "G" and alt_allele == "A") or
- (self.REF == "C" and alt_allele == "T") or
+ (self.REF == "C" and alt_allele == "T") or
(self.REF == "T" and alt_allele == "C")):
return True
else: return False
@@ -391,7 +391,7 @@ def is_deletion(self):
""" Return whether or not the INDEL is a deletion """
# if multiple alts, it is unclear if we have a transition
if len(self.ALT) > 1: return False
-
+
if self.is_indel:
# just one alt allele
alt_allele = self.ALT[0]
@@ -401,7 +401,7 @@ def is_deletion(self):
return True
else: return False
else: return False
-
+
@property
def var_type(self):
"""
@@ -437,7 +437,7 @@ def var_subtype(self):
return "unknown"
else:
return "unknown"
-
+
@property
def is_monomorphic(self):
""" Return True for reference calls """
@@ -541,7 +541,7 @@ def _parse_info(self, info_str):
'''
if info_str == '.':
return {}
-
+
entries = info_str.split(';')
retdict = {}
@@ -588,7 +588,7 @@ def _parse_samples(self, samples, samp_fmt, site):
samp_fmt_types = []
samp_fmt_nums = []
-
+
for fmt in samp_fmt:
try:
entry_type = self.formats[fmt].type
@@ -775,8 +775,8 @@ def _map(self, func, iterable, none='.'):
for x in iterable]
def __update_readme():
- import sys
- file('README.rst', 'w').write(sys.modules[__name__].__doc__)
+ import sys, vcf
+ file('README.rst', 'w').write(vcf.__doc__)
# backwards compatibility
Please sign in to comment.
Something went wrong with that request. Please try again.