Permalink
Browse files

Merge branch 'master' of https://github.com/jamescasbon/PyVCF into lenna

Conflicts:
	vcf/parser.py
	vcf/test/test_vcf.py
  • Loading branch information...
2 parents 49f8897 + d1a9fdc commit ba00d83950547eb6dae98cfb3bfbeed471be26e1 Lenna Peterson committed Feb 22, 2014
View
@@ -10,3 +10,4 @@ docs/_build
.DS_Store
vcf/cparse.c
vcf/cparse.so
+.coverage
View
@@ -4,9 +4,10 @@ python:
- "2.6"
- "2.7"
- "3.2"
+ - "3.3"
- "pypy"
install:
- - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors pysam argparse ordereddict; fi"
- - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install --use-mirrors pysam; fi"
+ - "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam argparse counter ordereddict; fi"
+ - "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam; fi"
- python setup.py install
script: python setup.py test
View
@@ -0,0 +1 @@
+recursive-include vcf *.pyx
View
@@ -14,7 +14,7 @@ There main interface is the class: ``Reader``. It takes a file-like
object and acts as a reader::
>>> import vcf
- >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'rb'))
+ >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
>>> for record in vcf_reader:
... print record
Record(CHROM=20, POS=14370, REF=G, ALT=[A])
@@ -49,7 +49,7 @@ one-entry Python lists (see, e.g., ``Record.ALT``). Semicolon-delimited lists
of key=value pairs are converted to Python dictionaries, with flags being given
a ``True`` value. Integers and floats are handled exactly as you'd expect::
- >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'rb'))
+ >>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
>>> record = vcf_reader.next()
>>> print record.POS
14370
@@ -65,10 +65,10 @@ examine properties of interest::
3 1.0 0
>>> print record.num_hom_ref, record.num_het, record.num_hom_alt
1 1 1
- >>> print record.nucl_diversity, record.aaf
- 0.6 0.5
+ >>> print record.nucl_diversity, record.aaf, record.heterozygosity
+ 0.6 [0.5] 0.5
>>> print record.get_hets()
- [Call(sample=NA00002, GT=1|0, HQ=[51, 51], DP=8, GQ=48)]
+ [Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
>>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
True False True False
>>> print record.var_type, record.var_subtype
@@ -101,7 +101,7 @@ call data in ``data``::
>>> print call.sample
NA00001
>>> print call.data
- {'GT': '0|0', 'HQ': [58, 50], 'DP': 3, 'GQ': 49}
+ CallData(GT=0|0, GQ=49, DP=3, HQ=[58, 50])
Please note that as of release 0.4.0, attributes known to have single values (such as
``DP`` and ``GQ`` above) are returned as values. Other attributes are returned
@@ -134,7 +134,7 @@ For example::
ALT records are actually classes, so that you can interrogate them::
- >>> reader = vcf.Reader(file('vcf/test/example-4.1-bnd.vcf'))
+ >>> reader = vcf.Reader(open('vcf/test/example-4.1-bnd.vcf'))
>>> _ = reader.next(); row = reader.next()
>>> print row
Record(CHROM=1, POS=2, REF=T, ALT=[T[2:3[])
@@ -146,22 +146,22 @@ Random access is supported for files with tabix indexes. Simply call fetch for
region you are interested in::
>>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
- >>> for record in vcf_reader.fetch('20', 1110696, 1230237):
+ >>> for record in vcf_reader.fetch('20', 1110696, 1230237): # doctest: +SKIP
... print record
Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
Record(CHROM=20, POS=1230237, REF=T, ALT=[None])
Or extract a single row::
- >>> print vcf_reader.fetch('20', 1110696)
+ >>> print vcf_reader.fetch('20', 1110696) # doctest: +SKIP
Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
The ``Writer`` class provides a way of writing a VCF file. Currently, you must specify a
template ``Reader`` which provides the metadata::
>>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
- >>> vcf_writer = vcf.Writer(file('/dev/null', 'w'), vcf_reader)
+ >>> vcf_writer = vcf.Writer(open('/dev/null', 'w'), vcf_reader)
>>> for record in vcf_reader:
... vcf_writer.write_record(record)
View
@@ -17,6 +17,72 @@ New features should have test code sent with them.
Changes
=======
+0.6.7 Release
+-------------
+
+* Include missing .pyx files
+
+0.6.6 Release
+-------------
+
+* better walk together record ordering (Thanks @datagram, #141)
+
+0.6.5 Release
+-------------
+
+* Better contig handling (#115, #116, #119 thanks Martijn)
+* INFO lines with type character (#120, #121 thanks @AndrewUzilov, Martijn)
+* Single breakends fix (#126 thanks @pkrushe)
+* Speedup by losing ordering of INFO (#128 thanks Martijn)
+* HOMSEQ and other missing fields in INFO (#130 thanks Martijn)
+* Add aaf property, (thanks @mgymrek #131)
+* Custom equality for walk_together, thanks bow #132
+* Change default line encoding to '\n'
+* Improved __eq__ (#134, thanks bow)
+
+
+0.6.4 Release
+-------------
+
+* Handle INFO fields with multiple values, thanks
+* Support writing records without GT data #88, thanks @bow
+* Pickleable call data #112, thanks @superbobry
+* Write files without FORMAT #95 thanks Martijn
+* Strict whitespace mode, thanks Martijn, Lee Lichtenstein and Manawsi Gupta
+* Add support for contigs in header, thanks @gcnh and Martijn
+* Fix GATK header parsing, thanks @alimanfoo
+
+0.6.3 Release
+-------------
+
+* cython port of #79
+* correct writing of meta lines #84
+
+0.6.2 Release
+-------------
+
+* issues #78, #79 (thanks Sean, Brad)
+
+0.6.1 Release
+-------------
+
+* Add strict whitespace mode for well formed VCFs with spaces
+ in sample names (thanks Marco)
+* Ignore blank lines in files (thanks Martijn)
+* Tweaks for handling missing data (thanks Sean)
+* bcftools tests (thanks Martijn)
+* record.FILTER is always a list
+
+0.6.0 Release
+-------------
+
+* Backwards incompatible change: _Call.data is now a
+ namedtuple (previously it was a dict)
+* Optional cython version, much improved performance.
+* Improvements to writer (thanks @cmclean)
+* Improvements to inheritance of classes (thanks @lennax)
+
+
0.5.0 Release
-------------
View
@@ -162,7 +162,7 @@ def addfilt(filt):
if output_record:
# use PASS only if other filter names appear in the FILTER column
#FIXME: is this good idea?
- if record.FILTER == '.' and not drop_filtered: record.FILTER = 'PASS'
+ if record.FILTER is None and not drop_filtered: record.FILTER = 'PASS'
output.write_record(record)
if __name__ == '__main__': main()
View
@@ -39,7 +39,9 @@ for record in reader:
for sample in record.samples:
row = [sample.sample]
- row += [flatten(sample.data.get(x, None)) for x in formats]
+ # Format fields not present will simply end up "blank"
+ # in the output
+ row += [flatten(getattr(sample.data, x, None)) for x in formats]
row += [record.FILTER or '.']
row += fixed
row += info_row
View
@@ -16,9 +16,12 @@
except ImportError:
requires.append('argparse')
-
+import collections
+try:
+ collections.Counter
+except AttributeError:
+ requires.append('counter')
try:
- import collections
collections.OrderedDict
except AttributeError:
requires.append('ordereddict')
View
@@ -4,7 +4,7 @@
# and then run "tox" from this directory.
[tox]
-envlist = py26, py27, py32
+envlist = py26, py27, py32, py33
[testenv]
commands =
@@ -14,7 +14,9 @@ commands =
[testenv:py26]
deps =
argparse
+ counter
ordereddict
+ cython
pysam
[testenv:py27]
@@ -23,6 +25,9 @@ deps =
cython
[testenv:py32]
-deps =
+deps =
cython
+[testenv:py33]
+deps =
+ cython
View
@@ -66,8 +66,8 @@
3 1.0 0
>>> print record.num_hom_ref, record.num_het, record.num_hom_alt
1 1 1
- >>> print record.nucl_diversity, record.aaf
- 0.6 0.5
+ >>> print record.nucl_diversity, record.aaf, record.heterozygosity
+ 0.6 [0.5] 0.5
>>> print record.get_hets()
[Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
>>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
@@ -178,4 +178,4 @@
from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
from vcf.sample_filter import SampleFilter
-VERSION = '0.5.0'
+VERSION = '0.6.7'
View
@@ -48,7 +48,10 @@ def parse_samples(
if entry_num == 1 or ',' not in vals:
if entry_type == INTEGER:
- sampdat[j] = int(vals)
+ try:
+ sampdat[j] = int(vals)
+ except ValueError:
+ sampdat[j] = float(vals)
elif entry_type == FLOAT or entry_type == NUMERIC:
sampdat[j] = float(vals)
else:
@@ -62,7 +65,10 @@ def parse_samples(
vals = vals.split(',')
if entry_type == INTEGER:
- sampdat[j] = _map(int, vals)
+ try:
+ sampdat[j] = _map(int, vals)
+ except ValueError:
+ sampdat[j] = map(float, vals)
elif entry_type == FLOAT or entry_type == NUMERIC:
sampdat[j] = _map(float, vals)
else:
View
@@ -138,11 +138,12 @@ def __call__(self, record):
def bias_test(self, calls):
calls = [x for x in calls if x.called]
#TODO: single genotype assumption
+
try:
# freebayes
ra = robjects.IntVector([x['RO'][0] for x in calls])
aa = robjects.IntVector([x['AO'][0] for x in calls])
- except KeyError:
+ except AttributeError:
# GATK
ra = robjects.IntVector([x['AD'][0] for x in calls])
aa = robjects.IntVector([x['AD'][1] for x in calls])
Oops, something went wrong.

0 comments on commit ba00d83

Please sign in to comment.