Permalink
Browse files

use ordereddict for headers

  • Loading branch information...
1 parent debbb56 commit ad9d9a20e04ef99e275ff55b07664be226695c8a James Casbon committed Jun 10, 2012
Showing with 30 additions and 18 deletions.
  1. +8 −1 setup.py
  2. +22 −17 vcf/parser.py
View
@@ -8,6 +8,13 @@
except ImportError:
requires.append('argparse')
+
+try:
+ import collections
+ collections.OrderedDict
+except AttributeError:
+ requires.append('ordereddict')
+
# get the version without an import
VERSION = "Undefined"
DOC = ""
@@ -42,7 +49,7 @@
url='https://github.com/jamescasbon/PyVCF',
version=VERSION,
classifiers = [
- 'Development Status :: 3 - Alpha',
+ 'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'Operating System :: OS Independent',
View
@@ -8,12 +8,16 @@
import codecs
try:
+ from collections import OrderedDict
+except ImportError:
+ from ordereddict import OrderedDict
+
+try:
import pysam
except ImportError:
pysam = None
-
# Metadata parsers/constants
RESERVED_INFO = {
'AA': 'String', 'AC': 'Integer', 'AF': 'Float', 'AN': 'Integer',
@@ -363,7 +367,7 @@ def is_snp(self):
def is_indel(self):
""" Return whether or not the variant is an INDEL """
is_sv = self.is_sv
-
+
if len(self.REF) > 1 and not is_sv: return True
for alt in self.ALT:
if alt is None:
@@ -377,7 +381,7 @@ def is_indel(self):
# 1 2827693 . CCCCTCGCA C . PASS SVTYPE=DEL;
return False
return False
-
+
@property
def is_sv(self):
""" Return whether or not the variant is a structural variant """
@@ -444,17 +448,17 @@ def var_subtype(self):
<DEL> -> DEL
<INS:ME:L1> -> INS:ME:L1
<DUP> -> DUP
-
+
The logic is meant to follow the rules outlined in the following
paragraph at:
-
+
http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41
-
- "For precisely known variants, the REF and ALT fields should contain
- the full sequences for the alleles, following the usual VCF conventions.
- For imprecise variants, the REF field may contain a single base and the
- ALT fields should contain symbolic alleles (e.g. <ID>), described in more
- detail below. Imprecise variants should also be marked by the presence
+
+ "For precisely known variants, the REF and ALT fields should contain
+ the full sequences for the alleles, following the usual VCF conventions.
+ For imprecise variants, the REF field may contain a single base and the
+ ALT fields should contain symbolic alleles (e.g. <ID>), described in more
+ detail below. Imprecise variants should also be marked by the presence
of an IMPRECISE flag in the INFO field."
"""
if self.is_snp:
@@ -478,7 +482,7 @@ def var_subtype(self):
return self.INFO['SVTYPE']
else:
# first remove both "<" and ">" from ALT
- return self.ALT[0].strip('<>')
+ return self.ALT[0].strip('<>')
else:
return "unknown"
@@ -488,10 +492,10 @@ def sv_end(self):
if self.is_sv:
return self.INFO['END']
return None
-
+
@property
def is_sv_precise(self):
- """ Return whether the SV cordinates are mapped
+ """ Return whether the SV cordinates are mapped
to 1 b.p. resolution.
"""
if self.INFO.get('IMPRECISE') is None and not self.is_sv:
@@ -506,6 +510,7 @@ def is_monomorphic(self):
""" Return True for reference calls """
return len(self.ALT) == 1 and self.ALT[0] is None
+
class Reader(object):
""" Reader for a VCF v 4.0 file, an iterator returning ``_Record objects`` """
@@ -560,7 +565,7 @@ def _parse_metainfo(self):
The end user shouldn't have to use this. She can access the metainfo
directly with ``self.metadata``.'''
for attr in ('metadata', 'infos', 'filters', 'formats'):
- setattr(self, attr, {})
+ setattr(self, attr, OrderedDict())
parser = _vcf_metadata_parser()
@@ -649,7 +654,7 @@ def _parse_samples(self, samples, samp_fmt, site):
gt_bases = []# A/A, A|G, G/G, etc.
gt_types = []# 0, 1, 2, etc.
gt_phases = []# T, F, T, etc.
-
+
samp_fmt = samp_fmt.split(':')
samp_fmt_types = []
@@ -679,7 +684,7 @@ def _parse_samples(self, samples, samp_fmt, site):
gt_bases.append(bases) if bases is not None else './.'
gt_types.append(type) if type is not None else -1
gt_phases.append(phase) if phase is not None else False
-
+
return _SampleInfo(samp_data, gt_bases, gt_types, gt_phases)
def _parse_sample(self, sample, samp_fmt, samp_fmt_types, samp_fmt_nums):

0 comments on commit ad9d9a2

Please sign in to comment.