Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge commit '889ab04c6a824f1cd18a08b21eeaef16c3a90862' into sv

  • Loading branch information...
commit be394037f33146b1873ef74a24eb7b82c5ae10b3 2 parents 8537a6a + 889ab04
James Casbon authored
Showing with 199 additions and 10 deletions.
  1. +133 −6 test/test_vcf.py
  2. +66 −4 vcf/parser.py
View
139 test/test_vcf.py
@@ -316,21 +316,148 @@ def test_var_type(self):
self.assertEqual("indel", type)
elif var.POS == 1234567:
self.assertEqual("indel", type)
+ # SV tests
+ reader = vcf.Reader(fh('example-4.1-sv.vcf'))
+ for var in reader:
+ type = var.var_type
+ if var.POS == 2827693:
+ self.assertEqual("sv", type)
+ if var.POS == 321682:
+ self.assertEqual("sv", type)
+ if var.POS == 14477084:
+ self.assertEqual("sv", type)
+ if var.POS == 9425916:
+ self.assertEqual("sv", type)
+ elif var.POS == 12665100:
+ self.assertEqual("sv", type)
+ elif var.POS == 18665128:
+ self.assertEqual("sv", type)
+
def test_var_subtype(self):
reader = vcf.Reader(fh('example-4.0.vcf'))
for var in reader:
- type = var.var_subtype
+ subtype = var.var_subtype
+ if var.POS == 14370:
+ self.assertEqual("ts", subtype)
+ if var.POS == 17330:
+ self.assertEqual("tv", subtype)
+ if var.POS == 1110696:
+ self.assertEqual("unknown", subtype)
+ if var.POS == 1230237:
+ self.assertEqual("del", subtype)
+ elif var.POS == 1234567:
+ self.assertEqual("unknown", subtype)
+ # SV tests
+ reader = vcf.Reader(fh('example-4.1-sv.vcf'))
+ for var in reader:
+ subtype = var.var_subtype
+ if var.POS == 2827693:
+ self.assertEqual("DEL", subtype)
+ if var.POS == 321682:
+ self.assertEqual("DEL", subtype)
+ if var.POS == 14477084:
+ self.assertEqual("DEL:ME:ALU", subtype)
+ if var.POS == 9425916:
+ self.assertEqual("INS:ME:L1", subtype)
+ elif var.POS == 12665100:
+ self.assertEqual("DUP", subtype)
+ elif var.POS == 18665128:
+ self.assertEqual("DUP:TANDEM", subtype)
+
+ def test_is_sv(self):
+ reader = vcf.Reader(fh('example-4.1-sv.vcf'))
+ for var in reader:
+ is_sv = var.is_sv
+ if var.POS == 2827693:
+ self.assertEqual(True, is_sv)
+ if var.POS == 321682:
+ self.assertEqual(True, is_sv)
+ if var.POS == 14477084:
+ self.assertEqual(True, is_sv)
+ if var.POS == 9425916:
+ self.assertEqual(True, is_sv)
+ elif var.POS == 12665100:
+ self.assertEqual(True, is_sv)
+ elif var.POS == 18665128:
+ self.assertEqual(True, is_sv)
+
+ reader = vcf.Reader(fh('example-4.0.vcf'))
+ for var in reader:
+ is_sv = var.is_sv
if var.POS == 14370:
- self.assertEqual("ts", type)
+ self.assertEqual(False, is_sv)
if var.POS == 17330:
- self.assertEqual("tv", type)
+ self.assertEqual(False, is_sv)
if var.POS == 1110696:
- self.assertEqual("unknown", type)
+ self.assertEqual(False, is_sv)
if var.POS == 1230237:
- self.assertEqual("del", type)
+ self.assertEqual(False, is_sv)
elif var.POS == 1234567:
- self.assertEqual("unknown", type)
+ self.assertEqual(False, is_sv)
+
+ def test_is_sv_precise(self):
+ reader = vcf.Reader(fh('example-4.1-sv.vcf'))
+ for var in reader:
+ is_precise = var.is_sv_precise
+ if var.POS == 2827693:
+ self.assertEqual(True, is_precise)
+ if var.POS == 321682:
+ self.assertEqual(False, is_precise)
+ if var.POS == 14477084:
+ self.assertEqual(False, is_precise)
+ if var.POS == 9425916:
+ self.assertEqual(False, is_precise)
+ elif var.POS == 12665100:
+ self.assertEqual(False, is_precise)
+ elif var.POS == 18665128:
+ self.assertEqual(False, is_precise)
+
+ reader = vcf.Reader(fh('example-4.0.vcf'))
+ for var in reader:
+ is_precise = var.is_sv_precise
+ if var.POS == 14370:
+ self.assertEqual(False, is_precise)
+ if var.POS == 17330:
+ self.assertEqual(False, is_precise)
+ if var.POS == 1110696:
+ self.assertEqual(False, is_precise)
+ if var.POS == 1230237:
+ self.assertEqual(False, is_precise)
+ elif var.POS == 1234567:
+ self.assertEqual(False, is_precise)
+
+ def test_sv_end(self):
+ reader = vcf.Reader(fh('example-4.1-sv.vcf'))
+ for var in reader:
+ sv_end = var.sv_end
+ if var.POS == 2827693:
+ self.assertEqual(2827680, sv_end)
+ if var.POS == 321682:
+ self.assertEqual(321887, sv_end)
+ if var.POS == 14477084:
+ self.assertEqual(14477381, sv_end)
+ if var.POS == 9425916:
+ self.assertEqual(9425916, sv_end)
+ elif var.POS == 12665100:
+ self.assertEqual(12686200, sv_end)
+ elif var.POS == 18665128:
+ self.assertEqual(18665204, sv_end)
+
+ reader = vcf.Reader(fh('example-4.0.vcf'))
+ for var in reader:
+ sv_end = var.sv_end
+ if var.POS == 14370:
+ self.assertEqual(None, sv_end)
+ if var.POS == 17330:
+ self.assertEqual(None, sv_end)
+ if var.POS == 1110696:
+ self.assertEqual(None, sv_end)
+ if var.POS == 1230237:
+ self.assertEqual(None, sv_end)
+ elif var.POS == 1234567:
+ self.assertEqual(None, sv_end)
+
class TestCall(unittest.TestCase):
View
70 vcf/parser.py
@@ -354,13 +354,28 @@ def is_snp(self):
@property
def is_indel(self):
""" Return whether or not the variant is an INDEL """
- if len(self.REF) > 1: return True
+ is_sv = self.is_sv
+
+ if len(self.REF) > 1 and not is_sv: return True
for alt in self.ALT:
if alt is None:
return True
elif len(alt) != len(self.REF):
- return True
+ # the diff. b/w INDELs and SVs can be murky.
+ if not is_sv:
+ # 1 2827693 . CCCCTCGCA C . PASS AC=10;
+ return True
+ else:
+ # 1 2827693 . CCCCTCGCA C . PASS SVTYPE=DEL;
+ return False
return False
+
+ @property
+ def is_sv(self):
+ """ Return whether or not the variant is a structural variant """
+ if self.INFO.get('SVTYPE') is None:
+ return False
+ return True
@property
def is_transition(self):
@@ -405,14 +420,34 @@ def var_type(self):
return "snp"
elif self.is_indel:
return "indel"
+ elif self.is_sv:
+ return "sv"
else:
return "unknown"
@property
def var_subtype(self):
"""
- Return the subtype of variant [ts, tv, ins, del]
- TO DO: support SV sub_types
+ Return the subtype of variant.
+ - For SNPs and INDELs, yeild one of: [ts, tv, ins, del]
+ - For SVs yield either "complex" or the SV type defined
+ in the ALT fields (removing the brackets).
+ E.g.:
+ <DEL> -> DEL
+ <INS:ME:L1> -> INS:ME:L1
+ <DUP> -> DUP
+
+ The logic is meant to follow the rules outlined in the following
+ paragraph at:
+
+ http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41
+
+ "For precisely known variants, the REF and ALT fields should contain
+ the full sequences for the alleles, following the usual VCF conventions.
+ For imprecise variants, the REF field may contain a single base and the
+ ALT fields should contain symbolic alleles (e.g. <ID>), described in more
+ detail below. Imprecise variants should also be marked by the presence
+ of an IMPRECISE flag in the INFO field."
"""
if self.is_snp:
if self.is_transition:
@@ -428,10 +463,37 @@ def var_subtype(self):
return "ins"
else: # multiple ALT alleles. unclear
return "unknown"
+ elif self.is_sv:
+ if self.INFO['SVTYPE'] == "BND":
+ return "complex"
+ elif self.is_sv_precise:
+ return self.INFO['SVTYPE']
+ else:
+ # first remove both "<" and ">" from ALT
+ return self.ALT[0].strip('<>')
else:
return "unknown"
@property
+ def sv_end(self):
+ """ Return the end position for the SV """
+ if self.is_sv:
+ return self.INFO['END']
+ return None
+
+ @property
+ def is_sv_precise(self):
+ """ Return whether the SV cordinates are mapped
+ to 1 b.p. resolution.
+ """
+ if self.INFO.get('IMPRECISE') is None and not self.is_sv:
+ return False
+ elif self.INFO.get('IMPRECISE') is not None and self.is_sv:
+ return False
+ elif self.INFO.get('IMPRECISE') is None and self.is_sv:
+ return True
+
+ @property
def is_monomorphic(self):
""" Return True for reference calls """
return len(self.ALT) == 1 and self.ALT[0] is None
Please sign in to comment.
Something went wrong with that request. Please try again.