Skip to content

Commit

Permalink
Added vcf and testcases to demonstrate issue214
Browse files Browse the repository at this point in the history
  • Loading branch information
redmar authored and redmar committed Nov 12, 2015
1 parent ee0208a commit d15a375
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 1 deletion.
32 changes: 32 additions & 0 deletions vcf/test/issue-214.vcf
@@ -0,0 +1,32 @@
##fileformat=VCFv4.1
##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location">
##FILTER=<ID=LowQual,Description="Low quality">
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##FORMAT=<ID=RGQ,Number=1,Type=Integer,Description="Unconditional reference genotype confidence, encoded as a phred quality -10*log10 p(genotype call is wrong)">
##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias.">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 Sample2
1 456904 . T C,* 6162.77 . AC=1,1;AF=8.333e-03,8.333e-03;AN=120;DP=7693;FS=0.000;MLEAC=1,1;MLEAF=8.333e-03,8.333e-03;MQ=60.00;QD=31.36;SOR=0.976 GT:AD:DP:GQ:PL 0:106,0,0:106:99:0,1800,1800 0:110,0,0:110:99:0,1800,1800
1 456940 . * C,T 6162.77 . AC=1,1;AF=8.333e-03,8.333e-03;AN=120;DP=7693;FS=0.000;MLEAC=1,1;MLEAF=8.333e-03,8.333e-03;MQ=60.00;QD=31.36;SOR=0.976 GT:AD:DP:GQ:PL 0:106,0,0:106:99:0,1800,1800 0:110,0,0:110:99:0,1800,1800
31 changes: 30 additions & 1 deletion vcf/test/test_vcf.py
Expand Up @@ -229,7 +229,35 @@ def testParse(self):
for s in r.samples:
s.phased


class TestIssue214(unittest.TestCase):
""" See https://github.com/jamescasbon/PyVCF/issues/214 """

def test_issue_214_is_snp(self):
reader=vcf.Reader(fh('issue-214.vcf'))
r=reader.next()
self.assertTrue(r.is_snp)

def test_issue_214_var_type(self):
reader=vcf.Reader(fh('issue-214.vcf'))
r=reader.next()
self.assertEqual(r.var_type,'snp')

# Can the ref even be a spanning deletion?
# Note, this does not trigger issue 214, but I've added it here for completeness
def test_issue_214_ref_is_del_is_snp(self):
reader=vcf.Reader(fh('issue-214.vcf'))
reader.next()
r=reader.next()
self.assertTrue(r.is_snp)

# Can the ref even be a spanning deletion?
# Note, this does not trigger issue 214, but I've added it here for completeness
def test_issue_214_ref_is_del_var_type(self):
reader=vcf.Reader(fh('issue-214.vcf'))
reader.next()
r=reader.next()
self.assertEqual(r.var_type,'snp')

class Test1kg(unittest.TestCase):

def testParse(self):
Expand Down Expand Up @@ -1532,6 +1560,7 @@ def test_write_uncalled(self):
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFreebayesOutput))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamtoolsOutput))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutput))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue214))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kg))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kgSites))
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGoNL))
Expand Down

0 comments on commit d15a375

Please sign in to comment.