Permalink
Browse files

update tests for stranded option

  • Loading branch information...
yang-yangfeng committed Jan 6, 2018
1 parent 5690304 commit 93cd0941598cbd152b07774b235b2cafc2ea97d0
View
BIN +6 KB .DS_Store
Binary file not shown.
View
BIN +6 KB src/.DS_Store
Binary file not shown.
@@ -161,7 +161,6 @@ class JunctionsExtractor {
region_ = ".";
}
JunctionsExtractor(string bam1, string region1, int strandness1) : bam_(bam1), region_(region1), strandness_(strandness1) {
//cerr << "param constructor called" << endl;
min_anchor_length_ = 8;
min_intron_length_ = 70;
max_intron_length_ = 500000;
@@ -0,0 +1,2 @@
chrom start end name score strand splice_site acceptors_skipped exons_skipped donors_skipped anchor known_donor known_acceptor known_junction genes transcripts variant_info
22 93668 97252 JUNC00000001 5 - CT-AC 0 0 0 N 0 0 0 NA NA 22:94626-94627
@@ -0,0 +1,35 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=TEST,Number=1,Type=Integer,Description="Testing Tag">
##FORMAT=<ID=TT,Number=A,Type=Integer,Description="Testing Tag, with commas and \"escapes\" and escaped escapes combined with \\\"quotes\\\\\"">
##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">
##FILTER=<ID=q10,Description="Quality below 10">
##FILTER=<ID=test,Description="Testing filter">
##contig=<ID=22,assembly=b37,length=51304566>
##test=<ID=4,IE=5>
##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
##readme=AAAAAA
##readme=BBBBBB
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
##INFO=<ID=STR,Number=1,Type=String,Description="Test string type">
##INFO=<ID=genes,Number=1,Type=String,Description="The Variant falls in the splice region of these genes">
##INFO=<ID=transcripts,Number=1,Type=String,Description="The Variant falls in the splice region of these transcripts">
##INFO=<ID=distances,Number=1,Type=String,Description="Vector of Min(Distance from start/end of exon in the transcript.)">
##INFO=<ID=annotations,Number=1,Type=String,Description="Does the variant fall in exonic/intronic splicing related space in the transcript.">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
22 38192 . G T 19.2 PASS AN=4;AC=2;genes=EP300;transcripts=ENST00000263253;distances=0;annotations=splicing_exonic GT:GQ 0/1:215 0/1:225
22 94627 . G C 29.2 PASS AN=4;AC=2;genes=EP300;transcripts=ENST00000263253;distances=1;annotations=splicing_intronic GT:GQ 0/1:215 0/1:225
22 97780 . G T 29.2 PASS AN=4;AC=2;genes=EP300;transcripts=ENST00000263253;distances=2;annotations=splicing_exonic GT:GQ 0/1:215 0/1:225
22 167675 . G T 29.2 PASS AN=4;AC=2;genes=RANGAP1;transcripts=ENST00000356244,ENST00000405486,ENST00000407260,ENST00000455915;distances=2,2,2,2;annotations=splicing_exonic,splicing_exonic,splicing_exonic,splicing_exonic GT:GQ 0/1:215 0/1:225
22 167677 . G T 29.2 PASS AN=4;AC=2;genes=RANGAP1;transcripts=ENST00000356244,ENST00000405486,ENST00000407260,ENST00000455915;distances=0,0,0,0;annotations=splicing_exonic,splicing_exonic,splicing_exonic,splicing_exonic GT:GQ 0/1:215 0/1:225
22 167679 . G T 29.2 PASS AN=4;AC=2;genes=RANGAP1;transcripts=ENST00000356244,ENST00000405486,ENST00000407260,ENST00000455915;distances=2,2,2,2;annotations=splicing_intronic,splicing_intronic,splicing_intronic,splicing_intronic GT:GQ 0/1:215 0/1:225
22 175311 . G T 29.2 PASS AN=4;AC=2;genes=RANGAP1;transcripts=ENST00000356244,ENST00000405486,ENST00000407260,ENST00000455915;distances=2,2,2,2;annotations=splicing_intronic,splicing_intronic,splicing_intronic,splicing_intronic GT:GQ 0/1:215 0/1:225
22 175501 . G T 29.2 PASS AN=4;AC=2;genes=RANGAP1;transcripts=ENST00000356244,ENST00000405486,ENST00000407260,ENST00000455915;distances=2,2,2,2;annotations=splicing_intronic,splicing_intronic,splicing_intronic,splicing_intronic GT:GQ 0/1:215 0/1:225
22 206991 . G T 29.2 PASS AN=4;AC=2;genes=RANGAP1;transcripts=ENST00000356244;distances=0;annotations=splicing_exonic GT:GQ 0/1:215 0/1:225
22 206993 . G T 29.2 PASS AN=4;AC=2;genes=RANGAP1;transcripts=ENST00000356244;distances=2;annotations=splicing_exonic GT:GQ 0/1:215 0/1:225
@@ -0,0 +1 @@
22 93614 97301 JUNC00000001 5 - 93614 97301 255,0,0 2 54,50 0,3637
@@ -0,0 +1,49 @@
1 22379140 22405017 JUNC00000001 236 - 22379140 22405017 255,0,0 2 95,96 0,25781
1 22379151 22405017 JUNC00000002 261 + 22379151 22405017 255,0,0 2 84,96 0,25770
1 22379167 22405020 JUNC00000003 245 ? 22379167 22405020 255,0,0 2 68,99 0,25754
1 22379198 22400683 JUNC00000004 17 + 22379198 22400683 255,0,0 2 37,97 0,21388
1 22379202 22400683 JUNC00000005 18 - 22379202 22400683 255,0,0 2 33,97 0,21384
1 22379210 22400675 JUNC00000006 6 ? 22379210 22400675 255,0,0 2 25,89 0,21376
1 22379367 22404979 JUNC00000007 1 - 22379367 22404979 255,0,0 2 42,58 0,25554
1 22380382 22404963 JUNC00000010 1 - 22380382 22404963 255,0,0 2 58,42 0,24539
1 22400613 22405020 JUNC00000011 94 - 22400613 22405020 255,0,0 2 99,99 0,4308
1 22400615 22405020 JUNC00000012 119 + 22400615 22405020 255,0,0 2 97,99 0,4306
1 22400616 22405019 JUNC00000013 27 ? 22400616 22405019 255,0,0 2 96,98 0,4305
1 22404977 22408287 JUNC00000016 315 ? 22404977 22408287 255,0,0 2 99,73 0,3237
1 22404977 22408287 JUNC00000014 1767 + 22404977 22408287 255,0,0 2 99,73 0,3237
1 22404977 22408287 JUNC00000015 1814 - 22404977 22408287 255,0,0 2 99,73 0,3237
1 22404983 22413017 JUNC00000017 7 - 22404983 22413017 255,0,0 2 93,86 0,7948
1 22405013 22405292 JUNC00000018 3 - 22405013 22405292 255,0,0 2 63,94 0,185
1 22405022 22413024 JUNC00000019 5 + 22405022 22413024 255,0,0 2 54,93 0,7909
1 22405283 22408281 JUNC00000024 6 + 22405283 22408281 255,0,0 2 91,67 0,2931
1 22405301 22408241 JUNC00000025 1 ? 22405301 22408241 255,0,0 2 73,27 0,2913
1 22405322 22408287 JUNC00000026 5 - 22405322 22408287 255,0,0 2 52,73 0,2892
1 22408214 22413030 JUNC00000022 241 ? 22408214 22413030 255,0,0 2 73,99 0,4717
1 22408214 22413030 JUNC00000021 2371 + 22408214 22413030 255,0,0 2 73,99 0,4717
1 22408214 22413030 JUNC00000020 2266 - 22408214 22413030 255,0,0 2 73,99 0,4717
1 22408221 22412968 JUNC00000027 1 - 22408221 22412968 255,0,0 2 62,37 0,4710
1 22412942 22413260 JUNC00000028 1894 + 22412942 22413260 255,0,0 2 99,99 0,219
1 22412942 22413260 JUNC00000029 2028 - 22412942 22413260 255,0,0 2 99,99 0,219
1 22412942 22413260 JUNC00000030 196 ? 22412942 22413260 255,0,0 2 99,99 0,219
1 22413260 22416446 JUNC00000032 31 ? 22413260 22416446 255,0,0 2 99,11 0,3175
1 22413260 22418018 JUNC00000033 144 ? 22413260 22418018 255,0,0 2 99,98 0,4660
1 22413260 22418019 JUNC00000031 326 + 22413260 22418019 255,0,0 2 99,99 0,4660
1 22413261 22418019 JUNC00000034 450 - 22413261 22418019 255,0,0 2 98,99 0,4659
1 22413262 22416494 JUNC00000035 12 - 22413262 22416494 255,0,0 2 97,59 0,3173
1 22413274 22416494 JUNC00000036 15 + 22413274 22416494 255,0,0 2 85,59 0,3161
1 22413276 22481449 JUNC00000037 4 - 22413276 22481449 255,0,0 2 83,54 0,68119
1 22413278 22481419 JUNC00000038 4 + 22413278 22481419 255,0,0 2 81,24 0,68117
1 22413280 22418023 JUNC00000039 10 - 22413280 22418023 255,0,0 2 79,99 0,4644
1 22413282 22481418 JUNC00000040 1 ? 22413282 22481418 255,0,0 2 77,23 0,68113
1 22413287 22498575 JUNC00000041 1 + 22413287 22498575 255,0,0 2 72,28 0,85260
1 22413289 22418023 JUNC00000042 6 + 22413289 22418023 255,0,0 2 70,99 0,4635
1 22413295 22417924 JUNC00000043 2 + 22413295 22417924 255,0,0 2 64,36 0,4593
1 22413316 22456130 JUNC00000044 1 ? 22413316 22456130 255,0,0 2 43,55 0,42759
1 22413326 22498614 JUNC00000045 1 - 22413326 22498614 255,0,0 2 33,67 0,85221
1 22413335 22445230 JUNC00000046 1 + 22413335 22445230 255,0,0 2 24,76 0,31819
1 22446966 22447774 JUNC00000048 2 + 22446966 22447774 255,0,0 2 44,71 0,737
1 22446980 22447761 JUNC00000049 1 - 22446980 22447761 255,0,0 2 30,58 0,723
1 22447808 22448007 JUNC00000050 2 ? 22447808 22448007 255,0,0 2 38,70 0,129
1 22447816 22448007 JUNC00000051 1 + 22447816 22448007 255,0,0 2 30,70 0,121
1 22447997 22456136 JUNC00000052 1 + 22447997 22456136 255,0,0 2 72,28 0,8111
1 22469441 22481452 JUNC00000053 1 + 22469441 22481452 255,0,0 2 43,57 0,11954
@@ -0,0 +1,41 @@
1 22379140 22405017 JUNC00000001 236 - 22379140 22405017 255,0,0 2 95,96 0,25781
1 22379151 22405017 JUNC00000002 261 + 22379151 22405017 255,0,0 2 84,96 0,25770
1 22379167 22405020 JUNC00000003 245 ? 22379167 22405020 255,0,0 2 68,99 0,25754
1 22379198 22400683 JUNC00000004 17 + 22379198 22400683 255,0,0 2 37,97 0,21388
1 22379202 22400683 JUNC00000005 18 - 22379202 22400683 255,0,0 2 33,97 0,21384
1 22379367 22404979 JUNC00000007 1 - 22379367 22404979 255,0,0 2 42,58 0,25554
1 22380382 22404963 JUNC00000010 1 - 22380382 22404963 255,0,0 2 58,42 0,24539
1 22400613 22405020 JUNC00000011 94 - 22400613 22405020 255,0,0 2 99,99 0,4308
1 22400615 22405020 JUNC00000012 119 + 22400615 22405020 255,0,0 2 97,99 0,4306
1 22400616 22405019 JUNC00000013 27 ? 22400616 22405019 255,0,0 2 96,98 0,4305
1 22404977 22408287 JUNC00000016 315 ? 22404977 22408287 255,0,0 2 99,73 0,3237
1 22404977 22408287 JUNC00000014 1767 + 22404977 22408287 255,0,0 2 99,73 0,3237
1 22404977 22408287 JUNC00000015 1814 - 22404977 22408287 255,0,0 2 99,73 0,3237
1 22404983 22413017 JUNC00000017 7 - 22404983 22413017 255,0,0 2 93,86 0,7948
1 22405013 22405292 JUNC00000018 3 - 22405013 22405292 255,0,0 2 63,94 0,185
1 22405022 22413024 JUNC00000019 5 + 22405022 22413024 255,0,0 2 54,93 0,7909
1 22405283 22408281 JUNC00000024 6 + 22405283 22408281 255,0,0 2 91,67 0,2931
1 22405322 22408287 JUNC00000026 5 - 22405322 22408287 255,0,0 2 52,73 0,2892
1 22408214 22413030 JUNC00000022 241 ? 22408214 22413030 255,0,0 2 73,99 0,4717
1 22408214 22413030 JUNC00000021 2371 + 22408214 22413030 255,0,0 2 73,99 0,4717
1 22408214 22413030 JUNC00000020 2266 - 22408214 22413030 255,0,0 2 73,99 0,4717
1 22408221 22412968 JUNC00000027 1 - 22408221 22412968 255,0,0 2 62,37 0,4710
1 22412942 22413260 JUNC00000028 1894 + 22412942 22413260 255,0,0 2 99,99 0,219
1 22412942 22413260 JUNC00000029 2028 - 22412942 22413260 255,0,0 2 99,99 0,219
1 22412942 22413260 JUNC00000030 196 ? 22412942 22413260 255,0,0 2 99,99 0,219
1 22413260 22418018 JUNC00000033 144 ? 22413260 22418018 255,0,0 2 99,98 0,4660
1 22413260 22418019 JUNC00000031 326 + 22413260 22418019 255,0,0 2 99,99 0,4660
1 22413261 22418019 JUNC00000034 450 - 22413261 22418019 255,0,0 2 98,99 0,4659
1 22413262 22416494 JUNC00000035 12 - 22413262 22416494 255,0,0 2 97,59 0,3173
1 22413274 22416494 JUNC00000036 15 + 22413274 22416494 255,0,0 2 85,59 0,3161
1 22413276 22481449 JUNC00000037 4 - 22413276 22481449 255,0,0 2 83,54 0,68119
1 22413280 22418023 JUNC00000039 10 - 22413280 22418023 255,0,0 2 79,99 0,4644
1 22413289 22418023 JUNC00000042 6 + 22413289 22418023 255,0,0 2 70,99 0,4635
1 22413295 22417924 JUNC00000043 2 + 22413295 22417924 255,0,0 2 64,36 0,4593
1 22413316 22456130 JUNC00000044 1 ? 22413316 22456130 255,0,0 2 43,55 0,42759
1 22413326 22498614 JUNC00000045 1 - 22413326 22498614 255,0,0 2 33,67 0,85221
1 22446966 22447774 JUNC00000048 2 + 22446966 22447774 255,0,0 2 44,71 0,737
1 22446980 22447761 JUNC00000049 1 - 22446980 22447761 255,0,0 2 30,58 0,723
1 22447808 22448007 JUNC00000050 2 ? 22447808 22448007 255,0,0 2 38,70 0,129
1 22447816 22448007 JUNC00000051 1 + 22447816 22448007 255,0,0 2 30,70 0,121
1 22469441 22481452 JUNC00000053 1 + 22469441 22481452 255,0,0 2 43,57 0,11954
@@ -30,7 +30,30 @@
import unittest
class TestCisSpliceEffectsIdentify(IntegrationTest, unittest.TestCase):
#Test default options.
#Test default options (but with RF strandedness).
def test_default_stranded(self):
variants = self.inputFiles("vcf/test1.vcf")[0]
bam1 = self.inputFiles("bam/test_hcc1395.2.bam")[0]
fasta = self.inputFiles("fa/test_chr22.fa")[0]
gtf = self.inputFiles("gtf/test_ensemble_chr22.2.gtf")[0]
output_annotatedjunctions = self.tempFile("observed-cse-identify.out")
output_annotatedvariants = self.tempFile("observed-cse-identify-variants.out")
output_junctions = self.tempFile("observed-cse-identify-junctions.out")
expected_annotatedjunctions = self.inputFiles("cis-splice-effects-identify/expected-cis-splice-effects-identify-default-stranded-annotatedjunctions.out")[0]
expected_annotatedvariants = self.inputFiles("cis-splice-effects-identify/expected-cis-splice-effects-identify-default-stranded-annotatedvariants.out")[0]
expected_junctions = self.inputFiles("cis-splice-effects-identify/expected-cis-splice-effects-identify-default-stranded-junctions.out")[0]
params = ["cis-splice-effects", "identify",
"-o ", output_annotatedjunctions,
"-v ", output_annotatedvariants,
"-j ", output_junctions,
variants, bam1, fasta, gtf]
rv, err = self.execute(params)
self.assertEqual(rv, 0, err)
self.assertFilesEqual(expected_annotatedjunctions, output_annotatedjunctions, err)
self.assertFilesEqual(expected_annotatedvariants, output_annotatedvariants, err)
self.assertFilesEqual(expected_junctions, output_junctions, err)
#Test default options (but with unstranded).
def test_default(self):
variants = self.inputFiles("vcf/test1.vcf")[0]
bam1 = self.inputFiles("bam/test_hcc1395.2.bam")[0]
@@ -30,6 +30,20 @@
import unittest
class TestExtract(IntegrationTest, unittest.TestCase):
def test_junctions_extract_anchor_stranded(self):
bam1 = self.inputFiles("bam/test_hcc1395.bam")[0]
output_file = self.tempFile("extract.out")
print "BAM1 is ", bam1
for anchor in ["", "30"]:
expected_file = self.inputFiles("junctions-extract/expected-stranded-a" +
anchor + ".out")[0]
if anchor != "":
anchor = "-a " + anchor
params = ["junctions", "extract", anchor, "-o", output_file, bam1]
rv, err = self.execute(params)
self.assertEqual(rv, 0)
self.assertFilesEqual(expected_file, output_file)
def test_junctions_extract_anchor(self):
bam1 = self.inputFiles("bam/test_hcc1395.bam")[0]
output_file = self.tempFile("extract.out")

0 comments on commit 93cd094

Please sign in to comment.