Skip to content

Commit

Permalink
Merge pull request #256 from dib-lab/refactor/vcf-take2
Browse files Browse the repository at this point in the history
VCF i/o
  • Loading branch information
standage committed May 16, 2018
2 parents 4ff6cc2 + fbb7940 commit dc568ab
Show file tree
Hide file tree
Showing 10 changed files with 443 additions and 6 deletions.
6 changes: 5 additions & 1 deletion kevlar/alac.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,9 @@ def main(args):
logstream=args.logfile
)

writer = kevlar.vcf.VCFWriter(
outstream, source='kevlar::alac', refr=args.refr,
)
writer.write_header()
for varcall in workflow:
print(varcall.vcf, file=outstream)
writer.write(varcall)
6 changes: 5 additions & 1 deletion kevlar/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,5 +136,9 @@ def main(args):
args.match, args.mismatch, args.open, args.extend,
args.ksize, args.refr, args.debug, 5, args.logfile
)
writer = kevlar.vcf.VCFWriter(
outstream, source='kevlar::call', refr=args.refr,
)
writer.write_header()
for varcall in caller:
print(varcall.vcf, file=outstream)
writer.write(varcall)
8 changes: 6 additions & 2 deletions kevlar/simplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,5 +112,9 @@ def main(args):
match=args.match, mismatch=args.mismatch, gapopen=args.open,
gapextend=args.extend, threads=args.threads, logstream=args.logfile
)
for variant in workflow:
print(variant.vcf, file=outstream)
writer = kevlar.vcf.VCFWriter(
outstream, source='kevlar::simplex', refr=args.refr,
)
writer.write_header()
for varcall in workflow:
writer.write(varcall)
28 changes: 28 additions & 0 deletions kevlar/tests/data/five-snvs-fmt-mismatch.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
##fileformat=VCFv4.2
##reference=GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz
##FILTER=<ID=PerfectMatch,Description="No mismatches between contig with putatively novel content and reference target">
##FILTER=<ID=InscrutableCigar,Description="Alignment path/structure cannot be interpreted as a variant">
##FILTER=<ID=PassengerVariant,Description="A mismatch between contig and reference that is not spanned by any novel k-mers">
##FILTER=<ID=MateFail,Description="Aligning mate reads suggests a better location for this variant call">
##FILTER=<ID=PartitionScore,Description="Expectation is 1 variant call per partition, so all call(s) with suboptimal likelihood scores are filtered">
##INFO=<ID=REFRWINDOW,Number=1,Type=String,Description="window containing all k-mers that span the variant reference allele">
##INFO=<ID=LIKESCORE,Number=1,Type=Float,Description="likelihood score of the variant, computed as `LLDN - max(LLIH, LLFP)`">
##INFO=<ID=CONTIG,Number=1,Type=String,Description="contig assembled from reads containing novel k-mers, aligned to reference to call variants">
##INFO=<ID=ALTWINDOW,Number=1,Type=String,Description="window containing all k-mers that span the variant alternate allele">
##INFO=<ID=LLDN,Number=1,Type=Float,Description="log likelihood that the variant is a de novo variant">
##INFO=<ID=KSW2,Number=1,Type=Float,Description="alignment score">
##INFO=<ID=MATEDIST,Number=1,Type=Float,Description="average distance of aligned mates of assembled novel reads">
##INFO=<ID=LLFP,Number=1,Type=Float,Description="log likelihood that the variant is a false call">
##INFO=<ID=DROPPED,Number=1,Type=Integer,Description="number of k-mers dropped from ALTWINDOW for likelihood calculations because it is present elsewhere in the genome (not novel)">
##INFO=<ID=LLIH,Number=1,Type=Float,Description="log likelihood that the variant is an inherited variant">
##INFO=<ID=IKMERS,Number=1,Type=Integer,Description="number of "interesting" (novel) k-mers spanning the variant alternate allele">
##INFO=<ID=CIGAR,Number=1,Type=String,Description="alignment path">
##FORMAT=<ID=ALTABUND,Number=.,Type=Integer,Description="abundance of alternate allele k-mers">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Kid Mom Dad Sibling
chr17 36385018 . G A . PASS ALTWINDOW=TCTTAGGTCCCAGCCTCTAGGTGGGGTCCTAACACAAGCGCGCAGCCACCCCCAAGCCAGG;CIGAR=50D191M50D;IKMERS=31;KSW2=178;MATEDIST=200.38;PART=11;REFRWINDOW=TCTTAGGTCCCAGCCTCTAGGTGGGGTCCTGACACAAGCGCGCAGCCACCCCCAAGCCAGG;CONTIG=AGCACCAGTGGGCTGGCTTTGGGACCCCGGGATGTACCATCCTCAGGCCACAGACACACCAGTCTTAGGTCCCAGCCTCTAGGTGGGGTCCTAACACAAGCGCGCAGCCACCCCCAAGCCAGGACTGTGGTTCTCCTTTTGGAATTTTATCAAACTGCCAAAGTGAACAGCAACCTGGGGTCAGGTCCAGC;LIKESCORE=360.19;LLDN=-143.08;LLFP=-682.92;LLIH=-503.27;DROPPED=1 ALTABUND:GT 23,19,19,19,20,21,24,26,25,24,23,23,23,24,23,25,25,24,23,23,23,24,23,22,21,21,24,25,25,25:0/1 0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0:0/0 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0:0/0
chr17 40212447 . C G . PASS ALTWINDOW=TTTTTTGAGACGGAGTTTCACTCTCATTGCGCAGGCTGGAGTGCAATGGCATGCTCTTAGC;CIGAR=50D198M50D;IKMERS=31;KSW2=185;PART=54;REFRWINDOW=TTTTTTGAGACGGAGTTTCACTCTCATTGCCCAGGCTGGAGTGCAATGGCATGCTCTTAGC;CONTIG=ATCTTCTAGTGTAATGGTGCCTGGGTATTTGCATATTGAAACGCCATCCTTCTATTTGTCTTTTTTTTTTTTTTGAGACGGAGTTTCACTCTCATTGCGCAGGCTGGAGTGCAATGGCATGCTCTTAGCTCACTGCAACCTCTGCTTCCCGGGTTCAAGCAATTCTCCTGCCTCAGCCTCCTGAGTAGCTGCGATTAA;LIKESCORE=351.10;LLDN=-103.60;LLFP=-750.58;LLIH=-454.70;DROPPED=1 ALTABUND:GT 16,17,16,18,17,17,16,18,18,19,19,19,20,19,18,17,16,16,20,20,20,19,19,20,19,19,18,17,17,17:0/1 0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0:0/0 0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0:0/0
chr17 3547691 . A G . PASS ALTWINDOW=TCAGAGGACAGGGCTGGGAATTCCCTGTGGGGAAGGAAGCCATCTCAGGCGGTGGAGGGGG;CIGAR=50D174M50D;IKMERS=31;KSW2=161;PART=89;REFRWINDOW=TCAGAGGACAGGGCTGGGAATTCCCTGTGGAGAAGGAAGCCATCTCAGGCGGTGGAGGGGG;CONTIG=GCTGGGTGACAGAGGGAGACTCCTCTAGAAAAAAAATGGCTGATCTAGAGCAGGGAGGACTTGAGGGGTCAGAGGACAGGGCTGGGAATTCCCTGTGGGGAAGGAAGCCATCTCAGGCGGTGGAGGGGGGCAGCGGAGGGGAGGGGCAGGGCAAGGGCAGCTTTCTCCTTGGGC;LIKESCORE=349.59;LLDN=-100.91;LLFP=-1883.18;LLIH=-450.50;DROPPED=0 ALTABUND:GT 19,17,17,17,17,17,18,18,18,18,18,19,19,17,18,18,16,16,17,16,17,16,16,17,18,17,17,17,17,16,16:0/1 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0:0/0 0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0:0/0
# This is a comment and should be ignored
chr17 9090636 . T A . PASS ALTWINDOW=AGAGTGCAGGAGGCAGCCACTCAGGAGACCATGCTAAGGCTGGCTTCCCGCTGCTTGTCTT;CIGAR=50D146M25I1M;IKMERS=30;KSW2=131;PART=62;REFRWINDOW=AGAGTGCAGGAGGCAGCCACTCAGGAGACCTTGCTAAGGCTGGCTTCCCGCTGCTTGTCTT;CONTIG=GTCATGGATGTTTCTGGTCATTTAGGAGCAGAAAGAGGGGGATACAGATGCCGGTTTATAAAGGCTGAGAGTGCAGGAGGCAGCCACTCAGGAGACCATGCTAAGGCTGGCTTCCCGCTGCTTGTCTTCTCTGGCCATTTCTCTGC;LIKESCORE=349.15;LLDN=-116.68;LLFP=-1946.59;LLIH=-465.84;DROPPED=0 ALTABUND:GT 21,20,20,19,17,19,20,19,18,17,17,17,17,17,17,17,18,19,19,19,18,18,18,17,19,18,17,17,17,15,15:0/1 0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0:0/0 0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,2,0,0,0,0,0,1,0,0,0:0/0
chr17 47453989 . G C . PASS ALTWINDOW=GACGAAACATCAGGCAAATCCAAATAGGAGCTCATTCTACAAAATAACTGACCTGTAATAT;CIGAR=50D183M50D;IKMERS=31;KSW2=170;PART=67;REFRWINDOW=GACGAAACATCAGGCAAATCCAAATAGGAGGTCATTCTACAAAATAACTGACCTGTAATAT;CONTIG=AAAACACAGAATCACGTATGTGATACTCCTGCTAAAAACATAACTGCTTTTAATCATGACGAAACATCAGGCAAATCCAAATAGGAGCTCATTCTACAAAATAACTGACCTGTAATATTCAGAAGTATCAAAGTTATGAAACTCAGGGAAAGACTGAGTTACTATTTGAGATTTAAAAAGACA;LIKESCORE=347.95;LLDN=-104.49;LLFP=-1763.31;LLIH=-452.44;DROPPED=0 ALTABUND:GT 19,18,18,18,17,18,18,20,20,19,19,19,18,17,17,17,17,17,17,17,17,17,19,18,17,17,18,18,18,16,16:0/1 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0:0/0 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0:0/0
27 changes: 27 additions & 0 deletions kevlar/tests/data/five-snvs-fmtstr-mismatch.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
##fileformat=VCFv4.2
##reference=GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz
##FILTER=<ID=PerfectMatch,Description="No mismatches between contig with putatively novel content and reference target">
##FILTER=<ID=InscrutableCigar,Description="Alignment path/structure cannot be interpreted as a variant">
##FILTER=<ID=PassengerVariant,Description="A mismatch between contig and reference that is not spanned by any novel k-mers">
##FILTER=<ID=MateFail,Description="Aligning mate reads suggests a better location for this variant call">
##FILTER=<ID=PartitionScore,Description="Expectation is 1 variant call per partition, so all call(s) with suboptimal likelihood scores are filtered">
##INFO=<ID=REFRWINDOW,Number=1,Type=String,Description="window containing all k-mers that span the variant reference allele">
##INFO=<ID=LIKESCORE,Number=1,Type=Float,Description="likelihood score of the variant, computed as `LLDN - max(LLIH, LLFP)`">
##INFO=<ID=CONTIG,Number=1,Type=String,Description="contig assembled from reads containing novel k-mers, aligned to reference to call variants">
##INFO=<ID=ALTWINDOW,Number=1,Type=String,Description="window containing all k-mers that span the variant alternate allele">
##INFO=<ID=LLDN,Number=1,Type=Float,Description="log likelihood that the variant is a de novo variant">
##INFO=<ID=KSW2,Number=1,Type=Float,Description="alignment score">
##INFO=<ID=MATEDIST,Number=1,Type=Float,Description="average distance of aligned mates of assembled novel reads">
##INFO=<ID=LLFP,Number=1,Type=Float,Description="log likelihood that the variant is a false call">
##INFO=<ID=DROPPED,Number=1,Type=Integer,Description="number of k-mers dropped from ALTWINDOW for likelihood calculations because it is present elsewhere in the genome (not novel)">
##INFO=<ID=LLIH,Number=1,Type=Float,Description="log likelihood that the variant is an inherited variant">
##INFO=<ID=IKMERS,Number=1,Type=Integer,Description="number of "interesting" (novel) k-mers spanning the variant alternate allele">
##INFO=<ID=CIGAR,Number=1,Type=String,Description="alignment path">
##FORMAT=<ID=ALTABUND,Number=.,Type=Integer,Description="abundance of alternate allele k-mers">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Kid Mom Dad
chr17 36385018 . G A . PASS ALTWINDOW=TCTTAGGTCCCAGCCTCTAGGTGGGGTCCTAACACAAGCGCGCAGCCACCCCCAAGCCAGG;CIGAR=50D191M50D;IKMERS=31;KSW2=178;MATEDIST=200.38;PART=11;REFRWINDOW=TCTTAGGTCCCAGCCTCTAGGTGGGGTCCTGACACAAGCGCGCAGCCACCCCCAAGCCAGG;CONTIG=AGCACCAGTGGGCTGGCTTTGGGACCCCGGGATGTACCATCCTCAGGCCACAGACACACCAGTCTTAGGTCCCAGCCTCTAGGTGGGGTCCTAACACAAGCGCGCAGCCACCCCCAAGCCAGGACTGTGGTTCTCCTTTTGGAATTTTATCAAACTGCCAAAGTGAACAGCAACCTGGGGTCAGGTCCAGC;LIKESCORE=360.19;LLDN=-143.08;LLFP=-682.92;LLIH=-503.27;DROPPED=1 ALTABUND:GT 23,19,19,19,20,21,24,26,25,24,23,23,23,24,23,25,25,24,23,23,23,24,23,22,21,21,24,25,25,25:0/1 0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0:0/0 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0:0/0
chr17 40212447 . C G . PASS ALTWINDOW=TTTTTTGAGACGGAGTTTCACTCTCATTGCGCAGGCTGGAGTGCAATGGCATGCTCTTAGC;CIGAR=50D198M50D;IKMERS=31;KSW2=185;PART=54;REFRWINDOW=TTTTTTGAGACGGAGTTTCACTCTCATTGCCCAGGCTGGAGTGCAATGGCATGCTCTTAGC;CONTIG=ATCTTCTAGTGTAATGGTGCCTGGGTATTTGCATATTGAAACGCCATCCTTCTATTTGTCTTTTTTTTTTTTTTGAGACGGAGTTTCACTCTCATTGCGCAGGCTGGAGTGCAATGGCATGCTCTTAGCTCACTGCAACCTCTGCTTCCCGGGTTCAAGCAATTCTCCTGCCTCAGCCTCCTGAGTAGCTGCGATTAA;LIKESCORE=351.10;LLDN=-103.60;LLFP=-750.58;LLIH=-454.70;DROPPED=1 ALTABUND:GT 16,17,16,18,17,17,16,18,18,19,19,19,20,19,18,17,16,16,20,20,20,19,19,20,19,19,18,17,17,17:0/1 0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0:0/0 0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0:0/0
chr17 3547691 . A G . PASS ALTWINDOW=TCAGAGGACAGGGCTGGGAATTCCCTGTGGGGAAGGAAGCCATCTCAGGCGGTGGAGGGGG;CIGAR=50D174M50D;IKMERS=31;KSW2=161;PART=89;REFRWINDOW=TCAGAGGACAGGGCTGGGAATTCCCTGTGGAGAAGGAAGCCATCTCAGGCGGTGGAGGGGG;CONTIG=GCTGGGTGACAGAGGGAGACTCCTCTAGAAAAAAAATGGCTGATCTAGAGCAGGGAGGACTTGAGGGGTCAGAGGACAGGGCTGGGAATTCCCTGTGGGGAAGGAAGCCATCTCAGGCGGTGGAGGGGGGCAGCGGAGGGGAGGGGCAGGGCAAGGGCAGCTTTCTCCTTGGGC;LIKESCORE=349.59;LLDN=-100.91;LLFP=-1883.18;LLIH=-450.50;DROPPED=0 ALTABUND:GT 19,17,17,17,17,17,18,18,18,18,18,19,19,17,18,18,16,16,17,16,17,16,16,17,18,17,17,17,17,16,16:0/1 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0:0/0 0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0:0/0
chr17 9090636 . T A . PASS ALTWINDOW=AGAGTGCAGGAGGCAGCCACTCAGGAGACCATGCTAAGGCTGGCTTCCCGCTGCTTGTCTT;CIGAR=50D146M25I1M;IKMERS=30;KSW2=131;PART=62;REFRWINDOW=AGAGTGCAGGAGGCAGCCACTCAGGAGACCTTGCTAAGGCTGGCTTCCCGCTGCTTGTCTT;CONTIG=GTCATGGATGTTTCTGGTCATTTAGGAGCAGAAAGAGGGGGATACAGATGCCGGTTTATAAAGGCTGAGAGTGCAGGAGGCAGCCACTCAGGAGACCATGCTAAGGCTGGCTTCCCGCTGCTTGTCTTCTCTGGCCATTTCTCTGC;LIKESCORE=349.15;LLDN=-116.68;LLFP=-1946.59;LLIH=-465.84;DROPPED=0 ALTABUND:GT:XYZ:PDQ 21,20,20,19,17,19,20,19,18,17,17,17,17,17,17,17,18,19,19,19,18,18,18,17,19,18,17,17,17,15,15:0/1 0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0:0/0 0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,2,0,0,0,0,0,1,0,0,0:0/0
chr17 47453989 . G C . PASS ALTWINDOW=GACGAAACATCAGGCAAATCCAAATAGGAGCTCATTCTACAAAATAACTGACCTGTAATAT;CIGAR=50D183M50D;IKMERS=31;KSW2=170;PART=67;REFRWINDOW=GACGAAACATCAGGCAAATCCAAATAGGAGGTCATTCTACAAAATAACTGACCTGTAATAT;CONTIG=AAAACACAGAATCACGTATGTGATACTCCTGCTAAAAACATAACTGCTTTTAATCATGACGAAACATCAGGCAAATCCAAATAGGAGCTCATTCTACAAAATAACTGACCTGTAATATTCAGAAGTATCAAAGTTATGAAACTCAGGGAAAGACTGAGTTACTATTTGAGATTTAAAAAGACA;LIKESCORE=347.95;LLDN=-104.49;LLFP=-1763.31;LLIH=-452.44;DROPPED=0 ALTABUND:GT 19,18,18,18,17,18,18,20,20,19,19,19,18,17,17,17,17,17,17,17,17,17,19,18,17,17,18,18,18,16,16:0/1 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0:0/0 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0:0/0
27 changes: 27 additions & 0 deletions kevlar/tests/data/five-snvs-with-likelihood.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
##fileformat=VCFv4.2
##reference=GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz
##FILTER=<ID=PerfectMatch,Description="No mismatches between contig with putatively novel content and reference target">
##FILTER=<ID=InscrutableCigar,Description="Alignment path/structure cannot be interpreted as a variant">
##FILTER=<ID=PassengerVariant,Description="A mismatch between contig and reference that is not spanned by any novel k-mers">
##FILTER=<ID=MateFail,Description="Aligning mate reads suggests a better location for this variant call">
##FILTER=<ID=PartitionScore,Description="Expectation is 1 variant call per partition, so all call(s) with suboptimal likelihood scores are filtered">
##INFO=<ID=REFRWINDOW,Number=1,Type=String,Description="window containing all k-mers that span the variant reference allele">
##INFO=<ID=LIKESCORE,Number=1,Type=Float,Description="likelihood score of the variant, computed as `LLDN - max(LLIH, LLFP)`">
##INFO=<ID=CONTIG,Number=1,Type=String,Description="contig assembled from reads containing novel k-mers, aligned to reference to call variants">
##INFO=<ID=ALTWINDOW,Number=1,Type=String,Description="window containing all k-mers that span the variant alternate allele">
##INFO=<ID=LLDN,Number=1,Type=Float,Description="log likelihood that the variant is a de novo variant">
##INFO=<ID=KSW2,Number=1,Type=Float,Description="alignment score">
##INFO=<ID=MATEDIST,Number=1,Type=Float,Description="average distance of aligned mates of assembled novel reads">
##INFO=<ID=LLFP,Number=1,Type=Float,Description="log likelihood that the variant is a false call">
##INFO=<ID=DROPPED,Number=1,Type=Integer,Description="number of k-mers dropped from ALTWINDOW for likelihood calculations because it is present elsewhere in the genome (not novel)">
##INFO=<ID=LLIH,Number=1,Type=Float,Description="log likelihood that the variant is an inherited variant">
##INFO=<ID=IKMERS,Number=1,Type=Integer,Description="number of "interesting" (novel) k-mers spanning the variant alternate allele">
##INFO=<ID=CIGAR,Number=1,Type=String,Description="alignment path">
##FORMAT=<ID=ALTABUND,Number=.,Type=Integer,Description="abundance of alternate allele k-mers">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Kid Mom Dad
chr17 36385018 . G A . PASS ALTWINDOW=TCTTAGGTCCCAGCCTCTAGGTGGGGTCCTAACACAAGCGCGCAGCCACCCCCAAGCCAGG;CIGAR=50D191M50D;IKMERS=31;KSW2=178;MATEDIST=200.38;PART=11;REFRWINDOW=TCTTAGGTCCCAGCCTCTAGGTGGGGTCCTGACACAAGCGCGCAGCCACCCCCAAGCCAGG;CONTIG=AGCACCAGTGGGCTGGCTTTGGGACCCCGGGATGTACCATCCTCAGGCCACAGACACACCAGTCTTAGGTCCCAGCCTCTAGGTGGGGTCCTAACACAAGCGCGCAGCCACCCCCAAGCCAGGACTGTGGTTCTCCTTTTGGAATTTTATCAAACTGCCAAAGTGAACAGCAACCTGGGGTCAGGTCCAGC;LIKESCORE=360.19;LLDN=-143.08;LLFP=-682.92;LLIH=-503.27;DROPPED=1 ALTABUND:GT 23,19,19,19,20,21,24,26,25,24,23,23,23,24,23,25,25,24,23,23,23,24,23,22,21,21,24,25,25,25:0/1 0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0:0/0 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0:0/0
chr17 40212447 . C G . PASS ALTWINDOW=TTTTTTGAGACGGAGTTTCACTCTCATTGCGCAGGCTGGAGTGCAATGGCATGCTCTTAGC;CIGAR=50D198M50D;IKMERS=31;KSW2=185;PART=54;REFRWINDOW=TTTTTTGAGACGGAGTTTCACTCTCATTGCCCAGGCTGGAGTGCAATGGCATGCTCTTAGC;CONTIG=ATCTTCTAGTGTAATGGTGCCTGGGTATTTGCATATTGAAACGCCATCCTTCTATTTGTCTTTTTTTTTTTTTTGAGACGGAGTTTCACTCTCATTGCGCAGGCTGGAGTGCAATGGCATGCTCTTAGCTCACTGCAACCTCTGCTTCCCGGGTTCAAGCAATTCTCCTGCCTCAGCCTCCTGAGTAGCTGCGATTAA;LIKESCORE=351.10;LLDN=-103.60;LLFP=-750.58;LLIH=-454.70;DROPPED=1 ALTABUND:GT 16,17,16,18,17,17,16,18,18,19,19,19,20,19,18,17,16,16,20,20,20,19,19,20,19,19,18,17,17,17:0/1 0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0:0/0 0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0:0/0
chr17 3547691 . A G . PASS ALTWINDOW=TCAGAGGACAGGGCTGGGAATTCCCTGTGGGGAAGGAAGCCATCTCAGGCGGTGGAGGGGG;CIGAR=50D174M50D;IKMERS=31;KSW2=161;PART=89;REFRWINDOW=TCAGAGGACAGGGCTGGGAATTCCCTGTGGAGAAGGAAGCCATCTCAGGCGGTGGAGGGGG;CONTIG=GCTGGGTGACAGAGGGAGACTCCTCTAGAAAAAAAATGGCTGATCTAGAGCAGGGAGGACTTGAGGGGTCAGAGGACAGGGCTGGGAATTCCCTGTGGGGAAGGAAGCCATCTCAGGCGGTGGAGGGGGGCAGCGGAGGGGAGGGGCAGGGCAAGGGCAGCTTTCTCCTTGGGC;LIKESCORE=349.59;LLDN=-100.91;LLFP=-1883.18;LLIH=-450.50;DROPPED=0 ALTABUND:GT 19,17,17,17,17,17,18,18,18,18,18,19,19,17,18,18,16,16,17,16,17,16,16,17,18,17,17,17,17,16,16:0/1 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0:0/0 0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0:0/0
chr17 9090636 . T A . PASS ALTWINDOW=AGAGTGCAGGAGGCAGCCACTCAGGAGACCATGCTAAGGCTGGCTTCCCGCTGCTTGTCTT;CIGAR=50D146M25I1M;IKMERS=30;KSW2=131;PART=62;REFRWINDOW=AGAGTGCAGGAGGCAGCCACTCAGGAGACCTTGCTAAGGCTGGCTTCCCGCTGCTTGTCTT;CONTIG=GTCATGGATGTTTCTGGTCATTTAGGAGCAGAAAGAGGGGGATACAGATGCCGGTTTATAAAGGCTGAGAGTGCAGGAGGCAGCCACTCAGGAGACCATGCTAAGGCTGGCTTCCCGCTGCTTGTCTTCTCTGGCCATTTCTCTGC;LIKESCORE=349.15;LLDN=-116.68;LLFP=-1946.59;LLIH=-465.84;DROPPED=0 ALTABUND:GT 21,20,20,19,17,19,20,19,18,17,17,17,17,17,17,17,18,19,19,19,18,18,18,17,19,18,17,17,17,15,15:0/1 0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0:0/0 0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,2,0,0,0,0,0,1,0,0,0:0/0
chr17 47453989 . G C . PASS ALTWINDOW=GACGAAACATCAGGCAAATCCAAATAGGAGCTCATTCTACAAAATAACTGACCTGTAATAT;CIGAR=50D183M50D;IKMERS=31;KSW2=170;PART=67;REFRWINDOW=GACGAAACATCAGGCAAATCCAAATAGGAGGTCATTCTACAAAATAACTGACCTGTAATAT;CONTIG=AAAACACAGAATCACGTATGTGATACTCCTGCTAAAAACATAACTGCTTTTAATCATGACGAAACATCAGGCAAATCCAAATAGGAGCTCATTCTACAAAATAACTGACCTGTAATATTCAGAAGTATCAAAGTTATGAAACTCAGGGAAAGACTGAGTTACTATTTGAGATTTAAAAAGACA;LIKESCORE=347.95;LLDN=-104.49;LLFP=-1763.31;LLIH=-452.44;DROPPED=0 ALTABUND:GT 19,18,18,18,17,18,18,20,20,19,19,19,18,17,17,17,17,17,17,17,17,17,19,18,17,17,18,18,18,16,16:0/1 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0:0/0 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0:0/0
8 changes: 8 additions & 0 deletions kevlar/tests/test_alac.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ def test_pico_4(greedy, capsys):
kevlar.alac.main(args)
out, err = capsys.readouterr()

# grep -v ^'#' out
out = '\n'.join([l for l in out.split('\n') if not l.startswith('#')])

vcf = '\t'.join([
'seq1', '1175768', '.', 'T', 'C', '.', 'PASS',
'ALTWINDOW=CCCTGCCATTATAGATGCTAGATTCACATCTTCATTTATTTTTACTTTT;'
Expand Down Expand Up @@ -70,6 +73,8 @@ def test_pico_partitioned(capsys):

out, err = capsys.readouterr()
lines = out.strip().split('\n')
assert len(lines) == 33
lines = [l for l in lines if not l.startswith('#')]
assert len(lines) == 10
numnocalls = sum([1 for line in lines if '\t.\t.\t.\t.\t' in line])
assert numnocalls == 2
Expand Down Expand Up @@ -135,6 +140,9 @@ def test_alac_single_partition_badlabel(capsys):
args = kevlar.cli.parser().parse_args(arglist)
kevlar.alac.main(args)
out, err = capsys.readouterr()

# grep -v ^'#' out
out = '\n'.join([l for l in out.split('\n') if not l.startswith('#')])
assert out == ''


Expand Down

0 comments on commit dc568ab

Please sign in to comment.