Skip to content

Commit

Permalink
Use VCF header lines in VCFInFormatter
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh committed Nov 18, 2016
1 parent d155f66 commit 447ca9a
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 32 deletions.
Expand Up @@ -23,10 +23,7 @@ import htsjdk.variant.variantcontext.writer.{
}
import htsjdk.variant.vcf.{ VCFHeader, VCFHeaderLine }
import java.io.OutputStream
import org.bdgenomics.adam.converters.{
SupportedHeaderLines,
VariantContextConverter
}
import org.bdgenomics.adam.converters.VariantContextConverter
import org.bdgenomics.adam.models.{
SequenceDictionary,
VariantContext
Expand All @@ -47,13 +44,14 @@ object VCFInFormatter extends InFormatterCompanion[VariantContext, VariantContex
* VCF header.
*/
def apply(gRdd: VariantContextRDD): VCFInFormatter = {
VCFInFormatter(gRdd.sequences, gRdd.samples.map(_.getSampleId))
VCFInFormatter(gRdd.sequences, gRdd.samples.map(_.getSampleId), gRdd.headerLines)
}
}

private[variant] case class VCFInFormatter private (
sequences: SequenceDictionary,
samples: Seq[String]) extends InFormatter[VariantContext, VariantContextRDD, VCFInFormatter] {
samples: Seq[String],
headerLines: Seq[VCFHeaderLine]) extends InFormatter[VariantContext, VariantContextRDD, VCFInFormatter] {

protected val companion = VCFInFormatter

Expand All @@ -75,9 +73,7 @@ private[variant] case class VCFInFormatter private (
.unsetOption(Options.INDEX_ON_THE_FLY)
.build()

val headerLines: Set[VCFHeaderLine] = (SupportedHeaderLines.infoHeaderLines ++
SupportedHeaderLines.formatHeaderLines).toSet
val header = new VCFHeader(headerLines, samples)
val header = new VCFHeader(headerLines.toSet, samples)
header.setSequenceDictionary(sequences.toSAMSequenceDictionary)
writer.writeHeader(header)

Expand Down
12 changes: 6 additions & 6 deletions adam-core/src/test/resources/sorted.lex.vcf
Expand Up @@ -63,9 +63,9 @@
##contig=<ID=2,length=249250621>
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C . . . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,256 0/1:8,2:10:dp;rd:60:60,0,256 0/0:39,0:39:PASS:99:0,116,256
1 14522 . G A . . . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,256 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,256
1 63735 rs201888535 CCTA C . . . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,256 0/0:40,0:40:PASS:99:0,117,256 0/1:23,74:97:rd:99:256,0,256
13 752721 rs3131972 A G . . . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:256,81,0 1/1:0,19:19:dp:57:256,57,0 1/1:0,22:22:PASS:66:256,66,0
13 752791 . A G . . . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:256,81,0:0,1,2,3 1/1:0,19:19:dp:57:256,57,0:4,5,6,7 1/1:0,22:22:PASS:66:256,66,0:2,3,4,5
2 19190 . GC G . . . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:256,0,256 0/1:18,13:31:PASS:99:256,0,256 0/1:5,15:20:rd:99:256,0,107
1 14397 . CTGT C . IndelQD . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,256 0/1:8,2:10:dp;rd:60:60,0,256 0/0:39,0:39:PASS:99:0,116,256
1 14522 . G A . VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,256 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,256
1 63735 rs201888535 CCTA C . PASS . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,256 0/0:40,0:40:PASS:99:0,117,256 0/1:23,74:97:rd:99:256,0,256
13 752721 rs3131972 A G . PASS . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:256,81,0 1/1:0,19:19:dp:57:256,57,0 1/1:0,22:22:PASS:66:256,66,0
13 752791 . A G . PASS . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:256,81,0:0,1,2,3 1/1:0,19:19:dp:57:256,57,0:4,5,6,7 1/1:0,22:22:PASS:66:256,66,0:2,3,4,5
2 19190 . GC G . PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:256,0,256 0/1:18,13:31:PASS:99:256,0,256 0/1:5,15:20:rd:99:256,0,107
12 changes: 6 additions & 6 deletions adam-core/src/test/resources/sorted.vcf
Expand Up @@ -63,9 +63,9 @@
##contig=<ID=2,length=249250621>
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C . . . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,256 0/1:8,2:10:dp;rd:60:60,0,256 0/0:39,0:39:PASS:99:0,116,256
1 14522 . G A . . . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,256 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,256
1 63735 rs201888535 CCTA C . . . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,256 0/0:40,0:40:PASS:99:0,117,256 0/1:23,74:97:rd:99:256,0,256
2 19190 . GC G . . . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:256,0,256 0/1:18,13:31:PASS:99:256,0,256 0/1:5,15:20:rd:99:256,0,107
13 752721 rs3131972 A G . . . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:256,81,0 1/1:0,19:19:dp:57:256,57,0 1/1:0,22:22:PASS:66:256,66,0
13 752791 . A G . . . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:256,81,0:0,1,2,3 1/1:0,19:19:dp:57:256,57,0:4,5,6,7 1/1:0,22:22:PASS:66:256,66,0:2,3,4,5
1 14397 . CTGT C . IndelQD . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,256 0/1:8,2:10:dp;rd:60:60,0,256 0/0:39,0:39:PASS:99:0,116,256
1 14522 . G A . VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,256 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,256
1 63735 rs201888535 CCTA C . PASS . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,256 0/0:40,0:40:PASS:99:0,117,256 0/1:23,74:97:rd:99:256,0,256
2 19190 . GC G . PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:256,0,256 0/1:18,13:31:PASS:99:256,0,256 0/1:5,15:20:rd:99:256,0,107
13 752721 rs3131972 A G . PASS . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:256,81,0 1/1:0,19:19:dp:57:256,57,0 1/1:0,22:22:PASS:66:256,66,0
13 752791 . A G . PASS . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:256,81,0:0,1,2,3 1/1:0,19:19:dp:57:256,57,0:4,5,6,7 1/1:0,22:22:PASS:66:256,66,0:2,3,4,5
Expand Up @@ -92,10 +92,6 @@ class VariantContextConverterSuite extends ADAMFunSuite {
test("Convert somatic htsjdk site-only SNV to ADAM") {
val converter = new VariantContextConverter

// not sure why this doesn't work
//htsjdkSNVBuilder.attribute("SOMATIC", true)
//val adamVCs = converter.convert(htsjdkSNVBuilder.make)

val vcb: VariantContextBuilder = new VariantContextBuilder()
.alleles(List(Allele.create("A", true), Allele.create("T")))
.start(1L)
Expand Down
Expand Up @@ -154,10 +154,6 @@ class ADAMContextSuite extends ADAMFunSuite {
assert(vcs.size === 6)

val vc = vcs.head

/*
1 14397 . CTGT C 139.12 IndelQD AC=2;AF=0.333;AN=6;BaseQRankSum=1.800;ClippingRankSum=0.138;DP=69;FS=7.786;MLEAC=2;MLEAF=0.333;MQ=26.84;MQ0=0;MQRankSum=-1.906;QD=1.55;ReadPosRankSum=0.384 GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,827 0/1:8,2:10:dp;rd:60:60,0,414 0/0:39,0:39:PASS:99:0,116,2114
*/
val variant = vc.variant.variant
assert(variant.getContigName === "1")
assert(variant.getStart === 14396L)
Expand Down
Expand Up @@ -122,9 +122,7 @@ class VariantContextRDDSuite extends ADAMFunSuite {
}

sparkTest("don't lose any variants when piping as VCF") {
val smallVcf = Thread.currentThread()
.getContextClassLoader
.getResource("small.vcf").getFile
val smallVcf = testFile("small.vcf")
val rdd: VariantContextRDD = sc.loadVcf(smallVcf)
val records = rdd.rdd.count

Expand Down

0 comments on commit 447ca9a

Please sign in to comment.