diff --git a/adam-apis/src/test/scala/org/bdgenomics/adam/apis/java/JavaADAMContextSuite.scala b/adam-apis/src/test/scala/org/bdgenomics/adam/apis/java/JavaADAMContextSuite.scala index 923ccb12e7..c0e2c4e373 100644 --- a/adam-apis/src/test/scala/org/bdgenomics/adam/apis/java/JavaADAMContextSuite.scala +++ b/adam-apis/src/test/scala/org/bdgenomics/adam/apis/java/JavaADAMContextSuite.scala @@ -69,30 +69,30 @@ class JavaADAMContextSuite extends ADAMFunSuite { sparkTest("can read and write a small .vcf as genotypes") { val path = copyResource("small.vcf") val aRdd = sc.loadGenotypes(path) - assert(aRdd.jrdd.count() === 15) + assert(aRdd.jrdd.count() === 18) val newRdd = JavaADAMGenotypeConduit.conduit(aRdd, sc) - assert(newRdd.jrdd.count() === 15) + assert(newRdd.jrdd.count() === 18) } sparkTest("can read and write a small .vcf as variants") { val path = copyResource("small.vcf") val aRdd = sc.loadVariants(path) - assert(aRdd.jrdd.count() === 5) + assert(aRdd.jrdd.count() === 6) val newRdd = JavaADAMVariantConduit.conduit(aRdd, sc) - assert(newRdd.jrdd.count() === 5) + assert(newRdd.jrdd.count() === 6) } ignore("can read and write a small .vcf as annotations") { val path = copyResource("small.vcf") val aRdd = sc.loadVariantAnnotations(path) - assert(aRdd.jrdd.count() === 5) + assert(aRdd.jrdd.count() === 6) val newRdd = JavaADAMAnnotationConduit.conduit(aRdd, sc) - assert(newRdd.jrdd.count() === 5) + assert(newRdd.jrdd.count() === 6) } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantAnnotationConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantAnnotationConverter.scala index 2ae4b2b72a..b291fd88c2 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantAnnotationConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantAnnotationConverter.scala @@ -17,15 +17,16 @@ */ package org.bdgenomics.adam.converters -import org.apache.avro.Schema -import org.apache.avro.specific.SpecificRecord import htsjdk.variant.variantcontext.VariantContext import htsjdk.variant.vcf._ +import org.apache.avro.Schema +import org.apache.avro.specific.SpecificRecord import org.bdgenomics.formats.avro.{ DatabaseVariantAnnotation, Genotype, VariantCallingAnnotations } +import scala.collection.JavaConversions._ /** * Singleton object for building AttrKey instances. @@ -123,6 +124,17 @@ private[converters] object VariantAnnotationConverter extends Serializable { case a: String => java.lang.Boolean.valueOf(a) } + /** + * Converts a java String of comma delimited integers to a + * java.util.List of Integer + * + * @param attr Attribute to convert. + * @return Attribute as a java.util.List[Integer] + */ + private def attrAsIntList(attr: Object): Object = attr match { + case a: java.lang.String => seqAsJavaList(a.split(",").map(java.lang.Integer.valueOf _)) + } + /** * Keys corresponding to the COSMIC mutation database. */ @@ -187,7 +199,7 @@ private[converters] object VariantAnnotationConverter extends Serializable { AttrKey("phaseQuality", attrAsInt _, new VCFFormatHeaderLine(VCFConstants.PHASE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Read-backed phasing quality")), AttrKey("phaseSetId", attrAsInt _, new VCFFormatHeaderLine(VCFConstants.PHASE_SET_KEY, 1, VCFHeaderLineType.Integer, "Phase set")), AttrKey("minReadDepth", attrAsInt _, new VCFFormatHeaderLine("MIN_DP", 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block")), - AttrKey("strandBiasComponents", attrAsInt _, new VCFFormatHeaderLine("SB", 4, VCFHeaderLineType.Integer, "Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias.")) + AttrKey("strandBiasComponents", attrAsIntList _, new VCFFormatHeaderLine("SB", 4, VCFHeaderLineType.Integer, "Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias.")) ) /** diff --git a/adam-core/src/test/resources/small.vcf b/adam-core/src/test/resources/small.vcf index 70c6b75d36..e7d776ed29 100644 --- a/adam-core/src/test/resources/small.vcf +++ b/adam-core/src/test/resources/small.vcf @@ -20,6 +20,7 @@ ##FORMAT= ##FORMAT= ##FORMAT= +##FORMAT= ##GATKCommandLine= ##GATKCommandLine= ##GATKCommandLine= @@ -53,3 +54,4 @@ 1 19190 . GC G 1186.88 PASS AC=3;AF=0.500;AN=6;BaseQRankSum=4.157;ClippingRankSum=3.666;DP=74;FS=37.037;MLEAC=3;MLEAF=0.500;MQ=22.26;MQ0=0;MQRankSum=0.195;QD=16.04;ReadPosRankSum=-4.072 GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,201 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107 1 63735 rs201888535 CCTA C 2994.09 PASS AC=1;AF=0.167;AN=6;BaseQRankSum=1.138;ClippingRankSum=0.448;DB;DP=176;FS=13.597;MLEAC=1;MLEAF=0.167;MQ=31.06;MQ0=0;MQRankSum=0.636;QD=9.98;ReadPosRankSum=-1.180 GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,1425 0/0:40,0:40:PASS:99:0,117,2120 0/1:23,74:97:rd:99:3034,0,942 1 752721 rs3131972 A G 2486.90 PASS AC=6;AF=1.00;AN=6;DB;DP=69;FS=0.000;MLEAC=6;MLEAF=1.00;MQ=60.00;MQ0=0;POSITIVE_TRAIN_SITE;QD=31.67;VQSLOD=18.94;culprit=QD GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:1021,81,0 1/1:0,19:19:dp:57:661,57,0 1/1:0,22:22:PASS:66:831,66,0 +1 752791 . A G 2486.90 PASS AC=6;AF=1.00;AN=6;DB;DP=69;FS=0.000;MLEAC=6;MLEAF=1.00;MQ=60.00;MQ0=0;POSITIVE_TRAIN_SITE;QD=31.67;VQSLOD=18.94;culprit=QD GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:1021,81,0:0,1,2,3 1/1:0,19:19:dp:57:661,57,0:4,5,6,7 1/1:0,22:22:PASS:66:831,66,0:2,3,4,5 \ No newline at end of file diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantAnnotationsSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantAnnotationsSuite.scala index de38314df8..3061e953ac 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantAnnotationsSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantAnnotationsSuite.scala @@ -204,4 +204,22 @@ class VariantAnnotationsSuite extends ADAMFunSuite { assert(te.getDistance == 453) assert(te.getMessages.isEmpty) } + + test("create java.util.List[Int] from SB tag String value") { + val sb_tagData = "2,3,4,5" + val sb_converter = VariantAnnotationConverter.FORMAT_KEYS + .filter(x => x.adamKey == "strandBiasComponents").head.attrConverter + + val sb_parsed = sb_converter(sb_tagData).asInstanceOf[java.util.List[Int]] + val sb_component1 = sb_parsed.get(0) + val sb_component2 = sb_parsed.get(1) + val sb_component3 = sb_parsed.get(2) + val sb_component4 = sb_parsed.get(3) + + assert(sb_component1 == 2 && + sb_component2 == 3 && + sb_component3 == 4 && + sb_component4 == 5) + + } } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala index 10f32b6bb5..8bd599e197 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala @@ -149,7 +149,7 @@ class ADAMContextSuite extends ADAMFunSuite { val path = testFile("small.vcf") val vcs = sc.loadGenotypes(path).toVariantContextRDD.rdd.collect.sortBy(_.position) - assert(vcs.size === 5) + assert(vcs.size === 6) val vc = vcs.head assert(vc.genotypes.size === 3) @@ -372,7 +372,7 @@ class ADAMContextSuite extends ADAMFunSuite { val path = testFile("bqsr1.vcf").replace("bqsr1", "*") val variants = sc.loadVcf(path).toVariantRDD - assert(variants.rdd.count === 691) + assert(variants.rdd.count === 692) } sparkTest("load vcf from a directory") {