diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala index ebb0007594..cc9435695a 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala @@ -155,8 +155,11 @@ private[adam] class VariantContextConverter( headerLines: Seq[VCFHeaderLine]) extends Serializable with Logging { import VariantContextConverter._ - private val htsjdkConvFn = makeHtsjdkGenotypeConverter(headerLines) - private val bdgConvFn = makeBdgGenotypeConverter(headerLines) + // format fns gatk --> bdg, extract fns bdg --> gatk + private val variantFormatFn = makeVariantFormatFn(headerLines) + private val variantExtractFn = makeVariantExtractFn(headerLines) + private val genotypeFormatFn = makeGenotypeFormatFn(headerLines) + private val genotypeExtractFn = makeGenotypeExtractFn(headerLines) /** * Converts a Scala float to a Java float. @@ -167,47 +170,45 @@ private[adam] class VariantContextConverter( private def jFloat(f: Float): java.lang.Float = f /** - * Converts a single GATK variant into ADAMVariantContext(s). + * Converts a GATK variant context into one or more ADAM variant context(s). * - * @param vc GATK Variant context to convert. - * @return ADAM variant contexts + * @param vc GATK variant context to convert. + * @param stringency Validation stringency. + * @return The specified GATK variant context converted into one or more ADAM variant context(s) */ - def convert(vc: HtsjdkVariantContext, - variantAnnotationConvFn: (HtsjdkVariantContext, Option[String], Int) => VariantAnnotation, - genotypeConvFn: (HtsjdkGenotype, Variant, Allele, Int, Option[Int], Boolean) => Genotype): Seq[ADAMVariantContext] = { + def convert( + vc: HtsjdkVariantContext, + stringency: ValidationStringency): Seq[ADAMVariantContext] = { vc.getAlternateAlleles.toList match { case List(NON_REF_ALLELE) => { - val variantAnnotation = variantAnnotationConvFn(vc, None, 0) - val variant = variantAnnotation.variant + val variant = variantFormatFn(vc, None, 0) val genotypes = vc.getGenotypes.map(g => { - genotypeConvFn(g, variant, NON_REF_ALLELE, 0, Some(1), false) + genotypeFormatFn(g, variant, NON_REF_ALLELE, 0, Some(1), false) }) - return Seq(ADAMVariantContext(variant, genotypes, Some(variantAnnotation))) + return Seq(ADAMVariantContext(variant, genotypes, Some(variant.getAnnotation))) } case List(allele) => { require( allele.isNonReference, "Assertion failed when converting: " + vc.toString ) - val variantAnnotation = variantAnnotationConvFn(vc, Some(allele.getDisplayString), 0) - val variant = variantAnnotation.variant + val variant = variantFormatFn(vc, Some(allele.getDisplayString), 0) val genotypes = vc.getGenotypes.map(g => { - genotypeConvFn(g, variant, allele, 1, None, false) + genotypeFormatFn(g, variant, allele, 1, None, false) }) - return Seq(ADAMVariantContext(variant, genotypes, Some(variantAnnotation))) + return Seq(ADAMVariantContext(variant, genotypes, Some(variant.getAnnotation))) } case List(allele, NON_REF_ALLELE) => { require( allele.isNonReference, "Assertion failed when converting: " + vc.toString ) - val variantAnnotation = variantAnnotationConvFn(vc, Some(allele.getDisplayString), 0) - val variant = variantAnnotation.variant + val variant = variantFormatFn(vc, Some(allele.getDisplayString), 0) val genotypes = vc.getGenotypes.map(g => { - genotypeConvFn(g, variant, allele, 1, Some(2), false) + genotypeFormatFn(g, variant, allele, 1, Some(2), false) }) - return Seq(ADAMVariantContext(variant, genotypes, Some(variantAnnotation))) + return Seq(ADAMVariantContext(variant, genotypes, Some(variant.getAnnotation))) } case _ => { val vcb = new VariantContextBuilder(vc) @@ -229,13 +230,11 @@ private[adam] class VariantContextConverter( val idx = vc.getAlleleIndex(allele) require(idx >= 1, "Unexpected index for alternate allele: " + vc.toString) - val variantAnnotation = variantAnnotationConvFn(vc, Some(allele.getDisplayString), idx) - val variant = variantAnnotation.variant - + val variant = variantFormatFn(vc, Some(allele.getDisplayString), idx) val genotypes = vc.getGenotypes.map(g => { - genotypeConvFn(g, variant, allele, idx, referenceModelIndex, true) + genotypeFormatFn(g, variant, allele, idx, referenceModelIndex, true) }) - Seq(ADAMVariantContext(variant, genotypes, Some(variantAnnotation))) + Seq(ADAMVariantContext(variant, genotypes, Some(variant.getAnnotation))) }) } } @@ -271,7 +270,7 @@ private[adam] class VariantContextConverter( } } - // variant conversion functions + // htsjdk --> variant format functions private[converters] def formatNames( vc: HtsjdkVariantContext, @@ -294,20 +293,13 @@ private[adam] class VariantContextConverter( vb } - private[converters] def formatSomatic( - vc: HtsjdkVariantContext, - vb: Variant.Builder): Variant.Builder = { - - Option(vc.getAttribute("SOMATIC").asInstanceOf[java.lang.Boolean]) - .fold(vb)(vb.setSomatic(_)) - } - - private val variantConversionFns: Iterable[(HtsjdkVariantContext, Variant.Builder) => Variant.Builder] = Iterable( + private val variantFormatFns: Iterable[(HtsjdkVariantContext, Variant.Builder) => Variant.Builder] = Iterable( formatNames(_, _), - formatFilters(_, _), - formatSomatic(_, _) + formatFilters(_, _) ) + // variant --> htsjdk extract functions + private[converters] def extractNames( v: Variant, vcb: VariantContextBuilder): VariantContextBuilder = { @@ -337,24 +329,12 @@ private[adam] class VariantContextConverter( }).getOrElse(vcb.unfiltered) } - private[converters] def extractSomatic( - v: Variant, - vcb: VariantContextBuilder): VariantContextBuilder = { - - val somatic: java.lang.Boolean = Option(v.getSomatic).getOrElse(false) - if (somatic) { - vcb.attribute("SOMATIC", true) - } - vcb - } - - private val variantExtractorFns: Iterable[(Variant, VariantContextBuilder) => VariantContextBuilder] = Iterable( + private val variantExtractFns: Iterable[(Variant, VariantContextBuilder) => VariantContextBuilder] = Iterable( extractNames(_, _), - extractFilters(_, _), - extractSomatic(_, _) + extractFilters(_, _) ) - // variant annotation conversion functions + // htsjdk --> variant annotation format functions private[converters] def formatAncestralAllele( vc: HtsjdkVariantContext, @@ -416,6 +396,16 @@ private[adam] class VariantContextConverter( .fold(vab)(vab.setThousandGenomes(_)) } + private[converters] def formatSomatic( + vc: HtsjdkVariantContext, + vab: VariantAnnotation.Builder, + v: Variant, + index: Int): VariantAnnotation.Builder = { + + Option(vc.getAttribute("SOMATIC").asInstanceOf[java.lang.Boolean]) + .fold(vab)(vab.setSomatic(_)) + } + private[converters] def formatAlleleCount( vc: HtsjdkVariantContext, vab: VariantAnnotation.Builder, @@ -504,21 +494,24 @@ private[adam] class VariantContextConverter( .fold(vab)(vab.setTranscriptEffects(_)) } - private val variantAnnotationConversionFns: Iterable[(HtsjdkVariantContext, VariantAnnotation.Builder, Variant, Int) => VariantAnnotation.Builder] = Iterable( + private val variantAnnotationFormatFns: Iterable[(HtsjdkVariantContext, VariantAnnotation.Builder, Variant, Int) => VariantAnnotation.Builder] = Iterable( formatAncestralAllele(_, _, _, _), formatDbSnp(_, _, _, _), formatHapMap2(_, _, _, _), formatHapMap3(_, _, _, _), formatThousandGenomes(_, _, _, _), - formatCigar(_, _, _, _), + formatSomatic(_, _, _, _), formatAlleleCount(_, _, _, _), formatAlleleFrequency(_, _, _, _), + formatCigar(_, _, _, _), formatReadDepth(_, _, _, _), formatForwardReadDepth(_, _, _, _), formatReverseReadDepth(_, _, _, _), formatTranscriptEffects(_, _, _, _) ) + // variant annotation --> htsjdk extract functions + private[converters] def extractAncestralAllele( va: VariantAnnotation, vcb: VariantContextBuilder): VariantContextBuilder = { @@ -554,11 +547,11 @@ private[adam] class VariantContextConverter( Option(va.getThousandGenomes).fold(vcb)(vcb.attribute("1000G", _)) } - private[converters] def extractCigar( + private[converters] def extractSomatic( va: VariantAnnotation, vcb: VariantContextBuilder): VariantContextBuilder = { - Option(va.getCigar).fold(vcb)(vcb.attribute("CIGAR", _)) + Option(va.getSomatic).fold(vcb)(vcb.attribute("SOMATIC", _)) } private[converters] def extractAlleleCount( @@ -575,6 +568,13 @@ private[adam] class VariantContextConverter( Option(va.getAlleleCount).fold(vcb)(vcb.attribute("AF", _)) } + private[converters] def extractCigar( + va: VariantAnnotation, + vcb: VariantContextBuilder): VariantContextBuilder = { + + Option(va.getCigar).fold(vcb)(vcb.attribute("CIGAR", _)) + } + private[converters] def extractReadDepth( va: VariantAnnotation, vcb: VariantContextBuilder): VariantContextBuilder = { @@ -614,22 +614,23 @@ private[adam] class VariantContextConverter( "-1," + v } - private val variantAnnotationExtractorFns: Iterable[(VariantAnnotation, VariantContextBuilder) => VariantContextBuilder] = Iterable( + private val variantAnnotationExtractFns: Iterable[(VariantAnnotation, VariantContextBuilder) => VariantContextBuilder] = Iterable( extractAncestralAllele(_, _), extractDbSnp(_, _), extractHapMap2(_, _), extractHapMap3(_, _), extractThousandGenomes(_, _), - extractCigar(_, _), + extractSomatic(_, _), extractAlleleCount(_, _), extractAlleleFrequency(_, _), + extractCigar(_, _), extractReadDepth(_, _), extractForwardReadDepth(_, _), extractReverseReadDepth(_, _), extractTranscriptEffects(_, _) ) - // genotype conversion functions + // htsjdk --> genotype format functions private[converters] def formatAllelicDepth(g: HtsjdkGenotype, gb: Genotype.Builder, @@ -778,7 +779,7 @@ private[adam] class VariantContextConverter( gb } - private val coreFormatFieldConversionFns: Iterable[(HtsjdkGenotype, Genotype.Builder, Int, Array[Int]) => Genotype.Builder] = Iterable( + private val genotypeFormatFns: Iterable[(HtsjdkGenotype, Genotype.Builder, Int, Array[Int]) => Genotype.Builder] = Iterable( formatAllelicDepth(_, _, _, _), formatReadDepth(_, _, _, _), formatMinReadDepth(_, _, _, _), @@ -788,6 +789,8 @@ private[adam] class VariantContextConverter( formatPhaseInfo(_, _, _, _) ) + // genotype --> htsjdk extract functions + private[converters] def extractAllelicDepth(g: Genotype, gb: GenotypeBuilder): GenotypeBuilder = { (Option(g.getReferenceReadDepth), Option(g.getAlternateReadDepth)) match { @@ -861,7 +864,7 @@ private[adam] class VariantContextConverter( }).getOrElse(gb.phased(false)) } - private val coreFormatFieldExtractorFns: Iterable[(Genotype, GenotypeBuilder) => GenotypeBuilder] = Iterable( + private val genotypeExtractFns: Iterable[(Genotype, GenotypeBuilder) => GenotypeBuilder] = Iterable( extractAllelicDepth(_, _), extractReadDepth(_, _), extractMinReadDepth(_, _), @@ -871,7 +874,7 @@ private[adam] class VariantContextConverter( extractPhaseInfo(_, _) ) - // genotype annotation conversion functions + // htsjdk --> genotype annotation format functions private[converters] def formatFilters(g: HtsjdkGenotype, vcab: VariantCallingAnnotations.Builder, @@ -934,13 +937,15 @@ private[adam] class VariantContextConverter( }).getOrElse(vcab) } - private val annotationFormatFieldConversionFns: Iterable[(HtsjdkGenotype, VariantCallingAnnotations.Builder, Int, Array[Int]) => VariantCallingAnnotations.Builder] = Iterable( + private val genotypeAnnotationFormatFns: Iterable[(HtsjdkGenotype, VariantCallingAnnotations.Builder, Int, Array[Int]) => VariantCallingAnnotations.Builder] = Iterable( formatFilters(_, _, _, _), formatFisherStrandBias(_, _, _, _), formatRmsMapQ(_, _, _, _), formatMapQ0(_, _, _, _) ) + // genotype annotation --> htsjdk extract functions + private[converters] def extractFilters(vca: VariantCallingAnnotations, gb: GenotypeBuilder): GenotypeBuilder = { Option(vca.getFiltersApplied) @@ -982,13 +987,15 @@ private[adam] class VariantContextConverter( }).getOrElse(gb) } - private val annotationFormatFieldExtractorFns: Iterable[(VariantCallingAnnotations, GenotypeBuilder) => GenotypeBuilder] = Iterable( + private val genotypeAnnotationExtractFns: Iterable[(VariantCallingAnnotations, GenotypeBuilder) => GenotypeBuilder] = Iterable( extractFilters(_, _), extractFisherStrandBias(_, _), extractRmsMapQ(_, _), extractMapQ0(_, _) ) + // safe type conversions + private def toBoolean(obj: java.lang.Object): Boolean = { tryAndCatchStringCast(obj, o => { o.asInstanceOf[java.lang.Boolean] @@ -1303,8 +1310,8 @@ private[adam] class VariantContextConverter( } } - def makeHtsjdkVariantContextConverter( - headerLines: Seq[VCFHeaderLine]): (HtsjdkVariantContext, Option[String], Int) => VariantAnnotation = { + def makeVariantFormatFn( + headerLines: Seq[VCFHeaderLine]): (HtsjdkVariantContext, Option[String], Int) => Variant = { val attributeFns: Iterable[(HtsjdkVariantContext, Int, Array[Int]) => Option[(String, String)]] = headerLines .flatMap(hl => hl match { @@ -1327,7 +1334,7 @@ private[adam] class VariantContextConverter( def convert(vc: HtsjdkVariantContext, alt: Option[String], - alleleIdx: Int): VariantAnnotation = { + alleleIdx: Int): Variant = { // create the builder val variantBuilder = Variant.newBuilder @@ -1339,7 +1346,7 @@ private[adam] class VariantContextConverter( alt.foreach(variantBuilder.setAlternateAllele(_)) // bind the conversion functions and fold - val boundFns: Iterable[Variant.Builder => Variant.Builder] = variantConversionFns + val boundFns: Iterable[Variant.Builder => Variant.Builder] = variantFormatFns .map(fn => { fn(vc, _: Variant.Builder) }) @@ -1347,9 +1354,8 @@ private[adam] class VariantContextConverter( val variant = variantBuilder.build val variantAnnotationBuilder = VariantAnnotation.newBuilder - .setVariant(variant) - val boundAnnotationFns: Iterable[VariantAnnotation.Builder => VariantAnnotation.Builder] = variantAnnotationConversionFns + val boundAnnotationFns: Iterable[VariantAnnotation.Builder => VariantAnnotation.Builder] = variantAnnotationFormatFns .map(fn => { fn(vc, _: VariantAnnotation.Builder, variant, alleleIdx) }) @@ -1370,16 +1376,14 @@ private[adam] class VariantContextConverter( convertedAnnotation.setAttributes(attrMap) } - convertedAnnotationWithAttrs.build + variant.setAnnotation(convertedAnnotationWithAttrs.build) + variant } convert(_, _, _) } - /** - * - */ - def makeHtsjdkGenotypeConverter( + def makeGenotypeFormatFn( headerLines: Seq[VCFHeaderLine]): (HtsjdkGenotype, Variant, Allele, Int, Option[Int], Boolean) => Genotype = { val attributeFns: Iterable[(HtsjdkGenotype, Int, Array[Int]) => Option[(String, String)]] = headerLines @@ -1441,7 +1445,7 @@ private[adam] class VariantContextConverter( } // bind the conversion functions and fold - val boundFns: Iterable[Genotype.Builder => Genotype.Builder] = coreFormatFieldConversionFns + val boundFns: Iterable[Genotype.Builder => Genotype.Builder] = genotypeFormatFns .map(fn => { fn(g, _: Genotype.Builder, alleleIdx, indices) }) @@ -1459,7 +1463,7 @@ private[adam] class VariantContextConverter( val vcAnns = VariantCallingAnnotations.newBuilder // bind the annotation conversion functions and fold - val boundAnnotationFns: Iterable[VariantCallingAnnotations.Builder => VariantCallingAnnotations.Builder] = annotationFormatFieldConversionFns + val boundAnnotationFns: Iterable[VariantCallingAnnotations.Builder => VariantCallingAnnotations.Builder] = genotypeAnnotationFormatFns .map(fn => { fn(g, _: VariantCallingAnnotations.Builder, alleleIdx, indices) }) @@ -1730,7 +1734,7 @@ private[adam] class VariantContextConverter( } } - def makeBdgVariantContextConverter( + def makeVariantExtractFn( headerLines: Seq[VCFHeaderLine]): (ADAMVariantContext) => HtsjdkVariantContext = { val attributeFns: Iterable[(Map[String, String]) => Option[(String, java.lang.Object)]] = headerLines @@ -1762,13 +1766,13 @@ private[adam] class VariantContextConverter( .alleles(VariantContextConverter.convertAlleles(v)) // bind the conversion functions and fold - val convertedWithVariants = variantExtractorFns.foldLeft(builder)( + val convertedWithVariants = variantExtractFns.foldLeft(builder)( (vcb: VariantContextBuilder, fn) => fn(v, vcb)) // extract from annotations, if present val convertedWithAttrs = vc.annotations .fold(convertedWithVariants)(va => { - val convertedWithAnnotations = variantAnnotationExtractorFns + val convertedWithAnnotations = variantAnnotationExtractFns .foldLeft(convertedWithVariants)((vcb: VariantContextBuilder, fn) => fn(va, vcb)) // get the attribute map @@ -1788,10 +1792,7 @@ private[adam] class VariantContextConverter( convert(_) } - /** - * - */ - def makeBdgGenotypeConverter( + def makeGenotypeExtractFn( headerLines: Seq[VCFHeaderLine]): (Genotype) => HtsjdkGenotype = { val attributeFns: Iterable[(Map[String, String]) => Option[(String, java.lang.Object)]] = headerLines @@ -1821,7 +1822,7 @@ private[adam] class VariantContextConverter( VariantContextConverter.convertAlleles(g)) // bind the conversion functions and fold - val convertedCore = coreFormatFieldExtractorFns.foldLeft(builder)( + val convertedCore = genotypeExtractFns.foldLeft(builder)( (gb: GenotypeBuilder, fn) => fn(g, gb)) // convert the annotations if they exist @@ -1829,7 +1830,7 @@ private[adam] class VariantContextConverter( .fold(convertedCore)(vca => { // bind the annotation conversion functions and fold - val convertedAnnotations = annotationFormatFieldExtractorFns.foldLeft(convertedCore)( + val convertedAnnotations = genotypeAnnotationExtractFns.foldLeft(convertedCore)( (gb: GenotypeBuilder, fn) => fn(vca, gb)) // get the attribute map @@ -1851,22 +1852,21 @@ private[adam] class VariantContextConverter( } /** - * Convert an ADAMVariantContext into the equivalent GATK VariantContext - * @param vc - * @return GATK VariantContext + * Convert an ADAM variant context into a GATK variant context. + * + * @param vc ADAM variant context to convert. + * @param stringency Validation stringency. + * @return The specified ADAM variant context converted into a GATK variant context. */ def convert( vc: ADAMVariantContext, - variantConvFn: (ADAMVariantContext) => HtsjdkVariantContext, - genotypeConvFn: (Genotype) => HtsjdkGenotype, - stringency: ValidationStringency = ValidationStringency.LENIENT): Option[HtsjdkVariantContext] = { + stringency: ValidationStringency): Option[HtsjdkVariantContext] = { - // todo: should variantConvFn return builder? - val vcb = new VariantContextBuilder(variantConvFn(vc)) + val vcb = new VariantContextBuilder(variantExtractFn(vc)) // attach genotypes try { - Some(vcb.genotypes(vc.genotypes.map(g => genotypeConvFn(g))) + Some(vcb.genotypes(vc.genotypes.map(g => genotypeExtractFn(g))) .make) } catch { case t: Throwable => { diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala index 09e5ab28b4..65856dd637 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala @@ -927,11 +927,8 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log // load vcf metadata val (sd, samples, headers) = loadVcfMetadata(filePath) - val vcc = new VariantContextConverter() - val variantContextConvFn = vcc.makeHtsjdkVariantContextConverter(headers) - val genotypeConvFn = vcc.makeHtsjdkGenotypeConverter(headers) - - VariantContextRDD(records.flatMap(p => vcc.convert(p._2.get, variantContextConvFn, genotypeConvFn)), + val vcc = new VariantContextConverter(headers) + VariantContextRDD(records.flatMap(p => vcc.convert(p._2.get, stringency)), sd, samples, cleanAndMixInSupportedLines(headers, stringency)) @@ -971,11 +968,8 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log // load vcf metadata val (sd, samples, headers) = loadVcfMetadata(filePath) - val vcc = new VariantContextConverter() - val variantContextConvFn = vcc.makeHtsjdkVariantContextConverter(headers) - val genotypeConvFn = vcc.makeHtsjdkGenotypeConverter(headers) - - VariantContextRDD(records.flatMap(p => vcc.convert(p._2.get, variantContextConvFn, genotypeConvFn)), + val vcc = new VariantContextConverter(headers) + VariantContextRDD(records.flatMap(p => vcc.convert(p._2.get, stringency)), sd, samples, cleanAndMixInSupportedLines(headers, stringency)) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFInFormatter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFInFormatter.scala index cc2cbf7356..88ba06c5c4 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFInFormatter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFInFormatter.scala @@ -17,6 +17,7 @@ */ package org.bdgenomics.adam.rdd.variant +import htsjdk.samtools.ValidationStringency import htsjdk.variant.variantcontext.writer.{ Options, VariantContextWriterBuilder @@ -58,9 +59,7 @@ private[variant] case class VCFInFormatter private ( protected val companion = VCFInFormatter // make a converter - val converter = new VariantContextConverter() - val variantContextConvFn = converter.makeBdgVariantContextConverter(headerLines) - val genotypeConvFn = converter.makeBdgGenotypeConverter(headerLines) + val converter = new VariantContextConverter(headerLines) /** * Writes variant contexts to an output stream in VCF format. @@ -83,7 +82,7 @@ private[variant] case class VCFInFormatter private ( // write the records iter.foreach(r => { - val optVc = converter.convert(r, variantContextConvFn, genotypeConvFn) + val optVc = converter.convert(r, ValidationStringency.LENIENT) optVc.foreach(vc => { writer.add(vc) }) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala index 5511c6b339..fcebeda26d 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala @@ -17,6 +17,7 @@ */ package org.bdgenomics.adam.rdd.variant +import htsjdk.samtools.ValidationStringency import htsjdk.variant.vcf.{ VCFCodec, VCFHeaderLine @@ -46,9 +47,7 @@ case class VCFOutFormatter(headerLines: Seq[VCFHeaderLine]) extends OutFormatter def read(is: InputStream): Iterator[VariantContext] = { // make converter and empty dicts - val converter = new VariantContextConverter - val variantContextConvFn = converter.makeHtsjdkVariantContextConverter(headerLines) - val genotypeConvFn = converter.makeHtsjdkGenotypeConverter(headerLines) + val converter = new VariantContextConverter(headerLines) // make line reader iterator val lri = new AsciiLineReaderIterator(new AsciiLineReader(is)) @@ -65,7 +64,7 @@ case class VCFOutFormatter(headerLines: Seq[VCFHeaderLine]) extends OutFormatter iter.close() records.toIterator } else { - val nextRecords = records ++ converter.convert(codec.decode(iter.next), variantContextConvFn, genotypeConvFn) + val nextRecords = records ++ converter.convert(codec.decode(iter.next), ValidationStringency.LENIENT) convertIterator(iter, nextRecords) } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantContextRDD.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantContextRDD.scala index c15e7d77ce..130a73cec0 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantContextRDD.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantContextRDD.scala @@ -113,11 +113,9 @@ case class VariantContextRDD(rdd: RDD[VariantContext], val sampleIds = samples.map(_.getSampleId) // convert the variants to htsjdk VCs - val converter = new VariantContextConverter() - val variantContextConvFn = converter.makeBdgVariantContextConverter(headerLines) - val genotypeConvFn = converter.makeBdgGenotypeConverter(headerLines) + val converter = new VariantContextConverter(headerLines) val writableVCs: RDD[(LongWritable, VariantContextWritable)] = rdd.flatMap(vc => { - converter.convert(vc, variantContextConvFn, genotypeConvFn, stringency = stringency) + converter.convert(vc, stringency) .map(htsjdkVc => { val vcw = new VariantContextWritable vcw.set(htsjdkVc) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala index 3516ba7fae..64afe6a28d 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala @@ -18,6 +18,7 @@ package org.bdgenomics.adam.converters import com.google.common.collect.ImmutableList +import htsjdk.samtools.ValidationStringency import htsjdk.variant.utils.SAMSequenceDictionaryExtractor import htsjdk.variant.variantcontext.{ Allele, @@ -41,15 +42,8 @@ class VariantContextConverterSuite extends ADAMFunSuite { SequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(new File(path))) } - val converter = new VariantContextConverter - val adamToHtsjdkGenotypeConvFn = converter.makeBdgGenotypeConverter( - SupportedHeaderLines.allHeaderLines) - val adamToHtsjdkVariantContextConvFn = converter.makeBdgVariantContextConverter( - SupportedHeaderLines.allHeaderLines) - val htsjdkToAdamGenotypeConvFn = converter.makeHtsjdkGenotypeConverter( - SupportedHeaderLines.allHeaderLines) - val htsjdkToAdamVariantContextConvFn = converter.makeHtsjdkVariantContextConverter( - SupportedHeaderLines.allHeaderLines) + val lenient = ValidationStringency.LENIENT + val converter = new VariantContextConverter(SupportedHeaderLines.allHeaderLines) def htsjdkSNVBuilder: VariantContextBuilder = new VariantContextBuilder() .alleles(List(Allele.create("A", true), Allele.create("T"))) @@ -82,7 +76,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { .setAlternateAllele("T") test("Convert htsjdk site-only SNV to ADAM") { - val adamVCs = converter.convert(htsjdkSNVBuilder.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(htsjdkSNVBuilder.make, lenient) assert(adamVCs.length === 1) val adamVC = adamVCs.head @@ -96,8 +90,6 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("Convert somatic htsjdk site-only SNV to ADAM") { - val converter = new VariantContextConverter - val vcb: VariantContextBuilder = new VariantContextBuilder() .alleles(List(Allele.create("A", true), Allele.create("T"))) .start(1L) @@ -105,14 +97,14 @@ class VariantContextConverterSuite extends ADAMFunSuite { .chr("1") .attribute("SOMATIC", true) - val adamVCs = converter.convert(vcb.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vcb.make, lenient) val adamVC = adamVCs.head val variant = adamVC.variant.variant - assert(variant.getSomatic === true) + assert(variant.getAnnotation.getSomatic === true) } test("Convert htsjdk site-only CNV to ADAM") { - val adamVCs = converter.convert(htsjdkCNVBuilder.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(htsjdkCNVBuilder.make, lenient) assert(adamVCs.length === 1) val adamVC = adamVCs.head @@ -128,7 +120,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(variant.getEnd === 20L) } - ignore("Convert htsjdk SNV w/ genotypes w/ phase information to ADAM") { + test("Convert htsjdk SNV w/ genotypes w/ phase information to ADAM") { val vcb = htsjdkSNVBuilder val genotypeAttributes = Map[String, Object]("PQ" -> new Integer(50), "PS" -> new Integer(1)) @@ -138,7 +130,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { .make) .make() - val adamVCs = converter.convert(vc, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vc, lenient) assert(adamVCs.length === 1) val adamGTs = adamVCs.flatMap(_.genotypes) @@ -149,12 +141,12 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(adamGT.getPhaseQuality === 50) } - ignore("Convert htsjdk SNV with different variant filters to ADAM") { + test("Convert htsjdk SNV with different variant filters to ADAM") { val vcb = htsjdkSNVBuilder vcb.genotypes(GenotypeBuilder.create("NA12878", vcb.getAlleles)) { // No filters - val adamVCs = converter.convert(vcb.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vcb.make, lenient) val adamVariant = adamVCs.map(_.variant).head assert(adamVariant.variant.getFiltersApplied === false) assert(adamVariant.variant.getFiltersPassed === null) @@ -162,7 +154,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { } { // PASSing vcb.unfiltered.passFilters - val adamVCs = converter.convert(vcb.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vcb.make, lenient) val adamVariant = adamVCs.map(_.variant).head assert(adamVariant.variant.getFiltersApplied === true) assert(adamVariant.variant.getFiltersPassed === true) @@ -170,7 +162,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { } { // not PASSing vcb.unfiltered.filter("LowMQ") - val adamVCs = converter.convert(vcb.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vcb.make, lenient) val adamVariant = adamVCs.map(_.variant).head assert(adamVariant.variant.getFiltersApplied === true) assert(adamVariant.variant.getFiltersPassed === false) @@ -178,14 +170,14 @@ class VariantContextConverterSuite extends ADAMFunSuite { } } - ignore("Convert htsjdk SNV with different genotype filters to ADAM") { + test("Convert htsjdk SNV with different genotype filters to ADAM") { val vcb = htsjdkSNVBuilder val gb = new GenotypeBuilder("NA12878", vcb.getAlleles) { // No filters gb.unfiltered vcb.genotypes(gb.make) - val adamVCs = converter.convert(vcb.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vcb.make, lenient) val adamGT = adamVCs.flatMap(_.genotypes).head // htsjdk does not distinguish between filters not applied and filters passed in Genotype assert(adamGT.getVariantCallingAnnotations.getFiltersApplied === true) @@ -195,7 +187,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { { // PASSing gb.filter("PASS") vcb.genotypes(gb.make) - val adamVCs = converter.convert(vcb.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vcb.make, lenient) val adamGT = adamVCs.flatMap(_.genotypes).head assert(adamGT.getVariantCallingAnnotations.getFiltersApplied === true) assert(adamGT.getVariantCallingAnnotations.getFiltersPassed === true) @@ -205,7 +197,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { gb.filter("LowMQ") vcb.genotypes(gb.make) - val adamVCs = converter.convert(vcb.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vcb.make, lenient) val adamGT = adamVCs.flatMap(_.genotypes).head assert(adamGT.getVariantCallingAnnotations.getFiltersApplied === true) assert(adamGT.getVariantCallingAnnotations.getFiltersPassed === false) @@ -213,11 +205,10 @@ class VariantContextConverterSuite extends ADAMFunSuite { } } - ignore("Convert ADAM site-only SNV to htsjdk") { + test("Convert ADAM site-only SNV to htsjdk") { val vc = ADAMVariantContext(adamSNVBuilder().build) - val optHtsjdkVC = converter.convert(vc, - adamToHtsjdkVariantContextConvFn, adamToHtsjdkGenotypeConvFn) + val optHtsjdkVC = converter.convert(vc, lenient) assert(optHtsjdkVC.isDefined) val htsjdkVC = optHtsjdkVC.get @@ -231,16 +222,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(!htsjdkVC.filtersWereApplied) } - ignore("Convert ADAM site-only SNV to htsjdk with contig conversion") { - val vc = ADAMVariantContext(adamSNVBuilder("NC_000001.10").build) - - val converter = new VariantContextConverter(dict = Some(dictionary)) - - val htsjdkVC = converter.convert(vc) - assert(htsjdkVC.getContig === "1") - } - - ignore("Convert ADAM SNV w/ genotypes to htsjdk") { + test("Convert ADAM SNV w/ genotypes to htsjdk") { val variant = adamSNVBuilder().build val genotype = Genotype.newBuilder .setVariant(variant) @@ -254,8 +236,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { .build) .build - val optHtsjdkVC = converter.convert(ADAMVariantContext(variant, Seq(genotype)), - adamToHtsjdkVariantContextConvFn, adamToHtsjdkGenotypeConvFn) + val optHtsjdkVC = converter.convert(ADAMVariantContext(variant, Seq(genotype)), lenient) assert(optHtsjdkVC.isDefined) val htsjdkVC = optHtsjdkVC.get @@ -275,9 +256,9 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(sbComponents(3) === 6) } - ignore("Convert htsjdk multi-allelic sites-only SNVs to ADAM") { + test("Convert htsjdk multi-allelic sites-only SNVs to ADAM") { val vc = htsjdkMultiAllelicSNVBuilder.make - val adamVCs = converter.convert(vc, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vc, lenient) assert(adamVCs.length === 2) for ((allele, idx) <- vc.getAlternateAlleles.zipWithIndex) { @@ -287,14 +268,14 @@ class VariantContextConverterSuite extends ADAMFunSuite { } } - ignore("Convert htsjdk multi-allelic SNVs to ADAM") { + test("Convert htsjdk multi-allelic SNVs to ADAM") { val gb = new GenotypeBuilder("NA12878", List(Allele.create("T"), Allele.create("G"))) gb.AD(Array(4, 2, 3)).PL(Array(59, 0, 181, 1, 66, 102)) val vcb = htsjdkMultiAllelicSNVBuilder vcb.genotypes(gb.make) - val adamVCs = converter.convert(vcb.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vcb.make, lenient) assert(adamVCs.length === 2) for (adamVC <- adamVCs) { @@ -321,14 +302,14 @@ class VariantContextConverterSuite extends ADAMFunSuite { .sameElements(List(59, 1, 102))) } - ignore("Convert gVCF reference records to ADAM") { + test("Convert gVCF reference records to ADAM") { val gb = new GenotypeBuilder("NA12878", List(Allele.create("A", true), Allele.create("A", true))) gb.PL(Array(0, 1, 2)).DP(44).attribute("MIN_DP", 38) val vcb = htsjdkRefSNV vcb.genotypes(gb.make) - val adamVCs = converter.convert(vcb.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vcb.make, lenient) assert(adamVCs.length == 1) val adamGTs = adamVCs.flatMap(_.genotypes) @@ -344,22 +325,22 @@ class VariantContextConverterSuite extends ADAMFunSuite { .sameElements(List(0, 1, 2))) } - ignore("Convert htsjdk variant context with no IDs to ADAM") { + test("Convert htsjdk variant context with no IDs to ADAM") { val vcb = htsjdkSNVBuilder vcb.noID() - val adamVCs = converter.convert(vcb.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vcb.make, lenient) assert(adamVCs.length == 1) val variant = adamVCs.head.variant assert(variant.variant.getNames.isEmpty) } - ignore("Convert htsjdk variant context with one ID to ADAM") { + test("Convert htsjdk variant context with one ID to ADAM") { val vcb = htsjdkSNVBuilder vcb.id("rs3131972") - val adamVCs = converter.convert(vcb.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vcb.make, lenient) assert(adamVCs.length == 1) val variant = adamVCs.head.variant @@ -367,11 +348,11 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(variant.variant.getNames.get(0) === "rs3131972") } - ignore("Convert htsjdk variant context with multiple IDs to ADAM") { + test("Convert htsjdk variant context with multiple IDs to ADAM") { val vcb = htsjdkSNVBuilder vcb.id("rs3131972;rs201888535") - val adamVCs = converter.convert(vcb.make, htsjdkToAdamVariantContextConvFn, htsjdkToAdamGenotypeConvFn) + val adamVCs = converter.convert(vcb.make, lenient) assert(adamVCs.length == 1) val variant = adamVCs.head.variant @@ -380,27 +361,25 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(variant.variant.getNames.get(1) === "rs201888535") } - ignore("Convert ADAM variant context with no names to htsjdk") { + test("Convert ADAM variant context with no names to htsjdk") { val variant = adamSNVBuilder() .build assert(variant.getNames.isEmpty) - val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), - adamToHtsjdkVariantContextConvFn, adamToHtsjdkGenotypeConvFn) + val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), lenient) assert(optHtsjdkVC.isDefined) val htsjdkVC = optHtsjdkVC.get assert(!htsjdkVC.hasID) } - ignore("Convert ADAM variant context with one name to htsjdk") { + test("Convert ADAM variant context with one name to htsjdk") { val variant = adamSNVBuilder() .setNames(ImmutableList.of("rs3131972")) .build - val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), - adamToHtsjdkVariantContextConvFn, adamToHtsjdkGenotypeConvFn) + val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), lenient) assert(optHtsjdkVC.isDefined) val htsjdkVC = optHtsjdkVC.get @@ -408,13 +387,12 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(htsjdkVC.getID === "rs3131972") } - ignore("Convert ADAM variant context with multiple names to htsjdk") { + test("Convert ADAM variant context with multiple names to htsjdk") { val variant = adamSNVBuilder() .setNames(ImmutableList.of("rs3131972", "rs201888535")) .build - val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), - adamToHtsjdkVariantContextConvFn, adamToHtsjdkGenotypeConvFn) + val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), lenient) assert(optHtsjdkVC.isDefined) val htsjdkVC = optHtsjdkVC.get @@ -427,10 +405,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { .setFiltersApplied(null) .build - val converter = new VariantContextConverter - - val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), - adamToHtsjdkVariantContextConvFn, adamToHtsjdkGenotypeConvFn) + val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), lenient) assert(optHtsjdkVC.isDefined) val htsjdkVC = optHtsjdkVC.get @@ -444,10 +419,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { .setFiltersApplied(false) .build - val converter = new VariantContextConverter - - val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), - adamToHtsjdkVariantContextConvFn, adamToHtsjdkGenotypeConvFn) + val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), lenient) assert(optHtsjdkVC.isDefined) val htsjdkVC = optHtsjdkVC.get @@ -462,10 +434,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { .setFiltersPassed(true) .build - val converter = new VariantContextConverter - - val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), - adamToHtsjdkVariantContextConvFn, adamToHtsjdkGenotypeConvFn) + val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), lenient) assert(optHtsjdkVC.isDefined) val htsjdkVC = optHtsjdkVC.get @@ -481,10 +450,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { .setFiltersFailed(ImmutableList.of("FILTER1", "FILTER2")) .build - val converter = new VariantContextConverter - - val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), - adamToHtsjdkVariantContextConvFn, adamToHtsjdkGenotypeConvFn) + val optHtsjdkVC = converter.convert(ADAMVariantContext(variant), lenient) assert(optHtsjdkVC.isDefined) val htsjdkVC = optHtsjdkVC.get diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala index a4aa0240b8..d6a398c6d5 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala @@ -17,6 +17,7 @@ */ package org.bdgenomics.adam.models +import htsjdk.samtools.ValidationStringency import htsjdk.variant.variantcontext.{ Allele, GenotypeBuilder, @@ -452,7 +453,7 @@ class ReferenceRegionSuite extends FunSuite { val vc = vcb.genotypes(GenotypeBuilder.create("NA12878", vcb.getAlleles(), Map.empty[String, java.lang.Object])).make() - val gts = converter.convert(vc).flatMap(_.genotypes) + val gts = converter.convert(vc, ValidationStringency.LENIENT).flatMap(_.genotypes) assert(gts.size === 1) val gt = gts.head diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala index 83630e82c9..6688d7dfd3 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala @@ -145,7 +145,7 @@ class ADAMContextSuite extends ADAMFunSuite { assert(last.getName === "gn|BPY2C;ccds|CCDS44030;ens|ENSG00000185894;vega|OTTHUMG00000045199") } - ignore("can read a small .vcf file") { + sparkTest("can read a small .vcf file") { val path = testFile("small.vcf") val gts = sc.loadGenotypes(path) @@ -172,44 +172,44 @@ class ADAMContextSuite extends ADAMFunSuite { assert(gt.getReadDepth === 20) } - ignore("can read a gzipped .vcf file") { + sparkTest("can read a gzipped .vcf file") { val path = testFile("test.vcf.gz") val vcs = sc.loadVcf(path) assert(vcs.rdd.count === 6) } - ignore("can read a BGZF gzipped .vcf file with .gz file extension") { + sparkTest("can read a BGZF gzipped .vcf file with .gz file extension") { val path = testFile("test.vcf.bgzf.gz") val vcs = sc.loadVcf(path) assert(vcs.rdd.count === 6) } - ignore("can read a BGZF gzipped .vcf file with .bgz file extension") { + sparkTest("can read a BGZF gzipped .vcf file with .bgz file extension") { val path = testFile("test.vcf.bgz") val vcs = sc.loadVcf(path) assert(vcs.rdd.count === 6) } - ignore("can read an uncompressed BCFv2.2 file") { // see https://github.com/samtools/htsjdk/issues/507 + sparkTest("can read an uncompressed BCFv2.2 file") { // see https://github.com/samtools/htsjdk/issues/507 val path = testFile("test.uncompressed.bcf") val vcs = sc.loadVcf(path) assert(vcs.rdd.count === 6) } - ignore("can read a BGZF compressed BCFv2.2 file") { // see https://github.com/samtools/htsjdk/issues/507 + sparkTest("can read a BGZF compressed BCFv2.2 file") { // see https://github.com/samtools/htsjdk/issues/507 val path = testFile("test.compressed.bcf") val vcs = sc.loadVcf(path) assert(vcs.rdd.count === 6) } - ignore("loadIndexedVcf with 1 ReferenceRegion") { + sparkTest("loadIndexedVcf with 1 ReferenceRegion") { val path = testFile("bqsr1.vcf") val refRegion = ReferenceRegion("22", 16097643, 16098647) val vcs = sc.loadIndexedVcf(path, refRegion) assert(vcs.rdd.count == 17) } - ignore("loadIndexedVcf with multiple ReferenceRegions") { + sparkTest("loadIndexedVcf with multiple ReferenceRegions") { val path = testFile("bqsr1.vcf") val refRegion1 = ReferenceRegion("22", 16050677, 16050822) val refRegion2 = ReferenceRegion("22", 16097643, 16098647) @@ -264,7 +264,7 @@ class ADAMContextSuite extends ADAMFunSuite { } } - ignore("filter on load using the filter2 API") { + sparkTest("filter on load using the filter2 API") { val path = testFile("bqsr1.vcf") val variants = sc.loadVariants(path) @@ -381,21 +381,21 @@ class ADAMContextSuite extends ADAMFunSuite { assert(reads.rdd.count == 10) } - ignore("load vcf with a glob") { + sparkTest("load vcf with a glob") { val path = testFile("bqsr1.vcf").replace("bqsr1", "*") val variants = sc.loadVcf(path).toVariantRDD assert(variants.rdd.count === 710) } - ignore("load vcf from a directory") { + sparkTest("load vcf from a directory") { val path = new File(testFile("vcf_dir/1.vcf")).getParent() val variants = sc.loadVcf(path).toVariantRDD assert(variants.rdd.count === 681) } - ignore("load gvcf which contains a multi-allelic row from a directory") { + sparkTest("load gvcf which contains a multi-allelic row from a directory") { val path = new File(testFile("gvcf_dir/gvcf_multiallelic.g.vcf")).getParent() val variants = sc.loadVcf(path).toVariantRDD diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/GenotypeRDDSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/GenotypeRDDSuite.scala index eddf8b0543..192ae44873 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/GenotypeRDDSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/GenotypeRDDSuite.scala @@ -22,8 +22,7 @@ import org.bdgenomics.adam.util.ADAMFunSuite class GenotypeRDDSuite extends ADAMFunSuite { - // these tests will all fail until https://github.com/bigdatagenomics/adam/pull/1291 merges - ignore("use broadcast join to pull down genotypes mapped to targets") { + sparkTest("use broadcast join to pull down genotypes mapped to targets") { val genotypesPath = testFile("small.vcf") val targetsPath = testFile("small.1.bed") @@ -35,7 +34,7 @@ class GenotypeRDDSuite extends ADAMFunSuite { assert(jRdd.rdd.count === 9L) } - ignore("use right outer broadcast join to pull down genotypes mapped to targets") { + sparkTest("use right outer broadcast join to pull down genotypes mapped to targets") { val genotypesPath = testFile("small.vcf") val targetsPath = testFile("small.1.bed") @@ -49,7 +48,7 @@ class GenotypeRDDSuite extends ADAMFunSuite { assert(c.count(_._1.isDefined) === 9) } - ignore("use shuffle join to pull down genotypes mapped to targets") { + sparkTest("use shuffle join to pull down genotypes mapped to targets") { val genotypesPath = testFile("small.vcf") val targetsPath = testFile("small.1.bed") @@ -70,7 +69,7 @@ class GenotypeRDDSuite extends ADAMFunSuite { assert(jRdd0.rdd.count === 3L) } - ignore("use right outer shuffle join to pull down genotypes mapped to targets") { + sparkTest("use right outer shuffle join to pull down genotypes mapped to targets") { val genotypesPath = testFile("small.vcf") val targetsPath = testFile("small.1.bed") @@ -95,7 +94,7 @@ class GenotypeRDDSuite extends ADAMFunSuite { assert(c0.count(_._1.isDefined) === 9) } - ignore("use left outer shuffle join to pull down genotypes mapped to targets") { + sparkTest("use left outer shuffle join to pull down genotypes mapped to targets") { val genotypesPath = testFile("small.vcf") val targetsPath = testFile("small.1.bed") @@ -120,7 +119,7 @@ class GenotypeRDDSuite extends ADAMFunSuite { assert(c0.count(_._2.isDefined) === 9) } - ignore("use full outer shuffle join to pull down genotypes mapped to targets") { + sparkTest("use full outer shuffle join to pull down genotypes mapped to targets") { val genotypesPath = testFile("small.vcf") val targetsPath = testFile("small.1.bed") @@ -149,7 +148,7 @@ class GenotypeRDDSuite extends ADAMFunSuite { assert(c0.count(t => t._1.isDefined && t._2.isDefined) === 9) } - ignore("use shuffle join with group by to pull down genotypes mapped to targets") { + sparkTest("use shuffle join with group by to pull down genotypes mapped to targets") { val genotypesPath = testFile("small.vcf") val targetsPath = testFile("small.1.bed") @@ -174,7 +173,7 @@ class GenotypeRDDSuite extends ADAMFunSuite { assert(c0.forall(_._2.size == 1)) } - ignore("use right outer shuffle join with group by to pull down genotypes mapped to targets") { + sparkTest("use right outer shuffle join with group by to pull down genotypes mapped to targets") { val genotypesPath = testFile("small.vcf") val targetsPath = testFile("small.1.bed") diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextRDDSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextRDDSuite.scala index c94ea64ef4..45c059f838 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextRDDSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextRDDSuite.scala @@ -61,7 +61,7 @@ class VariantContextRDDSuite extends ADAMFunSuite { .build)) } - ignore("can write, then read in .vcf file") { + sparkTest("can write, then read in .vcf file") { val path = new File(tempDir, "test.vcf") variants.saveAsVcf(TestSaveArgs(path.getAbsolutePath), false) assert(path.exists) @@ -85,7 +85,7 @@ class VariantContextRDDSuite extends ADAMFunSuite { assert(vcRdd.sequences.records(0).name === "chr11") } - ignore("can write as a single file, then read in .vcf file") { + sparkTest("can write as a single file, then read in .vcf file") { val path = new File(tempDir, "test_single.vcf") variants.saveAsVcf(path.getAbsolutePath, asSingleFile = true) assert(path.exists) @@ -111,7 +111,7 @@ class VariantContextRDDSuite extends ADAMFunSuite { assert(pipedRdd.rdd.flatMap(_.genotypes).count === 18) } - ignore("save a file sorted by contig index") { + sparkTest("save a file sorted by contig index") { val inputPath = testFile("random.vcf") val variants = sc.loadVcf(inputPath) val outputPath = tmpFile("sorted.vcf") @@ -123,7 +123,7 @@ class VariantContextRDDSuite extends ADAMFunSuite { checkFiles(outputPath, testFile("sorted.vcf")) } - ignore("save a lexicographically sorted file") { + sparkTest("save a lexicographically sorted file") { val inputPath = testFile("random.vcf") val variants = sc.loadVcf(inputPath) val outputPath = tmpFile("sorted.lex.vcf")