diff --git a/adam-core/pom.xml b/adam-core/pom.xml index 9bdcd7f0e4..0e1281aae4 100644 --- a/adam-core/pom.xml +++ b/adam-core/pom.xml @@ -106,6 +106,7 @@ org.bdgenomics.formats.avro.Feature org.bdgenomics.formats.avro.Fragment org.bdgenomics.formats.avro.Genotype + org.bdgenomics.formats.avro.GenotypeAnnotation org.bdgenomics.formats.avro.NucleotideContigFragment org.bdgenomics.formats.avro.OntologyTerm org.bdgenomics.formats.avro.ProcessingStep @@ -113,7 +114,6 @@ org.bdgenomics.formats.avro.TranscriptEffect org.bdgenomics.formats.avro.Variant org.bdgenomics.formats.avro.VariantAnnotation - org.bdgenomics.formats.avro.VariantCallingAnnotations ${project.basedir}/target/generated-sources/src/main/scala/org/bdgenomics/adam/sql/Schemas.scala compile @@ -134,6 +134,7 @@ org.bdgenomics.formats.avro.Feature org.bdgenomics.formats.avro.Fragment org.bdgenomics.formats.avro.Genotype + org.bdgenomics.formats.avro.GenotypeAnnotation org.bdgenomics.formats.avro.NucleotideContigFragment org.bdgenomics.formats.avro.OntologyTerm org.bdgenomics.formats.avro.Read @@ -145,7 +146,6 @@ org.bdgenomics.formats.avro.TranscriptEffect org.bdgenomics.formats.avro.Variant org.bdgenomics.formats.avro.VariantAnnotation - org.bdgenomics.formats.avro.VariantCallingAnnotations ${project.basedir}/target/generated-sources/src/main/scala/org/bdgenomics/adam/projections/Enums.scala compile diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala index 96905fe62c..74a71f5ae4 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala @@ -21,7 +21,7 @@ import com.google.common.base.Splitter import com.google.common.collect.ImmutableList import htsjdk.samtools.ValidationStringency import htsjdk.variant.variantcontext.{ - Allele, + Allele => HtsjdkAllele, Genotype => HtsjdkGenotype, GenotypeBuilder, GenotypesContext, @@ -55,58 +55,23 @@ import scala.collection.mutable.{ Buffer, HashMap } /** * Object for converting between htsjdk and ADAM VariantContexts. - * - * Handles Variant, Genotype, Allele, and various genotype annotation - * conversions. Does not handle Variant annotations. Genotype annotations are - * annotations in the VCF GT field while Variant annotations are annotations - * contained in the VCF INFO field. */ object VariantContextConverter { - /** - * If set to true, this property will ensure that the variant.annotation field - * in the Genotype record is populated after conversion from an htsjdk - * VariantContext. By default, this property is false. - */ - val nestAnnotationInGenotypesProperty = "org.bdgenomics.adam.converters.VariantContextConverter.NEST_ANN_IN_GENOTYPES" - - /** - * Sets the value of the nest annotation in genotypes property. - * - * @param conf Hadoop configuration to set the property in. - * @param populateNestedAnn If true, the nested field is populated. - */ - def setNestAnnotationInGenotypesProperty(conf: Configuration, - populateNestedAnn: Boolean) { - conf.setBoolean(nestAnnotationInGenotypesProperty, populateNestedAnn) - } - - /** - * Gets the value of the nest annotation in genotypes property. - * - * @param conf Hadoop configuration to set the property in. - * @return Returns whether or not to nest the variant annotation under each - * genotype record. - */ - private[adam] def getNestAnnotationInGenotypesProperty( - conf: Configuration): Boolean = { - conf.getBoolean(nestAnnotationInGenotypesProperty, false) - } - /** * Representation for an unknown non-ref/symbolic allele in VCF. */ - private val NON_REF_ALLELE = Allele.create("", false /* !Reference */ ) + private val NON_REF_ALLELE = HtsjdkAllele.create("", false /* !Reference */ ) /** - * The index in the Avro genotype record for the splitFromMultiAllelec field. + * The index in the Avro genotype record for the splitFromMultiallelec field. * * This field is true if the VCF site was not biallelic. */ - private lazy val splitFromMultiAllelicField = Genotype.SCHEMA$.getField("splitFromMultiAllelic") + private lazy val splitFromMultiallelicField = Genotype.SCHEMA$.getField("splitFromMultiallelic") /** - * One conversion method for each way of representing an Allele + * Convert a htsjdk allele to an Avro allele. * * An htsjdk Allele can represent a reference or alternate allele call, or a * site where no call could be made. If the allele is an alternate allele, we @@ -117,11 +82,11 @@ object VariantContextConverter { * @param allele The allele we are converting. * @return The Avro representation for this allele. */ - private def convertAllele(vc: HtsjdkVariantContext, allele: Allele): GenotypeAllele = { - if (allele.isNoCall) GenotypeAllele.NO_CALL - else if (allele.isReference) GenotypeAllele.REF - else if (allele == NON_REF_ALLELE || !vc.hasAlternateAllele(allele)) GenotypeAllele.OTHER_ALT - else GenotypeAllele.ALT + private def convertAllele(vc: HtsjdkVariantContext, allele: HtsjdkAllele): Allele = { + if (allele.isNoCall) Allele.NO_CALL + else if (allele.isReference) Allele.REF + else if (allele == NON_REF_ALLELE || !vc.hasAlternateAllele(allele)) Allele.OTHER_ALT + else Allele.ALT } /** @@ -133,17 +98,17 @@ object VariantContextConverter { * @return If the allele is defined, returns a wrapped allele. Else, returns * a None. */ - private def convertAlleleOpt(allele: String, isRef: Boolean = false): Option[Allele] = { + private def convertAlleleOpt(allele: String, isRef: Boolean = false): Option[HtsjdkAllele] = { if (allele == null) { None } else { - Some(Allele.create(allele, isRef)) + Some(HtsjdkAllele.create(allele, isRef)) } } - private val OPT_NON_REF = Some(Allele.create("", false)) + private val OPT_NON_REF = Some(HtsjdkAllele.create("", false)) - private def optNonRef(v: Variant, hasNonRefModel: Boolean): Option[Allele] = { + private def optNonRef(v: Variant, hasNonRefModel: Boolean): Option[HtsjdkAllele] = { if (hasNonRefModel || v.getAlternateAllele == null) { OPT_NON_REF } else { @@ -160,7 +125,7 @@ object VariantContextConverter { * @return Returns a Java collection representing the reference allele and any * alternate allele at the site. */ - private def convertAlleles(v: Variant, hasNonRefModel: Boolean): java.util.Collection[Allele] = { + private def convertAlleles(v: Variant, hasNonRefModel: Boolean): java.util.Collection[HtsjdkAllele] = { val asSeq = Seq(convertAlleleOpt(v.getReferenceAllele, true), convertAlleleOpt(v.getAlternateAllele), optNonRef(v, hasNonRefModel)).flatten @@ -178,13 +143,13 @@ object VariantContextConverter { * @param g The genotype call at a site for a single sample. * @return Returns the called alleles at this site. */ - private def convertAlleles(g: Genotype): java.util.List[Allele] = { + private def convertAlleles(g: Genotype): java.util.List[HtsjdkAllele] = { var alleles = g.getAlleles - if (alleles == null) return Collections.emptyList[Allele] + if (alleles == null) return Collections.emptyList[HtsjdkAllele] else g.getAlleles.map { - case GenotypeAllele.NO_CALL | GenotypeAllele.OTHER_ALT => Allele.NO_CALL - case GenotypeAllele.REF => Allele.create(g.getVariant.getReferenceAllele, true) - case GenotypeAllele.ALT => Allele.create(g.getVariant.getAlternateAllele) + case Allele.NO_CALL | Allele.OTHER_ALT => HtsjdkAllele.NO_CALL + case Allele.REF => HtsjdkAllele.create(g.getReferenceAllele, true) + case Allele.ALT => HtsjdkAllele.create(g.getAlternateAllele) } } @@ -274,8 +239,7 @@ object VariantContextConverter { stringency: ValidationStringency, conf: Configuration): VariantContextConverter = { new VariantContextConverter(headerLines, - stringency, - getNestAnnotationInGenotypesProperty(conf)) + stringency) } } @@ -291,8 +255,7 @@ object VariantContextConverter { */ class VariantContextConverter( headerLines: Seq[VCFHeaderLine], - stringency: ValidationStringency, - setNestedAnnotationInGenotype: Boolean) extends Serializable with Logging { + stringency: ValidationStringency) extends Serializable with Logging { import VariantContextConverter._ // format fns gatk --> bdg, extract fns bdg --> gatk @@ -300,6 +263,18 @@ class VariantContextConverter( private val variantExtractFn = makeVariantExtractFn(headerLines) private val genotypeFormatFn = makeGenotypeFormatFn(headerLines) private val genotypeExtractFn = makeGenotypeExtractFn(headerLines) + private val gvcfBlocks = extractGvcfBlocks(headerLines) + + private val gvcfBlockRegex = """minGQ=([0-9+]).*maxGQ=([0-9]+).*""".r + + private def extractGvcfBlocks(headerLines: Seq[VCFHeaderLine]): Seq[(Int, Int)] = { + headerLines + .find(_.getKey() == "##GVCFBlock") + .map(_.getValue() match { + case gvcfBlockRegex(minGQ, maxGQ) => (minGQ.toInt, maxGQ.toInt) + }) + .toSeq + } /** * Converts a Scala float to a Java float. @@ -317,15 +292,6 @@ class VariantContextConverter( */ private def jDouble(f: Double): java.lang.Double = f - private def genotypeVariant(coreVariant: Variant, - fullVariant: Variant): Variant = { - if (setNestedAnnotationInGenotype) { - fullVariant - } else { - coreVariant - } - } - /** * Converts a GATK variant context into one or more ADAM variant context(s). * @@ -338,10 +304,9 @@ class VariantContextConverter( try { vc.getAlternateAlleles.toList match { case List(NON_REF_ALLELE) | List() => { - val (coreVariant, variant) = variantFormatFn(vc, None, 0, false) - val v = genotypeVariant(coreVariant, variant) + val variant = variantFormatFn(vc, None, 0, false) val genotypes = vc.getGenotypes.map(g => { - genotypeFormatFn(g, v, NON_REF_ALLELE, 0, Some(1), false) + genotypeFormatFn(g, variant, NON_REF_ALLELE, 0, Some(1), false) }) return Seq(ADAMVariantContext(variant, genotypes)) } @@ -350,10 +315,9 @@ class VariantContextConverter( allele.isNonReference, "Assertion failed when converting: " + vc.toString ) - val (coreVariant, variant) = variantFormatFn(vc, Some(allele.getDisplayString), 0, false) - val v = genotypeVariant(coreVariant, variant) + val variant = variantFormatFn(vc, Some(allele.getDisplayString), 0, false) val genotypes = vc.getGenotypes.map(g => { - genotypeFormatFn(g, v, allele, 1, None, false) + genotypeFormatFn(g, variant, allele, 1, None, false) }) return Seq(ADAMVariantContext(variant, genotypes)) } @@ -362,10 +326,9 @@ class VariantContextConverter( allele.isNonReference, "Assertion failed when converting: " + vc.toString ) - val (coreVariant, variant) = variantFormatFn(vc, Some(allele.getDisplayString), 0, false) - val v = genotypeVariant(coreVariant, variant) + val variant = variantFormatFn(vc, Some(allele.getDisplayString), 0, false) val genotypes = vc.getGenotypes.map(g => { - genotypeFormatFn(g, v, allele, 1, Some(2), false) + genotypeFormatFn(g, variant, allele, 1, Some(2), false) }) return Seq(ADAMVariantContext(variant, genotypes)) } @@ -392,13 +355,12 @@ class VariantContextConverter( // variant annotations only contain values for alternate alleles so // we need to subtract one from real index val variantIdx = idx - 1 - val (coreVariant, variant) = variantFormatFn(vc, + val variant = variantFormatFn(vc, Some(allele.getDisplayString), variantIdx, true) - val v = genotypeVariant(coreVariant, variant) val genotypes = vc.getGenotypes.map(g => { - genotypeFormatFn(g, v, allele, idx, referenceModelIndex, true) + genotypeFormatFn(g, variant, allele, idx, referenceModelIndex, true) }) ADAMVariantContext(variant, genotypes) }) @@ -857,200 +819,412 @@ class VariantContextConverter( // htsjdk --> genotype format functions - private[converters] def formatAllelicDepth(g: HtsjdkGenotype, - gb: Genotype.Builder, - gIdx: Int, - gIndices: Array[Int]): Genotype.Builder = { + private def tryAndCatchStringCast[T](attr: java.lang.Object, + tryFn: (java.lang.Object) => T, + catchFn: (String) => T): T = { + try { + tryFn(attr) + } catch { + case cce: ClassCastException => { + // is this a string? if so, parse... + if (attr.getClass.isAssignableFrom(classOf[String])) { + catchFn(attr.asInstanceOf[String]) + } else { + throw cce + } + } + case t: Throwable => throw t + } + } + + private[converters] def formatFilters(g: HtsjdkGenotype, + gb: Genotype.Builder, + idx: Int, + indices: Array[Int]): Genotype.Builder = { + // see https://github.com/samtools/htsjdk/issues/741 + val gtFiltersWereApplied = true + if (gtFiltersWereApplied) { + val filtersWereApplied = gb.setFiltersApplied(true) + if (g.isFiltered) { + filtersWereApplied.setFiltersPassed(false) + .setFiltersFailed(g.getFilters.split(";").toList) + } else { + filtersWereApplied.setFiltersPassed(true) + } + } else { + gb.setFiltersApplied(false) + } + } + + private[converters] def formatPhaseSet(g: HtsjdkGenotype, + gb: Genotype.Builder, + gIdx: Int, + gIndices: Array[Int]): Genotype.Builder = { + if (g.isPhased) { + gb.setPhased(true) + + Option(g.getExtendedAttribute("PS")) + .map(attr => { + tryAndCatchStringCast(attr, attribute => { + gb.setPhaseSet(attribute.asInstanceOf[java.lang.Integer]) + }, attribute => { + gb.setPhaseSet(attribute.toInt) + }) + }) + } + gb + } + + private val genotypeFormatFns: Iterable[(HtsjdkGenotype, Genotype.Builder, Int, Array[Int]) => Genotype.Builder] = Iterable( + formatFilters(_, _, _, _), + formatPhaseSet(_, _, _, _) + ) + + // genotype --> htsjdk extract functions + + private[converters] def extractFilters(g: Genotype, + gb: GenotypeBuilder): GenotypeBuilder = { + Option(g.getFiltersApplied) + .filter(ft => ft) + .map(applied => { + Option(g.getFiltersPassed).map(passed => { + if (passed) { + gb.filters("PASS") + } else { + val failedFilters = g.getFiltersFailed + require(failedFilters.nonEmpty, + "Genotype marked as filtered, but no failed filters listed in %s.".format(g)) + gb.filters(failedFilters.mkString(";")) + } + }).getOrElse({ + throw new IllegalArgumentException("Genotype filters were applied but filters passed is null in %s.".format(g)) + }) + }).getOrElse(gb.unfiltered()) + } + + private[converters] def extractPhaseSet(g: Genotype, + gb: GenotypeBuilder): GenotypeBuilder = { + Option(g.getPhased) + .filter(p => p) + .map(p => { + Option(g.getPhaseSet).map(gb.attribute("PS", _)) + gb.phased(true) + }).getOrElse(gb.phased(false)) + } + + private val genotypeExtractFns: Iterable[(Genotype, GenotypeBuilder) => GenotypeBuilder] = Iterable( + extractFilters(_, _), + extractPhaseSet(_, _) + ) + + // htsjdk --> genotype annotation format functions + + private[converters] def formatReadDepths(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + gIdx: Int, + gIndices: Array[Int]): GenotypeAnnotation.Builder = { // AD is an array type field if (g.hasAD && gIdx < g.getAD.size) { val ad = g.getAD - gb.setReferenceReadDepth(ad(0)) + gab.setReferenceReadDepth(ad(0)) .setAlternateReadDepth(ad(gIdx)) } else { - gb + gab } } - private[converters] def formatReadDepth(g: HtsjdkGenotype, - gb: Genotype.Builder, - gIdx: Int, - gIndices: Array[Int]): Genotype.Builder = { - if (g.hasDP) { - gb.setReadDepth(g.getDP) - } else { - gb - } + private[converters] def formatForwardReadDepths(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + gIdx: Int, + gIndices: Array[Int]): GenotypeAnnotation.Builder = { + Option(g.getExtendedAttribute("ADF")) + .map(attr => { + val adf = toIntArray(attr) + try { + gab.setReferenceForwardReadDepth(adf(0)) + .setAlternateForwardReadDepth(adf(gIdx)) + } catch { + case _: ArrayIndexOutOfBoundsException => { + log.warn("Ran into Array Out of Bounds when accessing index %s for field ADF of genotype %s.".format(gIdx, g)) + gab + } + } + }).getOrElse(gab) } - private[converters] def formatMinReadDepth(g: HtsjdkGenotype, - gb: Genotype.Builder, - gIdx: Int, - gIndices: Array[Int]): Genotype.Builder = { - Option(g.getExtendedAttribute("MIN_DP", null)) + private[converters] def formatReverseReadDepths(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + gIdx: Int, + gIndices: Array[Int]): GenotypeAnnotation.Builder = { + Option(g.getExtendedAttribute("ADR")) .map(attr => { - tryAndCatchStringCast(attr, attribute => { - gb.setMinReadDepth(attribute.asInstanceOf[java.lang.Integer]) - }, attribute => { - gb.setMinReadDepth(attribute.toInt) - }) - }).getOrElse(gb) + val adr = toIntArray(attr) + try { + gab.setReferenceReverseReadDepth(adr(0)) + .setAlternateReverseReadDepth(adr(gIdx)) + } catch { + case _: ArrayIndexOutOfBoundsException => { + log.warn("Ran into Array Out of Bounds when accessing index %s for field ADR of genotype %s.".format(gIdx, g)) + gab + } + } + }).getOrElse(gab) } - private[converters] def formatGenotypeQuality(g: HtsjdkGenotype, - gb: Genotype.Builder, - gIdx: Int, - gIndices: Array[Int]): Genotype.Builder = { - if (g.hasGQ) { - gb.setGenotypeQuality(g.getGQ) + private[converters] def formatReadDepth(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + idx: Int, + indices: Array[Int]): GenotypeAnnotation.Builder = { + if (g.hasDP) { + gab.setReadDepth(g.getDP) } else { - gb + gab } } - private[converters] def formatGenotypeLikelihoods(g: HtsjdkGenotype, - gb: Genotype.Builder, - gIdx: Int, - gIndices: Array[Int]): Genotype.Builder = { + private[converters] def formatLikelihoods(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + idx: Int, + indices: Array[Int]): GenotypeAnnotation.Builder = { + // GL Number=G Type=Float + // Convert GL from log10 scale to log scale, optionally convert PL from phred scale to log scale + if (g.hasPL) { val pl = g.getPL try { - val likelihoods = gIndices.map(idx => { + val likelihoods = indices.map(idx => { jDouble(PhredUtils.phredToLogProbability(pl(idx))) }).toList - gb.setGenotypeLikelihoods(likelihoods) + gab.setLikelihoods(likelihoods) } catch { case _: ArrayIndexOutOfBoundsException => { - log.warn("Ran into Array Out of Bounds when accessing indices %s of genotype %s.".format(gIndices.mkString(","), g)) - gb + log.warn("Ran into Array Out of Bounds when accessing indices %s for field PL of genotype %s.".format(indices.mkString(","), g)) + gab } } } else { - gb + gab } } - private[converters] def formatNonRefGenotypeLikelihoods(g: HtsjdkGenotype, - gb: Genotype.Builder, - gIndices: Array[Int]): Genotype.Builder = { + private[converters] def formatNonReferenceLikelihoods(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + idx: Int, + indices: Array[Int]): GenotypeAnnotation.Builder = { + + /* + + This looks correct, with the exception that nonReferenceLikelihoods shouldn't go into + a new VCF key, but that nonReferenceLikelihoods encodes the GL's for the symbolic non-ref + (*) alt in a gVCF line. + + Handling this gets a bit complex, but the way to think of it is that if alternateAllele is + non-null, we get a VCF line with two alts (alternateAllele, and *). In the diploid case, we'd + get GL's: + + likelihoods(0), likelihoods(1), likelihoods(2), nonRefLikelihoods(1), nonRefLikelihoods(2), 0?. + + I'm not entirely sure what should go in the last spot, which would be the GT=1/2 likelihood, + which I feel is not well defined. If alternateAllele is null, then we get a VCF line with just * + as an alt and with GL=nonRefLikelihoods. + + */ if (g.hasPL) { val pl = g.getPL - gb.setNonReferenceLikelihoods(gIndices.map(idx => { + gab.setNonReferenceLikelihoods(indices.map(idx => { jDouble(PhredUtils.phredToLogProbability(pl(idx))) }).toList) } else { - gb + gab } } - private def tryAndCatchStringCast[T](attr: java.lang.Object, - tryFn: (java.lang.Object) => T, - catchFn: (String) => T): T = { - try { - tryFn(attr) - } catch { - case cce: ClassCastException => { + private[converters] def formatPriors(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + idx: Int, + indices: Array[Int]): GenotypeAnnotation.Builder = { - // is this a string? if so, parse... - if (attr.getClass.isAssignableFrom(classOf[String])) { - catchFn(attr.asInstanceOf[String]) - } else { - throw cce + Option(g.getExtendedAttribute("priors")) + .map(attr => { + val p = toFloatArray(attr) + try { + val priors = indices.map(idx => { + jFloat(p(idx)) + }).toList + gab.setPriors(priors) + } catch { + case _: ArrayIndexOutOfBoundsException => { + log.warn("Ran into Array Out of Bounds when accessing indices %s for field priors of genotype %s.".format(indices.mkString(","), g)) + gab + } } - } - case t: Throwable => throw t + }).getOrElse(gab) + } + + private[converters] def formatPosteriors(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + idx: Int, + indices: Array[Int]): GenotypeAnnotation.Builder = { + + Option(g.getExtendedAttribute("GP")) + .map(attr => { + val gp = toFloatArray(attr) + try { + val posteriors = indices.map(idx => { + // todo: convert GP from log10 to log scale + jFloat(gp(idx)) + }).toList + gab.setPosteriors(posteriors) + } catch { + case _: ArrayIndexOutOfBoundsException => { + log.warn("Ran into Array Out of Bounds when accessing indices %s for field GP of genotype %s.".format(indices.mkString(","), g)) + gab + } + } + }).getOrElse(gab) + } + + private[converters] def formatQuality(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + idx: Int, + indices: Array[Int]): GenotypeAnnotation.Builder = { + if (g.hasGQ) { + val gq = g.getGQ + gab.setQuality(gq) + + gvcfBlocks.foreach(kv => { + val minGQ = kv._1 + val maxGQ = kv._2 + if (gq >= minGQ && gq < maxGQ) { + gab.setMinimumQuality(minGQ) + gab.setMaximumQuality(maxGQ) + } + }) } + gab } - private[converters] def formatStrandBiasComponents(g: HtsjdkGenotype, - gb: Genotype.Builder, - gIdx: Int, - gIndices: Array[Int]): Genotype.Builder = { - Option(g.getExtendedAttribute("SB")) + private[converters] def formatRmsMappingQuality(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + idx: Int, + indices: Array[Int]): GenotypeAnnotation.Builder = { + + Option(g.getExtendedAttribute("MQ")) .map(attr => { - tryAndCatchStringCast( - attr, - attribute => { - gb.setStrandBiasComponents(attribute.asInstanceOf[Array[java.lang.Integer]].toSeq) - }, attribute => { - val components = attribute.split(",") - require(components.size == 4, - "Strand bias components must have 4 entries. Saw %s in %s.".format( - attr, g)) + tryAndCatchStringCast(attr, attribute => { + gab.setRmsMappingQuality(attribute.asInstanceOf[java.lang.Integer]) + }, attribute => { + gab.setRmsMappingQuality(attribute.toInt) + }) + }).getOrElse(gab) + } - gb.setStrandBiasComponents(components.map(e => e.toInt: java.lang.Integer) - .toSeq) - }) - }).getOrElse(gb) + private[converters] def formatPhaseSetQuality(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + idx: Int, + indices: Array[Int]): GenotypeAnnotation.Builder = { + + Option(g.getExtendedAttribute("PQ")) + .map(attr => { + tryAndCatchStringCast(attr, attribute => { + gab.setPhaseSetQuality(attribute.asInstanceOf[java.lang.Integer]) + }, attribute => { + gab.setPhaseSetQuality(attribute.toInt) + }) + }).getOrElse(gab) } - private[converters] def formatPhaseInfo(g: HtsjdkGenotype, - gb: Genotype.Builder, - gIdx: Int, - gIndices: Array[Int]): Genotype.Builder = { - if (g.isPhased) { - gb.setPhased(true) + private[converters] def formatMinimumReadDepth(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + idx: Int, + indices: Array[Int]): GenotypeAnnotation.Builder = { - Option(g.getExtendedAttribute(VCFConstants.PHASE_SET_KEY)) - .map(attr => { - tryAndCatchStringCast(attr, attribute => { - gb.setPhaseSetId(attribute.asInstanceOf[java.lang.Integer]) - }, attribute => { - gb.setPhaseSetId(attribute.toInt) - }) + Option(g.getExtendedAttribute("MIN_DP", null)) + .map(attr => { + tryAndCatchStringCast(attr, attribute => { + gab.setMinimumReadDepth(attribute.asInstanceOf[java.lang.Integer]) + }, attribute => { + gab.setMinimumReadDepth(attribute.toInt) }) + }).getOrElse(gab) + } - Option(g.getExtendedAttribute(VCFConstants.PHASE_QUALITY_KEY)) - .map(attr => { - tryAndCatchStringCast(attr, attribute => { - gb.setPhaseQuality(attribute.asInstanceOf[java.lang.Integer]) - }, attribute => { - gb.setPhaseQuality(attribute.toInt) - }) + private[converters] def formatFisherStrandBiasPValue(g: HtsjdkGenotype, + gab: GenotypeAnnotation.Builder, + idx: Int, + indices: Array[Int]): GenotypeAnnotation.Builder = { + + Option(g.getExtendedAttribute("FS")) + .map(attr => { + tryAndCatchStringCast(attr, attribute => { + gab.setFisherStrandBiasPValue(attribute.asInstanceOf[java.lang.Float]) + }, attribute => { + gab.setFisherStrandBiasPValue(toFloat(attribute)) }) - } - gb + }).getOrElse(gab) } - private val genotypeFormatFns: Iterable[(HtsjdkGenotype, Genotype.Builder, Int, Array[Int]) => Genotype.Builder] = Iterable( - formatAllelicDepth(_, _, _, _), + private val genotypeAnnotationFormatFns: Iterable[(HtsjdkGenotype, GenotypeAnnotation.Builder, Int, Array[Int]) => GenotypeAnnotation.Builder] = Iterable( + formatReadDepths(_, _, _, _), + formatForwardReadDepths(_, _, _, _), + formatReverseReadDepths(_, _, _, _), formatReadDepth(_, _, _, _), - formatMinReadDepth(_, _, _, _), - formatGenotypeQuality(_, _, _, _), - formatGenotypeLikelihoods(_, _, _, _), - formatStrandBiasComponents(_, _, _, _), - formatPhaseInfo(_, _, _, _) + formatLikelihoods(_, _, _, _), + formatNonReferenceLikelihoods(_, _, _, _), + formatPriors(_, _, _, _), + formatPosteriors(_, _, _, _), + formatQuality(_, _, _, _), + formatRmsMappingQuality(_, _, _, _), + formatPhaseSetQuality(_, _, _, _), + formatMinimumReadDepth(_, _, _, _), + formatFisherStrandBiasPValue(_, _, _, _) ) - // genotype --> htsjdk extract functions + // genotype annotation --> htsjdk extract functions - private[converters] def extractAllelicDepth(g: Genotype, - gb: GenotypeBuilder): GenotypeBuilder = { - (Option(g.getReferenceReadDepth), Option(g.getAlternateReadDepth)) match { + private[converters] def extractReadDepths(ga: GenotypeAnnotation, + gb: GenotypeBuilder): GenotypeBuilder = { + + (Option(ga.getReferenceReadDepth), Option(ga.getAlternateReadDepth)) match { case (Some(ref), Some(alt)) => gb.AD(Array(ref, alt)) case (Some(_), None) => { - throw new IllegalArgumentException("Had reference depth but no alternate depth in %s.".format(g)) + throw new IllegalArgumentException("Had reference depth but no alternate depth in %s.".format(ga)) } case (None, Some(_)) => { - throw new IllegalArgumentException("Had alternate depth but no reference depth in %s.".format(g)) + throw new IllegalArgumentException("Had alternate depth but no reference depth in %s.".format(ga)) } case _ => gb.noAD } } - private[converters] def extractReadDepth(g: Genotype, - gb: GenotypeBuilder): GenotypeBuilder = { - Option(g.getReadDepth).fold(gb.noDP)(dp => gb.DP(dp)) + private[converters] def extractForwardReadDepths(ga: GenotypeAnnotation, + gb: GenotypeBuilder): GenotypeBuilder = { + + (Option(ga.getReferenceForwardReadDepth), Option(ga.getAlternateForwardReadDepth)) match { + case (Some(ref), Some(alt)) => gb.attribute("ADF", ImmutableList.of(ref, alt)) + case (None, Some(_)) => throw new IllegalArgumentException("Forward read depth specified without reference forward read depth") + case (Some(_), None) => throw new IllegalArgumentException("Reference forward read depth specified without forward read depth") + case _ => + } + gb } - private[converters] def extractMinReadDepth(g: Genotype, - gb: GenotypeBuilder): GenotypeBuilder = { - Option(g.getMinReadDepth).fold(gb)(minDp => gb.attribute("MIN_DP", minDp)) + private[converters] def extractReverseReadDepths(ga: GenotypeAnnotation, + gb: GenotypeBuilder): GenotypeBuilder = { + + (Option(ga.getReferenceReverseReadDepth), Option(ga.getAlternateReverseReadDepth)) match { + case (Some(ref), Some(alt)) => gb.attribute("ADR", ImmutableList.of(ref, alt)) + case (None, Some(_)) => throw new IllegalArgumentException("Reverse read depth specified without reference reverse read depth") + case (Some(_), None) => throw new IllegalArgumentException("Reference reverse read depth specified without reverse read depth") + case _ => + } + gb } - private[converters] def extractGenotypeQuality(g: Genotype, - gb: GenotypeBuilder): GenotypeBuilder = { - Option(g.getGenotypeQuality).fold(gb.noGQ)(gq => gb.GQ(gq)) + private[converters] def extractReadDepth(ga: GenotypeAnnotation, + gb: GenotypeBuilder): GenotypeBuilder = { + Option(ga.getReadDepth).fold(gb.noDP)(gb.DP(_)) } private[converters] def numPls(copyNumber: Int): Int = { @@ -1089,6 +1263,7 @@ class VariantContextConverter( array } + /* private[converters] def extractGenotypeLikelihoods(g: Genotype, gb: GenotypeBuilder): GenotypeBuilder = { val gls = g.getGenotypeLikelihoods @@ -1113,167 +1288,68 @@ class VariantContextConverter( gb.PL(nonRefPls(gls, nls)) } } + */ - private[converters] def extractStrandBiasComponents(g: Genotype, - gb: GenotypeBuilder): GenotypeBuilder = { - - val components = g.getStrandBiasComponents - - if (components.isEmpty) { - gb - } else { - require(components.size == 4, - "Illegal strand bias components length. Must be empty or 4. In:\n%s".format(g)) - gb.attribute("SB", components.map(i => i: Int).toArray) - } + private[converters] def extractLikelihoods(ga: GenotypeAnnotation, + gb: GenotypeBuilder): GenotypeBuilder = { + gb } - private[converters] def extractPhaseInfo(g: Genotype, - gb: GenotypeBuilder): GenotypeBuilder = { - Option(g.getPhased) - .filter(p => p) - .map(p => { - val setFns: Iterable[Option[(GenotypeBuilder => GenotypeBuilder)]] = Iterable( - Option(g.getPhaseSetId).map(ps => { - (b: GenotypeBuilder) => b.attribute("PS", ps) - }), - Option(g.getPhaseQuality).map(pq => { - (b: GenotypeBuilder) => b.attribute("PQ", pq) - })) - - setFns.flatten - .foldLeft(gb.phased(true))((b, fn) => fn(b)) - }).getOrElse(gb.phased(false)) + private[converters] def extractNonReferenceLikelihoods(ga: GenotypeAnnotation, + gb: GenotypeBuilder): GenotypeBuilder = { + gb } - private val genotypeExtractFns: Iterable[(Genotype, GenotypeBuilder) => GenotypeBuilder] = Iterable( - extractAllelicDepth(_, _), - extractReadDepth(_, _), - extractMinReadDepth(_, _), - extractGenotypeQuality(_, _), - extractGenotypeLikelihoods(_, _), - extractStrandBiasComponents(_, _), - extractPhaseInfo(_, _) - ) - - // htsjdk --> genotype annotation format functions - - private[converters] def formatFilters(g: HtsjdkGenotype, - vcab: VariantCallingAnnotations.Builder, - idx: Int, - indices: Array[Int]): VariantCallingAnnotations.Builder = { - // see https://github.com/samtools/htsjdk/issues/741 - val gtFiltersWereApplied = true - if (gtFiltersWereApplied) { - val filtersWereApplied = vcab.setFiltersApplied(true) - if (g.isFiltered) { - filtersWereApplied.setFiltersPassed(false) - .setFiltersFailed(g.getFilters.split(";").toList) - } else { - filtersWereApplied.setFiltersPassed(true) - } - } else { - vcab.setFiltersApplied(false) - } + private[converters] def extractPriors(ga: GenotypeAnnotation, + gb: GenotypeBuilder): GenotypeBuilder = { + Option(ga.getPriors).fold(gb)(gb.attribute("priors", _)) } - private[converters] def formatFisherStrandBias(g: HtsjdkGenotype, - vcab: VariantCallingAnnotations.Builder, - idx: Int, - indices: Array[Int]): VariantCallingAnnotations.Builder = { - Option(g.getExtendedAttribute("FS")) - .map(attr => { - tryAndCatchStringCast(attr, attribute => { - vcab.setFisherStrandBiasPValue(attribute.asInstanceOf[java.lang.Float]) - }, attribute => { - vcab.setFisherStrandBiasPValue(toFloat(attribute)) - }) - }).getOrElse(vcab) + private[converters] def extractPosteriors(ga: GenotypeAnnotation, + gb: GenotypeBuilder): GenotypeBuilder = { + // todo: convert back from log scale to log10 scale + Option(ga.getPosteriors).fold(gb)(gb.attribute("GP", _)) } - private[converters] def formatRmsMapQ(g: HtsjdkGenotype, - vcab: VariantCallingAnnotations.Builder, - idx: Int, - indices: Array[Int]): VariantCallingAnnotations.Builder = { - Option(g.getExtendedAttribute("MQ")) - .map(attr => { - tryAndCatchStringCast(attr, attribute => { - vcab.setRmsMapQ(attribute.asInstanceOf[java.lang.Float]) - }, attribute => { - vcab.setRmsMapQ(toFloat(attribute)) - }) - }).getOrElse(vcab) + private[converters] def extractQuality(ga: GenotypeAnnotation, + gb: GenotypeBuilder): GenotypeBuilder = { + Option(ga.getQuality).fold(gb.noGQ)(gb.GQ(_)) } - private[converters] def formatMapQ0(g: HtsjdkGenotype, - vcab: VariantCallingAnnotations.Builder, - idx: Int, - indices: Array[Int]): VariantCallingAnnotations.Builder = { - Option(g.getExtendedAttribute("MQ0")) - .map(attr => { - tryAndCatchStringCast(attr, attribute => { - vcab.setMapq0Reads(attribute.asInstanceOf[java.lang.Integer]) - }, attribute => { - vcab.setMapq0Reads(attribute.toInt) - }) - }).getOrElse(vcab) + private[converters] def extractRmsMappingQuality(ga: GenotypeAnnotation, + gb: GenotypeBuilder): GenotypeBuilder = { + Option(ga.getRmsMappingQuality).fold(gb)(gb.attribute("MQ", _)) } - private val genotypeAnnotationFormatFns: Iterable[(HtsjdkGenotype, VariantCallingAnnotations.Builder, Int, Array[Int]) => VariantCallingAnnotations.Builder] = Iterable( - formatFilters(_, _, _, _), - formatFisherStrandBias(_, _, _, _), - formatRmsMapQ(_, _, _, _), - formatMapQ0(_, _, _, _) - ) - - // genotype annotation --> htsjdk extract functions - - private[converters] def extractFilters(vca: VariantCallingAnnotations, - gb: GenotypeBuilder): GenotypeBuilder = { - Option(vca.getFiltersApplied) - .filter(ft => ft) - .map(applied => { - Option(vca.getFiltersPassed).map(passed => { - if (passed) { - gb.filters("PASS") - } else { - val failedFilters = vca.getFiltersFailed - require(failedFilters.nonEmpty, - "Genotype marked as filtered, but no failed filters listed in %s.".format(vca)) - gb.filters(failedFilters.mkString(";")) - } - }).getOrElse({ - throw new IllegalArgumentException("Filters were applied but filters passed is null in %s.".format(vca)) - }) - }).getOrElse(gb.unfiltered()) + private[converters] def extractPhaseSetQuality(ga: GenotypeAnnotation, + gb: GenotypeBuilder): GenotypeBuilder = { + Option(ga.getPhaseSetQuality).fold(gb)(gb.attribute("PQ", _)) } - private[converters] def extractFisherStrandBias(vca: VariantCallingAnnotations, + private[converters] def extractMinimumReadDepth(ga: GenotypeAnnotation, gb: GenotypeBuilder): GenotypeBuilder = { - Option(vca.getFisherStrandBiasPValue).map(fs => { - gb.attribute("FS", fs) - }).getOrElse(gb) - } - - private[converters] def extractRmsMapQ(vca: VariantCallingAnnotations, - gb: GenotypeBuilder): GenotypeBuilder = { - Option(vca.getRmsMapQ).map(mq => { - gb.attribute("MQ", mq) - }).getOrElse(gb) + Option(ga.getMinimumReadDepth).fold(gb)(gb.attribute("MIN_DP", _)) } - private[converters] def extractMapQ0(vca: VariantCallingAnnotations, - gb: GenotypeBuilder): GenotypeBuilder = { - Option(vca.getMapq0Reads).map(mq0 => { - gb.attribute("MQ0", mq0) - }).getOrElse(gb) + private[converters] def extractFisherStrandBiasPValue(ga: GenotypeAnnotation, + gb: GenotypeBuilder): GenotypeBuilder = { + Option(ga.getFisherStrandBiasPValue).fold(gb)(gb.attribute("FS", _)) } - private val genotypeAnnotationExtractFns: Iterable[(VariantCallingAnnotations, GenotypeBuilder) => GenotypeBuilder] = Iterable( - extractFilters(_, _), - extractFisherStrandBias(_, _), - extractRmsMapQ(_, _), - extractMapQ0(_, _) + private val genotypeAnnotationExtractFns: Iterable[(GenotypeAnnotation, GenotypeBuilder) => GenotypeBuilder] = Iterable( + extractReadDepths(_, _), + extractForwardReadDepths(_, _), + extractReverseReadDepths(_, _), + extractReadDepth(_, _), + extractLikelihoods(_, _), + extractNonReferenceLikelihoods(_, _), + extractPriors(_, _), + extractPosteriors(_, _), + extractQuality(_, _), + extractRmsMappingQuality(_, _), + extractPhaseSetQuality(_, _), + extractMinimumReadDepth(_, _), + extractFisherStrandBiasPValue(_, _) ) // safe type conversions @@ -1639,7 +1715,7 @@ class VariantContextConverter( private def makeVariantFormatFn( headerLines: Seq[VCFHeaderLine], - stringency: ValidationStringency = ValidationStringency.STRICT): (HtsjdkVariantContext, Option[String], Int, Boolean) => (Variant, Variant) = { + stringency: ValidationStringency = ValidationStringency.STRICT): (HtsjdkVariantContext, Option[String], Int, Boolean) => Variant = { val attributeFns: Iterable[(HtsjdkVariantContext, Int, Array[Int]) => Option[(String, String)]] = headerLines .flatMap(hl => hl match { @@ -1675,7 +1751,7 @@ class VariantContextConverter( def convert(vc: HtsjdkVariantContext, alt: Option[String], alleleIdx: Int, - wasSplit: Boolean): (Variant, Variant) = { + wasSplit: Boolean): Variant = { // create the builder val variantBuilder = Variant.newBuilder @@ -1686,7 +1762,7 @@ class VariantContextConverter( // was this split? if (wasSplit) { - variantBuilder.setSplitFromMultiAllelic(true) + variantBuilder.setSplitFromMultiallelic(true) } alt.foreach(variantBuilder.setAlternateAllele(_)) @@ -1713,7 +1789,7 @@ class VariantContextConverter( } }) - val variant = variantBuilder.build + val variant = variantBuilder.build() val variantAnnotationBuilder = VariantAnnotation.newBuilder val boundAnnotationFns: Iterable[VariantAnnotation.Builder => VariantAnnotation.Builder] = variantAnnotationFormatFns @@ -1752,14 +1828,14 @@ class VariantContextConverter( } variantBuilder.setAnnotation(convertedAnnotationWithAttrs.build) - (variant, variantBuilder.build) + variantBuilder.build } convert(_, _, _, _) } private def makeGenotypeFormatFn( - headerLines: Seq[VCFHeaderLine]): (HtsjdkGenotype, Variant, Allele, Int, Option[Int], Boolean) => Genotype = { + headerLines: Seq[VCFHeaderLine]): (HtsjdkGenotype, Variant, HtsjdkAllele, Int, Option[Int], Boolean) => Genotype = { val attributeFns: Iterable[(HtsjdkGenotype, Int, Array[Int]) => Option[(String, String)]] = headerLines .flatMap(hl => hl match { @@ -1783,33 +1859,34 @@ class VariantContextConverter( def convert(g: HtsjdkGenotype, variant: Variant, - allele: Allele, + allele: HtsjdkAllele, alleleIdx: Int, nonRefIndex: Option[Int], wasSplit: Boolean): Genotype = { // create the builder val builder = Genotype.newBuilder() - .setVariant(variant) .setReferenceName(variant.getReferenceName) .setStart(variant.getStart) .setEnd(variant.getEnd) + .setReferenceAllele(variant.getReferenceAllele) + .setAlternateAllele(variant.getAlternateAllele) .setSampleId(g.getSampleName) .setAlleles(g.getAlleles.map(gtAllele => { if (gtAllele.isReference) { - GenotypeAllele.REF + Allele.REF } else if (gtAllele.isNoCall) { - GenotypeAllele.NO_CALL + Allele.NO_CALL } else if (gtAllele.equals(allele, true)) { - GenotypeAllele.ALT + Allele.ALT } else { - GenotypeAllele.OTHER_ALT + Allele.OTHER_ALT } })) // was this split? if (wasSplit) { - builder.setSplitFromMultiAllelic(true) + builder.setSplitFromMultiallelic(true) } // get array indices @@ -1842,26 +1919,17 @@ class VariantContextConverter( } }) - // if we have a non-ref allele, fold and build - val coreWithOptNonRefs = nonRefIndex.fold(convertedCore)(nonRefAllele => { - - // non-ref pl indices - val nrIndices = GenotypeLikelihoods.getPLIndecesOfAlleles(0, nonRefAllele) - - formatNonRefGenotypeLikelihoods(g, convertedCore, nrIndices) - }) - - val vcAnns = VariantCallingAnnotations.newBuilder + val gAnns = GenotypeAnnotation.newBuilder // bind the annotation conversion functions and fold - val boundAnnotationFns: Iterable[VariantCallingAnnotations.Builder => VariantCallingAnnotations.Builder] = genotypeAnnotationFormatFns + val boundAnnotationFns: Iterable[GenotypeAnnotation.Builder => GenotypeAnnotation.Builder] = genotypeAnnotationFormatFns .map(fn => { - fn(g, _: VariantCallingAnnotations.Builder, alleleIdx, indices) + fn(g, _: GenotypeAnnotation.Builder, alleleIdx, indices) }) - val convertedAnnotations = boundAnnotationFns.foldLeft(vcAnns)( - (vcab: VariantCallingAnnotations.Builder, fn) => { + val convertedAnnotations = boundAnnotationFns.foldLeft(gAnns)( + (gab: GenotypeAnnotation.Builder, fn) => { try { - fn(vcab) + fn(gab) } catch { case t: Throwable => { if (stringency == ValidationStringency.STRICT) { @@ -1871,7 +1939,7 @@ class VariantContextConverter( log.warn("Converting genotype annotation field in %s with function %s failed: %s".format( g, fn, t)) } - vcab + gab } } } @@ -1889,8 +1957,8 @@ class VariantContextConverter( } // build the annotations and attach - val gtWithAnnotations = coreWithOptNonRefs - .setVariantCallingAnnotations(convertedAnnotationsWithAttrs.build) + val gtWithAnnotations = convertedCore + .setAnnotation(convertedAnnotationsWithAttrs.build) // build and return gtWithAnnotations.build() @@ -2180,7 +2248,7 @@ class VariantContextConverter( def convert(vc: ADAMVariantContext): HtsjdkVariantContext = { val v = vc.variant.variant val hasNonRefAlleles = vc.genotypes - .exists(_.getNonReferenceLikelihoods.length != 0) + .exists(_.getAnnotation.getNonReferenceLikelihoods.length != 0) val builder = new VariantContextBuilder() .chr(v.getReferenceName) .start(v.getStart + 1) @@ -2307,21 +2375,21 @@ class VariantContextConverter( }) // convert the annotations if they exist - val gtWithAnnotations = Option(g.getVariantCallingAnnotations) - .fold(convertedCore)(vca => { + val gtWithAnnotations = Option(g.getAnnotation) + .fold(convertedCore)(ga => { // bind the annotation conversion functions and fold val convertedAnnotations = genotypeAnnotationExtractFns.foldLeft(convertedCore)( (gb: GenotypeBuilder, fn) => { try { - fn(vca, gb) + fn(ga, gb) } catch { case t: Throwable => { if (stringency == ValidationStringency.STRICT) { throw t } else { if (stringency == ValidationStringency.LENIENT) { - log.warn("Applying annotation extraction function %s to %s failed with %s.".format(fn, vca, t)) + log.warn("Applying annotation extraction function %s to %s failed with %s.".format(fn, ga, t)) } gb @@ -2331,7 +2399,7 @@ class VariantContextConverter( }) // get the attribute map - val attributes: Map[String, String] = vca.getAttributes.toMap + val attributes: Map[String, String] = ga.getAttributes.toMap // apply the attribute converters and return attributeFns.foldLeft(convertedAnnotations)((gb: GenotypeBuilder, fn) => { @@ -2347,7 +2415,7 @@ class VariantContextConverter( throw t } else { if (stringency == ValidationStringency.LENIENT) { - log.warn("Applying attribute extraction function %s to %s failed with %s.".format(fn, vca, t)) + log.warn("Applying attribute extraction function %s to %s failed with %s.".format(fn, ga, t)) } gb diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala index af02c4b077..2e76688244 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala @@ -90,24 +90,11 @@ object ReferencePosition extends Serializable { * @return The reference position of this genotype. */ def apply(genotype: Genotype): ReferencePosition = { - val referenceNameSet = Seq(Option(genotype.getReferenceName), Option(genotype.getVariant.getReferenceName)) - .flatten - .toSet - val startSet = Seq(Option(genotype.getStart), Option(genotype.getVariant.getStart)) - .flatten - .toSet - require(referenceNameSet.nonEmpty, "Genotype has no reference name: %s".format(genotype)) - require(referenceNameSet.size == 1, "Genotype has multiple reference names: %s, %s".format( - referenceNameSet, genotype)) - require(startSet.nonEmpty, "Genotype has no start: %s".format(genotype)) - require(startSet.size == 1, "Genotype has multiple starts: %s, %s".format( - startSet, genotype)) - // see ADAM-1959, VCF 0 = telomere - if (startSet.head != -1) { - new ReferencePosition(referenceNameSet.head, startSet.head) + if (genotype.getStart != -1) { + new ReferencePosition(genotype.getReferenceName, genotype.getStart) } else { - new ReferencePosition(referenceNameSet.head, 0L) + new ReferencePosition(genotype.getReferenceName, 0L) } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala index 153103c013..8cf0abcaf7 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala @@ -29,34 +29,35 @@ import org.bdgenomics.formats.avro.{ Genotype, Variant, VariantAnnotation } object VariantContext { /** - * Constructs an VariantContext from an Variant + * Create a new variant context from a variant. * - * @param v Variant which is used to construct the ReferencePosition - * @return VariantContext corresponding to the Variant + * @param v Variant to create a variant context from. + * @return Return a new variant context created from the specified variant. */ def apply(v: Variant): VariantContext = { new VariantContext(ReferencePosition(v), RichVariant(v), Iterable.empty) } /** - * Constructs an VariantContext from an Variant and Seq[Genotype] - * and DatabaseVariantAnnotation + * Create a new variant context from a variant and one or more genotypes. * - * @param v Variant which is used to construct the ReferencePosition - * @param genotypes Seq[Genotype] - * @return VariantContext corresponding to the Variant + * @param v Variant to create a variant context from. + * @param genotypes One or more genotypes. + * @return Return a new variant context created from the specified variant + * and genotypes. */ def apply(v: Variant, genotypes: Iterable[Genotype]): VariantContext = { new VariantContext(ReferencePosition(v), RichVariant(v), genotypes) } /** - * Builds a variant context off of a set of genotypes. Builds variants from the genotypes. + * Create a new variant context from a set of genotypes. * * @note Genotypes must be at the same position. * - * @param genotypes List of genotypes to build variant context from. - * @return A variant context corresponding to the variants and genotypes at this site. + * @param genotypes One or more genotypes to create a variant context from. + * @return Return a new variant context created from the specified set of + * genotypes. */ def buildFromGenotypes(genotypes: Seq[Genotype]): VariantContext = { val position = ReferencePosition(genotypes.head) @@ -65,7 +66,15 @@ object VariantContext { "Genotypes do not all have the same position." ) - val variant = genotypes.head.getVariant + val g = genotypes.head + val variant = Variant.newBuilder() + .setReferenceName(g.getReferenceName) + .setStart(g.getStart) + .setEnd(g.getEnd) + .setSplitFromMultiallelic(g.getSplitFromMultiallelic) + .setReferenceAllele(g.getReferenceAllele) + .setAlternateAllele(g.getAlternateAllele) + .build() new VariantContext(position, RichVariant(variant), genotypes) } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/GenotypeDataset.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/GenotypeDataset.scala index b6a0def2fa..2dc6478fc6 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/GenotypeDataset.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/GenotypeDataset.scala @@ -49,7 +49,7 @@ import org.bdgenomics.adam.sql.{ import org.bdgenomics.utils.interval.array.{ IntervalArray, IntervalArraySerializer } import org.bdgenomics.formats.avro.{ Genotype, - GenotypeAllele, + Allele, Sample, Variant, VariantAnnotation @@ -204,38 +204,24 @@ case class DatasetBoundGenotypeDataset private[rdd] ( copy(samples = newSamples.toSeq) } - override def copyVariantEndToAttribute(): GenotypeDataset = { - def copyEnd(g: GenotypeProduct): GenotypeProduct = { - val variant = g.variant.getOrElse(VariantProduct()) - val annotation = variant.annotation.getOrElse(VariantAnnotationProduct()) - val attributes = annotation.attributes + ("END" -> g.end.toString) - val annotationCopy = annotation.copy(attributes = attributes) - val variantCopy = variant.copy(annotation = Some(annotationCopy)) - g.copy(variant = Some(variantCopy)) - } - val sqlContext = SQLContext.getOrCreate(rdd.context) - import sqlContext.implicits._ - transformDataset(dataset => dataset.map(copyEnd)) - } - override def filterToFiltersPassed(): GenotypeDataset = { - transformDataset(dataset => dataset.filter(dataset.col("variantCallingAnnotations.filtersPassed"))) + transformDataset(dataset => dataset.filter(dataset.col("filtersPassed"))) } override def filterByQuality(minimumQuality: Double): GenotypeDataset = { - transformDataset(dataset => dataset.filter(dataset.col("genotypeQuality") >= minimumQuality)) + transformDataset(dataset => dataset.filter(dataset.col("annotation.quality") >= minimumQuality)) } override def filterByReadDepth(minimumReadDepth: Int): GenotypeDataset = { - transformDataset(dataset => dataset.filter(dataset.col("readDepth") >= minimumReadDepth)) + transformDataset(dataset => dataset.filter(dataset.col("annotation.readDepth") >= minimumReadDepth)) } override def filterByAlternateReadDepth(minimumAlternateReadDepth: Int): GenotypeDataset = { - transformDataset(dataset => dataset.filter(dataset.col("alternateReadDepth") >= minimumAlternateReadDepth)) + transformDataset(dataset => dataset.filter(dataset.col("annotation.alternateReadDepth") >= minimumAlternateReadDepth)) } override def filterByReferenceReadDepth(minimumReferenceReadDepth: Int): GenotypeDataset = { - transformDataset(dataset => dataset.filter(dataset.col("referenceReadDepth") >= minimumReferenceReadDepth)) + transformDataset(dataset => dataset.filter(dataset.col("annotation.referenceReadDepth") >= minimumReferenceReadDepth)) } override def filterToSample(sampleId: String) = { @@ -375,7 +361,12 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno val sqlContext = SQLContext.getOrCreate(rdd.context) import sqlContext.implicits._ - val notDedupedVariants = dataset.select($"variant.*") + val notDedupedVariants = dataset.select($"referenceName", + $"start", + $"end", + $"splitFromMultiallelic", + $"referenceAllele", + $"alternateAllele") .as[VariantProduct] val maybeDedupedVariants = if (dedupe) { @@ -385,6 +376,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno notDedupedVariants.dropDuplicates("referenceName", "start", "end", + "splitFromMultiallelic", "referenceAllele", "alternateAllele") } else { @@ -394,32 +386,13 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno VariantDataset(maybeDedupedVariants, sequences, headerLines) } - /** - * Copy variant end to a variant attribute (VCF INFO field "END"). - * - * @return GenotypeDataset with variant end copied to a variant attribute. - */ - def copyVariantEndToAttribute(): GenotypeDataset = { - def copyEnd(g: Genotype): Genotype = { - val variant = Option(g.variant).getOrElse(new Variant()) - val annotation = Option(variant.annotation).getOrElse(new VariantAnnotation()) - val attributes = new java.util.HashMap[String, String]() - Option(annotation.attributes).map(attributes.putAll(_)) - attributes.put("END", g.end.toString) - val annotationCopy = VariantAnnotation.newBuilder(annotation).setAttributes(attributes).build() - val variantCopy = Variant.newBuilder(variant).setAnnotation(annotationCopy).build() - Genotype.newBuilder(g).setVariant(variantCopy).build() - } - transform(rdd => rdd.map(copyEnd)) - } - /** * Filter this GenotypeDataset to genotype filters passed (VCF FORMAT field "FT" value PASS). * * @return GenotypeDataset filtered to genotype filters passed. */ def filterToFiltersPassed(): GenotypeDataset = { - transform(rdd => rdd.filter(g => Option(g.getVariantCallingAnnotations).exists(_.getFiltersPassed))) + transform(rdd => rdd.filter(_.getFiltersPassed)) } /** @@ -429,7 +402,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno * @return GenotypeDataset filtered by quality. */ def filterByQuality(minimumQuality: Double): GenotypeDataset = { - transform(rdd => rdd.filter(g => Option(g.getGenotypeQuality).exists(_ >= minimumQuality))) + transform(rdd => rdd.filter(g => Option(g.getAnnotation).exists(a => Option(a.getQuality).exists(_ >= minimumQuality)))) } /** @@ -439,7 +412,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno * @return GenotypeDataset filtered by read depth. */ def filterByReadDepth(minimumReadDepth: Int): GenotypeDataset = { - transform(rdd => rdd.filter(g => Option(g.getReadDepth).exists(_ >= minimumReadDepth))) + transform(rdd => rdd.filter(g => Option(g.getAnnotation).exists(a => Option(a.getReadDepth).exists(_ >= minimumReadDepth)))) } /** @@ -449,7 +422,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno * @return GenotypeDataset filtered by alternate read depth. */ def filterByAlternateReadDepth(minimumAlternateReadDepth: Int): GenotypeDataset = { - transform(rdd => rdd.filter(g => Option(g.getAlternateReadDepth).exists(_ >= minimumAlternateReadDepth))) + transform(rdd => rdd.filter(g => Option(g.getAnnotation).exists(a => Option(a.getAlternateReadDepth).exists(_ >= minimumAlternateReadDepth)))) } /** @@ -459,14 +432,14 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno * @return GenotypeDataset filtered by reference read depth. */ def filterByReferenceReadDepth(minimumReferenceReadDepth: Int): GenotypeDataset = { - transform(rdd => rdd.filter(g => Option(g.getReferenceReadDepth).exists(_ >= minimumReferenceReadDepth))) + transform(rdd => rdd.filter(g => Option(g.getAnnotation).exists(a => Option(a.getReferenceReadDepth).exists(_ >= minimumReferenceReadDepth)))) } /** * Filter this GenotypeDataset by sample to those that match the specified sample. * * @param sampleId Sample to filter by. - * return GenotypeDataset filtered by sample. + * @return GenotypeDataset filtered by sample. */ def filterToSample(sampleId: String): GenotypeDataset = { transform(rdd => rdd.filter(g => Option(g.getSampleId).exists(_ == sampleId))) @@ -476,7 +449,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno * (Java-specific) Filter this GenotypeDataset by sample to those that match the specified samples. * * @param sampleIds List of samples to filter by. - * return GenotypeDataset filtered by one or more samples. + * @return GenotypeDataset filtered by one or more samples. */ def filterToSamples(sampleIds: java.util.List[String]): GenotypeDataset = { filterToSamples(asScalaBuffer(sampleIds)) @@ -486,7 +459,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno * (Scala-specific) Filter this GenotypeDataset by sample to those that match the specified samples. * * @param sampleIds Sequence of samples to filter by. - * return GenotypeDataset filtered by one or more samples. + * @return GenotypeDataset filtered by one or more samples. */ def filterToSamples(sampleIds: Seq[String]): GenotypeDataset = { transform(rdd => rdd.filter(g => Option(g.getSampleId).exists(sampleIds.contains(_)))) @@ -498,7 +471,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno * @return GenotypeDataset filtered to remove genotypes containing NO_CALL alleles. */ def filterNoCalls(): GenotypeDataset = { - transform(rdd => rdd.filter(g => !g.getAlleles.contains(GenotypeAllele.NO_CALL))) + transform(rdd => rdd.filter(g => !g.getAlleles.contains(Allele.NO_CALL))) } /** diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala index deb7ace160..e7ba929920 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala @@ -50,8 +50,6 @@ case class VCFOutFormatter( val stringency: ValidationStringency, val optHeaderLines: Option[CollectionAccumulator[VCFHeaderLine]]) extends OutFormatter[VariantContext] with Logging { - private val nestAnn = VariantContextConverter.getNestAnnotationInGenotypesProperty(conf) - /** * OutFormatter that reads streaming VCF. Defaults to ValidationStringency.LENIENT. * Java-friendly no-arg constructor. @@ -110,9 +108,7 @@ case class VCFOutFormatter( optHeaderLines.map(accumulator => lines.foreach(line => accumulator.add(line))) // make converter - val converter = new VariantContextConverter(lines, - stringency, - nestAnn) + val converter = new VariantContextConverter(lines, stringency) @tailrec def convertIterator(iter: AsciiLineReaderIterator, records: ListBuffer[VariantContext] = ListBuffer.empty): Iterator[VariantContext] = { diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantDataset.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantDataset.scala index ffa700608c..c99958d5bd 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantDataset.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantDataset.scala @@ -186,7 +186,7 @@ case class DatasetBoundVariantDataset private[rdd] ( } override def filterByQuality(minimumQuality: Double): VariantDataset = { - transformDataset(dataset => dataset.filter(!dataset.col("splitFromMultiAllelic") && dataset.col("quality") >= minimumQuality)) + transformDataset(dataset => dataset.filter(!dataset.col("splitFromMultiallelic") && dataset.col("quality") >= minimumQuality)) } override def filterByReadDepth(minimumReadDepth: Int): VariantDataset = { @@ -318,18 +318,18 @@ sealed abstract class VariantDataset extends AvroGenomicDataset[Variant, Variant /** * Filter this VariantDataset by quality (VCF column 6 "QUAL"). Variants split - * for multi-allelic sites will also be filtered out. + * for multiallelic sites will also be filtered out. * * @param minimumQuality Minimum quality to filter by, inclusive. * @return VariantDataset filtered by quality. */ def filterByQuality(minimumQuality: Double): VariantDataset = { - transform(rdd => rdd.filter(v => !(Option(v.getSplitFromMultiAllelic).exists(_ == true)) && Option(v.getQuality).exists(_ >= minimumQuality))) + transform(rdd => rdd.filter(v => !(Option(v.getSplitFromMultiallelic).exists(_ == true)) && Option(v.getQuality).exists(_ >= minimumQuality))) } /** * Filter this VariantDataset by total read depth (VCF INFO reserved key AD, Number=R, - * split for multi-allelic sites into single integer values for the reference allele + * split for multiallelic sites into single integer values for the reference allele * (filterByReferenceReadDepth) and the alternate allele (this method)). * * @param minimumReadDepth Minimum total read depth to filter by, inclusive. @@ -341,7 +341,7 @@ sealed abstract class VariantDataset extends AvroGenomicDataset[Variant, Variant /** * Filter this VariantDataset by reference total read depth (VCF INFO reserved key AD, Number=R, - * split for multi-allelic sites into single integer values for the alternate allele + * split for multiallelic sites into single integer values for the alternate allele * (filterByReadDepth) and the reference allele (this method)). * * @param minimumReferenceReadDepth Minimum reference total read depth to filter by, inclusive. diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichVariant.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichVariant.scala index 511ed74f62..0ad3da5e34 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichVariant.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichVariant.scala @@ -31,10 +31,13 @@ object RichVariant { * @return Returns a rich variant representing the variant that was genotyped. */ def genotypeToRichVariant(genotype: Genotype): RichVariant = { - val variant = Variant.newBuilder(genotype.variant) + val variant = Variant.newBuilder() .setReferenceName(genotype.getReferenceName) .setStart(genotype.getStart) .setEnd(genotype.getEnd) + .setSplitFromMultiallelic(genotype.getSplitFromMultiallelic) + .setReferenceAllele(genotype.getReferenceAllele) + .setAlternateAllele(genotype.getAlternateAllele) .build() RichVariant(variant) } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala b/adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala index 697cb971fb..2a977e2801 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala @@ -233,6 +233,7 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging { new org.bdgenomics.adam.util.TwoBitFileSerializer) // org.bdgenomics.formats.avro + kryo.register(classOf[org.bdgenomics.formats.avro.Allele]) kryo.register(classOf[org.bdgenomics.formats.avro.AlignmentRecord], new AvroSerializer[org.bdgenomics.formats.avro.AlignmentRecord]) kryo.register(classOf[org.bdgenomics.formats.avro.Dbxref], @@ -243,8 +244,8 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging { new AvroSerializer[org.bdgenomics.formats.avro.Fragment]) kryo.register(classOf[org.bdgenomics.formats.avro.Genotype], new AvroSerializer[org.bdgenomics.formats.avro.Genotype]) - kryo.register(classOf[org.bdgenomics.formats.avro.GenotypeAllele]) - kryo.register(classOf[org.bdgenomics.formats.avro.GenotypeType]) + kryo.register(classOf[org.bdgenomics.formats.avro.GenotypeAnnotation], + new AvroSerializer[org.bdgenomics.formats.avro.GenotypeAnnotation]) kryo.register(classOf[org.bdgenomics.formats.avro.NucleotideContigFragment], new AvroSerializer[org.bdgenomics.formats.avro.NucleotideContigFragment]) kryo.register(classOf[org.bdgenomics.formats.avro.OntologyTerm], @@ -271,8 +272,6 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging { kryo.register(classOf[org.bdgenomics.formats.avro.VariantAnnotation], new AvroSerializer[org.bdgenomics.formats.avro.VariantAnnotation]) kryo.register(classOf[org.bdgenomics.formats.avro.VariantAnnotationMessage]) - kryo.register(classOf[org.bdgenomics.formats.avro.VariantCallingAnnotations], - new AvroSerializer[org.bdgenomics.formats.avro.VariantCallingAnnotations]) // org.codehaus.jackson.node kryo.register(classOf[org.codehaus.jackson.node.NullNode]) @@ -312,12 +311,13 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging { kryo.register(classOf[scala.Array[org.apache.spark.sql.catalyst.InternalRow]]) kryo.register(classOf[scala.Array[org.apache.spark.sql.types.StructField]]) kryo.register(classOf[scala.Array[org.apache.spark.sql.types.StructType]]) + kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Allele]]) kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.AlignmentRecord]]) kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Dbxref]]) kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Feature]]) kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Fragment]]) kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Genotype]]) - kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.GenotypeAllele]]) + kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.GenotypeAnnotation]]) kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.OntologyTerm]]) kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.NucleotideContigFragment]]) kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Read]]) @@ -330,7 +330,6 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging { kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Variant]]) kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.VariantAnnotation]]) kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.VariantAnnotationMessage]]) - kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.VariantCallingAnnotations]]) kryo.register(classOf[scala.Array[org.bdgenomics.adam.algorithms.consensus.Consensus]]) kryo.register(classOf[scala.Array[org.bdgenomics.adam.models.Coverage]]) kryo.register(classOf[scala.Array[org.bdgenomics.adam.models.ReferencePosition]]) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/ADAMShell.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/ADAMShell.scala index 746e9f6272..87a510893f 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/util/ADAMShell.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/ADAMShell.scala @@ -179,11 +179,11 @@ object ADAMShell { g.getReferenceName(), g.getStart().toString, g.getEnd().toString, - g.getVariant().getReferenceAllele(), - g.getVariant().getAlternateAllele(), + g.getReferenceAllele(), + g.getAlternateAllele(), g.getAlleles().toString, g.getSampleId() - ) ++ keys.map(key => Option(g.getVariantCallingAnnotations().getAttributes().get(key)).getOrElse(""))).toArray + ) ++ keys.map(key => Option(g.getAnnotation().getAttributes().get(key)).getOrElse(""))).toArray println("\nGenotype Format Fields\n" + new ASCIITable(header, rows).toString) } @@ -218,13 +218,13 @@ object ADAMShell { g.getReferenceName(), g.getStart().toString, g.getEnd().toString, - g.getVariant().getReferenceAllele(), - g.getVariant().getAlternateAllele(), + g.getReferenceAllele(), + g.getAlternateAllele(), g.getAlleles().toString, g.getSampleId(), - g.getVariantCallingAnnotations().getFiltersApplied().toString, - g.getVariantCallingAnnotations().getFiltersPassed().toString, - g.getVariantCallingAnnotations().getFiltersFailed().toString + g.getFiltersApplied().toString, + g.getFiltersPassed().toString, + g.getFiltersFailed().toString )).toArray println("\nGenotype Filters\n" + new ASCIITable(header, rows).toString) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala index 78e27ede33..9fac68beab 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala @@ -22,7 +22,7 @@ import com.google.common.collect.ImmutableMap import htsjdk.samtools.ValidationStringency import htsjdk.variant.utils.SAMSequenceDictionaryExtractor import htsjdk.variant.variantcontext.{ - Allele, + Allele => HtsjdkAllele, Genotype => HtsjdkGenotype, GenotypeBuilder, GenotypeType, @@ -55,30 +55,30 @@ class VariantContextConverterSuite extends ADAMFunSuite { val lenient = ValidationStringency.LENIENT val converter = new VariantContextConverter(DefaultHeaderLines.allHeaderLines, - lenient, false) + lenient) val strictConverter = new VariantContextConverter(DefaultHeaderLines.allHeaderLines, - ValidationStringency.STRICT, false) + ValidationStringency.STRICT) def htsjdkSNVBuilder: VariantContextBuilder = new VariantContextBuilder() - .alleles(List(Allele.create("A", true), Allele.create("T"))) + .alleles(List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("T"))) .start(1L) .stop(1L) .chr("1") def htsjdkMultiAllelicSNVBuilder: VariantContextBuilder = new VariantContextBuilder() - .alleles(List(Allele.create("A", true), Allele.create("T"), Allele.create("G"))) + .alleles(List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("T"), HtsjdkAllele.create("G"))) .start(1L) .stop(1L) .chr("1") def htsjdkRefSNV: VariantContextBuilder = new VariantContextBuilder() - .alleles(List(Allele.create("A", true), Allele.create("", false))) + .alleles(List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("", false))) .start(1L) .stop(1L) .chr("1") def htsjdkCNVBuilder: VariantContextBuilder = new VariantContextBuilder() - .alleles(List(Allele.create("A", true), Allele.create("", false))) + .alleles(List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("", false))) .start(10L) .stop(20L) .chr("1") @@ -106,7 +106,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { test("Convert somatic htsjdk site-only SNV to ADAM") { val vcb: VariantContextBuilder = new VariantContextBuilder() - .alleles(List(Allele.create("A", true), Allele.create("T"))) + .alleles(List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("T"))) .start(1L) .stop(1L) .chr("1") @@ -151,7 +151,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { val adamGTs = adamVCs.flatMap(_.genotypes) assert(adamGTs.length === 1) val adamGT = adamGTs.head - assert(adamGT.getAlleles.sameElements(List(GenotypeAllele.REF, GenotypeAllele.ALT))) + assert(adamGT.getAlleles.sameElements(List(Allele.REF, Allele.ALT))) assert(adamGT.getPhaseSetId === 1) assert(adamGT.getPhaseQuality === 50) } @@ -195,18 +195,18 @@ class VariantContextConverterSuite extends ADAMFunSuite { val adamVCs = converter.convert(vcb.make) val adamGT = adamVCs.flatMap(_.genotypes).head // htsjdk does not distinguish between filters not applied and filters passed in Genotype - assert(adamGT.getVariantCallingAnnotations.getFiltersApplied === true) - assert(adamGT.getVariantCallingAnnotations.getFiltersPassed === true) - assert(adamGT.getVariantCallingAnnotations.getFiltersFailed.isEmpty) + assert(adamGT.getAnnotation.getFiltersApplied === true) + assert(adamGT.getAnnotation.getFiltersPassed === true) + assert(adamGT.getAnnotation.getFiltersFailed.isEmpty) } { // PASSing gb.filter("PASS") vcb.genotypes(gb.make) val adamVCs = converter.convert(vcb.make) val adamGT = adamVCs.flatMap(_.genotypes).head - assert(adamGT.getVariantCallingAnnotations.getFiltersApplied === true) - assert(adamGT.getVariantCallingAnnotations.getFiltersPassed === true) - assert(adamGT.getVariantCallingAnnotations.getFiltersFailed.isEmpty) + assert(adamGT.getAnnotation.getFiltersApplied === true) + assert(adamGT.getAnnotation.getFiltersPassed === true) + assert(adamGT.getAnnotation.getFiltersFailed.isEmpty) } { // not PASSing gb.filter("LowMQ") @@ -214,9 +214,9 @@ class VariantContextConverterSuite extends ADAMFunSuite { val adamVCs = converter.convert(vcb.make) val adamGT = adamVCs.flatMap(_.genotypes).head - assert(adamGT.getVariantCallingAnnotations.getFiltersApplied === true) - assert(adamGT.getVariantCallingAnnotations.getFiltersPassed === false) - assert(adamGT.getVariantCallingAnnotations.getFiltersFailed.sameElements(List("LowMQ"))) + assert(adamGT.getAnnotation.getFiltersApplied === true) + assert(adamGT.getAnnotation.getFiltersPassed === false) + assert(adamGT.getAnnotation.getFiltersFailed.sameElements(List("LowMQ"))) } } @@ -230,8 +230,8 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(htsjdkVC.getContig === "1") assert(htsjdkVC.getStart === 1) assert(htsjdkVC.getEnd === 1) - assert(htsjdkVC.getReference === Allele.create("A", true)) - assert(htsjdkVC.getAlternateAlleles.sameElements(List(Allele.create("T")))) + assert(htsjdkVC.getReference === HtsjdkAllele.create("A", true)) + assert(htsjdkVC.getAlternateAlleles.sameElements(List(HtsjdkAllele.create("T")))) assert(!htsjdkVC.hasLog10PError) assert(!htsjdkVC.hasID) assert(!htsjdkVC.filtersWereApplied) @@ -240,13 +240,17 @@ class VariantContextConverterSuite extends ADAMFunSuite { test("Convert ADAM SNV w/ genotypes to htsjdk") { val variant = adamSNVBuilder().build val genotype = Genotype.newBuilder - .setVariant(variant) + .setStart(variant.getStart) + .setEnd(variant.getEnd) + .setReferenceAllele(variant.getReferenceAllele) + .setAlternateAllele(variant.getAlternateAllele) + .setSplitFromMultiallelic(variant.getSplitFromMultiallelic) .setSampleId("NA12878") .setStrandBiasComponents(List(0, 2, 4, 6).map(i => i: java.lang.Integer)) - .setAlleles(List(GenotypeAllele.REF, GenotypeAllele.ALT)) - .setVariantCallingAnnotations(VariantCallingAnnotations.newBuilder() + .setAlleles(List(Allele.REF, Allele.ALT)) + .setAnnotation(GenotypeAnnotation.newBuilder() .setFisherStrandBiasPValue(3.0f) - .setRmsMapQ(0.0f) + .setRmsMappingQuality(0.0f) .setMapq0Reads(5) .build) .build @@ -274,11 +278,15 @@ class VariantContextConverterSuite extends ADAMFunSuite { test("Convert ADAM SNV w/ genotypes but bad SB to htsjdk with strict validation") { val variant = adamSNVBuilder().build val genotype = Genotype.newBuilder - .setVariant(variant) + .setStart(variant.getStart) + .setEnd(variant.getEnd) + .setReferenceAllele(variant.getReferenceAllele) + .setAlternateAllele(variant.getAlternateAllele) + .setSplitFromMultiallelic(variant.getSplitFromMultiallelic) .setSampleId("NA12878") .setStrandBiasComponents(List(0, 2).map(i => i: java.lang.Integer)) - .setAlleles(List(GenotypeAllele.REF, GenotypeAllele.ALT)) - .setVariantCallingAnnotations(VariantCallingAnnotations.newBuilder() + .setAlleles(List(Allele.REF, Allele.ALT)) + .setAnnotation(GenotypeAnnotation.newBuilder() .setFisherStrandBiasPValue(3.0f) .setRmsMapQ(0.0f) .setMapq0Reads(5) @@ -293,13 +301,17 @@ class VariantContextConverterSuite extends ADAMFunSuite { test("Convert ADAM SNV w/ genotypes but bad SB to htsjdk with lenient validation") { val variant = adamSNVBuilder().build val genotype = Genotype.newBuilder - .setVariant(variant) + .setStart(variant.getStart) + .setEnd(variant.getEnd) + .setReferenceAllele(variant.getReferenceAllele) + .setAlternateAllele(variant.getAlternateAllele) + .setSplitFromMultiallelic(variant.getSplitFromMultiallelic) .setSampleId("NA12878") .setStrandBiasComponents(List(0, 2).map(i => i: java.lang.Integer)) - .setAlleles(List(GenotypeAllele.REF, GenotypeAllele.ALT)) - .setVariantCallingAnnotations(VariantCallingAnnotations.newBuilder() + .setAlleles(List(Allele.REF, Allele.ALT)) + .setAnnotation(GenotypeAnnotation.newBuilder() .setFisherStrandBiasPValue(3.0f) - .setRmsMapQ(0.0f) + .setRmsMappingQuality(0.0f) .setMapq0Reads(5) .build) .build @@ -325,7 +337,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("Convert htsjdk multi-allelic SNVs to ADAM and back to htsjdk") { - val gb = new GenotypeBuilder("NA12878", List(Allele.create("T"), Allele.create("G"))) + val gb = new GenotypeBuilder("NA12878", List(HtsjdkAllele.create("T"), HtsjdkAllele.create("G"))) gb.AD(Array(4, 2, 3)).PL(Array(59, 0, 181, 1, 66, 102)) val vcb = htsjdkMultiAllelicSNVBuilder @@ -344,14 +356,14 @@ class VariantContextConverterSuite extends ADAMFunSuite { val adamGT1 = adamVCs(0).genotypes.head val adamGT2 = adamVCs(1).genotypes.head - assert(adamGT1.getAlleles.sameElements(List(GenotypeAllele.ALT, GenotypeAllele.OTHER_ALT))) + assert(adamGT1.getAlleles.sameElements(List(Allele.ALT, Allele.OTHER_ALT))) assert(adamGT1.getAlternateReadDepth === 2) assert(adamGT1.getGenotypeLikelihoods .map(d => d: scala.Double) .map(PhredUtils.logProbabilityToPhred) .sameElements(List(59, 0, 181))) - assert(adamGT2.getAlleles.sameElements(List(GenotypeAllele.OTHER_ALT, GenotypeAllele.ALT))) + assert(adamGT2.getAlleles.sameElements(List(Allele.OTHER_ALT, Allele.ALT))) assert(adamGT2.getAlternateReadDepth === 3) assert(adamGT2.getGenotypeLikelihoods .map(d => d: scala.Double) @@ -377,7 +389,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("Convert gVCF reference records to ADAM") { - val gb = new GenotypeBuilder("NA12878", List(Allele.create("A", true), Allele.create("A", true))) + val gb = new GenotypeBuilder("NA12878", List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("A", true))) gb.PL(Array(0, 1, 2)).DP(44).attribute("MIN_DP", 38) val vcb = htsjdkRefSNV @@ -390,7 +402,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(adamGTs.length === 1) val adamGT = adamGTs.head assert(adamGT.getVariant.getAlternateAllele === null) - assert(adamGT.getAlleles.sameElements(List(GenotypeAllele.REF, GenotypeAllele.REF))) + assert(adamGT.getAlleles.sameElements(List(Allele.REF, Allele.REF))) assert(adamGT.getMinReadDepth === 38) assert(adamGT.getGenotypeLikelihoods.isEmpty) assert(adamGT.getNonReferenceLikelihoods @@ -719,11 +731,11 @@ class VariantContextConverterSuite extends ADAMFunSuite { def buildVca( objMap: Map[String, java.lang.Object], - extractor: (HtsjdkGenotype, VariantCallingAnnotations.Builder, Int, Array[Int]) => VariantCallingAnnotations.Builder, - fns: Iterable[GenotypeBuilder => GenotypeBuilder] = Iterable.empty): VariantCallingAnnotations = { + extractor: (HtsjdkGenotype, GenotypeAnnotation.Builder, Int, Array[Int]) => GenotypeAnnotation.Builder, + fns: Iterable[GenotypeBuilder => GenotypeBuilder] = Iterable.empty): GenotypeAnnotation = { val htsjdkGenotype = makeGenotype(objMap, fns) extractor(htsjdkGenotype, - VariantCallingAnnotations.newBuilder, + GenotypeAnnotation.newBuilder, 0, Array(0, 1, 2)).build } @@ -796,18 +808,18 @@ class VariantContextConverterSuite extends ADAMFunSuite { test("no rms mapping quality going htsjdk->adam") { val vca = buildVca(Map.empty, - converter.formatRmsMapQ, + converter.formatRmsMappingQuality, fns = Iterable.empty) - assert(vca.getRmsMapQ === null) + assert(vca.getRmsMappingQuality === null) } test("extract rms mapping quality going htsjdk->adam") { val vca = buildVca(Map(("MQ" -> (40.0f: java.lang.Float).asInstanceOf[java.lang.Object])), - converter.formatRmsMapQ, + converter.formatRmsMappingQuality, fns = Iterable.empty) - assert(vca.getRmsMapQ > 39.9f && vca.getRmsMapQ < 40.1f) + assert(vca.getRmsMappingQuality > 39.9f && vca.getRmsMappingQuality < 40.1f) } test("no mq0 going htsjdk->adam") { @@ -1040,7 +1052,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(g.getExtendedAttribute("PQ").asInstanceOf[java.lang.Integer] === 10) } - def emptyVca = VariantCallingAnnotations.newBuilder.build + def emptyVca = GenotypeAnnotation.newBuilder.build test("no filter info going adam->htsjdk") { val g = converter.extractFilters(emptyVca, newGb) @@ -1052,7 +1064,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { test("if filters applied, must set passed/failed going adam->htsjdk") { intercept[IllegalArgumentException] { - val g = converter.extractFilters(VariantCallingAnnotations.newBuilder + val g = converter.extractFilters(GenotypeAnnotation.newBuilder .setFiltersApplied(true) .build, newGb) .make @@ -1060,7 +1072,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("filters passed going adam->htsjdk") { - val g = converter.extractFilters(VariantCallingAnnotations.newBuilder + val g = converter.extractFilters(GenotypeAnnotation.newBuilder .setFiltersApplied(true) .setFiltersPassed(true) .build, newGb) @@ -1074,7 +1086,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { test("if filters failed, must set filters failed going adam->htsjdk") { intercept[IllegalArgumentException] { - val g = converter.extractFilters(VariantCallingAnnotations.newBuilder + val g = converter.extractFilters(GenotypeAnnotation.newBuilder .setFiltersApplied(true) .setFiltersPassed(false) .build, newGb) @@ -1083,7 +1095,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("single filter failed going adam->htsjdk") { - val g = converter.extractFilters(VariantCallingAnnotations.newBuilder + val g = converter.extractFilters(GenotypeAnnotation.newBuilder .setFiltersApplied(true) .setFiltersPassed(false) .setFiltersFailed(Seq("lowmq")) @@ -1095,7 +1107,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("multiple filters failed going adam->htsjdk") { - val g = converter.extractFilters(VariantCallingAnnotations.newBuilder + val g = converter.extractFilters(GenotypeAnnotation.newBuilder .setFiltersApplied(true) .setFiltersPassed(false) .setFiltersFailed(Seq("lowmq", "lowdp")) @@ -1114,7 +1126,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("extract fisher strand bias going adam->htsjdk") { - val g = converter.extractFisherStrandBias(VariantCallingAnnotations.newBuilder + val g = converter.extractFisherStrandBias(GenotypeAnnotation.newBuilder .setFisherStrandBiasPValue(20.0f) .build, newGb) .make @@ -1132,7 +1144,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("extract rms mapping quality going adam->htsjdk") { - val g = converter.extractRmsMapQ(VariantCallingAnnotations.newBuilder + val g = converter.extractRmsMapQ(GenotypeAnnotation.newBuilder .setRmsMapQ(40.0f) .build, newGb) .make @@ -1150,7 +1162,7 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("extract mapping quality 0 reads going adam->htsjdk") { - val g = converter.extractMapQ0(VariantCallingAnnotations.newBuilder + val g = converter.extractMapQ0(GenotypeAnnotation.newBuilder .setMapq0Reads(5) .build, newGb) .make @@ -2250,11 +2262,11 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("VCF FORMAT attribute Number=0 Type=Flag adam->htsjdk not supported") { - val vca = VariantCallingAnnotations.newBuilder + val vca = GenotypeAnnotation.newBuilder .setAttributes(ImmutableMap.of("FLAG", "true")) .build val g = Genotype.newBuilder - .setVariantCallingAnnotations(vca) + .setGenotypeAnnotation(vca) .build val adamVc = ADAMVariantContext(v, Some(g)) @@ -2271,12 +2283,12 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("VCF FORMAT attribute Number=1 Type=Integer adam->htsjdk") { - val vca = VariantCallingAnnotations.newBuilder + val vca = GenotypeAnnotation.newBuilder .setAttributes(ImmutableMap.of("ONE_INT", "42")) .build val g = Genotype.newBuilder .setSampleId("sample") - .setVariantCallingAnnotations(vca) + .setAnnotation(vca) .build val adamVc = ADAMVariantContext(v, Some(g)) @@ -2296,12 +2308,12 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("VCF FORMAT attribute Number=4 Type=Integer adam->htsjdk") { - val vca = VariantCallingAnnotations.newBuilder + val vca = GenotypeAnnotation.newBuilder .setAttributes(ImmutableMap.of("FOUR_INTS", "5,10,15,20")) .build val g = Genotype.newBuilder .setSampleId("sample") - .setVariantCallingAnnotations(vca) + .setAnnotation(vca) .build val adamVc = ADAMVariantContext(v, Some(g)) @@ -2326,12 +2338,12 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("VCF FORMAT attribute Number=A Type=Integer adam->htsjdk") { - val vca = VariantCallingAnnotations.newBuilder + val vca = GenotypeAnnotation.newBuilder .setAttributes(ImmutableMap.of("A_INT", "42")) .build val g = Genotype.newBuilder .setSampleId("sample") - .setVariantCallingAnnotations(vca) + .setAnnotation(vca) .build val adamVc = ADAMVariantContext(v, Some(g)) @@ -2353,12 +2365,12 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("VCF FORMAT attribute Number=R Type=Integer adam->htsjdk") { - val vca = VariantCallingAnnotations.newBuilder + val vca = GenotypeAnnotation.newBuilder .setAttributes(ImmutableMap.of("R_INT", "5,10")) .build val g = Genotype.newBuilder .setSampleId("sample") - .setVariantCallingAnnotations(vca) + .setAnnotation(vca) .build val adamVc = ADAMVariantContext(v, Some(g)) @@ -2381,12 +2393,12 @@ class VariantContextConverterSuite extends ADAMFunSuite { } test("VCF FORMAT attribute Number=R Type=String adam->htsjdk") { - val vca = VariantCallingAnnotations.newBuilder + val vca = GenotypeAnnotation.newBuilder .setAttributes(ImmutableMap.of("R_STRING", "foo,bar")) .build val g = Genotype.newBuilder .setSampleId("sample") - .setVariantCallingAnnotations(vca) + .setAnnotation(vca) .build val adamVc = ADAMVariantContext(v, Some(g)) @@ -2448,8 +2460,8 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(adamVc.genotypes.size === 1) val adamGt = adamVc.genotypes.head - assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("ONE_INT")) - assert(adamGt.getVariantCallingAnnotations.getAttributes.get("ONE_INT") === "42") + assert(adamGt.getAnnotation.getAttributes.containsKey("ONE_INT")) + assert(adamGt.getAnnotation.getAttributes.get("ONE_INT") === "42") } test("VCF FORMAT attribute Number=4 Type=Integer htsjdk->adam") { @@ -2471,8 +2483,8 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(adamVc.genotypes.size === 1) val adamGt = adamVc.genotypes.head - assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("FOUR_INTS")) - assert(adamGt.getVariantCallingAnnotations.getAttributes.get("FOUR_INTS") === "5,10,15,20") + assert(adamGt.getAnnotation.getAttributes.containsKey("FOUR_INTS")) + assert(adamGt.getAnnotation.getAttributes.get("FOUR_INTS") === "5,10,15,20") } test("VCF FORMAT attribute Number=4 Type=Float htsjdk->adam") { @@ -2494,8 +2506,8 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(adamVc.genotypes.size === 1) val adamGt = adamVc.genotypes.head - assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("FOUR_FLOATS")) - assert(adamGt.getVariantCallingAnnotations.getAttributes.get("FOUR_FLOATS") === "5.0,10.1,15.2,20.3") + assert(adamGt.getAnnotation.getAttributes.containsKey("FOUR_FLOATS")) + assert(adamGt.getAnnotation.getAttributes.get("FOUR_FLOATS") === "5.0,10.1,15.2,20.3") } test("VCF FORMAT attribute Number=A Type=Integer htsjdk->adam") { @@ -2517,8 +2529,8 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(adamVc.genotypes.size === 1) val adamGt = adamVc.genotypes.head - assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("A_INT")) - assert(adamGt.getVariantCallingAnnotations.getAttributes.get("A_INT") === "10") + assert(adamGt.getAnnotation.getAttributes.containsKey("A_INT")) + assert(adamGt.getAnnotation.getAttributes.get("A_INT") === "10") } test("VCF FORMAT attribute Number=R Type=Integer htsjdk->adam") { @@ -2540,8 +2552,8 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(adamVc.genotypes.size === 1) val adamGt = adamVc.genotypes.head - assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("R_INT")) - assert(adamGt.getVariantCallingAnnotations.getAttributes.get("R_INT") === "5,10") + assert(adamGt.getAnnotation.getAttributes.containsKey("R_INT")) + assert(adamGt.getAnnotation.getAttributes.get("R_INT") === "5,10") } test("VCF FORMAT attribute Number=R Type=String htsjdk->adam") { @@ -2563,8 +2575,8 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(adamVc.genotypes.size === 1) val adamGt = adamVc.genotypes.head - assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("R_STRING")) - assert(adamGt.getVariantCallingAnnotations.getAttributes.get("R_STRING") === "foo,bar") + assert(adamGt.getAnnotation.getAttributes.containsKey("R_STRING")) + assert(adamGt.getAnnotation.getAttributes.get("R_STRING") === "foo,bar") } test("VCF FORMAT attribute Number=G Type=String htsjdk->adam") { @@ -2586,8 +2598,8 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(adamVc.genotypes.size === 1) val adamGt = adamVc.genotypes.head - assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("STRING_G")) - assert(adamGt.getVariantCallingAnnotations.getAttributes.get("STRING_G") === "foo,bar,baz") + assert(adamGt.getAnnotation.getAttributes.containsKey("STRING_G")) + assert(adamGt.getAnnotation.getAttributes.get("STRING_G") === "foo,bar,baz") } sparkTest("respect end position for symbolic alts") { diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala index a99637400e..c99703ea6a 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala @@ -19,7 +19,7 @@ package org.bdgenomics.adam.models import htsjdk.samtools.ValidationStringency import htsjdk.variant.variantcontext.{ - Allele, + Allele => HtsjdkAllele, GenotypeBuilder, VariantContextBuilder } @@ -675,7 +675,7 @@ class ReferenceRegionSuite extends FunSuite { ValidationStringency.LENIENT, false) val vcb = new VariantContextBuilder() - .alleles(List(Allele.create("A", true), Allele.create("T"))) + .alleles(List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("T"))) .start(1L) .stop(1L) .chr("1") diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala index 406c952871..ae3b9e9617 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala @@ -209,7 +209,7 @@ class ADAMContextSuite extends ADAMFunSuite { assert(vc.genotypes.size === 3) val gt = vc.genotypes.head - assert(gt.getVariantCallingAnnotations != null) + assert(gt.getAnnotation != null) assert(gt.getReadDepth === 20) } @@ -244,7 +244,7 @@ class ADAMContextSuite extends ADAMFunSuite { assert(vcs.rdd.filter(_.variant.variant.getAnnotation().getAttributes().containsKey("NS")) .count === 7) assert(vcs.rdd.flatMap(_.genotypes) - .filter(_.getVariantCallingAnnotations().getAttributes().containsKey("HQ")) + .filter(_.getAnnotation().getAttributes().containsKey("HQ")) .count === 0) } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextDatasetSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextDatasetSuite.scala index 0553384ba2..a9cd10ce6e 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextDatasetSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextDatasetSuite.scala @@ -79,7 +79,7 @@ class VariantContextDatasetSuite extends ADAMFunSuite { val g0 = Genotype.newBuilder().setVariant(v0) .setSampleId("NA12878") - .setAlleles(List(GenotypeAllele.REF, GenotypeAllele.ALT)) + .setAlleles(List(Allele.REF, Allele.ALT)) .build VariantContextDataset(sc.parallelize(List( @@ -97,7 +97,7 @@ class VariantContextDatasetSuite extends ADAMFunSuite { v.getAnnotation.getAttributes().containsKey("MQA") }).count === 1) assert(vc1.toGenotypes.rdd.filter(gt => { - gt.getVariantCallingAnnotations().getAttributes().containsKey("MQA") + gt.getAnnotation().getAttributes().containsKey("MQA") }).count === 1) assert(vc1.rdd.count === 6) } @@ -186,7 +186,7 @@ class VariantContextDatasetSuite extends ADAMFunSuite { assert(alternateReadDepths.forall(rd => (rd == 0 || rd == null))) // ADALL should be zeros or null after splitting ref=GAAGAAAGAAAGA alt=GAAGAAAGA,GAAGA,G ADALL 0,0,0 - val netAlleleDepths = filtered.map(_.getVariantCallingAnnotations.getAttributes.get("ADALL")).collect() + val netAlleleDepths = filtered.map(_.getAnnotation.getAttributes.get("ADALL")).collect() assert(netAlleleDepths.forall(adall => (adall == "0,0" || adall == ""))) } @@ -200,9 +200,9 @@ class VariantContextDatasetSuite extends ADAMFunSuite { assert(variant.getAnnotation.getAttributes.get("BaseQRankSum") === "-Infinity") val genotype = vcs.toGenotypes().rdd.filter(_.getStart == 14396L).first() - assert(genotype.getVariantCallingAnnotations.getRmsMapQ === Float.NegativeInfinity) + assert(genotype.getAnnotation.getRmsMapQ === Float.NegativeInfinity) // +Inf FORMAT value --> Infinity after conversion - assert(genotype.getVariantCallingAnnotations.getAttributes.get("float") === "Infinity") + assert(genotype.getAnnotation.getAttributes.get("float") === "Infinity") } sparkTest("support VCFs with `nan` instead of `NaN` float values") { @@ -214,8 +214,8 @@ class VariantContextDatasetSuite extends ADAMFunSuite { assert(variant.getAnnotation.getAttributes.get("ClippingRankSum") === "NaN") val genotype = vcs.toGenotypes().rdd.filter(_.getStart == 14396L).first() - assert(genotype.getVariantCallingAnnotations.getRmsMapQ.isNaN) - assert(genotype.getVariantCallingAnnotations.getAttributes.get("float") === "NaN") + assert(genotype.getAnnotation.getRmsMapQ.isNaN) + assert(genotype.getAnnotation.getAttributes.get("float") === "NaN") } sparkTest("don't lose any variants when piping as VCF") { @@ -268,7 +268,7 @@ class VariantContextDatasetSuite extends ADAMFunSuite { // check for freebayes-specific VCF FORMAT keys val genotype = pipedRdd.toGenotypes.rdd.first for (freebayesFormatKey <- Seq("RO", "QR", "AO", "QA")) { - assert(genotype.getVariantCallingAnnotations.getAttributes.containsKey(freebayesFormatKey)) + assert(genotype.getAnnotation.getAttributes.containsKey(freebayesFormatKey)) } // retrieve accumulated VCF header lines diff --git a/pom.xml b/pom.xml index 121d9acc42..dea305bec6 100644 --- a/pom.xml +++ b/pom.xml @@ -27,7 +27,7 @@ 2.7.5 7.9.2 1.7.25 - 0.12.0 + 0.12.1-SNAPSHOT 0.2.13 2.16.1 1.1.1