diff --git a/adam-core/pom.xml b/adam-core/pom.xml
index 9bdcd7f0e4..0e1281aae4 100644
--- a/adam-core/pom.xml
+++ b/adam-core/pom.xml
@@ -106,6 +106,7 @@
org.bdgenomics.formats.avro.Feature
org.bdgenomics.formats.avro.Fragment
org.bdgenomics.formats.avro.Genotype
+ org.bdgenomics.formats.avro.GenotypeAnnotation
org.bdgenomics.formats.avro.NucleotideContigFragment
org.bdgenomics.formats.avro.OntologyTerm
org.bdgenomics.formats.avro.ProcessingStep
@@ -113,7 +114,6 @@
org.bdgenomics.formats.avro.TranscriptEffect
org.bdgenomics.formats.avro.Variant
org.bdgenomics.formats.avro.VariantAnnotation
- org.bdgenomics.formats.avro.VariantCallingAnnotations
${project.basedir}/target/generated-sources/src/main/scala/org/bdgenomics/adam/sql/Schemas.scala
compile
@@ -134,6 +134,7 @@
org.bdgenomics.formats.avro.Feature
org.bdgenomics.formats.avro.Fragment
org.bdgenomics.formats.avro.Genotype
+ org.bdgenomics.formats.avro.GenotypeAnnotation
org.bdgenomics.formats.avro.NucleotideContigFragment
org.bdgenomics.formats.avro.OntologyTerm
org.bdgenomics.formats.avro.Read
@@ -145,7 +146,6 @@
org.bdgenomics.formats.avro.TranscriptEffect
org.bdgenomics.formats.avro.Variant
org.bdgenomics.formats.avro.VariantAnnotation
- org.bdgenomics.formats.avro.VariantCallingAnnotations
${project.basedir}/target/generated-sources/src/main/scala/org/bdgenomics/adam/projections/Enums.scala
compile
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala
index 96905fe62c..74a71f5ae4 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala
@@ -21,7 +21,7 @@ import com.google.common.base.Splitter
import com.google.common.collect.ImmutableList
import htsjdk.samtools.ValidationStringency
import htsjdk.variant.variantcontext.{
- Allele,
+ Allele => HtsjdkAllele,
Genotype => HtsjdkGenotype,
GenotypeBuilder,
GenotypesContext,
@@ -55,58 +55,23 @@ import scala.collection.mutable.{ Buffer, HashMap }
/**
* Object for converting between htsjdk and ADAM VariantContexts.
- *
- * Handles Variant, Genotype, Allele, and various genotype annotation
- * conversions. Does not handle Variant annotations. Genotype annotations are
- * annotations in the VCF GT field while Variant annotations are annotations
- * contained in the VCF INFO field.
*/
object VariantContextConverter {
- /**
- * If set to true, this property will ensure that the variant.annotation field
- * in the Genotype record is populated after conversion from an htsjdk
- * VariantContext. By default, this property is false.
- */
- val nestAnnotationInGenotypesProperty = "org.bdgenomics.adam.converters.VariantContextConverter.NEST_ANN_IN_GENOTYPES"
-
- /**
- * Sets the value of the nest annotation in genotypes property.
- *
- * @param conf Hadoop configuration to set the property in.
- * @param populateNestedAnn If true, the nested field is populated.
- */
- def setNestAnnotationInGenotypesProperty(conf: Configuration,
- populateNestedAnn: Boolean) {
- conf.setBoolean(nestAnnotationInGenotypesProperty, populateNestedAnn)
- }
-
- /**
- * Gets the value of the nest annotation in genotypes property.
- *
- * @param conf Hadoop configuration to set the property in.
- * @return Returns whether or not to nest the variant annotation under each
- * genotype record.
- */
- private[adam] def getNestAnnotationInGenotypesProperty(
- conf: Configuration): Boolean = {
- conf.getBoolean(nestAnnotationInGenotypesProperty, false)
- }
-
/**
* Representation for an unknown non-ref/symbolic allele in VCF.
*/
- private val NON_REF_ALLELE = Allele.create("", false /* !Reference */ )
+ private val NON_REF_ALLELE = HtsjdkAllele.create("", false /* !Reference */ )
/**
- * The index in the Avro genotype record for the splitFromMultiAllelec field.
+ * The index in the Avro genotype record for the splitFromMultiallelec field.
*
* This field is true if the VCF site was not biallelic.
*/
- private lazy val splitFromMultiAllelicField = Genotype.SCHEMA$.getField("splitFromMultiAllelic")
+ private lazy val splitFromMultiallelicField = Genotype.SCHEMA$.getField("splitFromMultiallelic")
/**
- * One conversion method for each way of representing an Allele
+ * Convert a htsjdk allele to an Avro allele.
*
* An htsjdk Allele can represent a reference or alternate allele call, or a
* site where no call could be made. If the allele is an alternate allele, we
@@ -117,11 +82,11 @@ object VariantContextConverter {
* @param allele The allele we are converting.
* @return The Avro representation for this allele.
*/
- private def convertAllele(vc: HtsjdkVariantContext, allele: Allele): GenotypeAllele = {
- if (allele.isNoCall) GenotypeAllele.NO_CALL
- else if (allele.isReference) GenotypeAllele.REF
- else if (allele == NON_REF_ALLELE || !vc.hasAlternateAllele(allele)) GenotypeAllele.OTHER_ALT
- else GenotypeAllele.ALT
+ private def convertAllele(vc: HtsjdkVariantContext, allele: HtsjdkAllele): Allele = {
+ if (allele.isNoCall) Allele.NO_CALL
+ else if (allele.isReference) Allele.REF
+ else if (allele == NON_REF_ALLELE || !vc.hasAlternateAllele(allele)) Allele.OTHER_ALT
+ else Allele.ALT
}
/**
@@ -133,17 +98,17 @@ object VariantContextConverter {
* @return If the allele is defined, returns a wrapped allele. Else, returns
* a None.
*/
- private def convertAlleleOpt(allele: String, isRef: Boolean = false): Option[Allele] = {
+ private def convertAlleleOpt(allele: String, isRef: Boolean = false): Option[HtsjdkAllele] = {
if (allele == null) {
None
} else {
- Some(Allele.create(allele, isRef))
+ Some(HtsjdkAllele.create(allele, isRef))
}
}
- private val OPT_NON_REF = Some(Allele.create("", false))
+ private val OPT_NON_REF = Some(HtsjdkAllele.create("", false))
- private def optNonRef(v: Variant, hasNonRefModel: Boolean): Option[Allele] = {
+ private def optNonRef(v: Variant, hasNonRefModel: Boolean): Option[HtsjdkAllele] = {
if (hasNonRefModel || v.getAlternateAllele == null) {
OPT_NON_REF
} else {
@@ -160,7 +125,7 @@ object VariantContextConverter {
* @return Returns a Java collection representing the reference allele and any
* alternate allele at the site.
*/
- private def convertAlleles(v: Variant, hasNonRefModel: Boolean): java.util.Collection[Allele] = {
+ private def convertAlleles(v: Variant, hasNonRefModel: Boolean): java.util.Collection[HtsjdkAllele] = {
val asSeq = Seq(convertAlleleOpt(v.getReferenceAllele, true),
convertAlleleOpt(v.getAlternateAllele),
optNonRef(v, hasNonRefModel)).flatten
@@ -178,13 +143,13 @@ object VariantContextConverter {
* @param g The genotype call at a site for a single sample.
* @return Returns the called alleles at this site.
*/
- private def convertAlleles(g: Genotype): java.util.List[Allele] = {
+ private def convertAlleles(g: Genotype): java.util.List[HtsjdkAllele] = {
var alleles = g.getAlleles
- if (alleles == null) return Collections.emptyList[Allele]
+ if (alleles == null) return Collections.emptyList[HtsjdkAllele]
else g.getAlleles.map {
- case GenotypeAllele.NO_CALL | GenotypeAllele.OTHER_ALT => Allele.NO_CALL
- case GenotypeAllele.REF => Allele.create(g.getVariant.getReferenceAllele, true)
- case GenotypeAllele.ALT => Allele.create(g.getVariant.getAlternateAllele)
+ case Allele.NO_CALL | Allele.OTHER_ALT => HtsjdkAllele.NO_CALL
+ case Allele.REF => HtsjdkAllele.create(g.getReferenceAllele, true)
+ case Allele.ALT => HtsjdkAllele.create(g.getAlternateAllele)
}
}
@@ -274,8 +239,7 @@ object VariantContextConverter {
stringency: ValidationStringency,
conf: Configuration): VariantContextConverter = {
new VariantContextConverter(headerLines,
- stringency,
- getNestAnnotationInGenotypesProperty(conf))
+ stringency)
}
}
@@ -291,8 +255,7 @@ object VariantContextConverter {
*/
class VariantContextConverter(
headerLines: Seq[VCFHeaderLine],
- stringency: ValidationStringency,
- setNestedAnnotationInGenotype: Boolean) extends Serializable with Logging {
+ stringency: ValidationStringency) extends Serializable with Logging {
import VariantContextConverter._
// format fns gatk --> bdg, extract fns bdg --> gatk
@@ -300,6 +263,18 @@ class VariantContextConverter(
private val variantExtractFn = makeVariantExtractFn(headerLines)
private val genotypeFormatFn = makeGenotypeFormatFn(headerLines)
private val genotypeExtractFn = makeGenotypeExtractFn(headerLines)
+ private val gvcfBlocks = extractGvcfBlocks(headerLines)
+
+ private val gvcfBlockRegex = """minGQ=([0-9+]).*maxGQ=([0-9]+).*""".r
+
+ private def extractGvcfBlocks(headerLines: Seq[VCFHeaderLine]): Seq[(Int, Int)] = {
+ headerLines
+ .find(_.getKey() == "##GVCFBlock")
+ .map(_.getValue() match {
+ case gvcfBlockRegex(minGQ, maxGQ) => (minGQ.toInt, maxGQ.toInt)
+ })
+ .toSeq
+ }
/**
* Converts a Scala float to a Java float.
@@ -317,15 +292,6 @@ class VariantContextConverter(
*/
private def jDouble(f: Double): java.lang.Double = f
- private def genotypeVariant(coreVariant: Variant,
- fullVariant: Variant): Variant = {
- if (setNestedAnnotationInGenotype) {
- fullVariant
- } else {
- coreVariant
- }
- }
-
/**
* Converts a GATK variant context into one or more ADAM variant context(s).
*
@@ -338,10 +304,9 @@ class VariantContextConverter(
try {
vc.getAlternateAlleles.toList match {
case List(NON_REF_ALLELE) | List() => {
- val (coreVariant, variant) = variantFormatFn(vc, None, 0, false)
- val v = genotypeVariant(coreVariant, variant)
+ val variant = variantFormatFn(vc, None, 0, false)
val genotypes = vc.getGenotypes.map(g => {
- genotypeFormatFn(g, v, NON_REF_ALLELE, 0, Some(1), false)
+ genotypeFormatFn(g, variant, NON_REF_ALLELE, 0, Some(1), false)
})
return Seq(ADAMVariantContext(variant, genotypes))
}
@@ -350,10 +315,9 @@ class VariantContextConverter(
allele.isNonReference,
"Assertion failed when converting: " + vc.toString
)
- val (coreVariant, variant) = variantFormatFn(vc, Some(allele.getDisplayString), 0, false)
- val v = genotypeVariant(coreVariant, variant)
+ val variant = variantFormatFn(vc, Some(allele.getDisplayString), 0, false)
val genotypes = vc.getGenotypes.map(g => {
- genotypeFormatFn(g, v, allele, 1, None, false)
+ genotypeFormatFn(g, variant, allele, 1, None, false)
})
return Seq(ADAMVariantContext(variant, genotypes))
}
@@ -362,10 +326,9 @@ class VariantContextConverter(
allele.isNonReference,
"Assertion failed when converting: " + vc.toString
)
- val (coreVariant, variant) = variantFormatFn(vc, Some(allele.getDisplayString), 0, false)
- val v = genotypeVariant(coreVariant, variant)
+ val variant = variantFormatFn(vc, Some(allele.getDisplayString), 0, false)
val genotypes = vc.getGenotypes.map(g => {
- genotypeFormatFn(g, v, allele, 1, Some(2), false)
+ genotypeFormatFn(g, variant, allele, 1, Some(2), false)
})
return Seq(ADAMVariantContext(variant, genotypes))
}
@@ -392,13 +355,12 @@ class VariantContextConverter(
// variant annotations only contain values for alternate alleles so
// we need to subtract one from real index
val variantIdx = idx - 1
- val (coreVariant, variant) = variantFormatFn(vc,
+ val variant = variantFormatFn(vc,
Some(allele.getDisplayString),
variantIdx,
true)
- val v = genotypeVariant(coreVariant, variant)
val genotypes = vc.getGenotypes.map(g => {
- genotypeFormatFn(g, v, allele, idx, referenceModelIndex, true)
+ genotypeFormatFn(g, variant, allele, idx, referenceModelIndex, true)
})
ADAMVariantContext(variant, genotypes)
})
@@ -857,200 +819,412 @@ class VariantContextConverter(
// htsjdk --> genotype format functions
- private[converters] def formatAllelicDepth(g: HtsjdkGenotype,
- gb: Genotype.Builder,
- gIdx: Int,
- gIndices: Array[Int]): Genotype.Builder = {
+ private def tryAndCatchStringCast[T](attr: java.lang.Object,
+ tryFn: (java.lang.Object) => T,
+ catchFn: (String) => T): T = {
+ try {
+ tryFn(attr)
+ } catch {
+ case cce: ClassCastException => {
+ // is this a string? if so, parse...
+ if (attr.getClass.isAssignableFrom(classOf[String])) {
+ catchFn(attr.asInstanceOf[String])
+ } else {
+ throw cce
+ }
+ }
+ case t: Throwable => throw t
+ }
+ }
+
+ private[converters] def formatFilters(g: HtsjdkGenotype,
+ gb: Genotype.Builder,
+ idx: Int,
+ indices: Array[Int]): Genotype.Builder = {
+ // see https://github.com/samtools/htsjdk/issues/741
+ val gtFiltersWereApplied = true
+ if (gtFiltersWereApplied) {
+ val filtersWereApplied = gb.setFiltersApplied(true)
+ if (g.isFiltered) {
+ filtersWereApplied.setFiltersPassed(false)
+ .setFiltersFailed(g.getFilters.split(";").toList)
+ } else {
+ filtersWereApplied.setFiltersPassed(true)
+ }
+ } else {
+ gb.setFiltersApplied(false)
+ }
+ }
+
+ private[converters] def formatPhaseSet(g: HtsjdkGenotype,
+ gb: Genotype.Builder,
+ gIdx: Int,
+ gIndices: Array[Int]): Genotype.Builder = {
+ if (g.isPhased) {
+ gb.setPhased(true)
+
+ Option(g.getExtendedAttribute("PS"))
+ .map(attr => {
+ tryAndCatchStringCast(attr, attribute => {
+ gb.setPhaseSet(attribute.asInstanceOf[java.lang.Integer])
+ }, attribute => {
+ gb.setPhaseSet(attribute.toInt)
+ })
+ })
+ }
+ gb
+ }
+
+ private val genotypeFormatFns: Iterable[(HtsjdkGenotype, Genotype.Builder, Int, Array[Int]) => Genotype.Builder] = Iterable(
+ formatFilters(_, _, _, _),
+ formatPhaseSet(_, _, _, _)
+ )
+
+ // genotype --> htsjdk extract functions
+
+ private[converters] def extractFilters(g: Genotype,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+ Option(g.getFiltersApplied)
+ .filter(ft => ft)
+ .map(applied => {
+ Option(g.getFiltersPassed).map(passed => {
+ if (passed) {
+ gb.filters("PASS")
+ } else {
+ val failedFilters = g.getFiltersFailed
+ require(failedFilters.nonEmpty,
+ "Genotype marked as filtered, but no failed filters listed in %s.".format(g))
+ gb.filters(failedFilters.mkString(";"))
+ }
+ }).getOrElse({
+ throw new IllegalArgumentException("Genotype filters were applied but filters passed is null in %s.".format(g))
+ })
+ }).getOrElse(gb.unfiltered())
+ }
+
+ private[converters] def extractPhaseSet(g: Genotype,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+ Option(g.getPhased)
+ .filter(p => p)
+ .map(p => {
+ Option(g.getPhaseSet).map(gb.attribute("PS", _))
+ gb.phased(true)
+ }).getOrElse(gb.phased(false))
+ }
+
+ private val genotypeExtractFns: Iterable[(Genotype, GenotypeBuilder) => GenotypeBuilder] = Iterable(
+ extractFilters(_, _),
+ extractPhaseSet(_, _)
+ )
+
+ // htsjdk --> genotype annotation format functions
+
+ private[converters] def formatReadDepths(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ gIdx: Int,
+ gIndices: Array[Int]): GenotypeAnnotation.Builder = {
// AD is an array type field
if (g.hasAD && gIdx < g.getAD.size) {
val ad = g.getAD
- gb.setReferenceReadDepth(ad(0))
+ gab.setReferenceReadDepth(ad(0))
.setAlternateReadDepth(ad(gIdx))
} else {
- gb
+ gab
}
}
- private[converters] def formatReadDepth(g: HtsjdkGenotype,
- gb: Genotype.Builder,
- gIdx: Int,
- gIndices: Array[Int]): Genotype.Builder = {
- if (g.hasDP) {
- gb.setReadDepth(g.getDP)
- } else {
- gb
- }
+ private[converters] def formatForwardReadDepths(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ gIdx: Int,
+ gIndices: Array[Int]): GenotypeAnnotation.Builder = {
+ Option(g.getExtendedAttribute("ADF"))
+ .map(attr => {
+ val adf = toIntArray(attr)
+ try {
+ gab.setReferenceForwardReadDepth(adf(0))
+ .setAlternateForwardReadDepth(adf(gIdx))
+ } catch {
+ case _: ArrayIndexOutOfBoundsException => {
+ log.warn("Ran into Array Out of Bounds when accessing index %s for field ADF of genotype %s.".format(gIdx, g))
+ gab
+ }
+ }
+ }).getOrElse(gab)
}
- private[converters] def formatMinReadDepth(g: HtsjdkGenotype,
- gb: Genotype.Builder,
- gIdx: Int,
- gIndices: Array[Int]): Genotype.Builder = {
- Option(g.getExtendedAttribute("MIN_DP", null))
+ private[converters] def formatReverseReadDepths(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ gIdx: Int,
+ gIndices: Array[Int]): GenotypeAnnotation.Builder = {
+ Option(g.getExtendedAttribute("ADR"))
.map(attr => {
- tryAndCatchStringCast(attr, attribute => {
- gb.setMinReadDepth(attribute.asInstanceOf[java.lang.Integer])
- }, attribute => {
- gb.setMinReadDepth(attribute.toInt)
- })
- }).getOrElse(gb)
+ val adr = toIntArray(attr)
+ try {
+ gab.setReferenceReverseReadDepth(adr(0))
+ .setAlternateReverseReadDepth(adr(gIdx))
+ } catch {
+ case _: ArrayIndexOutOfBoundsException => {
+ log.warn("Ran into Array Out of Bounds when accessing index %s for field ADR of genotype %s.".format(gIdx, g))
+ gab
+ }
+ }
+ }).getOrElse(gab)
}
- private[converters] def formatGenotypeQuality(g: HtsjdkGenotype,
- gb: Genotype.Builder,
- gIdx: Int,
- gIndices: Array[Int]): Genotype.Builder = {
- if (g.hasGQ) {
- gb.setGenotypeQuality(g.getGQ)
+ private[converters] def formatReadDepth(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ idx: Int,
+ indices: Array[Int]): GenotypeAnnotation.Builder = {
+ if (g.hasDP) {
+ gab.setReadDepth(g.getDP)
} else {
- gb
+ gab
}
}
- private[converters] def formatGenotypeLikelihoods(g: HtsjdkGenotype,
- gb: Genotype.Builder,
- gIdx: Int,
- gIndices: Array[Int]): Genotype.Builder = {
+ private[converters] def formatLikelihoods(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ idx: Int,
+ indices: Array[Int]): GenotypeAnnotation.Builder = {
+ // GL Number=G Type=Float
+ // Convert GL from log10 scale to log scale, optionally convert PL from phred scale to log scale
+
if (g.hasPL) {
val pl = g.getPL
try {
- val likelihoods = gIndices.map(idx => {
+ val likelihoods = indices.map(idx => {
jDouble(PhredUtils.phredToLogProbability(pl(idx)))
}).toList
- gb.setGenotypeLikelihoods(likelihoods)
+ gab.setLikelihoods(likelihoods)
} catch {
case _: ArrayIndexOutOfBoundsException => {
- log.warn("Ran into Array Out of Bounds when accessing indices %s of genotype %s.".format(gIndices.mkString(","), g))
- gb
+ log.warn("Ran into Array Out of Bounds when accessing indices %s for field PL of genotype %s.".format(indices.mkString(","), g))
+ gab
}
}
} else {
- gb
+ gab
}
}
- private[converters] def formatNonRefGenotypeLikelihoods(g: HtsjdkGenotype,
- gb: Genotype.Builder,
- gIndices: Array[Int]): Genotype.Builder = {
+ private[converters] def formatNonReferenceLikelihoods(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ idx: Int,
+ indices: Array[Int]): GenotypeAnnotation.Builder = {
+
+ /*
+
+ This looks correct, with the exception that nonReferenceLikelihoods shouldn't go into
+ a new VCF key, but that nonReferenceLikelihoods encodes the GL's for the symbolic non-ref
+ (*) alt in a gVCF line.
+
+ Handling this gets a bit complex, but the way to think of it is that if alternateAllele is
+ non-null, we get a VCF line with two alts (alternateAllele, and *). In the diploid case, we'd
+ get GL's:
+
+ likelihoods(0), likelihoods(1), likelihoods(2), nonRefLikelihoods(1), nonRefLikelihoods(2), 0?.
+
+ I'm not entirely sure what should go in the last spot, which would be the GT=1/2 likelihood,
+ which I feel is not well defined. If alternateAllele is null, then we get a VCF line with just *
+ as an alt and with GL=nonRefLikelihoods.
+
+ */
if (g.hasPL) {
val pl = g.getPL
- gb.setNonReferenceLikelihoods(gIndices.map(idx => {
+ gab.setNonReferenceLikelihoods(indices.map(idx => {
jDouble(PhredUtils.phredToLogProbability(pl(idx)))
}).toList)
} else {
- gb
+ gab
}
}
- private def tryAndCatchStringCast[T](attr: java.lang.Object,
- tryFn: (java.lang.Object) => T,
- catchFn: (String) => T): T = {
- try {
- tryFn(attr)
- } catch {
- case cce: ClassCastException => {
+ private[converters] def formatPriors(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ idx: Int,
+ indices: Array[Int]): GenotypeAnnotation.Builder = {
- // is this a string? if so, parse...
- if (attr.getClass.isAssignableFrom(classOf[String])) {
- catchFn(attr.asInstanceOf[String])
- } else {
- throw cce
+ Option(g.getExtendedAttribute("priors"))
+ .map(attr => {
+ val p = toFloatArray(attr)
+ try {
+ val priors = indices.map(idx => {
+ jFloat(p(idx))
+ }).toList
+ gab.setPriors(priors)
+ } catch {
+ case _: ArrayIndexOutOfBoundsException => {
+ log.warn("Ran into Array Out of Bounds when accessing indices %s for field priors of genotype %s.".format(indices.mkString(","), g))
+ gab
+ }
}
- }
- case t: Throwable => throw t
+ }).getOrElse(gab)
+ }
+
+ private[converters] def formatPosteriors(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ idx: Int,
+ indices: Array[Int]): GenotypeAnnotation.Builder = {
+
+ Option(g.getExtendedAttribute("GP"))
+ .map(attr => {
+ val gp = toFloatArray(attr)
+ try {
+ val posteriors = indices.map(idx => {
+ // todo: convert GP from log10 to log scale
+ jFloat(gp(idx))
+ }).toList
+ gab.setPosteriors(posteriors)
+ } catch {
+ case _: ArrayIndexOutOfBoundsException => {
+ log.warn("Ran into Array Out of Bounds when accessing indices %s for field GP of genotype %s.".format(indices.mkString(","), g))
+ gab
+ }
+ }
+ }).getOrElse(gab)
+ }
+
+ private[converters] def formatQuality(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ idx: Int,
+ indices: Array[Int]): GenotypeAnnotation.Builder = {
+ if (g.hasGQ) {
+ val gq = g.getGQ
+ gab.setQuality(gq)
+
+ gvcfBlocks.foreach(kv => {
+ val minGQ = kv._1
+ val maxGQ = kv._2
+ if (gq >= minGQ && gq < maxGQ) {
+ gab.setMinimumQuality(minGQ)
+ gab.setMaximumQuality(maxGQ)
+ }
+ })
}
+ gab
}
- private[converters] def formatStrandBiasComponents(g: HtsjdkGenotype,
- gb: Genotype.Builder,
- gIdx: Int,
- gIndices: Array[Int]): Genotype.Builder = {
- Option(g.getExtendedAttribute("SB"))
+ private[converters] def formatRmsMappingQuality(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ idx: Int,
+ indices: Array[Int]): GenotypeAnnotation.Builder = {
+
+ Option(g.getExtendedAttribute("MQ"))
.map(attr => {
- tryAndCatchStringCast(
- attr,
- attribute => {
- gb.setStrandBiasComponents(attribute.asInstanceOf[Array[java.lang.Integer]].toSeq)
- }, attribute => {
- val components = attribute.split(",")
- require(components.size == 4,
- "Strand bias components must have 4 entries. Saw %s in %s.".format(
- attr, g))
+ tryAndCatchStringCast(attr, attribute => {
+ gab.setRmsMappingQuality(attribute.asInstanceOf[java.lang.Integer])
+ }, attribute => {
+ gab.setRmsMappingQuality(attribute.toInt)
+ })
+ }).getOrElse(gab)
+ }
- gb.setStrandBiasComponents(components.map(e => e.toInt: java.lang.Integer)
- .toSeq)
- })
- }).getOrElse(gb)
+ private[converters] def formatPhaseSetQuality(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ idx: Int,
+ indices: Array[Int]): GenotypeAnnotation.Builder = {
+
+ Option(g.getExtendedAttribute("PQ"))
+ .map(attr => {
+ tryAndCatchStringCast(attr, attribute => {
+ gab.setPhaseSetQuality(attribute.asInstanceOf[java.lang.Integer])
+ }, attribute => {
+ gab.setPhaseSetQuality(attribute.toInt)
+ })
+ }).getOrElse(gab)
}
- private[converters] def formatPhaseInfo(g: HtsjdkGenotype,
- gb: Genotype.Builder,
- gIdx: Int,
- gIndices: Array[Int]): Genotype.Builder = {
- if (g.isPhased) {
- gb.setPhased(true)
+ private[converters] def formatMinimumReadDepth(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ idx: Int,
+ indices: Array[Int]): GenotypeAnnotation.Builder = {
- Option(g.getExtendedAttribute(VCFConstants.PHASE_SET_KEY))
- .map(attr => {
- tryAndCatchStringCast(attr, attribute => {
- gb.setPhaseSetId(attribute.asInstanceOf[java.lang.Integer])
- }, attribute => {
- gb.setPhaseSetId(attribute.toInt)
- })
+ Option(g.getExtendedAttribute("MIN_DP", null))
+ .map(attr => {
+ tryAndCatchStringCast(attr, attribute => {
+ gab.setMinimumReadDepth(attribute.asInstanceOf[java.lang.Integer])
+ }, attribute => {
+ gab.setMinimumReadDepth(attribute.toInt)
})
+ }).getOrElse(gab)
+ }
- Option(g.getExtendedAttribute(VCFConstants.PHASE_QUALITY_KEY))
- .map(attr => {
- tryAndCatchStringCast(attr, attribute => {
- gb.setPhaseQuality(attribute.asInstanceOf[java.lang.Integer])
- }, attribute => {
- gb.setPhaseQuality(attribute.toInt)
- })
+ private[converters] def formatFisherStrandBiasPValue(g: HtsjdkGenotype,
+ gab: GenotypeAnnotation.Builder,
+ idx: Int,
+ indices: Array[Int]): GenotypeAnnotation.Builder = {
+
+ Option(g.getExtendedAttribute("FS"))
+ .map(attr => {
+ tryAndCatchStringCast(attr, attribute => {
+ gab.setFisherStrandBiasPValue(attribute.asInstanceOf[java.lang.Float])
+ }, attribute => {
+ gab.setFisherStrandBiasPValue(toFloat(attribute))
})
- }
- gb
+ }).getOrElse(gab)
}
- private val genotypeFormatFns: Iterable[(HtsjdkGenotype, Genotype.Builder, Int, Array[Int]) => Genotype.Builder] = Iterable(
- formatAllelicDepth(_, _, _, _),
+ private val genotypeAnnotationFormatFns: Iterable[(HtsjdkGenotype, GenotypeAnnotation.Builder, Int, Array[Int]) => GenotypeAnnotation.Builder] = Iterable(
+ formatReadDepths(_, _, _, _),
+ formatForwardReadDepths(_, _, _, _),
+ formatReverseReadDepths(_, _, _, _),
formatReadDepth(_, _, _, _),
- formatMinReadDepth(_, _, _, _),
- formatGenotypeQuality(_, _, _, _),
- formatGenotypeLikelihoods(_, _, _, _),
- formatStrandBiasComponents(_, _, _, _),
- formatPhaseInfo(_, _, _, _)
+ formatLikelihoods(_, _, _, _),
+ formatNonReferenceLikelihoods(_, _, _, _),
+ formatPriors(_, _, _, _),
+ formatPosteriors(_, _, _, _),
+ formatQuality(_, _, _, _),
+ formatRmsMappingQuality(_, _, _, _),
+ formatPhaseSetQuality(_, _, _, _),
+ formatMinimumReadDepth(_, _, _, _),
+ formatFisherStrandBiasPValue(_, _, _, _)
)
- // genotype --> htsjdk extract functions
+ // genotype annotation --> htsjdk extract functions
- private[converters] def extractAllelicDepth(g: Genotype,
- gb: GenotypeBuilder): GenotypeBuilder = {
- (Option(g.getReferenceReadDepth), Option(g.getAlternateReadDepth)) match {
+ private[converters] def extractReadDepths(ga: GenotypeAnnotation,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+
+ (Option(ga.getReferenceReadDepth), Option(ga.getAlternateReadDepth)) match {
case (Some(ref), Some(alt)) => gb.AD(Array(ref, alt))
case (Some(_), None) => {
- throw new IllegalArgumentException("Had reference depth but no alternate depth in %s.".format(g))
+ throw new IllegalArgumentException("Had reference depth but no alternate depth in %s.".format(ga))
}
case (None, Some(_)) => {
- throw new IllegalArgumentException("Had alternate depth but no reference depth in %s.".format(g))
+ throw new IllegalArgumentException("Had alternate depth but no reference depth in %s.".format(ga))
}
case _ => gb.noAD
}
}
- private[converters] def extractReadDepth(g: Genotype,
- gb: GenotypeBuilder): GenotypeBuilder = {
- Option(g.getReadDepth).fold(gb.noDP)(dp => gb.DP(dp))
+ private[converters] def extractForwardReadDepths(ga: GenotypeAnnotation,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+
+ (Option(ga.getReferenceForwardReadDepth), Option(ga.getAlternateForwardReadDepth)) match {
+ case (Some(ref), Some(alt)) => gb.attribute("ADF", ImmutableList.of(ref, alt))
+ case (None, Some(_)) => throw new IllegalArgumentException("Forward read depth specified without reference forward read depth")
+ case (Some(_), None) => throw new IllegalArgumentException("Reference forward read depth specified without forward read depth")
+ case _ =>
+ }
+ gb
}
- private[converters] def extractMinReadDepth(g: Genotype,
- gb: GenotypeBuilder): GenotypeBuilder = {
- Option(g.getMinReadDepth).fold(gb)(minDp => gb.attribute("MIN_DP", minDp))
+ private[converters] def extractReverseReadDepths(ga: GenotypeAnnotation,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+
+ (Option(ga.getReferenceReverseReadDepth), Option(ga.getAlternateReverseReadDepth)) match {
+ case (Some(ref), Some(alt)) => gb.attribute("ADR", ImmutableList.of(ref, alt))
+ case (None, Some(_)) => throw new IllegalArgumentException("Reverse read depth specified without reference reverse read depth")
+ case (Some(_), None) => throw new IllegalArgumentException("Reference reverse read depth specified without reverse read depth")
+ case _ =>
+ }
+ gb
}
- private[converters] def extractGenotypeQuality(g: Genotype,
- gb: GenotypeBuilder): GenotypeBuilder = {
- Option(g.getGenotypeQuality).fold(gb.noGQ)(gq => gb.GQ(gq))
+ private[converters] def extractReadDepth(ga: GenotypeAnnotation,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+ Option(ga.getReadDepth).fold(gb.noDP)(gb.DP(_))
}
private[converters] def numPls(copyNumber: Int): Int = {
@@ -1089,6 +1263,7 @@ class VariantContextConverter(
array
}
+ /*
private[converters] def extractGenotypeLikelihoods(g: Genotype,
gb: GenotypeBuilder): GenotypeBuilder = {
val gls = g.getGenotypeLikelihoods
@@ -1113,167 +1288,68 @@ class VariantContextConverter(
gb.PL(nonRefPls(gls, nls))
}
}
+ */
- private[converters] def extractStrandBiasComponents(g: Genotype,
- gb: GenotypeBuilder): GenotypeBuilder = {
-
- val components = g.getStrandBiasComponents
-
- if (components.isEmpty) {
- gb
- } else {
- require(components.size == 4,
- "Illegal strand bias components length. Must be empty or 4. In:\n%s".format(g))
- gb.attribute("SB", components.map(i => i: Int).toArray)
- }
+ private[converters] def extractLikelihoods(ga: GenotypeAnnotation,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+ gb
}
- private[converters] def extractPhaseInfo(g: Genotype,
- gb: GenotypeBuilder): GenotypeBuilder = {
- Option(g.getPhased)
- .filter(p => p)
- .map(p => {
- val setFns: Iterable[Option[(GenotypeBuilder => GenotypeBuilder)]] = Iterable(
- Option(g.getPhaseSetId).map(ps => {
- (b: GenotypeBuilder) => b.attribute("PS", ps)
- }),
- Option(g.getPhaseQuality).map(pq => {
- (b: GenotypeBuilder) => b.attribute("PQ", pq)
- }))
-
- setFns.flatten
- .foldLeft(gb.phased(true))((b, fn) => fn(b))
- }).getOrElse(gb.phased(false))
+ private[converters] def extractNonReferenceLikelihoods(ga: GenotypeAnnotation,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+ gb
}
- private val genotypeExtractFns: Iterable[(Genotype, GenotypeBuilder) => GenotypeBuilder] = Iterable(
- extractAllelicDepth(_, _),
- extractReadDepth(_, _),
- extractMinReadDepth(_, _),
- extractGenotypeQuality(_, _),
- extractGenotypeLikelihoods(_, _),
- extractStrandBiasComponents(_, _),
- extractPhaseInfo(_, _)
- )
-
- // htsjdk --> genotype annotation format functions
-
- private[converters] def formatFilters(g: HtsjdkGenotype,
- vcab: VariantCallingAnnotations.Builder,
- idx: Int,
- indices: Array[Int]): VariantCallingAnnotations.Builder = {
- // see https://github.com/samtools/htsjdk/issues/741
- val gtFiltersWereApplied = true
- if (gtFiltersWereApplied) {
- val filtersWereApplied = vcab.setFiltersApplied(true)
- if (g.isFiltered) {
- filtersWereApplied.setFiltersPassed(false)
- .setFiltersFailed(g.getFilters.split(";").toList)
- } else {
- filtersWereApplied.setFiltersPassed(true)
- }
- } else {
- vcab.setFiltersApplied(false)
- }
+ private[converters] def extractPriors(ga: GenotypeAnnotation,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+ Option(ga.getPriors).fold(gb)(gb.attribute("priors", _))
}
- private[converters] def formatFisherStrandBias(g: HtsjdkGenotype,
- vcab: VariantCallingAnnotations.Builder,
- idx: Int,
- indices: Array[Int]): VariantCallingAnnotations.Builder = {
- Option(g.getExtendedAttribute("FS"))
- .map(attr => {
- tryAndCatchStringCast(attr, attribute => {
- vcab.setFisherStrandBiasPValue(attribute.asInstanceOf[java.lang.Float])
- }, attribute => {
- vcab.setFisherStrandBiasPValue(toFloat(attribute))
- })
- }).getOrElse(vcab)
+ private[converters] def extractPosteriors(ga: GenotypeAnnotation,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+ // todo: convert back from log scale to log10 scale
+ Option(ga.getPosteriors).fold(gb)(gb.attribute("GP", _))
}
- private[converters] def formatRmsMapQ(g: HtsjdkGenotype,
- vcab: VariantCallingAnnotations.Builder,
- idx: Int,
- indices: Array[Int]): VariantCallingAnnotations.Builder = {
- Option(g.getExtendedAttribute("MQ"))
- .map(attr => {
- tryAndCatchStringCast(attr, attribute => {
- vcab.setRmsMapQ(attribute.asInstanceOf[java.lang.Float])
- }, attribute => {
- vcab.setRmsMapQ(toFloat(attribute))
- })
- }).getOrElse(vcab)
+ private[converters] def extractQuality(ga: GenotypeAnnotation,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+ Option(ga.getQuality).fold(gb.noGQ)(gb.GQ(_))
}
- private[converters] def formatMapQ0(g: HtsjdkGenotype,
- vcab: VariantCallingAnnotations.Builder,
- idx: Int,
- indices: Array[Int]): VariantCallingAnnotations.Builder = {
- Option(g.getExtendedAttribute("MQ0"))
- .map(attr => {
- tryAndCatchStringCast(attr, attribute => {
- vcab.setMapq0Reads(attribute.asInstanceOf[java.lang.Integer])
- }, attribute => {
- vcab.setMapq0Reads(attribute.toInt)
- })
- }).getOrElse(vcab)
+ private[converters] def extractRmsMappingQuality(ga: GenotypeAnnotation,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+ Option(ga.getRmsMappingQuality).fold(gb)(gb.attribute("MQ", _))
}
- private val genotypeAnnotationFormatFns: Iterable[(HtsjdkGenotype, VariantCallingAnnotations.Builder, Int, Array[Int]) => VariantCallingAnnotations.Builder] = Iterable(
- formatFilters(_, _, _, _),
- formatFisherStrandBias(_, _, _, _),
- formatRmsMapQ(_, _, _, _),
- formatMapQ0(_, _, _, _)
- )
-
- // genotype annotation --> htsjdk extract functions
-
- private[converters] def extractFilters(vca: VariantCallingAnnotations,
- gb: GenotypeBuilder): GenotypeBuilder = {
- Option(vca.getFiltersApplied)
- .filter(ft => ft)
- .map(applied => {
- Option(vca.getFiltersPassed).map(passed => {
- if (passed) {
- gb.filters("PASS")
- } else {
- val failedFilters = vca.getFiltersFailed
- require(failedFilters.nonEmpty,
- "Genotype marked as filtered, but no failed filters listed in %s.".format(vca))
- gb.filters(failedFilters.mkString(";"))
- }
- }).getOrElse({
- throw new IllegalArgumentException("Filters were applied but filters passed is null in %s.".format(vca))
- })
- }).getOrElse(gb.unfiltered())
+ private[converters] def extractPhaseSetQuality(ga: GenotypeAnnotation,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+ Option(ga.getPhaseSetQuality).fold(gb)(gb.attribute("PQ", _))
}
- private[converters] def extractFisherStrandBias(vca: VariantCallingAnnotations,
+ private[converters] def extractMinimumReadDepth(ga: GenotypeAnnotation,
gb: GenotypeBuilder): GenotypeBuilder = {
- Option(vca.getFisherStrandBiasPValue).map(fs => {
- gb.attribute("FS", fs)
- }).getOrElse(gb)
- }
-
- private[converters] def extractRmsMapQ(vca: VariantCallingAnnotations,
- gb: GenotypeBuilder): GenotypeBuilder = {
- Option(vca.getRmsMapQ).map(mq => {
- gb.attribute("MQ", mq)
- }).getOrElse(gb)
+ Option(ga.getMinimumReadDepth).fold(gb)(gb.attribute("MIN_DP", _))
}
- private[converters] def extractMapQ0(vca: VariantCallingAnnotations,
- gb: GenotypeBuilder): GenotypeBuilder = {
- Option(vca.getMapq0Reads).map(mq0 => {
- gb.attribute("MQ0", mq0)
- }).getOrElse(gb)
+ private[converters] def extractFisherStrandBiasPValue(ga: GenotypeAnnotation,
+ gb: GenotypeBuilder): GenotypeBuilder = {
+ Option(ga.getFisherStrandBiasPValue).fold(gb)(gb.attribute("FS", _))
}
- private val genotypeAnnotationExtractFns: Iterable[(VariantCallingAnnotations, GenotypeBuilder) => GenotypeBuilder] = Iterable(
- extractFilters(_, _),
- extractFisherStrandBias(_, _),
- extractRmsMapQ(_, _),
- extractMapQ0(_, _)
+ private val genotypeAnnotationExtractFns: Iterable[(GenotypeAnnotation, GenotypeBuilder) => GenotypeBuilder] = Iterable(
+ extractReadDepths(_, _),
+ extractForwardReadDepths(_, _),
+ extractReverseReadDepths(_, _),
+ extractReadDepth(_, _),
+ extractLikelihoods(_, _),
+ extractNonReferenceLikelihoods(_, _),
+ extractPriors(_, _),
+ extractPosteriors(_, _),
+ extractQuality(_, _),
+ extractRmsMappingQuality(_, _),
+ extractPhaseSetQuality(_, _),
+ extractMinimumReadDepth(_, _),
+ extractFisherStrandBiasPValue(_, _)
)
// safe type conversions
@@ -1639,7 +1715,7 @@ class VariantContextConverter(
private def makeVariantFormatFn(
headerLines: Seq[VCFHeaderLine],
- stringency: ValidationStringency = ValidationStringency.STRICT): (HtsjdkVariantContext, Option[String], Int, Boolean) => (Variant, Variant) = {
+ stringency: ValidationStringency = ValidationStringency.STRICT): (HtsjdkVariantContext, Option[String], Int, Boolean) => Variant = {
val attributeFns: Iterable[(HtsjdkVariantContext, Int, Array[Int]) => Option[(String, String)]] = headerLines
.flatMap(hl => hl match {
@@ -1675,7 +1751,7 @@ class VariantContextConverter(
def convert(vc: HtsjdkVariantContext,
alt: Option[String],
alleleIdx: Int,
- wasSplit: Boolean): (Variant, Variant) = {
+ wasSplit: Boolean): Variant = {
// create the builder
val variantBuilder = Variant.newBuilder
@@ -1686,7 +1762,7 @@ class VariantContextConverter(
// was this split?
if (wasSplit) {
- variantBuilder.setSplitFromMultiAllelic(true)
+ variantBuilder.setSplitFromMultiallelic(true)
}
alt.foreach(variantBuilder.setAlternateAllele(_))
@@ -1713,7 +1789,7 @@ class VariantContextConverter(
}
})
- val variant = variantBuilder.build
+ val variant = variantBuilder.build()
val variantAnnotationBuilder = VariantAnnotation.newBuilder
val boundAnnotationFns: Iterable[VariantAnnotation.Builder => VariantAnnotation.Builder] = variantAnnotationFormatFns
@@ -1752,14 +1828,14 @@ class VariantContextConverter(
}
variantBuilder.setAnnotation(convertedAnnotationWithAttrs.build)
- (variant, variantBuilder.build)
+ variantBuilder.build
}
convert(_, _, _, _)
}
private def makeGenotypeFormatFn(
- headerLines: Seq[VCFHeaderLine]): (HtsjdkGenotype, Variant, Allele, Int, Option[Int], Boolean) => Genotype = {
+ headerLines: Seq[VCFHeaderLine]): (HtsjdkGenotype, Variant, HtsjdkAllele, Int, Option[Int], Boolean) => Genotype = {
val attributeFns: Iterable[(HtsjdkGenotype, Int, Array[Int]) => Option[(String, String)]] = headerLines
.flatMap(hl => hl match {
@@ -1783,33 +1859,34 @@ class VariantContextConverter(
def convert(g: HtsjdkGenotype,
variant: Variant,
- allele: Allele,
+ allele: HtsjdkAllele,
alleleIdx: Int,
nonRefIndex: Option[Int],
wasSplit: Boolean): Genotype = {
// create the builder
val builder = Genotype.newBuilder()
- .setVariant(variant)
.setReferenceName(variant.getReferenceName)
.setStart(variant.getStart)
.setEnd(variant.getEnd)
+ .setReferenceAllele(variant.getReferenceAllele)
+ .setAlternateAllele(variant.getAlternateAllele)
.setSampleId(g.getSampleName)
.setAlleles(g.getAlleles.map(gtAllele => {
if (gtAllele.isReference) {
- GenotypeAllele.REF
+ Allele.REF
} else if (gtAllele.isNoCall) {
- GenotypeAllele.NO_CALL
+ Allele.NO_CALL
} else if (gtAllele.equals(allele, true)) {
- GenotypeAllele.ALT
+ Allele.ALT
} else {
- GenotypeAllele.OTHER_ALT
+ Allele.OTHER_ALT
}
}))
// was this split?
if (wasSplit) {
- builder.setSplitFromMultiAllelic(true)
+ builder.setSplitFromMultiallelic(true)
}
// get array indices
@@ -1842,26 +1919,17 @@ class VariantContextConverter(
}
})
- // if we have a non-ref allele, fold and build
- val coreWithOptNonRefs = nonRefIndex.fold(convertedCore)(nonRefAllele => {
-
- // non-ref pl indices
- val nrIndices = GenotypeLikelihoods.getPLIndecesOfAlleles(0, nonRefAllele)
-
- formatNonRefGenotypeLikelihoods(g, convertedCore, nrIndices)
- })
-
- val vcAnns = VariantCallingAnnotations.newBuilder
+ val gAnns = GenotypeAnnotation.newBuilder
// bind the annotation conversion functions and fold
- val boundAnnotationFns: Iterable[VariantCallingAnnotations.Builder => VariantCallingAnnotations.Builder] = genotypeAnnotationFormatFns
+ val boundAnnotationFns: Iterable[GenotypeAnnotation.Builder => GenotypeAnnotation.Builder] = genotypeAnnotationFormatFns
.map(fn => {
- fn(g, _: VariantCallingAnnotations.Builder, alleleIdx, indices)
+ fn(g, _: GenotypeAnnotation.Builder, alleleIdx, indices)
})
- val convertedAnnotations = boundAnnotationFns.foldLeft(vcAnns)(
- (vcab: VariantCallingAnnotations.Builder, fn) => {
+ val convertedAnnotations = boundAnnotationFns.foldLeft(gAnns)(
+ (gab: GenotypeAnnotation.Builder, fn) => {
try {
- fn(vcab)
+ fn(gab)
} catch {
case t: Throwable => {
if (stringency == ValidationStringency.STRICT) {
@@ -1871,7 +1939,7 @@ class VariantContextConverter(
log.warn("Converting genotype annotation field in %s with function %s failed: %s".format(
g, fn, t))
}
- vcab
+ gab
}
}
}
@@ -1889,8 +1957,8 @@ class VariantContextConverter(
}
// build the annotations and attach
- val gtWithAnnotations = coreWithOptNonRefs
- .setVariantCallingAnnotations(convertedAnnotationsWithAttrs.build)
+ val gtWithAnnotations = convertedCore
+ .setAnnotation(convertedAnnotationsWithAttrs.build)
// build and return
gtWithAnnotations.build()
@@ -2180,7 +2248,7 @@ class VariantContextConverter(
def convert(vc: ADAMVariantContext): HtsjdkVariantContext = {
val v = vc.variant.variant
val hasNonRefAlleles = vc.genotypes
- .exists(_.getNonReferenceLikelihoods.length != 0)
+ .exists(_.getAnnotation.getNonReferenceLikelihoods.length != 0)
val builder = new VariantContextBuilder()
.chr(v.getReferenceName)
.start(v.getStart + 1)
@@ -2307,21 +2375,21 @@ class VariantContextConverter(
})
// convert the annotations if they exist
- val gtWithAnnotations = Option(g.getVariantCallingAnnotations)
- .fold(convertedCore)(vca => {
+ val gtWithAnnotations = Option(g.getAnnotation)
+ .fold(convertedCore)(ga => {
// bind the annotation conversion functions and fold
val convertedAnnotations = genotypeAnnotationExtractFns.foldLeft(convertedCore)(
(gb: GenotypeBuilder, fn) => {
try {
- fn(vca, gb)
+ fn(ga, gb)
} catch {
case t: Throwable => {
if (stringency == ValidationStringency.STRICT) {
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
- log.warn("Applying annotation extraction function %s to %s failed with %s.".format(fn, vca, t))
+ log.warn("Applying annotation extraction function %s to %s failed with %s.".format(fn, ga, t))
}
gb
@@ -2331,7 +2399,7 @@ class VariantContextConverter(
})
// get the attribute map
- val attributes: Map[String, String] = vca.getAttributes.toMap
+ val attributes: Map[String, String] = ga.getAttributes.toMap
// apply the attribute converters and return
attributeFns.foldLeft(convertedAnnotations)((gb: GenotypeBuilder, fn) => {
@@ -2347,7 +2415,7 @@ class VariantContextConverter(
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
- log.warn("Applying attribute extraction function %s to %s failed with %s.".format(fn, vca, t))
+ log.warn("Applying attribute extraction function %s to %s failed with %s.".format(fn, ga, t))
}
gb
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala
index af02c4b077..2e76688244 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala
@@ -90,24 +90,11 @@ object ReferencePosition extends Serializable {
* @return The reference position of this genotype.
*/
def apply(genotype: Genotype): ReferencePosition = {
- val referenceNameSet = Seq(Option(genotype.getReferenceName), Option(genotype.getVariant.getReferenceName))
- .flatten
- .toSet
- val startSet = Seq(Option(genotype.getStart), Option(genotype.getVariant.getStart))
- .flatten
- .toSet
- require(referenceNameSet.nonEmpty, "Genotype has no reference name: %s".format(genotype))
- require(referenceNameSet.size == 1, "Genotype has multiple reference names: %s, %s".format(
- referenceNameSet, genotype))
- require(startSet.nonEmpty, "Genotype has no start: %s".format(genotype))
- require(startSet.size == 1, "Genotype has multiple starts: %s, %s".format(
- startSet, genotype))
-
// see ADAM-1959, VCF 0 = telomere
- if (startSet.head != -1) {
- new ReferencePosition(referenceNameSet.head, startSet.head)
+ if (genotype.getStart != -1) {
+ new ReferencePosition(genotype.getReferenceName, genotype.getStart)
} else {
- new ReferencePosition(referenceNameSet.head, 0L)
+ new ReferencePosition(genotype.getReferenceName, 0L)
}
}
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala
index 153103c013..8cf0abcaf7 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala
@@ -29,34 +29,35 @@ import org.bdgenomics.formats.avro.{ Genotype, Variant, VariantAnnotation }
object VariantContext {
/**
- * Constructs an VariantContext from an Variant
+ * Create a new variant context from a variant.
*
- * @param v Variant which is used to construct the ReferencePosition
- * @return VariantContext corresponding to the Variant
+ * @param v Variant to create a variant context from.
+ * @return Return a new variant context created from the specified variant.
*/
def apply(v: Variant): VariantContext = {
new VariantContext(ReferencePosition(v), RichVariant(v), Iterable.empty)
}
/**
- * Constructs an VariantContext from an Variant and Seq[Genotype]
- * and DatabaseVariantAnnotation
+ * Create a new variant context from a variant and one or more genotypes.
*
- * @param v Variant which is used to construct the ReferencePosition
- * @param genotypes Seq[Genotype]
- * @return VariantContext corresponding to the Variant
+ * @param v Variant to create a variant context from.
+ * @param genotypes One or more genotypes.
+ * @return Return a new variant context created from the specified variant
+ * and genotypes.
*/
def apply(v: Variant, genotypes: Iterable[Genotype]): VariantContext = {
new VariantContext(ReferencePosition(v), RichVariant(v), genotypes)
}
/**
- * Builds a variant context off of a set of genotypes. Builds variants from the genotypes.
+ * Create a new variant context from a set of genotypes.
*
* @note Genotypes must be at the same position.
*
- * @param genotypes List of genotypes to build variant context from.
- * @return A variant context corresponding to the variants and genotypes at this site.
+ * @param genotypes One or more genotypes to create a variant context from.
+ * @return Return a new variant context created from the specified set of
+ * genotypes.
*/
def buildFromGenotypes(genotypes: Seq[Genotype]): VariantContext = {
val position = ReferencePosition(genotypes.head)
@@ -65,7 +66,15 @@ object VariantContext {
"Genotypes do not all have the same position."
)
- val variant = genotypes.head.getVariant
+ val g = genotypes.head
+ val variant = Variant.newBuilder()
+ .setReferenceName(g.getReferenceName)
+ .setStart(g.getStart)
+ .setEnd(g.getEnd)
+ .setSplitFromMultiallelic(g.getSplitFromMultiallelic)
+ .setReferenceAllele(g.getReferenceAllele)
+ .setAlternateAllele(g.getAlternateAllele)
+ .build()
new VariantContext(position, RichVariant(variant), genotypes)
}
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/GenotypeDataset.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/GenotypeDataset.scala
index b6a0def2fa..2dc6478fc6 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/GenotypeDataset.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/GenotypeDataset.scala
@@ -49,7 +49,7 @@ import org.bdgenomics.adam.sql.{
import org.bdgenomics.utils.interval.array.{ IntervalArray, IntervalArraySerializer }
import org.bdgenomics.formats.avro.{
Genotype,
- GenotypeAllele,
+ Allele,
Sample,
Variant,
VariantAnnotation
@@ -204,38 +204,24 @@ case class DatasetBoundGenotypeDataset private[rdd] (
copy(samples = newSamples.toSeq)
}
- override def copyVariantEndToAttribute(): GenotypeDataset = {
- def copyEnd(g: GenotypeProduct): GenotypeProduct = {
- val variant = g.variant.getOrElse(VariantProduct())
- val annotation = variant.annotation.getOrElse(VariantAnnotationProduct())
- val attributes = annotation.attributes + ("END" -> g.end.toString)
- val annotationCopy = annotation.copy(attributes = attributes)
- val variantCopy = variant.copy(annotation = Some(annotationCopy))
- g.copy(variant = Some(variantCopy))
- }
- val sqlContext = SQLContext.getOrCreate(rdd.context)
- import sqlContext.implicits._
- transformDataset(dataset => dataset.map(copyEnd))
- }
-
override def filterToFiltersPassed(): GenotypeDataset = {
- transformDataset(dataset => dataset.filter(dataset.col("variantCallingAnnotations.filtersPassed")))
+ transformDataset(dataset => dataset.filter(dataset.col("filtersPassed")))
}
override def filterByQuality(minimumQuality: Double): GenotypeDataset = {
- transformDataset(dataset => dataset.filter(dataset.col("genotypeQuality") >= minimumQuality))
+ transformDataset(dataset => dataset.filter(dataset.col("annotation.quality") >= minimumQuality))
}
override def filterByReadDepth(minimumReadDepth: Int): GenotypeDataset = {
- transformDataset(dataset => dataset.filter(dataset.col("readDepth") >= minimumReadDepth))
+ transformDataset(dataset => dataset.filter(dataset.col("annotation.readDepth") >= minimumReadDepth))
}
override def filterByAlternateReadDepth(minimumAlternateReadDepth: Int): GenotypeDataset = {
- transformDataset(dataset => dataset.filter(dataset.col("alternateReadDepth") >= minimumAlternateReadDepth))
+ transformDataset(dataset => dataset.filter(dataset.col("annotation.alternateReadDepth") >= minimumAlternateReadDepth))
}
override def filterByReferenceReadDepth(minimumReferenceReadDepth: Int): GenotypeDataset = {
- transformDataset(dataset => dataset.filter(dataset.col("referenceReadDepth") >= minimumReferenceReadDepth))
+ transformDataset(dataset => dataset.filter(dataset.col("annotation.referenceReadDepth") >= minimumReferenceReadDepth))
}
override def filterToSample(sampleId: String) = {
@@ -375,7 +361,12 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno
val sqlContext = SQLContext.getOrCreate(rdd.context)
import sqlContext.implicits._
- val notDedupedVariants = dataset.select($"variant.*")
+ val notDedupedVariants = dataset.select($"referenceName",
+ $"start",
+ $"end",
+ $"splitFromMultiallelic",
+ $"referenceAllele",
+ $"alternateAllele")
.as[VariantProduct]
val maybeDedupedVariants = if (dedupe) {
@@ -385,6 +376,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno
notDedupedVariants.dropDuplicates("referenceName",
"start",
"end",
+ "splitFromMultiallelic",
"referenceAllele",
"alternateAllele")
} else {
@@ -394,32 +386,13 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno
VariantDataset(maybeDedupedVariants, sequences, headerLines)
}
- /**
- * Copy variant end to a variant attribute (VCF INFO field "END").
- *
- * @return GenotypeDataset with variant end copied to a variant attribute.
- */
- def copyVariantEndToAttribute(): GenotypeDataset = {
- def copyEnd(g: Genotype): Genotype = {
- val variant = Option(g.variant).getOrElse(new Variant())
- val annotation = Option(variant.annotation).getOrElse(new VariantAnnotation())
- val attributes = new java.util.HashMap[String, String]()
- Option(annotation.attributes).map(attributes.putAll(_))
- attributes.put("END", g.end.toString)
- val annotationCopy = VariantAnnotation.newBuilder(annotation).setAttributes(attributes).build()
- val variantCopy = Variant.newBuilder(variant).setAnnotation(annotationCopy).build()
- Genotype.newBuilder(g).setVariant(variantCopy).build()
- }
- transform(rdd => rdd.map(copyEnd))
- }
-
/**
* Filter this GenotypeDataset to genotype filters passed (VCF FORMAT field "FT" value PASS).
*
* @return GenotypeDataset filtered to genotype filters passed.
*/
def filterToFiltersPassed(): GenotypeDataset = {
- transform(rdd => rdd.filter(g => Option(g.getVariantCallingAnnotations).exists(_.getFiltersPassed)))
+ transform(rdd => rdd.filter(_.getFiltersPassed))
}
/**
@@ -429,7 +402,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno
* @return GenotypeDataset filtered by quality.
*/
def filterByQuality(minimumQuality: Double): GenotypeDataset = {
- transform(rdd => rdd.filter(g => Option(g.getGenotypeQuality).exists(_ >= minimumQuality)))
+ transform(rdd => rdd.filter(g => Option(g.getAnnotation).exists(a => Option(a.getQuality).exists(_ >= minimumQuality))))
}
/**
@@ -439,7 +412,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno
* @return GenotypeDataset filtered by read depth.
*/
def filterByReadDepth(minimumReadDepth: Int): GenotypeDataset = {
- transform(rdd => rdd.filter(g => Option(g.getReadDepth).exists(_ >= minimumReadDepth)))
+ transform(rdd => rdd.filter(g => Option(g.getAnnotation).exists(a => Option(a.getReadDepth).exists(_ >= minimumReadDepth))))
}
/**
@@ -449,7 +422,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno
* @return GenotypeDataset filtered by alternate read depth.
*/
def filterByAlternateReadDepth(minimumAlternateReadDepth: Int): GenotypeDataset = {
- transform(rdd => rdd.filter(g => Option(g.getAlternateReadDepth).exists(_ >= minimumAlternateReadDepth)))
+ transform(rdd => rdd.filter(g => Option(g.getAnnotation).exists(a => Option(a.getAlternateReadDepth).exists(_ >= minimumAlternateReadDepth))))
}
/**
@@ -459,14 +432,14 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno
* @return GenotypeDataset filtered by reference read depth.
*/
def filterByReferenceReadDepth(minimumReferenceReadDepth: Int): GenotypeDataset = {
- transform(rdd => rdd.filter(g => Option(g.getReferenceReadDepth).exists(_ >= minimumReferenceReadDepth)))
+ transform(rdd => rdd.filter(g => Option(g.getAnnotation).exists(a => Option(a.getReferenceReadDepth).exists(_ >= minimumReferenceReadDepth))))
}
/**
* Filter this GenotypeDataset by sample to those that match the specified sample.
*
* @param sampleId Sample to filter by.
- * return GenotypeDataset filtered by sample.
+ * @return GenotypeDataset filtered by sample.
*/
def filterToSample(sampleId: String): GenotypeDataset = {
transform(rdd => rdd.filter(g => Option(g.getSampleId).exists(_ == sampleId)))
@@ -476,7 +449,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno
* (Java-specific) Filter this GenotypeDataset by sample to those that match the specified samples.
*
* @param sampleIds List of samples to filter by.
- * return GenotypeDataset filtered by one or more samples.
+ * @return GenotypeDataset filtered by one or more samples.
*/
def filterToSamples(sampleIds: java.util.List[String]): GenotypeDataset = {
filterToSamples(asScalaBuffer(sampleIds))
@@ -486,7 +459,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno
* (Scala-specific) Filter this GenotypeDataset by sample to those that match the specified samples.
*
* @param sampleIds Sequence of samples to filter by.
- * return GenotypeDataset filtered by one or more samples.
+ * @return GenotypeDataset filtered by one or more samples.
*/
def filterToSamples(sampleIds: Seq[String]): GenotypeDataset = {
transform(rdd => rdd.filter(g => Option(g.getSampleId).exists(sampleIds.contains(_))))
@@ -498,7 +471,7 @@ sealed abstract class GenotypeDataset extends MultisampleAvroGenomicDataset[Geno
* @return GenotypeDataset filtered to remove genotypes containing NO_CALL alleles.
*/
def filterNoCalls(): GenotypeDataset = {
- transform(rdd => rdd.filter(g => !g.getAlleles.contains(GenotypeAllele.NO_CALL)))
+ transform(rdd => rdd.filter(g => !g.getAlleles.contains(Allele.NO_CALL)))
}
/**
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala
index deb7ace160..e7ba929920 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala
@@ -50,8 +50,6 @@ case class VCFOutFormatter(
val stringency: ValidationStringency,
val optHeaderLines: Option[CollectionAccumulator[VCFHeaderLine]]) extends OutFormatter[VariantContext] with Logging {
- private val nestAnn = VariantContextConverter.getNestAnnotationInGenotypesProperty(conf)
-
/**
* OutFormatter that reads streaming VCF. Defaults to ValidationStringency.LENIENT.
* Java-friendly no-arg constructor.
@@ -110,9 +108,7 @@ case class VCFOutFormatter(
optHeaderLines.map(accumulator => lines.foreach(line => accumulator.add(line)))
// make converter
- val converter = new VariantContextConverter(lines,
- stringency,
- nestAnn)
+ val converter = new VariantContextConverter(lines, stringency)
@tailrec def convertIterator(iter: AsciiLineReaderIterator,
records: ListBuffer[VariantContext] = ListBuffer.empty): Iterator[VariantContext] = {
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantDataset.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantDataset.scala
index ffa700608c..c99958d5bd 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantDataset.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VariantDataset.scala
@@ -186,7 +186,7 @@ case class DatasetBoundVariantDataset private[rdd] (
}
override def filterByQuality(minimumQuality: Double): VariantDataset = {
- transformDataset(dataset => dataset.filter(!dataset.col("splitFromMultiAllelic") && dataset.col("quality") >= minimumQuality))
+ transformDataset(dataset => dataset.filter(!dataset.col("splitFromMultiallelic") && dataset.col("quality") >= minimumQuality))
}
override def filterByReadDepth(minimumReadDepth: Int): VariantDataset = {
@@ -318,18 +318,18 @@ sealed abstract class VariantDataset extends AvroGenomicDataset[Variant, Variant
/**
* Filter this VariantDataset by quality (VCF column 6 "QUAL"). Variants split
- * for multi-allelic sites will also be filtered out.
+ * for multiallelic sites will also be filtered out.
*
* @param minimumQuality Minimum quality to filter by, inclusive.
* @return VariantDataset filtered by quality.
*/
def filterByQuality(minimumQuality: Double): VariantDataset = {
- transform(rdd => rdd.filter(v => !(Option(v.getSplitFromMultiAllelic).exists(_ == true)) && Option(v.getQuality).exists(_ >= minimumQuality)))
+ transform(rdd => rdd.filter(v => !(Option(v.getSplitFromMultiallelic).exists(_ == true)) && Option(v.getQuality).exists(_ >= minimumQuality)))
}
/**
* Filter this VariantDataset by total read depth (VCF INFO reserved key AD, Number=R,
- * split for multi-allelic sites into single integer values for the reference allele
+ * split for multiallelic sites into single integer values for the reference allele
* (filterByReferenceReadDepth
) and the alternate allele (this method)).
*
* @param minimumReadDepth Minimum total read depth to filter by, inclusive.
@@ -341,7 +341,7 @@ sealed abstract class VariantDataset extends AvroGenomicDataset[Variant, Variant
/**
* Filter this VariantDataset by reference total read depth (VCF INFO reserved key AD, Number=R,
- * split for multi-allelic sites into single integer values for the alternate allele
+ * split for multiallelic sites into single integer values for the alternate allele
* (filterByReadDepth
) and the reference allele (this method)).
*
* @param minimumReferenceReadDepth Minimum reference total read depth to filter by, inclusive.
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichVariant.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichVariant.scala
index 511ed74f62..0ad3da5e34 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichVariant.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichVariant.scala
@@ -31,10 +31,13 @@ object RichVariant {
* @return Returns a rich variant representing the variant that was genotyped.
*/
def genotypeToRichVariant(genotype: Genotype): RichVariant = {
- val variant = Variant.newBuilder(genotype.variant)
+ val variant = Variant.newBuilder()
.setReferenceName(genotype.getReferenceName)
.setStart(genotype.getStart)
.setEnd(genotype.getEnd)
+ .setSplitFromMultiallelic(genotype.getSplitFromMultiallelic)
+ .setReferenceAllele(genotype.getReferenceAllele)
+ .setAlternateAllele(genotype.getAlternateAllele)
.build()
RichVariant(variant)
}
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala b/adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala
index 697cb971fb..2a977e2801 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala
@@ -233,6 +233,7 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging {
new org.bdgenomics.adam.util.TwoBitFileSerializer)
// org.bdgenomics.formats.avro
+ kryo.register(classOf[org.bdgenomics.formats.avro.Allele])
kryo.register(classOf[org.bdgenomics.formats.avro.AlignmentRecord],
new AvroSerializer[org.bdgenomics.formats.avro.AlignmentRecord])
kryo.register(classOf[org.bdgenomics.formats.avro.Dbxref],
@@ -243,8 +244,8 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging {
new AvroSerializer[org.bdgenomics.formats.avro.Fragment])
kryo.register(classOf[org.bdgenomics.formats.avro.Genotype],
new AvroSerializer[org.bdgenomics.formats.avro.Genotype])
- kryo.register(classOf[org.bdgenomics.formats.avro.GenotypeAllele])
- kryo.register(classOf[org.bdgenomics.formats.avro.GenotypeType])
+ kryo.register(classOf[org.bdgenomics.formats.avro.GenotypeAnnotation],
+ new AvroSerializer[org.bdgenomics.formats.avro.GenotypeAnnotation])
kryo.register(classOf[org.bdgenomics.formats.avro.NucleotideContigFragment],
new AvroSerializer[org.bdgenomics.formats.avro.NucleotideContigFragment])
kryo.register(classOf[org.bdgenomics.formats.avro.OntologyTerm],
@@ -271,8 +272,6 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging {
kryo.register(classOf[org.bdgenomics.formats.avro.VariantAnnotation],
new AvroSerializer[org.bdgenomics.formats.avro.VariantAnnotation])
kryo.register(classOf[org.bdgenomics.formats.avro.VariantAnnotationMessage])
- kryo.register(classOf[org.bdgenomics.formats.avro.VariantCallingAnnotations],
- new AvroSerializer[org.bdgenomics.formats.avro.VariantCallingAnnotations])
// org.codehaus.jackson.node
kryo.register(classOf[org.codehaus.jackson.node.NullNode])
@@ -312,12 +311,13 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging {
kryo.register(classOf[scala.Array[org.apache.spark.sql.catalyst.InternalRow]])
kryo.register(classOf[scala.Array[org.apache.spark.sql.types.StructField]])
kryo.register(classOf[scala.Array[org.apache.spark.sql.types.StructType]])
+ kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Allele]])
kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.AlignmentRecord]])
kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Dbxref]])
kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Feature]])
kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Fragment]])
kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Genotype]])
- kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.GenotypeAllele]])
+ kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.GenotypeAnnotation]])
kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.OntologyTerm]])
kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.NucleotideContigFragment]])
kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Read]])
@@ -330,7 +330,6 @@ class ADAMKryoRegistrator extends KryoRegistrator with Logging {
kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.Variant]])
kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.VariantAnnotation]])
kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.VariantAnnotationMessage]])
- kryo.register(classOf[scala.Array[org.bdgenomics.formats.avro.VariantCallingAnnotations]])
kryo.register(classOf[scala.Array[org.bdgenomics.adam.algorithms.consensus.Consensus]])
kryo.register(classOf[scala.Array[org.bdgenomics.adam.models.Coverage]])
kryo.register(classOf[scala.Array[org.bdgenomics.adam.models.ReferencePosition]])
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/ADAMShell.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/ADAMShell.scala
index 746e9f6272..87a510893f 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/util/ADAMShell.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/ADAMShell.scala
@@ -179,11 +179,11 @@ object ADAMShell {
g.getReferenceName(),
g.getStart().toString,
g.getEnd().toString,
- g.getVariant().getReferenceAllele(),
- g.getVariant().getAlternateAllele(),
+ g.getReferenceAllele(),
+ g.getAlternateAllele(),
g.getAlleles().toString,
g.getSampleId()
- ) ++ keys.map(key => Option(g.getVariantCallingAnnotations().getAttributes().get(key)).getOrElse(""))).toArray
+ ) ++ keys.map(key => Option(g.getAnnotation().getAttributes().get(key)).getOrElse(""))).toArray
println("\nGenotype Format Fields\n" + new ASCIITable(header, rows).toString)
}
@@ -218,13 +218,13 @@ object ADAMShell {
g.getReferenceName(),
g.getStart().toString,
g.getEnd().toString,
- g.getVariant().getReferenceAllele(),
- g.getVariant().getAlternateAllele(),
+ g.getReferenceAllele(),
+ g.getAlternateAllele(),
g.getAlleles().toString,
g.getSampleId(),
- g.getVariantCallingAnnotations().getFiltersApplied().toString,
- g.getVariantCallingAnnotations().getFiltersPassed().toString,
- g.getVariantCallingAnnotations().getFiltersFailed().toString
+ g.getFiltersApplied().toString,
+ g.getFiltersPassed().toString,
+ g.getFiltersFailed().toString
)).toArray
println("\nGenotype Filters\n" + new ASCIITable(header, rows).toString)
diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala
index 78e27ede33..9fac68beab 100644
--- a/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala
+++ b/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala
@@ -22,7 +22,7 @@ import com.google.common.collect.ImmutableMap
import htsjdk.samtools.ValidationStringency
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor
import htsjdk.variant.variantcontext.{
- Allele,
+ Allele => HtsjdkAllele,
Genotype => HtsjdkGenotype,
GenotypeBuilder,
GenotypeType,
@@ -55,30 +55,30 @@ class VariantContextConverterSuite extends ADAMFunSuite {
val lenient = ValidationStringency.LENIENT
val converter = new VariantContextConverter(DefaultHeaderLines.allHeaderLines,
- lenient, false)
+ lenient)
val strictConverter = new VariantContextConverter(DefaultHeaderLines.allHeaderLines,
- ValidationStringency.STRICT, false)
+ ValidationStringency.STRICT)
def htsjdkSNVBuilder: VariantContextBuilder = new VariantContextBuilder()
- .alleles(List(Allele.create("A", true), Allele.create("T")))
+ .alleles(List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("T")))
.start(1L)
.stop(1L)
.chr("1")
def htsjdkMultiAllelicSNVBuilder: VariantContextBuilder = new VariantContextBuilder()
- .alleles(List(Allele.create("A", true), Allele.create("T"), Allele.create("G")))
+ .alleles(List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("T"), HtsjdkAllele.create("G")))
.start(1L)
.stop(1L)
.chr("1")
def htsjdkRefSNV: VariantContextBuilder = new VariantContextBuilder()
- .alleles(List(Allele.create("A", true), Allele.create("", false)))
+ .alleles(List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("", false)))
.start(1L)
.stop(1L)
.chr("1")
def htsjdkCNVBuilder: VariantContextBuilder = new VariantContextBuilder()
- .alleles(List(Allele.create("A", true), Allele.create("", false)))
+ .alleles(List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("", false)))
.start(10L)
.stop(20L)
.chr("1")
@@ -106,7 +106,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
test("Convert somatic htsjdk site-only SNV to ADAM") {
val vcb: VariantContextBuilder = new VariantContextBuilder()
- .alleles(List(Allele.create("A", true), Allele.create("T")))
+ .alleles(List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("T")))
.start(1L)
.stop(1L)
.chr("1")
@@ -151,7 +151,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
val adamGTs = adamVCs.flatMap(_.genotypes)
assert(adamGTs.length === 1)
val adamGT = adamGTs.head
- assert(adamGT.getAlleles.sameElements(List(GenotypeAllele.REF, GenotypeAllele.ALT)))
+ assert(adamGT.getAlleles.sameElements(List(Allele.REF, Allele.ALT)))
assert(adamGT.getPhaseSetId === 1)
assert(adamGT.getPhaseQuality === 50)
}
@@ -195,18 +195,18 @@ class VariantContextConverterSuite extends ADAMFunSuite {
val adamVCs = converter.convert(vcb.make)
val adamGT = adamVCs.flatMap(_.genotypes).head
// htsjdk does not distinguish between filters not applied and filters passed in Genotype
- assert(adamGT.getVariantCallingAnnotations.getFiltersApplied === true)
- assert(adamGT.getVariantCallingAnnotations.getFiltersPassed === true)
- assert(adamGT.getVariantCallingAnnotations.getFiltersFailed.isEmpty)
+ assert(adamGT.getAnnotation.getFiltersApplied === true)
+ assert(adamGT.getAnnotation.getFiltersPassed === true)
+ assert(adamGT.getAnnotation.getFiltersFailed.isEmpty)
}
{ // PASSing
gb.filter("PASS")
vcb.genotypes(gb.make)
val adamVCs = converter.convert(vcb.make)
val adamGT = adamVCs.flatMap(_.genotypes).head
- assert(adamGT.getVariantCallingAnnotations.getFiltersApplied === true)
- assert(adamGT.getVariantCallingAnnotations.getFiltersPassed === true)
- assert(adamGT.getVariantCallingAnnotations.getFiltersFailed.isEmpty)
+ assert(adamGT.getAnnotation.getFiltersApplied === true)
+ assert(adamGT.getAnnotation.getFiltersPassed === true)
+ assert(adamGT.getAnnotation.getFiltersFailed.isEmpty)
}
{ // not PASSing
gb.filter("LowMQ")
@@ -214,9 +214,9 @@ class VariantContextConverterSuite extends ADAMFunSuite {
val adamVCs = converter.convert(vcb.make)
val adamGT = adamVCs.flatMap(_.genotypes).head
- assert(adamGT.getVariantCallingAnnotations.getFiltersApplied === true)
- assert(adamGT.getVariantCallingAnnotations.getFiltersPassed === false)
- assert(adamGT.getVariantCallingAnnotations.getFiltersFailed.sameElements(List("LowMQ")))
+ assert(adamGT.getAnnotation.getFiltersApplied === true)
+ assert(adamGT.getAnnotation.getFiltersPassed === false)
+ assert(adamGT.getAnnotation.getFiltersFailed.sameElements(List("LowMQ")))
}
}
@@ -230,8 +230,8 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(htsjdkVC.getContig === "1")
assert(htsjdkVC.getStart === 1)
assert(htsjdkVC.getEnd === 1)
- assert(htsjdkVC.getReference === Allele.create("A", true))
- assert(htsjdkVC.getAlternateAlleles.sameElements(List(Allele.create("T"))))
+ assert(htsjdkVC.getReference === HtsjdkAllele.create("A", true))
+ assert(htsjdkVC.getAlternateAlleles.sameElements(List(HtsjdkAllele.create("T"))))
assert(!htsjdkVC.hasLog10PError)
assert(!htsjdkVC.hasID)
assert(!htsjdkVC.filtersWereApplied)
@@ -240,13 +240,17 @@ class VariantContextConverterSuite extends ADAMFunSuite {
test("Convert ADAM SNV w/ genotypes to htsjdk") {
val variant = adamSNVBuilder().build
val genotype = Genotype.newBuilder
- .setVariant(variant)
+ .setStart(variant.getStart)
+ .setEnd(variant.getEnd)
+ .setReferenceAllele(variant.getReferenceAllele)
+ .setAlternateAllele(variant.getAlternateAllele)
+ .setSplitFromMultiallelic(variant.getSplitFromMultiallelic)
.setSampleId("NA12878")
.setStrandBiasComponents(List(0, 2, 4, 6).map(i => i: java.lang.Integer))
- .setAlleles(List(GenotypeAllele.REF, GenotypeAllele.ALT))
- .setVariantCallingAnnotations(VariantCallingAnnotations.newBuilder()
+ .setAlleles(List(Allele.REF, Allele.ALT))
+ .setAnnotation(GenotypeAnnotation.newBuilder()
.setFisherStrandBiasPValue(3.0f)
- .setRmsMapQ(0.0f)
+ .setRmsMappingQuality(0.0f)
.setMapq0Reads(5)
.build)
.build
@@ -274,11 +278,15 @@ class VariantContextConverterSuite extends ADAMFunSuite {
test("Convert ADAM SNV w/ genotypes but bad SB to htsjdk with strict validation") {
val variant = adamSNVBuilder().build
val genotype = Genotype.newBuilder
- .setVariant(variant)
+ .setStart(variant.getStart)
+ .setEnd(variant.getEnd)
+ .setReferenceAllele(variant.getReferenceAllele)
+ .setAlternateAllele(variant.getAlternateAllele)
+ .setSplitFromMultiallelic(variant.getSplitFromMultiallelic)
.setSampleId("NA12878")
.setStrandBiasComponents(List(0, 2).map(i => i: java.lang.Integer))
- .setAlleles(List(GenotypeAllele.REF, GenotypeAllele.ALT))
- .setVariantCallingAnnotations(VariantCallingAnnotations.newBuilder()
+ .setAlleles(List(Allele.REF, Allele.ALT))
+ .setAnnotation(GenotypeAnnotation.newBuilder()
.setFisherStrandBiasPValue(3.0f)
.setRmsMapQ(0.0f)
.setMapq0Reads(5)
@@ -293,13 +301,17 @@ class VariantContextConverterSuite extends ADAMFunSuite {
test("Convert ADAM SNV w/ genotypes but bad SB to htsjdk with lenient validation") {
val variant = adamSNVBuilder().build
val genotype = Genotype.newBuilder
- .setVariant(variant)
+ .setStart(variant.getStart)
+ .setEnd(variant.getEnd)
+ .setReferenceAllele(variant.getReferenceAllele)
+ .setAlternateAllele(variant.getAlternateAllele)
+ .setSplitFromMultiallelic(variant.getSplitFromMultiallelic)
.setSampleId("NA12878")
.setStrandBiasComponents(List(0, 2).map(i => i: java.lang.Integer))
- .setAlleles(List(GenotypeAllele.REF, GenotypeAllele.ALT))
- .setVariantCallingAnnotations(VariantCallingAnnotations.newBuilder()
+ .setAlleles(List(Allele.REF, Allele.ALT))
+ .setAnnotation(GenotypeAnnotation.newBuilder()
.setFisherStrandBiasPValue(3.0f)
- .setRmsMapQ(0.0f)
+ .setRmsMappingQuality(0.0f)
.setMapq0Reads(5)
.build)
.build
@@ -325,7 +337,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("Convert htsjdk multi-allelic SNVs to ADAM and back to htsjdk") {
- val gb = new GenotypeBuilder("NA12878", List(Allele.create("T"), Allele.create("G")))
+ val gb = new GenotypeBuilder("NA12878", List(HtsjdkAllele.create("T"), HtsjdkAllele.create("G")))
gb.AD(Array(4, 2, 3)).PL(Array(59, 0, 181, 1, 66, 102))
val vcb = htsjdkMultiAllelicSNVBuilder
@@ -344,14 +356,14 @@ class VariantContextConverterSuite extends ADAMFunSuite {
val adamGT1 = adamVCs(0).genotypes.head
val adamGT2 = adamVCs(1).genotypes.head
- assert(adamGT1.getAlleles.sameElements(List(GenotypeAllele.ALT, GenotypeAllele.OTHER_ALT)))
+ assert(adamGT1.getAlleles.sameElements(List(Allele.ALT, Allele.OTHER_ALT)))
assert(adamGT1.getAlternateReadDepth === 2)
assert(adamGT1.getGenotypeLikelihoods
.map(d => d: scala.Double)
.map(PhredUtils.logProbabilityToPhred)
.sameElements(List(59, 0, 181)))
- assert(adamGT2.getAlleles.sameElements(List(GenotypeAllele.OTHER_ALT, GenotypeAllele.ALT)))
+ assert(adamGT2.getAlleles.sameElements(List(Allele.OTHER_ALT, Allele.ALT)))
assert(adamGT2.getAlternateReadDepth === 3)
assert(adamGT2.getGenotypeLikelihoods
.map(d => d: scala.Double)
@@ -377,7 +389,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("Convert gVCF reference records to ADAM") {
- val gb = new GenotypeBuilder("NA12878", List(Allele.create("A", true), Allele.create("A", true)))
+ val gb = new GenotypeBuilder("NA12878", List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("A", true)))
gb.PL(Array(0, 1, 2)).DP(44).attribute("MIN_DP", 38)
val vcb = htsjdkRefSNV
@@ -390,7 +402,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamGTs.length === 1)
val adamGT = adamGTs.head
assert(adamGT.getVariant.getAlternateAllele === null)
- assert(adamGT.getAlleles.sameElements(List(GenotypeAllele.REF, GenotypeAllele.REF)))
+ assert(adamGT.getAlleles.sameElements(List(Allele.REF, Allele.REF)))
assert(adamGT.getMinReadDepth === 38)
assert(adamGT.getGenotypeLikelihoods.isEmpty)
assert(adamGT.getNonReferenceLikelihoods
@@ -719,11 +731,11 @@ class VariantContextConverterSuite extends ADAMFunSuite {
def buildVca(
objMap: Map[String, java.lang.Object],
- extractor: (HtsjdkGenotype, VariantCallingAnnotations.Builder, Int, Array[Int]) => VariantCallingAnnotations.Builder,
- fns: Iterable[GenotypeBuilder => GenotypeBuilder] = Iterable.empty): VariantCallingAnnotations = {
+ extractor: (HtsjdkGenotype, GenotypeAnnotation.Builder, Int, Array[Int]) => GenotypeAnnotation.Builder,
+ fns: Iterable[GenotypeBuilder => GenotypeBuilder] = Iterable.empty): GenotypeAnnotation = {
val htsjdkGenotype = makeGenotype(objMap, fns)
extractor(htsjdkGenotype,
- VariantCallingAnnotations.newBuilder,
+ GenotypeAnnotation.newBuilder,
0,
Array(0, 1, 2)).build
}
@@ -796,18 +808,18 @@ class VariantContextConverterSuite extends ADAMFunSuite {
test("no rms mapping quality going htsjdk->adam") {
val vca = buildVca(Map.empty,
- converter.formatRmsMapQ,
+ converter.formatRmsMappingQuality,
fns = Iterable.empty)
- assert(vca.getRmsMapQ === null)
+ assert(vca.getRmsMappingQuality === null)
}
test("extract rms mapping quality going htsjdk->adam") {
val vca = buildVca(Map(("MQ" -> (40.0f: java.lang.Float).asInstanceOf[java.lang.Object])),
- converter.formatRmsMapQ,
+ converter.formatRmsMappingQuality,
fns = Iterable.empty)
- assert(vca.getRmsMapQ > 39.9f && vca.getRmsMapQ < 40.1f)
+ assert(vca.getRmsMappingQuality > 39.9f && vca.getRmsMappingQuality < 40.1f)
}
test("no mq0 going htsjdk->adam") {
@@ -1040,7 +1052,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(g.getExtendedAttribute("PQ").asInstanceOf[java.lang.Integer] === 10)
}
- def emptyVca = VariantCallingAnnotations.newBuilder.build
+ def emptyVca = GenotypeAnnotation.newBuilder.build
test("no filter info going adam->htsjdk") {
val g = converter.extractFilters(emptyVca, newGb)
@@ -1052,7 +1064,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
test("if filters applied, must set passed/failed going adam->htsjdk") {
intercept[IllegalArgumentException] {
- val g = converter.extractFilters(VariantCallingAnnotations.newBuilder
+ val g = converter.extractFilters(GenotypeAnnotation.newBuilder
.setFiltersApplied(true)
.build, newGb)
.make
@@ -1060,7 +1072,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("filters passed going adam->htsjdk") {
- val g = converter.extractFilters(VariantCallingAnnotations.newBuilder
+ val g = converter.extractFilters(GenotypeAnnotation.newBuilder
.setFiltersApplied(true)
.setFiltersPassed(true)
.build, newGb)
@@ -1074,7 +1086,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
test("if filters failed, must set filters failed going adam->htsjdk") {
intercept[IllegalArgumentException] {
- val g = converter.extractFilters(VariantCallingAnnotations.newBuilder
+ val g = converter.extractFilters(GenotypeAnnotation.newBuilder
.setFiltersApplied(true)
.setFiltersPassed(false)
.build, newGb)
@@ -1083,7 +1095,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("single filter failed going adam->htsjdk") {
- val g = converter.extractFilters(VariantCallingAnnotations.newBuilder
+ val g = converter.extractFilters(GenotypeAnnotation.newBuilder
.setFiltersApplied(true)
.setFiltersPassed(false)
.setFiltersFailed(Seq("lowmq"))
@@ -1095,7 +1107,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("multiple filters failed going adam->htsjdk") {
- val g = converter.extractFilters(VariantCallingAnnotations.newBuilder
+ val g = converter.extractFilters(GenotypeAnnotation.newBuilder
.setFiltersApplied(true)
.setFiltersPassed(false)
.setFiltersFailed(Seq("lowmq", "lowdp"))
@@ -1114,7 +1126,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("extract fisher strand bias going adam->htsjdk") {
- val g = converter.extractFisherStrandBias(VariantCallingAnnotations.newBuilder
+ val g = converter.extractFisherStrandBias(GenotypeAnnotation.newBuilder
.setFisherStrandBiasPValue(20.0f)
.build, newGb)
.make
@@ -1132,7 +1144,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("extract rms mapping quality going adam->htsjdk") {
- val g = converter.extractRmsMapQ(VariantCallingAnnotations.newBuilder
+ val g = converter.extractRmsMapQ(GenotypeAnnotation.newBuilder
.setRmsMapQ(40.0f)
.build, newGb)
.make
@@ -1150,7 +1162,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("extract mapping quality 0 reads going adam->htsjdk") {
- val g = converter.extractMapQ0(VariantCallingAnnotations.newBuilder
+ val g = converter.extractMapQ0(GenotypeAnnotation.newBuilder
.setMapq0Reads(5)
.build, newGb)
.make
@@ -2250,11 +2262,11 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("VCF FORMAT attribute Number=0 Type=Flag adam->htsjdk not supported") {
- val vca = VariantCallingAnnotations.newBuilder
+ val vca = GenotypeAnnotation.newBuilder
.setAttributes(ImmutableMap.of("FLAG", "true"))
.build
val g = Genotype.newBuilder
- .setVariantCallingAnnotations(vca)
+ .setGenotypeAnnotation(vca)
.build
val adamVc = ADAMVariantContext(v, Some(g))
@@ -2271,12 +2283,12 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("VCF FORMAT attribute Number=1 Type=Integer adam->htsjdk") {
- val vca = VariantCallingAnnotations.newBuilder
+ val vca = GenotypeAnnotation.newBuilder
.setAttributes(ImmutableMap.of("ONE_INT", "42"))
.build
val g = Genotype.newBuilder
.setSampleId("sample")
- .setVariantCallingAnnotations(vca)
+ .setAnnotation(vca)
.build
val adamVc = ADAMVariantContext(v, Some(g))
@@ -2296,12 +2308,12 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("VCF FORMAT attribute Number=4 Type=Integer adam->htsjdk") {
- val vca = VariantCallingAnnotations.newBuilder
+ val vca = GenotypeAnnotation.newBuilder
.setAttributes(ImmutableMap.of("FOUR_INTS", "5,10,15,20"))
.build
val g = Genotype.newBuilder
.setSampleId("sample")
- .setVariantCallingAnnotations(vca)
+ .setAnnotation(vca)
.build
val adamVc = ADAMVariantContext(v, Some(g))
@@ -2326,12 +2338,12 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("VCF FORMAT attribute Number=A Type=Integer adam->htsjdk") {
- val vca = VariantCallingAnnotations.newBuilder
+ val vca = GenotypeAnnotation.newBuilder
.setAttributes(ImmutableMap.of("A_INT", "42"))
.build
val g = Genotype.newBuilder
.setSampleId("sample")
- .setVariantCallingAnnotations(vca)
+ .setAnnotation(vca)
.build
val adamVc = ADAMVariantContext(v, Some(g))
@@ -2353,12 +2365,12 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("VCF FORMAT attribute Number=R Type=Integer adam->htsjdk") {
- val vca = VariantCallingAnnotations.newBuilder
+ val vca = GenotypeAnnotation.newBuilder
.setAttributes(ImmutableMap.of("R_INT", "5,10"))
.build
val g = Genotype.newBuilder
.setSampleId("sample")
- .setVariantCallingAnnotations(vca)
+ .setAnnotation(vca)
.build
val adamVc = ADAMVariantContext(v, Some(g))
@@ -2381,12 +2393,12 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
test("VCF FORMAT attribute Number=R Type=String adam->htsjdk") {
- val vca = VariantCallingAnnotations.newBuilder
+ val vca = GenotypeAnnotation.newBuilder
.setAttributes(ImmutableMap.of("R_STRING", "foo,bar"))
.build
val g = Genotype.newBuilder
.setSampleId("sample")
- .setVariantCallingAnnotations(vca)
+ .setAnnotation(vca)
.build
val adamVc = ADAMVariantContext(v, Some(g))
@@ -2448,8 +2460,8 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamVc.genotypes.size === 1)
val adamGt = adamVc.genotypes.head
- assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("ONE_INT"))
- assert(adamGt.getVariantCallingAnnotations.getAttributes.get("ONE_INT") === "42")
+ assert(adamGt.getAnnotation.getAttributes.containsKey("ONE_INT"))
+ assert(adamGt.getAnnotation.getAttributes.get("ONE_INT") === "42")
}
test("VCF FORMAT attribute Number=4 Type=Integer htsjdk->adam") {
@@ -2471,8 +2483,8 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamVc.genotypes.size === 1)
val adamGt = adamVc.genotypes.head
- assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("FOUR_INTS"))
- assert(adamGt.getVariantCallingAnnotations.getAttributes.get("FOUR_INTS") === "5,10,15,20")
+ assert(adamGt.getAnnotation.getAttributes.containsKey("FOUR_INTS"))
+ assert(adamGt.getAnnotation.getAttributes.get("FOUR_INTS") === "5,10,15,20")
}
test("VCF FORMAT attribute Number=4 Type=Float htsjdk->adam") {
@@ -2494,8 +2506,8 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamVc.genotypes.size === 1)
val adamGt = adamVc.genotypes.head
- assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("FOUR_FLOATS"))
- assert(adamGt.getVariantCallingAnnotations.getAttributes.get("FOUR_FLOATS") === "5.0,10.1,15.2,20.3")
+ assert(adamGt.getAnnotation.getAttributes.containsKey("FOUR_FLOATS"))
+ assert(adamGt.getAnnotation.getAttributes.get("FOUR_FLOATS") === "5.0,10.1,15.2,20.3")
}
test("VCF FORMAT attribute Number=A Type=Integer htsjdk->adam") {
@@ -2517,8 +2529,8 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamVc.genotypes.size === 1)
val adamGt = adamVc.genotypes.head
- assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("A_INT"))
- assert(adamGt.getVariantCallingAnnotations.getAttributes.get("A_INT") === "10")
+ assert(adamGt.getAnnotation.getAttributes.containsKey("A_INT"))
+ assert(adamGt.getAnnotation.getAttributes.get("A_INT") === "10")
}
test("VCF FORMAT attribute Number=R Type=Integer htsjdk->adam") {
@@ -2540,8 +2552,8 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamVc.genotypes.size === 1)
val adamGt = adamVc.genotypes.head
- assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("R_INT"))
- assert(adamGt.getVariantCallingAnnotations.getAttributes.get("R_INT") === "5,10")
+ assert(adamGt.getAnnotation.getAttributes.containsKey("R_INT"))
+ assert(adamGt.getAnnotation.getAttributes.get("R_INT") === "5,10")
}
test("VCF FORMAT attribute Number=R Type=String htsjdk->adam") {
@@ -2563,8 +2575,8 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamVc.genotypes.size === 1)
val adamGt = adamVc.genotypes.head
- assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("R_STRING"))
- assert(adamGt.getVariantCallingAnnotations.getAttributes.get("R_STRING") === "foo,bar")
+ assert(adamGt.getAnnotation.getAttributes.containsKey("R_STRING"))
+ assert(adamGt.getAnnotation.getAttributes.get("R_STRING") === "foo,bar")
}
test("VCF FORMAT attribute Number=G Type=String htsjdk->adam") {
@@ -2586,8 +2598,8 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamVc.genotypes.size === 1)
val adamGt = adamVc.genotypes.head
- assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("STRING_G"))
- assert(adamGt.getVariantCallingAnnotations.getAttributes.get("STRING_G") === "foo,bar,baz")
+ assert(adamGt.getAnnotation.getAttributes.containsKey("STRING_G"))
+ assert(adamGt.getAnnotation.getAttributes.get("STRING_G") === "foo,bar,baz")
}
sparkTest("respect end position for symbolic alts") {
diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala
index a99637400e..c99703ea6a 100644
--- a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala
+++ b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala
@@ -19,7 +19,7 @@ package org.bdgenomics.adam.models
import htsjdk.samtools.ValidationStringency
import htsjdk.variant.variantcontext.{
- Allele,
+ Allele => HtsjdkAllele,
GenotypeBuilder,
VariantContextBuilder
}
@@ -675,7 +675,7 @@ class ReferenceRegionSuite extends FunSuite {
ValidationStringency.LENIENT,
false)
val vcb = new VariantContextBuilder()
- .alleles(List(Allele.create("A", true), Allele.create("T")))
+ .alleles(List(HtsjdkAllele.create("A", true), HtsjdkAllele.create("T")))
.start(1L)
.stop(1L)
.chr("1")
diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala
index 406c952871..ae3b9e9617 100644
--- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala
+++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala
@@ -209,7 +209,7 @@ class ADAMContextSuite extends ADAMFunSuite {
assert(vc.genotypes.size === 3)
val gt = vc.genotypes.head
- assert(gt.getVariantCallingAnnotations != null)
+ assert(gt.getAnnotation != null)
assert(gt.getReadDepth === 20)
}
@@ -244,7 +244,7 @@ class ADAMContextSuite extends ADAMFunSuite {
assert(vcs.rdd.filter(_.variant.variant.getAnnotation().getAttributes().containsKey("NS"))
.count === 7)
assert(vcs.rdd.flatMap(_.genotypes)
- .filter(_.getVariantCallingAnnotations().getAttributes().containsKey("HQ"))
+ .filter(_.getAnnotation().getAttributes().containsKey("HQ"))
.count === 0)
}
diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextDatasetSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextDatasetSuite.scala
index 0553384ba2..a9cd10ce6e 100644
--- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextDatasetSuite.scala
+++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextDatasetSuite.scala
@@ -79,7 +79,7 @@ class VariantContextDatasetSuite extends ADAMFunSuite {
val g0 = Genotype.newBuilder().setVariant(v0)
.setSampleId("NA12878")
- .setAlleles(List(GenotypeAllele.REF, GenotypeAllele.ALT))
+ .setAlleles(List(Allele.REF, Allele.ALT))
.build
VariantContextDataset(sc.parallelize(List(
@@ -97,7 +97,7 @@ class VariantContextDatasetSuite extends ADAMFunSuite {
v.getAnnotation.getAttributes().containsKey("MQA")
}).count === 1)
assert(vc1.toGenotypes.rdd.filter(gt => {
- gt.getVariantCallingAnnotations().getAttributes().containsKey("MQA")
+ gt.getAnnotation().getAttributes().containsKey("MQA")
}).count === 1)
assert(vc1.rdd.count === 6)
}
@@ -186,7 +186,7 @@ class VariantContextDatasetSuite extends ADAMFunSuite {
assert(alternateReadDepths.forall(rd => (rd == 0 || rd == null)))
// ADALL should be zeros or null after splitting ref=GAAGAAAGAAAGA alt=GAAGAAAGA,GAAGA,G ADALL 0,0,0
- val netAlleleDepths = filtered.map(_.getVariantCallingAnnotations.getAttributes.get("ADALL")).collect()
+ val netAlleleDepths = filtered.map(_.getAnnotation.getAttributes.get("ADALL")).collect()
assert(netAlleleDepths.forall(adall => (adall == "0,0" || adall == "")))
}
@@ -200,9 +200,9 @@ class VariantContextDatasetSuite extends ADAMFunSuite {
assert(variant.getAnnotation.getAttributes.get("BaseQRankSum") === "-Infinity")
val genotype = vcs.toGenotypes().rdd.filter(_.getStart == 14396L).first()
- assert(genotype.getVariantCallingAnnotations.getRmsMapQ === Float.NegativeInfinity)
+ assert(genotype.getAnnotation.getRmsMapQ === Float.NegativeInfinity)
// +Inf FORMAT value --> Infinity after conversion
- assert(genotype.getVariantCallingAnnotations.getAttributes.get("float") === "Infinity")
+ assert(genotype.getAnnotation.getAttributes.get("float") === "Infinity")
}
sparkTest("support VCFs with `nan` instead of `NaN` float values") {
@@ -214,8 +214,8 @@ class VariantContextDatasetSuite extends ADAMFunSuite {
assert(variant.getAnnotation.getAttributes.get("ClippingRankSum") === "NaN")
val genotype = vcs.toGenotypes().rdd.filter(_.getStart == 14396L).first()
- assert(genotype.getVariantCallingAnnotations.getRmsMapQ.isNaN)
- assert(genotype.getVariantCallingAnnotations.getAttributes.get("float") === "NaN")
+ assert(genotype.getAnnotation.getRmsMapQ.isNaN)
+ assert(genotype.getAnnotation.getAttributes.get("float") === "NaN")
}
sparkTest("don't lose any variants when piping as VCF") {
@@ -268,7 +268,7 @@ class VariantContextDatasetSuite extends ADAMFunSuite {
// check for freebayes-specific VCF FORMAT keys
val genotype = pipedRdd.toGenotypes.rdd.first
for (freebayesFormatKey <- Seq("RO", "QR", "AO", "QA")) {
- assert(genotype.getVariantCallingAnnotations.getAttributes.containsKey(freebayesFormatKey))
+ assert(genotype.getAnnotation.getAttributes.containsKey(freebayesFormatKey))
}
// retrieve accumulated VCF header lines
diff --git a/pom.xml b/pom.xml
index 121d9acc42..dea305bec6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -27,7 +27,7 @@
2.7.5
7.9.2
1.7.25
- 0.12.0
+ 0.12.1-SNAPSHOT
0.2.13
2.16.1
1.1.1