From 5328a8fb773176ef384e35ba4e86f39f0d9d6a41 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Mon, 26 Mar 2018 20:59:46 -0500 Subject: [PATCH] Support validation stringency in out formatters. --- .../adam/rdd/feature/BEDOutFormatter.scala | 7 +++-- .../adam/rdd/feature/GFF3OutFormatter.scala | 7 +++-- .../adam/rdd/feature/GTFOutFormatter.scala | 7 +++-- .../adam/rdd/variant/VCFOutFormatter.scala | 31 ++++++++++++++++--- 4 files changed, 41 insertions(+), 11 deletions(-) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/BEDOutFormatter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/BEDOutFormatter.scala index 763e224c77..39e7a32544 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/BEDOutFormatter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/BEDOutFormatter.scala @@ -30,10 +30,13 @@ import scala.collection.mutable.ListBuffer /** * OutFormatter that reads streaming BED format. + * + * @param stringency Validation stringency. Defaults to ValidationStringency.STRICT. */ -case class BEDOutFormatter() extends OutFormatter[Feature] { +case class BEDOutFormatter( + val stringency: ValidationStringency = ValidationStringency.STRICT) extends OutFormatter[Feature] { + val bedParser = new BEDParser - val stringency = ValidationStringency.STRICT /** * Reads features from an input stream in BED format. diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/GFF3OutFormatter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/GFF3OutFormatter.scala index 654a5e825f..36730117e4 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/GFF3OutFormatter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/GFF3OutFormatter.scala @@ -30,10 +30,13 @@ import scala.collection.mutable.ListBuffer /** * OutFormatter that reads streaming GFF3 format. + * + * @param stringency Validation stringency. Defaults to ValidationStringency.STRICT. */ -case class GFF3OutFormatter() extends OutFormatter[Feature] { +case class GFF3OutFormatter( + val stringency: ValidationStringency = ValidationStringency.STRICT) extends OutFormatter[Feature] { + val gff3Parser = new GFF3Parser - val stringency = ValidationStringency.STRICT /** * Reads features from an input stream in GFF3 format. diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/GTFOutFormatter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/GTFOutFormatter.scala index fd504879d6..4031e9b529 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/GTFOutFormatter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/GTFOutFormatter.scala @@ -30,10 +30,13 @@ import scala.collection.mutable.ListBuffer /** * OutFormatter that reads streaming GTF format. + * + * @param stringency Validation stringency. Defaults to ValidationStringency.STRICT. */ -case class GTFOutFormatter() extends OutFormatter[Feature] { +case class GTFOutFormatter( + val stringency: ValidationStringency = ValidationStringency.STRICT) extends OutFormatter[Feature] { + val gtfParser = new GTFParser - val stringency = ValidationStringency.STRICT /** * Reads features from an input stream in GTF format. diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala index 0ed7d32fa5..deb7ace160 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variant/VCFOutFormatter.scala @@ -42,20 +42,23 @@ import scala.collection.mutable.ListBuffer * OutFormatter that reads streaming VCF. * * @param conf Hadoop configuration. + * @param stringency Validation stringency. * @param optHeaderLines Optional accumulator for VCF header lines. */ case class VCFOutFormatter( @transient conf: Configuration, + val stringency: ValidationStringency, val optHeaderLines: Option[CollectionAccumulator[VCFHeaderLine]]) extends OutFormatter[VariantContext] with Logging { private val nestAnn = VariantContextConverter.getNestAnnotationInGenotypesProperty(conf) /** - * OutFormatter that reads streaming VCF. Java-friendly no-arg constructor. + * OutFormatter that reads streaming VCF. Defaults to ValidationStringency.LENIENT. + * Java-friendly no-arg constructor. * * @param conf Hadoop configuration. */ - def this(conf: Configuration) = this(conf, None) + def this(conf: Configuration) = this(conf, ValidationStringency.LENIENT, None) /** * OutFormatter that reads streaming VCF. Java-friendly constructor. @@ -63,7 +66,25 @@ case class VCFOutFormatter( * @param conf Hadoop configuration. * @param acc Accumulator for VCF header lines. */ - def this(conf: Configuration, acc: CollectionAccumulator[VCFHeaderLine]) = this(conf, Some(acc)) + def this(conf: Configuration, stringency: ValidationStringency) = this(conf, stringency, None) + + /** + * OutFormatter that reads streaming VCF. Defaults to ValidationStringency.LENIENT. + * Java-friendly constructor. + * + * @param conf Hadoop configuration. + * @param acc Accumulator for VCF header lines. + */ + def this(conf: Configuration, acc: CollectionAccumulator[VCFHeaderLine]) = this(conf, ValidationStringency.LENIENT, Some(acc)) + + /** + * OutFormatter that reads streaming VCF. Defaults to ValidationStringency.LENIENT. + * Java-friendly constructor. + * + * @param conf Hadoop configuration. + * @param optHeaderLines Optional accumulator for VCF header lines. + */ + def this(conf: Configuration, optHeaderLines: Option[CollectionAccumulator[VCFHeaderLine]]) = this(conf, ValidationStringency.LENIENT, optHeaderLines) /** * Reads VariantContexts from an input stream. Autodetects VCF format. @@ -83,14 +104,14 @@ case class VCFOutFormatter( val header = codec.readActualHeader(lri).asInstanceOf[VCFHeader] // merge header lines with our supported header lines - val lines = cleanAndMixInSupportedLines(headerLines(header), ValidationStringency.LENIENT, log) + val lines = cleanAndMixInSupportedLines(headerLines(header), stringency, log) // accumulate header lines if desired optHeaderLines.map(accumulator => lines.foreach(line => accumulator.add(line))) // make converter val converter = new VariantContextConverter(lines, - ValidationStringency.LENIENT, + stringency, nestAnn) @tailrec def convertIterator(iter: AsciiLineReaderIterator,