Skip to content

Commit

Permalink
[ADAM-1939] Allow validation stringency to waive off FLAG arrays.
Browse files Browse the repository at this point in the history
Resolves #1939.
  • Loading branch information
Frank Austin Nothaft authored and heuermh committed Mar 7, 2018
1 parent 7d07c71 commit bc8c6f4
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ class VariantContextConverter(
import VariantContextConverter._

// format fns gatk --> bdg, extract fns bdg --> gatk
private val variantFormatFn = makeVariantFormatFn(headerLines)
private val variantFormatFn = makeVariantFormatFn(headerLines, stringency)
private val variantExtractFn = makeVariantExtractFn(headerLines)
private val genotypeFormatFn = makeGenotypeFormatFn(headerLines)
private val genotypeExtractFn = makeGenotypeExtractFn(headerLines)
Expand Down Expand Up @@ -1636,7 +1636,8 @@ class VariantContextConverter(
}

private def makeVariantFormatFn(
headerLines: Seq[VCFHeaderLine]): (HtsjdkVariantContext, Option[String], Int, Boolean) => (Variant, Variant) = {
headerLines: Seq[VCFHeaderLine],
stringency: ValidationStringency = ValidationStringency.STRICT): (HtsjdkVariantContext, Option[String], Int, Boolean) => (Variant, Variant) = {

val attributeFns: Iterable[(HtsjdkVariantContext, Int, Array[Int]) => Option[(String, String)]] = headerLines
.flatMap(hl => hl match {
Expand All @@ -1648,7 +1649,20 @@ class VariantContextConverter(
if (DefaultHeaderLines.infoHeaderLines
.find(_.getID == key)
.isEmpty) {
lineToVariantContextExtractor(il)
try {
lineToVariantContextExtractor(il)
} catch {
case t: Throwable => {
if (stringency == ValidationStringency.STRICT) {
throw t
} else {
if (stringency == ValidationStringency.LENIENT) {
log.warn("Saw invalid info field %s. Ignoring...".format(t))
}
None
}
}
}
} else {
None
}
Expand Down
5 changes: 5 additions & 0 deletions adam-core/src/test/resources/invalid/small.INFO_flag.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
##fileformat=VCFv4.1
##INFO=<ID=ABADFLAG,Number=.,Type=Flag,Description="A no good, very bad flag.">
##contig=<ID=1,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C 139.12 IndelQD AC=2;AF=0.333;AN=6;BaseQRankSum=1.800;ClippingRankSum=0.138;DP=69;FS=7.786;MLEAC=2;MLEAF=0.333;MQ=26.84;MQ0=0;MQRankSum=-1.906;QD=1.55;ReadPosRankSum=0.384 GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,827 0/1:8,2:10:dp;rd:60:60,0,414 0/0:39,0:39:PASS:99:0,116,2114
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@ class ADAMContextSuite extends ADAMFunSuite {
assert(last.getName === "gn|BPY2C;ccds|CCDS44030;ens|ENSG00000185894;vega|OTTHUMG00000045199")
}

sparkTest("can read a small .vcf file with a validation issue") {
val path = testFile("invalid/small.INFO_flag.vcf")

val vcs = sc.loadVcf(path, stringency = ValidationStringency.LENIENT)
assert(vcs.rdd.count === 1)
}

sparkTest("can read a small .vcf file") {
val path = testFile("small.vcf")

Expand Down

0 comments on commit bc8c6f4

Please sign in to comment.