From 4d2191e4fe02625a6bf95824b0e3d643973e3072 Mon Sep 17 00:00:00 2001 From: Frank Austin Nothaft Date: Thu, 27 Oct 2016 08:10:45 -0700 Subject: [PATCH] [ADAM-1227] Fix NPE when computing coverage on RDDs with unmapped reads. Resolves #1227. Filters out unmapped reads before counting coverage. --- .../org/bdgenomics/adam/cli/Reads2Coverage.scala | 2 +- .../adam/rdd/read/AlignmentRecordRDD.scala | 12 +++++++++++- .../adam/rdd/read/AlignmentRecordRDDSuite.scala | 11 +++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Reads2Coverage.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Reads2Coverage.scala index d1d1f2ca2c..7ff5771286 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Reads2Coverage.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Reads2Coverage.scala @@ -64,7 +64,7 @@ class Reads2Coverage(protected val args: Reads2CoverageArgs) extends BDGSparkCom def run(sc: SparkContext): Unit = { - val proj = Projection(contigName, start, end, cigar) + val proj = Projection(readMapped, contigName, start, end, cigar) // If saving strand specific coverage, require that only one direction is specified require(!(args.onlyNegativeStrands && args.onlyPositiveStrands), diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDD.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDD.scala index 1fc514c4dd..cb819b7954 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDD.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDD.scala @@ -98,7 +98,17 @@ sealed trait AlignmentRecordRDD extends AvroReadGroupGenomicRDD[AlignmentRecord, def toCoverage(collapse: Boolean = true): CoverageRDD = { val covCounts = rdd.rdd - .flatMap(r => { + .filter(r => { + val readMapped = r.getReadMapped + + // validate alignment fields + if (readMapped) { + require(r.getStart != null && r.getEnd != null && r.getContigName != null, + "Read was mapped but was missing alignment start/end/contig (%s).".format(r)) + } + + readMapped + }).flatMap(r => { val t: List[Long] = List.range(r.getStart, r.getEnd) t.map(n => (ReferenceRegion(r.getContigName, n, n + 1), 1)) }).reduceByKey(_ + _) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala index c15ac5d559..1e94be6fb5 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala @@ -85,6 +85,17 @@ class AlignmentRecordRDDSuite extends ADAMFunSuite { assert(expectedSortedReads === mapped) } + sparkTest("coverage does not fail on unmapped reads") { + val inputPath = testFile("unmapped.sam") + val reads: AlignmentRecordRDD = sc.loadAlignments(inputPath) + .transform(rdd => { + rdd.filter(!_.getReadMapped) + }) + + val coverage = reads.toCoverage() + assert(coverage.rdd.count === 0) + } + sparkTest("computes coverage") { val inputPath = testFile("artificial.sam") val reads: AlignmentRecordRDD = sc.loadAlignments(inputPath)