Skip to content

Commit

Permalink
[ADAM-1469] Don't filter on whether reads have mismatches during real…
Browse files Browse the repository at this point in the history
…ignment

Resolves #1469. Provides approximately a 1% bump in both INDEL calling precision
and recall.
  • Loading branch information
fnothaft authored and heuermh committed Apr 3, 2017
1 parent 15aa5cd commit 8df3aa2
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,6 @@ private[read] class RealignIndels(
val (targetIdx, _) = target.get
val startTime = System.nanoTime()
// bootstrap realigned read set with the reads that need to be realigned
val (realignedReads, readsToRealign) = reads.partition(r => r.mdTag.exists(!_.hasMismatches))

// get reference from reads
val refStart = reads.map(_.getStart).min
Expand All @@ -299,7 +298,7 @@ private[read] class RealignIndels(

// preprocess reads and get consensus
val readsToClean = consensusModel.preprocessReadsForRealignment(
readsToRealign,
reads,
reference,
refRegion
).zipWithIndex
Expand All @@ -318,7 +317,7 @@ private[read] class RealignIndels(
observedConsensus
}

val finalReads = if (readsToClean.size > 0 && consensus.size > 0) {
val finalReads = if (reads.size > 0 && consensus.size > 0) {

// do not check realigned reads - they must match
val mismatchQualities = ComputingOriginalScores.time {
Expand Down Expand Up @@ -443,7 +442,7 @@ private[read] class RealignIndels(
log.info("On " + refRegion + ", realigned " + realignedReadCount + " reads to " +
bestConsensus + " due to LOD improvement of " + lodImprovement)

cleanedReads ++ realignedReads
cleanedReads
}
} else {
log.info("On " + refRegion + ", skipping realignment due to insufficient LOD improvement (" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ class RealignIndelsSuite extends ADAMFunSuite {
.collect()

val movedReads = result.filter(pair => pair._1 != pair._2)
assert(movedReads.size === 22)
assert(movedReads.size === 41)
val read = movedReads.map(_._2)
.filter(_.getReadName === "H06HDADXX130110:1:1114:19044:27806")
.head
Expand Down Expand Up @@ -552,7 +552,7 @@ class RealignIndelsSuite extends ADAMFunSuite {
val realignedReads = rdd.realignIndels(lodThreshold = 0.0)
.rdd
.collect
assert(realignedReads.count(_.getMapq >= 50) === 6)
assert(realignedReads.count(_.getMapq >= 50) === 7)
val realignedExtRead = realignedReads.filter(_.getMapq == 50).head
assert(realignedExtRead.getStart === 8L)
assert(realignedExtRead.getEnd === 14L)
Expand Down

0 comments on commit 8df3aa2

Please sign in to comment.