Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Misc. INDEL realigner bugfixes #1382

Merged
merged 3 commits into from
Feb 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,13 @@ object ConsensusGenerator {
* Provides a generator to extract consensuses from a known set of INDELs.
*
* @param rdd The previously called INDEL variants.
* @param flankSize The number of bases to flank each known INDEL by. Default
* is 0 bases.
* @return A consensus generator that looks at previously called INDELs.
*/
def fromKnownIndels(rdd: VariantRDD): ConsensusGenerator = {
new ConsensusGeneratorFromKnowns(rdd.rdd)
def fromKnownIndels(rdd: VariantRDD,
flankSize: Int = 0): ConsensusGenerator = {
new ConsensusGeneratorFromKnowns(rdd.rdd, flankSize)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.read.realignment.IndelRealignmentTarget
import org.bdgenomics.adam.rich.RichAlignmentRecord
import org.bdgenomics.formats.avro.Variant
import scala.math.max
import scala.transient

/**
Expand All @@ -36,7 +37,8 @@ import scala.transient
* @param file Path to file containing variants.
* @param sc Spark context to use.
*/
private[adam] class ConsensusGeneratorFromKnowns(rdd: RDD[Variant]) extends ConsensusGenerator {
private[adam] class ConsensusGeneratorFromKnowns(rdd: RDD[Variant],
val flankSize: Int) extends ConsensusGenerator {

private val indelTable = rdd.context.broadcast(IndelTable(rdd))

Expand All @@ -50,7 +52,10 @@ private[adam] class ConsensusGeneratorFromKnowns(rdd: RDD[Variant]) extends Cons

Some(rdd.filter(v => v.getReferenceAllele.length != v.getAlternateAllele.length)
.map(v => ReferenceRegion(v))
.map(r => new IndelRealignmentTarget(Some(r), r)))
.map(r => {
new IndelRealignmentTarget(Some(r),
ReferenceRegion(r.referenceName, max(0L, r.start - flankSize), r.end + flankSize))
}))
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ private[read] class RealignIndels(

// loop over all consensuses and evaluate
consensus.foreach(c => {

// generate a reference sequence from the consensus
val consensusSequence = c.insertIntoReference(reference, refRegion)

Expand All @@ -294,7 +295,7 @@ private[read] class RealignIndels(

// if the read's mismatch quality improves over the original alignment, save
// its alignment in the consensus sequence, else store -1
if (qual < originalQual) {
if (qual <= originalQual) {
(r, (qual, pos))
} else {
(r, (originalQual, -1))
Expand Down Expand Up @@ -359,23 +360,41 @@ private[read] class RealignIndels(
// compensate the end
builder.setEnd(refStart + remapping + r.getSequence.length + endPenalty)

val startLength = bestConsensus.index.start - (refStart + remapping)
val adjustedIdElement = if (endLength < 0) {
new CigarElement(idElement.getLength + endLength.toInt, idElement.getOperator)
} else if (startLength < 0) {
new CigarElement(idElement.getLength + startLength.toInt, idElement.getOperator)
} else {
idElement
}

val cigarElements = List[CigarElement](
new CigarElement((bestConsensus.index.start - (refStart + remapping)).toInt, CigarOperator.M),
idElement,
new CigarElement(startLength.toInt, CigarOperator.M),
adjustedIdElement,
new CigarElement(endLength.toInt, CigarOperator.M)
)
).filter(_.getLength > 0)

new Cigar(cigarElements)
}

// update mdtag and cigar
builder.setMismatchingPositions(MdTag.moveAlignment(r, newCigar, reference.drop(remapping), refStart + remapping).toString())
builder.setOldPosition(r.getStart())
builder.setOldCigar(r.getCigar())
builder.setCigar(newCigar.toString)
new RichAlignmentRecord(builder.build())
} else
try {
builder.setMismatchingPositions(MdTag.moveAlignment(r, newCigar, reference.drop(remapping), refStart + remapping).toString())
builder.setOldPosition(r.getStart())
builder.setOldCigar(r.getCigar())
builder.setCigar(newCigar.toString)
new RichAlignmentRecord(builder.build())
} catch {
case t: Throwable => {
log.warn("Caught %s when trying to move alignment to %s for %s.".format(
t, newCigar, r))
new RichAlignmentRecord(r)
}
}
} else {
new RichAlignmentRecord(builder.build())
}
})

log.info("On " + refRegion + ", realigned " + realignedReadCount + " reads to " +
Expand Down