New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BQSR updates #213

Merged
merged 7 commits into from Apr 22, 2014
Jump to file or symbol
Failed to load files and symbols.
+21 −20
Diff settings

Always

Just for now

Viewing a subset of changes. View all

style and naming fixes

  • Loading branch information...
jey committed Apr 8, 2014
commit 0e109e9ca5cf99e12d19fc2742afb10923e4dbb4
@@ -71,7 +71,7 @@ class BaseQualityRecalibration(
!knownSnps.value.isMasked(residue)
def observe(read: DecadentRead): Seq[(CovariateKey, Residue)] =
covariates(read).zip(read.sequence).
covariates(read).zip(read.residues).
filter { case (key, residue) => shouldIncludeResidue(residue) }
input.filter(shouldIncludeRead).flatMap(observe)
@@ -106,10 +106,10 @@ class BaseQualityRecalibration(
(if (read.record.getSecondOfPair) "2" else "")
val readLengths =
input.map(read => (readId(read), read.sequence.length)).collectAsMap
input.map(read => (readId(read), read.residues.length)).collectAsMap
val visited = dataset.
map { case (key, residue) => (readId(residue.read), Seq(residue.position)) }.
map { case (key, residue) => (readId(residue.read), Seq(residue.offset)) }.
reduceByKeyLocally((left, right) => left ++ right)
val outf = new java.io.File(filename)
@@ -96,12 +96,12 @@ class CovariateSpace(val extras: IndexedSeq[Covariate]) extends Serializable {
val extraVals = extras.map(cov => {
val result = cov(read)
// Each covariate must return a value per Residue
assert(result.size == read.sequence.size)
assert(result.size == read.residues.size)
result
})
// Construct the CovariateKeys
read.sequence.zipWithIndex.map {
read.residues.zipWithIndex.map {
case (residue, residueIdx) =>
val residueExtras = extraVals.map(_(residueIdx))
new CovariateKey(read.readGroup, residue.quality, residueExtras)
@@ -22,7 +22,7 @@ import org.bdgenomics.adam.rich.DecadentRead._
// TODO: should inherit from something like AbstractCovariate[(DNABase, DNABase)]
class DinucCovariate extends AbstractCovariate[(Char, Char)] {
def compute(read: DecadentRead): Seq[Option[(Char, Char)]] = {
val sequence = read.sequence.map(_.base)
val sequence = read.residues.map(_.base)
if (read.isNegativeRead) {
/* Use the reverse-complement of the sequence to get back the original
* sequence as it was read by the sequencing machine. The sequencer
@@ -37,7 +37,7 @@ class Recalibrator(val table: RecalibrationTable, val minAcceptableQuality: Qual
}
def computeQual(read: DecadentRead): Seq[QualityScore] = {
val origQuals = read.sequence.map(_.quality)
val origQuals = read.residues.map(_.quality)
val newQuals = table(read)
origQuals.zip(newQuals).map {
case (origQ, newQ) =>
@@ -72,19 +72,20 @@ class DecadentRead(val record: RichADAMRecord) extends Logging {
require(record.referencePositions.length == record.getSequence.length)
/**
* A "residue" is an individual monomer of a polymeric chain, such as DNA.
* In biochemistry and molecular biology, a "residue" refers to a specific
* monomer within a polymeric chain, such as DNA.
*/
class Residue private[DecadentRead] (val position: Int) {
class Residue private[DecadentRead] (val offset: Int) {
def read = DecadentRead.this
/**
* Nucleotide at this position.
* Nucleotide at this offset.
*
* TODO: Return values of meaningful type, e.g. `DNABase' or `Deoxyribonucleotide'.
* TODO: Return values of meaningful type, e.g. `DNABase'.
*/
def base: Char = read.baseSequence(position)
def base: Char = read.baseSequence(offset)
def quality = QualityScore(record.qualityScores(position))
def quality = QualityScore(record.qualityScores(offset))
def isRegularBase: Boolean = base match {
case 'A' => true
@@ -96,17 +97,17 @@ class DecadentRead(val record: RichADAMRecord) extends Logging {
}
def isMismatch(includeInsertions: Boolean = true): Boolean =
assumingAligned(record.isMismatchAtReadOffset(position).getOrElse(includeInsertions))
assumingAligned(record.isMismatchAtReadOffset(offset).getOrElse(includeInsertions))
def isSNP: Boolean = isMismatch(false)
def isInsertion: Boolean =
assumingAligned(record.isMismatchAtReadOffset(position).isEmpty)
assumingAligned(record.isMismatchAtReadOffset(offset).isEmpty)
def referenceLocationOption: Option[ReferenceLocation] =
assumingAligned(
record.readOffsetToReferencePosition(position).
map(refOffset => new ReferenceLocation(record.getReferenceName.toString, refOffset)))
def referenceLocationOption: Option[ReferenceLocation] = assumingAligned {
record.readOffsetToReferencePosition(offset).
map(refOffset => new ReferenceLocation(record.getReferenceName.toString, refOffset))
}
def referenceLocation: ReferenceLocation =
referenceLocationOption.getOrElse(
@@ -117,7 +118,7 @@ class DecadentRead(val record: RichADAMRecord) extends Logging {
private lazy val baseSequence: String = record.getSequence.toString
lazy val sequence: IndexedSeq[Residue] = Range(0, baseSequence.length).map(new Residue(_))
lazy val residues: IndexedSeq[Residue] = Range(0, baseSequence.length).map(new Residue(_))
def name: String = record.getReadName
ProTip! Use n and p to navigate between commits in a pull request.