Skip to content

Commit

Permalink
[ADAM-461] Fix ReferenceRegion and ReferencePosition impl
Browse files Browse the repository at this point in the history
This commit mainly does 3 things:

1. Fixes impl of `ReferenceRegion.width`.

2. Removes `Option`s from `ReferencePosition`.

3. Corrected impl of liftOver.  However, it now has different semantics: in all cases, the list of regions must be provided in genome order.

4. Simplifies structure of case classes.

Fixes #461.
  • Loading branch information
laserson committed Nov 17, 2014
1 parent 087378b commit 4ec66c7
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 140 deletions.
Expand Up @@ -27,15 +27,15 @@ object ReferencePositionWithOrientation {

def apply(record: AlignmentRecord): Option[ReferencePositionWithOrientation] = {
if (record.getReadMapped) {
Some(new ReferencePositionWithOrientation(ReferencePosition(record), record.getReadNegativeStrand))
Some(new ReferencePositionWithOrientation(ReferencePosition(record).get, record.getReadNegativeStrand))
} else {
None
}
}

def fivePrime(record: AlignmentRecord): Option[ReferencePositionWithOrientation] = {
if (record.getReadMapped) {
Some(new ReferencePositionWithOrientation(ReferencePosition.fivePrime(record), record.getReadNegativeStrand))
Some(new ReferencePositionWithOrientation(ReferencePosition.fivePrime(record).get, record.getReadNegativeStrand))
} else {
None
}
Expand All @@ -57,7 +57,7 @@ object ReferencePositionWithOrientation {
*/
def liftOverToReference(idx: Long,
mapping: Seq[ReferenceRegionWithOrientation]): ReferencePositionWithOrientation = {
val negativeStrand = mapping.headOption.fold(false)(_.negativeStrand)
val negativeStrand = mapping.head.negativeStrand

@tailrec def liftOverHelper(regions: Iterator[ReferenceRegionWithOrientation],
idx: Long): ReferencePositionWithOrientation = {
Expand All @@ -81,7 +81,7 @@ object ReferencePositionWithOrientation {
}

// return
ReferencePositionWithOrientation(Some(ReferencePosition(chr, pos)),
ReferencePositionWithOrientation(ReferencePosition(chr, pos),
negativeStrand)
} else {
// recurse
Expand All @@ -91,11 +91,15 @@ object ReferencePositionWithOrientation {
}

// call out to helper
liftOverHelper(mapping.toIterator, idx)
if (negativeStrand) {
liftOverHelper(mapping.reverseIterator, idx)
} else {
liftOverHelper(mapping.toIterator, idx)
}
}
}

case class ReferencePositionWithOrientation(refPos: Option[ReferencePosition],
case class ReferencePositionWithOrientation(refPos: ReferencePosition,
negativeStrand: Boolean)
extends Ordered[ReferencePositionWithOrientation] {

Expand All @@ -113,8 +117,7 @@ case class ReferencePositionWithOrientation(refPos: Option[ReferencePosition],
* is out of the range of the mappings given.
*/
def liftOverFromReference(mapping: Seq[ReferenceRegionWithOrientation]): Long = {
assert(refPos.isDefined, "Cannot lift an undefined position.")
val pos = refPos.get.pos
val pos = refPos.pos

@tailrec def liftOverHelper(regions: Iterator[ReferenceRegionWithOrientation],
idx: Long = 0L): Long = {
Expand Down Expand Up @@ -148,16 +151,7 @@ case class ReferencePositionWithOrientation(refPos: Option[ReferencePosition],
}

override def compare(that: ReferencePositionWithOrientation): Int = {
if (refPos.isEmpty && that.refPos.isEmpty) {
return 0
}
if (refPos.isEmpty) {
return -1
}
if (that.refPos.isEmpty) {
return 1
}
val posCompare = refPos.get.compare(that.refPos.get)
val posCompare = refPos.compare(that.refPos)
if (posCompare != 0) {
posCompare
} else {
Expand Down Expand Up @@ -282,25 +276,13 @@ class ReferencePositionWithOrientationSerializer extends Serializer[ReferencePos

def write(kryo: Kryo, output: Output, obj: ReferencePositionWithOrientation) = {
output.writeBoolean(obj.negativeStrand)
obj.refPos match {
case None =>
output.writeBoolean(false)
case Some(refPos) =>
output.writeBoolean(true)
referencePositionSerializer.write(kryo, output, refPos)
}
referencePositionSerializer.write(kryo, output, obj.refPos)
}

def read(kryo: Kryo, input: Input, klazz: Class[ReferencePositionWithOrientation]): ReferencePositionWithOrientation = {
val negativeStrand = input.readBoolean()
val hasPos = input.readBoolean()
hasPos match {
case false =>
new ReferencePositionWithOrientation(None, negativeStrand)
case true =>
val referencePosition = referencePositionSerializer.read(kryo, input, classOf[ReferencePosition])
new ReferencePositionWithOrientation(Some(referencePosition), negativeStrand)
}
val referencePosition = referencePositionSerializer.read(kryo, input, classOf[ReferencePosition])
new ReferencePositionWithOrientation(referencePosition, negativeStrand)
}
}

Expand Down
Expand Up @@ -26,83 +26,63 @@ import scala.math.{ max, min }
object ReferenceRegionWithOrientation {

/**
* Builds an oriented reference region from a given reference region
* and an orientation parameter.
* Builds an oriented reference region from the individual parameters
*
* @param region Unstranded reference region.
* @param negativeStrand True if this region should be placed on the negative
* strand, else it will be on the positive strand.
* @return Returns an oriented reference region.
* @param referenceName The name of the sequence (chromosome) in the reference genome
* @param start The 0-based residue-coordinate for the start of the region
* @param end The 0-based residue-coordinate for the first residue <i>after</i> the start
* which is <i>not</i> in the region -- i.e. [start, end) define a 0-based
* half-open interval.
* @param negativeStrand Boolean flag as to whether the region is on the forward or
* reverse strand of the reference region.
*/
def apply(region: ReferenceRegion,
def apply(referenceName: String,
start: Long,
end: Long,
negativeStrand: Boolean): ReferenceRegionWithOrientation = {
ReferenceRegionWithOrientation(region.referenceName,
region.start,
region.end,
negativeStrand)
ReferenceRegionWithOrientation(ReferenceRegion(referenceName, start, end), negativeStrand)
}
}

/**
* Represents a contiguous region of the reference genome with strand information.
*
* @param referenceName The name of the sequence (chromosome) in the reference genome
* @param start The 0-based residue-coordinate for the start of the region
* @param end The 0-based residue-coordinate for the first residue <i>after</i> the start
* which is <i>not</i> in the region -- i.e. [start, end) define a 0-based
* half-open interval.
* @param region The genomic locus as a ReferenceRegion
* @param negativeStrand Boolean flag as to whether the region is on the forward or
* reverse strand of the reference region.
*/
case class ReferenceRegionWithOrientation(referenceName: String,
start: Long,
end: Long,
case class ReferenceRegionWithOrientation(region: ReferenceRegion,
negativeStrand: Boolean) extends Ordered[ReferenceRegionWithOrientation] {

assert(end >= 0)
assert(start >= 0)

def width: Long = end - start - 1 // need minus 1 for open end
def width: Long = region.width

def contains(other: ReferencePositionWithOrientation): Boolean = {
other.refPos.fold(false)(rp => referenceName == rp.referenceName &&
negativeStrand == other.negativeStrand &&
start <= rp.pos && end > rp.pos)
negativeStrand == other.negativeStrand && region.contains(other.refPos)
}

def contains(other: ReferenceRegionWithOrientation): Boolean = {
referenceName == other.referenceName && negativeStrand == other.negativeStrand &&
start <= other.start && end >= other.end
region.contains(other.region) && negativeStrand == other.negativeStrand
}

def overlaps(other: ReferenceRegionWithOrientation): Boolean = {
referenceName == other.referenceName && negativeStrand == other.negativeStrand &&
((start >= other.start && start <= other.end) || (end >= other.start && end <= other.end))
region.overlaps(other.region) && negativeStrand == other.negativeStrand
}

def compare(that: ReferenceRegionWithOrientation): Int =
if (referenceName != that.referenceName) {
referenceName.compareTo(that.referenceName)
} else if (negativeStrand != that.negativeStrand) {
negativeStrand.compareTo(that.negativeStrand)
def compare(that: ReferenceRegionWithOrientation): Int = {
val regionCompare = region.compare(that.region)
if (regionCompare != 0) {
regionCompare
} else {
if (negativeStrand) {
// invert comparison if on negative strand
if (start != that.start)
-start.compareTo(that.start)
else
-end.compareTo(that.end)
} else {
if (start != that.start)
start.compareTo(that.start)
else
end.compareTo(that.end)
}
negativeStrand.compare(that.negativeStrand)
}

def toReferenceRegion: ReferenceRegion = {
ReferenceRegion(referenceName, start, end)
}

def toReferenceRegion: ReferenceRegion = region

def referenceName: String = region.referenceName

def start: Long = region.start

def end: Long = region.end
}

object ReferenceRegion {
Expand Down Expand Up @@ -167,9 +147,9 @@ object ReferenceRegion {
case class ReferenceRegion(referenceName: String, start: Long, end: Long) extends Ordered[ReferenceRegion] with Interval {

assert(start >= 0)
assert(end >= start)
assert(end > start)

def width: Long = end - start - 1
def width: Long = end - start

/**
* Merges two reference regions that are contiguous.
Expand Down
Expand Up @@ -30,7 +30,7 @@ class IndelTableSuite extends SparkFunSuite {
}

test("check for indels in a contig that doesn't exist") {
assert(indelTable.getIndelsInRegion(ReferenceRegion("0", 0L, 0L)).length === 0)
assert(indelTable.getIndelsInRegion(ReferenceRegion("0", 0L, 1L)).length === 0)
}

test("check for indels in a region without known indels") {
Expand Down
Expand Up @@ -145,50 +145,43 @@ class ReferencePositionSuite extends FunSuite {
}

test("liftOverToReference works with a multi-block alignment on the forward strand") {
val exons = Seq(ReferenceRegionWithOrientation("1", 100, 201, negativeStrand = false),
ReferenceRegionWithOrientation("1", 300, 401, negativeStrand = false),
ReferenceRegionWithOrientation("1", 500, 601, negativeStrand = false))
val exons = Seq(ReferenceRegionWithOrientation("1", 100, 200, negativeStrand = false),
ReferenceRegionWithOrientation("1", 300, 400, negativeStrand = false),
ReferenceRegionWithOrientation("1", 500, 600, negativeStrand = false))

val p0 = ReferencePositionWithOrientation.liftOverToReference(0, exons)
assert(p0.refPos.isDefined)
assert(p0.refPos.get.referenceName === "1")
assert(p0.refPos.get.pos === 100)
assert(p0.refPos.referenceName === "1")
assert(p0.refPos.pos === 100)

val p1 = ReferencePositionWithOrientation.liftOverToReference(50, exons)
assert(p1.refPos.isDefined)
assert(p1.refPos.get.referenceName === "1")
assert(p1.refPos.get.pos === 150)
assert(p1.refPos.referenceName === "1")
assert(p1.refPos.pos === 150)

val p2 = ReferencePositionWithOrientation.liftOverToReference(150, exons)
assert(p2.refPos.isDefined)
assert(p2.refPos.get.referenceName === "1")
assert(p2.refPos.get.pos === 350)
assert(p2.refPos.referenceName === "1")
assert(p2.refPos.pos === 350)

val p3 = ReferencePositionWithOrientation.liftOverToReference(250, exons)
assert(p3.refPos.isDefined)
assert(p3.refPos.get.referenceName === "1")
assert(p3.refPos.get.pos === 550)
assert(p3.refPos.referenceName === "1")
assert(p3.refPos.pos === 550)
}

test("liftOverToReference works with a multi-block alignment on the reverse strand") {
val exons = Seq(ReferenceRegionWithOrientation("1", 500, 601, negativeStrand = true),
ReferenceRegionWithOrientation("1", 300, 401, negativeStrand = true),
ReferenceRegionWithOrientation("1", 100, 201, negativeStrand = true))
val exons = Seq(ReferenceRegionWithOrientation("1", 100, 200, negativeStrand = true),
ReferenceRegionWithOrientation("1", 300, 400, negativeStrand = true),
ReferenceRegionWithOrientation("1", 500, 600, negativeStrand = true))

val p1 = ReferencePositionWithOrientation.liftOverToReference(50, exons)
assert(p1.refPos.isDefined)
assert(p1.refPos.get.referenceName === "1")
assert(p1.refPos.get.pos === 550)
assert(p1.refPos.referenceName === "1")
assert(p1.refPos.pos === 549)

val p2 = ReferencePositionWithOrientation.liftOverToReference(150, exons)
assert(p2.refPos.isDefined)
assert(p2.refPos.get.referenceName === "1")
assert(p2.refPos.get.pos === 350)
assert(p2.refPos.referenceName === "1")
assert(p2.refPos.pos === 349)

val p3 = ReferencePositionWithOrientation.liftOverToReference(250, exons)
assert(p3.refPos.isDefined)
assert(p3.refPos.get.referenceName === "1")
assert(p3.refPos.get.pos === 150)
assert(p3.refPos.referenceName === "1")
assert(p3.refPos.pos === 149)
}

test("lift over between two transcripts on the forward strand") {
Expand All @@ -200,9 +193,8 @@ class ReferencePositionSuite extends FunSuite {
// check forward strand
val pos = ReferencePositionWithOrientation.liftOverToReference(60, t1)

assert(pos.refPos.isDefined)
assert(pos.refPos.get.referenceName === "chr0")
assert(pos.refPos.get.pos === 60L)
assert(pos.refPos.referenceName === "chr0")
assert(pos.refPos.pos === 60L)
assert(!pos.negativeStrand)

val idx = pos.liftOverFromReference(t2)
Expand All @@ -217,16 +209,15 @@ class ReferencePositionSuite extends FunSuite {
ReferenceRegionWithOrientation("chr0", 50L, 101L, negativeStrand = true))

// check reverse strand
val idx = ReferencePositionWithOrientation(Some(ReferencePosition("chr0", 190L)), negativeStrand = true)
val idx = ReferencePositionWithOrientation(ReferencePosition("chr0", 190L), negativeStrand = true)
.liftOverFromReference(t2)

assert(idx === 11L)

val pos = ReferencePositionWithOrientation.liftOverToReference(idx, t1)

assert(pos.refPos.isDefined)
assert(pos.refPos.get.referenceName === "chr0")
assert(pos.refPos.get.pos === 189L)
assert(pos.refPos.referenceName === "chr0")
assert(pos.refPos.pos === 189L)
assert(pos.negativeStrand)
}
}

0 comments on commit 4ec66c7

Please sign in to comment.