Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ADAM-461] Fix ReferenceRegion and ReferencePosition impl #469

Merged
merged 1 commit into from Nov 17, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -27,15 +27,15 @@ object ReferencePositionWithOrientation {

def apply(record: AlignmentRecord): Option[ReferencePositionWithOrientation] = {
if (record.getReadMapped) {
Some(new ReferencePositionWithOrientation(ReferencePosition(record), record.getReadNegativeStrand))
Some(new ReferencePositionWithOrientation(ReferencePosition(record).get, record.getReadNegativeStrand))
} else {
None
}
}

def fivePrime(record: AlignmentRecord): Option[ReferencePositionWithOrientation] = {
if (record.getReadMapped) {
Some(new ReferencePositionWithOrientation(ReferencePosition.fivePrime(record), record.getReadNegativeStrand))
Some(new ReferencePositionWithOrientation(ReferencePosition.fivePrime(record).get, record.getReadNegativeStrand))
} else {
None
}
Expand All @@ -57,7 +57,7 @@ object ReferencePositionWithOrientation {
*/
def liftOverToReference(idx: Long,
mapping: Seq[ReferenceRegionWithOrientation]): ReferencePositionWithOrientation = {
val negativeStrand = mapping.headOption.fold(false)(_.negativeStrand)
val negativeStrand = mapping.head.negativeStrand

@tailrec def liftOverHelper(regions: Iterator[ReferenceRegionWithOrientation],
idx: Long): ReferencePositionWithOrientation = {
Expand All @@ -81,7 +81,7 @@ object ReferencePositionWithOrientation {
}

// return
ReferencePositionWithOrientation(Some(ReferencePosition(chr, pos)),
ReferencePositionWithOrientation(ReferencePosition(chr, pos),
negativeStrand)
} else {
// recurse
Expand All @@ -91,11 +91,15 @@ object ReferencePositionWithOrientation {
}

// call out to helper
liftOverHelper(mapping.toIterator, idx)
if (negativeStrand) {
liftOverHelper(mapping.reverseIterator, idx)
} else {
liftOverHelper(mapping.toIterator, idx)
}
}
}

case class ReferencePositionWithOrientation(refPos: Option[ReferencePosition],
case class ReferencePositionWithOrientation(refPos: ReferencePosition,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1; this shouldn't have been an option.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 (did I write this code originally? if so, it was me being even more of a Scala-n00b than I am now.)

negativeStrand: Boolean)
extends Ordered[ReferencePositionWithOrientation] {

Expand All @@ -113,8 +117,7 @@ case class ReferencePositionWithOrientation(refPos: Option[ReferencePosition],
* is out of the range of the mappings given.
*/
def liftOverFromReference(mapping: Seq[ReferenceRegionWithOrientation]): Long = {
assert(refPos.isDefined, "Cannot lift an undefined position.")
val pos = refPos.get.pos
val pos = refPos.pos

@tailrec def liftOverHelper(regions: Iterator[ReferenceRegionWithOrientation],
idx: Long = 0L): Long = {
Expand Down Expand Up @@ -148,16 +151,7 @@ case class ReferencePositionWithOrientation(refPos: Option[ReferencePosition],
}

override def compare(that: ReferencePositionWithOrientation): Int = {
if (refPos.isEmpty && that.refPos.isEmpty) {
return 0
}
if (refPos.isEmpty) {
return -1
}
if (that.refPos.isEmpty) {
return 1
}
val posCompare = refPos.get.compare(that.refPos.get)
val posCompare = refPos.compare(that.refPos)
if (posCompare != 0) {
posCompare
} else {
Expand Down Expand Up @@ -282,25 +276,13 @@ class ReferencePositionWithOrientationSerializer extends Serializer[ReferencePos

def write(kryo: Kryo, output: Output, obj: ReferencePositionWithOrientation) = {
output.writeBoolean(obj.negativeStrand)
obj.refPos match {
case None =>
output.writeBoolean(false)
case Some(refPos) =>
output.writeBoolean(true)
referencePositionSerializer.write(kryo, output, refPos)
}
referencePositionSerializer.write(kryo, output, obj.refPos)
}

def read(kryo: Kryo, input: Input, klazz: Class[ReferencePositionWithOrientation]): ReferencePositionWithOrientation = {
val negativeStrand = input.readBoolean()
val hasPos = input.readBoolean()
hasPos match {
case false =>
new ReferencePositionWithOrientation(None, negativeStrand)
case true =>
val referencePosition = referencePositionSerializer.read(kryo, input, classOf[ReferencePosition])
new ReferencePositionWithOrientation(Some(referencePosition), negativeStrand)
}
val referencePosition = referencePositionSerializer.read(kryo, input, classOf[ReferencePosition])
new ReferencePositionWithOrientation(referencePosition, negativeStrand)
}
}

Expand Down
Expand Up @@ -26,83 +26,63 @@ import scala.math.{ max, min }
object ReferenceRegionWithOrientation {

/**
* Builds an oriented reference region from a given reference region
* and an orientation parameter.
* Builds an oriented reference region from the individual parameters
*
* @param region Unstranded reference region.
* @param negativeStrand True if this region should be placed on the negative
* strand, else it will be on the positive strand.
* @return Returns an oriented reference region.
* @param referenceName The name of the sequence (chromosome) in the reference genome
* @param start The 0-based residue-coordinate for the start of the region
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@massie and I were talking about this in IRC, we really need to explain somewhere not in code what our coordinate conventions are. I mean, I like the comments, but we also need non-code versions of the same comments.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strongly agree with that.

Note, btw, that in my mental model, region.start should always be <
region.end.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed -- the convention of "end < start" to designate negative-strand regions is messed up

* @param end The 0-based residue-coordinate for the first residue <i>after</i> the start
* which is <i>not</i> in the region -- i.e. [start, end) define a 0-based
* half-open interval.
* @param negativeStrand Boolean flag as to whether the region is on the forward or
* reverse strand of the reference region.
*/
def apply(region: ReferenceRegion,
def apply(referenceName: String,
start: Long,
end: Long,
negativeStrand: Boolean): ReferenceRegionWithOrientation = {
ReferenceRegionWithOrientation(region.referenceName,
region.start,
region.end,
negativeStrand)
ReferenceRegionWithOrientation(ReferenceRegion(referenceName, start, end), negativeStrand)
}
}

/**
* Represents a contiguous region of the reference genome with strand information.
*
* @param referenceName The name of the sequence (chromosome) in the reference genome
* @param start The 0-based residue-coordinate for the start of the region
* @param end The 0-based residue-coordinate for the first residue <i>after</i> the start
* which is <i>not</i> in the region -- i.e. [start, end) define a 0-based
* half-open interval.
* @param region The genomic locus as a ReferenceRegion
* @param negativeStrand Boolean flag as to whether the region is on the forward or
* reverse strand of the reference region.
*/
case class ReferenceRegionWithOrientation(referenceName: String,
start: Long,
end: Long,
case class ReferenceRegionWithOrientation(region: ReferenceRegion,
negativeStrand: Boolean) extends Ordered[ReferenceRegionWithOrientation] {

assert(end >= 0)
assert(start >= 0)

def width: Long = end - start - 1 // need minus 1 for open end
def width: Long = region.width

def contains(other: ReferencePositionWithOrientation): Boolean = {
other.refPos.fold(false)(rp => referenceName == rp.referenceName &&
negativeStrand == other.negativeStrand &&
start <= rp.pos && end > rp.pos)
negativeStrand == other.negativeStrand && region.contains(other.refPos)
}

def contains(other: ReferenceRegionWithOrientation): Boolean = {
referenceName == other.referenceName && negativeStrand == other.negativeStrand &&
start <= other.start && end >= other.end
region.contains(other.region) && negativeStrand == other.negativeStrand
}

def overlaps(other: ReferenceRegionWithOrientation): Boolean = {
referenceName == other.referenceName && negativeStrand == other.negativeStrand &&
((start >= other.start && start <= other.end) || (end >= other.start && end <= other.end))
region.overlaps(other.region) && negativeStrand == other.negativeStrand
}

def compare(that: ReferenceRegionWithOrientation): Int =
if (referenceName != that.referenceName) {
referenceName.compareTo(that.referenceName)
} else if (negativeStrand != that.negativeStrand) {
negativeStrand.compareTo(that.negativeStrand)
def compare(that: ReferenceRegionWithOrientation): Int = {
val regionCompare = region.compare(that.region)
if (regionCompare != 0) {
regionCompare
} else {
if (negativeStrand) {
// invert comparison if on negative strand
if (start != that.start)
-start.compareTo(that.start)
else
-end.compareTo(that.end)
} else {
if (start != that.start)
start.compareTo(that.start)
else
end.compareTo(that.end)
}
negativeStrand.compare(that.negativeStrand)
}

def toReferenceRegion: ReferenceRegion = {
ReferenceRegion(referenceName, start, end)
}

def toReferenceRegion: ReferenceRegion = region

def referenceName: String = region.referenceName

def start: Long = region.start

def end: Long = region.end
}

object ReferenceRegion {
Expand Down Expand Up @@ -167,9 +147,9 @@ object ReferenceRegion {
case class ReferenceRegion(referenceName: String, start: Long, end: Long) extends Ordered[ReferenceRegion] with Interval {

assert(start >= 0)
assert(end >= start)
assert(end > start)

def width: Long = end - start - 1
def width: Long = end - start

/**
* Merges two reference regions that are contiguous.
Expand Down
Expand Up @@ -30,7 +30,7 @@ class IndelTableSuite extends SparkFunSuite {
}

test("check for indels in a contig that doesn't exist") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why'd this test get dropped?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What exactly is this testing? Does it make sense to instantiate an empty ReferenceRegion?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The important thing this is testing is that no indels exist on a contig ("0") that doesn't exist. You are correct though; it shouldn't be empty.

assert(indelTable.getIndelsInRegion(ReferenceRegion("0", 0L, 0L)).length === 0)
assert(indelTable.getIndelsInRegion(ReferenceRegion("0", 0L, 1L)).length === 0)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, test is back, and passes now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Excellent. Thank you for catching the empty region!

}

test("check for indels in a region without known indels") {
Expand Down
Expand Up @@ -145,50 +145,43 @@ class ReferencePositionSuite extends FunSuite {
}

test("liftOverToReference works with a multi-block alignment on the forward strand") {
val exons = Seq(ReferenceRegionWithOrientation("1", 100, 201, negativeStrand = false),
ReferenceRegionWithOrientation("1", 300, 401, negativeStrand = false),
ReferenceRegionWithOrientation("1", 500, 601, negativeStrand = false))
val exons = Seq(ReferenceRegionWithOrientation("1", 100, 200, negativeStrand = false),
ReferenceRegionWithOrientation("1", 300, 400, negativeStrand = false),
ReferenceRegionWithOrientation("1", 500, 600, negativeStrand = false))

val p0 = ReferencePositionWithOrientation.liftOverToReference(0, exons)
assert(p0.refPos.isDefined)
assert(p0.refPos.get.referenceName === "1")
assert(p0.refPos.get.pos === 100)
assert(p0.refPos.referenceName === "1")
assert(p0.refPos.pos === 100)

val p1 = ReferencePositionWithOrientation.liftOverToReference(50, exons)
assert(p1.refPos.isDefined)
assert(p1.refPos.get.referenceName === "1")
assert(p1.refPos.get.pos === 150)
assert(p1.refPos.referenceName === "1")
assert(p1.refPos.pos === 150)

val p2 = ReferencePositionWithOrientation.liftOverToReference(150, exons)
assert(p2.refPos.isDefined)
assert(p2.refPos.get.referenceName === "1")
assert(p2.refPos.get.pos === 350)
assert(p2.refPos.referenceName === "1")
assert(p2.refPos.pos === 350)

val p3 = ReferencePositionWithOrientation.liftOverToReference(250, exons)
assert(p3.refPos.isDefined)
assert(p3.refPos.get.referenceName === "1")
assert(p3.refPos.get.pos === 550)
assert(p3.refPos.referenceName === "1")
assert(p3.refPos.pos === 550)
}

test("liftOverToReference works with a multi-block alignment on the reverse strand") {
val exons = Seq(ReferenceRegionWithOrientation("1", 500, 601, negativeStrand = true),
ReferenceRegionWithOrientation("1", 300, 401, negativeStrand = true),
ReferenceRegionWithOrientation("1", 100, 201, negativeStrand = true))
val exons = Seq(ReferenceRegionWithOrientation("1", 100, 200, negativeStrand = true),
ReferenceRegionWithOrientation("1", 300, 400, negativeStrand = true),
ReferenceRegionWithOrientation("1", 500, 600, negativeStrand = true))

val p1 = ReferencePositionWithOrientation.liftOverToReference(50, exons)
assert(p1.refPos.isDefined)
assert(p1.refPos.get.referenceName === "1")
assert(p1.refPos.get.pos === 550)
assert(p1.refPos.referenceName === "1")
assert(p1.refPos.pos === 549)

val p2 = ReferencePositionWithOrientation.liftOverToReference(150, exons)
assert(p2.refPos.isDefined)
assert(p2.refPos.get.referenceName === "1")
assert(p2.refPos.get.pos === 350)
assert(p2.refPos.referenceName === "1")
assert(p2.refPos.pos === 349)

val p3 = ReferencePositionWithOrientation.liftOverToReference(250, exons)
assert(p3.refPos.isDefined)
assert(p3.refPos.get.referenceName === "1")
assert(p3.refPos.get.pos === 150)
assert(p3.refPos.referenceName === "1")
assert(p3.refPos.pos === 149)
}

test("lift over between two transcripts on the forward strand") {
Expand All @@ -200,9 +193,8 @@ class ReferencePositionSuite extends FunSuite {
// check forward strand
val pos = ReferencePositionWithOrientation.liftOverToReference(60, t1)

assert(pos.refPos.isDefined)
assert(pos.refPos.get.referenceName === "chr0")
assert(pos.refPos.get.pos === 60L)
assert(pos.refPos.referenceName === "chr0")
assert(pos.refPos.pos === 60L)
assert(!pos.negativeStrand)

val idx = pos.liftOverFromReference(t2)
Expand All @@ -217,16 +209,15 @@ class ReferencePositionSuite extends FunSuite {
ReferenceRegionWithOrientation("chr0", 50L, 101L, negativeStrand = true))

// check reverse strand
val idx = ReferencePositionWithOrientation(Some(ReferencePosition("chr0", 190L)), negativeStrand = true)
val idx = ReferencePositionWithOrientation(ReferencePosition("chr0", 190L), negativeStrand = true)
.liftOverFromReference(t2)

assert(idx === 11L)

val pos = ReferencePositionWithOrientation.liftOverToReference(idx, t1)

assert(pos.refPos.isDefined)
assert(pos.refPos.get.referenceName === "chr0")
assert(pos.refPos.get.pos === 189L)
assert(pos.refPos.referenceName === "chr0")
assert(pos.refPos.pos === 189L)
assert(pos.negativeStrand)
}
}