From 4ec66c789e3c55cb5f087f39486ce381001b7489 Mon Sep 17 00:00:00 2001 From: Uri Laserson Date: Fri, 7 Nov 2014 16:57:57 -0500 Subject: [PATCH] [ADAM-461] Fix ReferenceRegion and ReferencePosition impl This commit mainly does 3 things: 1. Fixes impl of `ReferenceRegion.width`. 2. Removes `Option`s from `ReferencePosition`. 3. Corrected impl of liftOver. However, it now has different semantics: in all cases, the list of regions must be provided in genome order. 4. Simplifies structure of case classes. Fixes #461. --- .../adam/models/ReferencePosition.scala | 48 ++++------- .../adam/models/ReferenceRegion.scala | 86 +++++++------------ .../adam/models/IndelTableSuite.scala | 2 +- .../adam/models/ReferencePositionSuite.scala | 59 ++++++------- .../adam/models/ReferenceRegionSuite.scala | 29 +++---- 5 files changed, 84 insertions(+), 140 deletions(-) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala index 34af9f1ec8..794215be3b 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala @@ -27,7 +27,7 @@ object ReferencePositionWithOrientation { def apply(record: AlignmentRecord): Option[ReferencePositionWithOrientation] = { if (record.getReadMapped) { - Some(new ReferencePositionWithOrientation(ReferencePosition(record), record.getReadNegativeStrand)) + Some(new ReferencePositionWithOrientation(ReferencePosition(record).get, record.getReadNegativeStrand)) } else { None } @@ -35,7 +35,7 @@ object ReferencePositionWithOrientation { def fivePrime(record: AlignmentRecord): Option[ReferencePositionWithOrientation] = { if (record.getReadMapped) { - Some(new ReferencePositionWithOrientation(ReferencePosition.fivePrime(record), record.getReadNegativeStrand)) + Some(new ReferencePositionWithOrientation(ReferencePosition.fivePrime(record).get, record.getReadNegativeStrand)) } else { None } @@ -57,7 +57,7 @@ object ReferencePositionWithOrientation { */ def liftOverToReference(idx: Long, mapping: Seq[ReferenceRegionWithOrientation]): ReferencePositionWithOrientation = { - val negativeStrand = mapping.headOption.fold(false)(_.negativeStrand) + val negativeStrand = mapping.head.negativeStrand @tailrec def liftOverHelper(regions: Iterator[ReferenceRegionWithOrientation], idx: Long): ReferencePositionWithOrientation = { @@ -81,7 +81,7 @@ object ReferencePositionWithOrientation { } // return - ReferencePositionWithOrientation(Some(ReferencePosition(chr, pos)), + ReferencePositionWithOrientation(ReferencePosition(chr, pos), negativeStrand) } else { // recurse @@ -91,11 +91,15 @@ object ReferencePositionWithOrientation { } // call out to helper - liftOverHelper(mapping.toIterator, idx) + if (negativeStrand) { + liftOverHelper(mapping.reverseIterator, idx) + } else { + liftOverHelper(mapping.toIterator, idx) + } } } -case class ReferencePositionWithOrientation(refPos: Option[ReferencePosition], +case class ReferencePositionWithOrientation(refPos: ReferencePosition, negativeStrand: Boolean) extends Ordered[ReferencePositionWithOrientation] { @@ -113,8 +117,7 @@ case class ReferencePositionWithOrientation(refPos: Option[ReferencePosition], * is out of the range of the mappings given. */ def liftOverFromReference(mapping: Seq[ReferenceRegionWithOrientation]): Long = { - assert(refPos.isDefined, "Cannot lift an undefined position.") - val pos = refPos.get.pos + val pos = refPos.pos @tailrec def liftOverHelper(regions: Iterator[ReferenceRegionWithOrientation], idx: Long = 0L): Long = { @@ -148,16 +151,7 @@ case class ReferencePositionWithOrientation(refPos: Option[ReferencePosition], } override def compare(that: ReferencePositionWithOrientation): Int = { - if (refPos.isEmpty && that.refPos.isEmpty) { - return 0 - } - if (refPos.isEmpty) { - return -1 - } - if (that.refPos.isEmpty) { - return 1 - } - val posCompare = refPos.get.compare(that.refPos.get) + val posCompare = refPos.compare(that.refPos) if (posCompare != 0) { posCompare } else { @@ -282,25 +276,13 @@ class ReferencePositionWithOrientationSerializer extends Serializer[ReferencePos def write(kryo: Kryo, output: Output, obj: ReferencePositionWithOrientation) = { output.writeBoolean(obj.negativeStrand) - obj.refPos match { - case None => - output.writeBoolean(false) - case Some(refPos) => - output.writeBoolean(true) - referencePositionSerializer.write(kryo, output, refPos) - } + referencePositionSerializer.write(kryo, output, obj.refPos) } def read(kryo: Kryo, input: Input, klazz: Class[ReferencePositionWithOrientation]): ReferencePositionWithOrientation = { val negativeStrand = input.readBoolean() - val hasPos = input.readBoolean() - hasPos match { - case false => - new ReferencePositionWithOrientation(None, negativeStrand) - case true => - val referencePosition = referencePositionSerializer.read(kryo, input, classOf[ReferencePosition]) - new ReferencePositionWithOrientation(Some(referencePosition), negativeStrand) - } + val referencePosition = referencePositionSerializer.read(kryo, input, classOf[ReferencePosition]) + new ReferencePositionWithOrientation(referencePosition, negativeStrand) } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala index ecc43f74e7..892a7cfdd1 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala @@ -26,83 +26,63 @@ import scala.math.{ max, min } object ReferenceRegionWithOrientation { /** - * Builds an oriented reference region from a given reference region - * and an orientation parameter. + * Builds an oriented reference region from the individual parameters * - * @param region Unstranded reference region. - * @param negativeStrand True if this region should be placed on the negative - * strand, else it will be on the positive strand. - * @return Returns an oriented reference region. + * @param referenceName The name of the sequence (chromosome) in the reference genome + * @param start The 0-based residue-coordinate for the start of the region + * @param end The 0-based residue-coordinate for the first residue after the start + * which is not in the region -- i.e. [start, end) define a 0-based + * half-open interval. + * @param negativeStrand Boolean flag as to whether the region is on the forward or + * reverse strand of the reference region. */ - def apply(region: ReferenceRegion, + def apply(referenceName: String, + start: Long, + end: Long, negativeStrand: Boolean): ReferenceRegionWithOrientation = { - ReferenceRegionWithOrientation(region.referenceName, - region.start, - region.end, - negativeStrand) + ReferenceRegionWithOrientation(ReferenceRegion(referenceName, start, end), negativeStrand) } } /** * Represents a contiguous region of the reference genome with strand information. * - * @param referenceName The name of the sequence (chromosome) in the reference genome - * @param start The 0-based residue-coordinate for the start of the region - * @param end The 0-based residue-coordinate for the first residue after the start - * which is not in the region -- i.e. [start, end) define a 0-based - * half-open interval. + * @param region The genomic locus as a ReferenceRegion * @param negativeStrand Boolean flag as to whether the region is on the forward or * reverse strand of the reference region. */ -case class ReferenceRegionWithOrientation(referenceName: String, - start: Long, - end: Long, +case class ReferenceRegionWithOrientation(region: ReferenceRegion, negativeStrand: Boolean) extends Ordered[ReferenceRegionWithOrientation] { - - assert(end >= 0) - assert(start >= 0) - - def width: Long = end - start - 1 // need minus 1 for open end + def width: Long = region.width def contains(other: ReferencePositionWithOrientation): Boolean = { - other.refPos.fold(false)(rp => referenceName == rp.referenceName && - negativeStrand == other.negativeStrand && - start <= rp.pos && end > rp.pos) + negativeStrand == other.negativeStrand && region.contains(other.refPos) } def contains(other: ReferenceRegionWithOrientation): Boolean = { - referenceName == other.referenceName && negativeStrand == other.negativeStrand && - start <= other.start && end >= other.end + region.contains(other.region) && negativeStrand == other.negativeStrand } def overlaps(other: ReferenceRegionWithOrientation): Boolean = { - referenceName == other.referenceName && negativeStrand == other.negativeStrand && - ((start >= other.start && start <= other.end) || (end >= other.start && end <= other.end)) + region.overlaps(other.region) && negativeStrand == other.negativeStrand } - def compare(that: ReferenceRegionWithOrientation): Int = - if (referenceName != that.referenceName) { - referenceName.compareTo(that.referenceName) - } else if (negativeStrand != that.negativeStrand) { - negativeStrand.compareTo(that.negativeStrand) + def compare(that: ReferenceRegionWithOrientation): Int = { + val regionCompare = region.compare(that.region) + if (regionCompare != 0) { + regionCompare } else { - if (negativeStrand) { - // invert comparison if on negative strand - if (start != that.start) - -start.compareTo(that.start) - else - -end.compareTo(that.end) - } else { - if (start != that.start) - start.compareTo(that.start) - else - end.compareTo(that.end) - } + negativeStrand.compare(that.negativeStrand) } - - def toReferenceRegion: ReferenceRegion = { - ReferenceRegion(referenceName, start, end) } + + def toReferenceRegion: ReferenceRegion = region + + def referenceName: String = region.referenceName + + def start: Long = region.start + + def end: Long = region.end } object ReferenceRegion { @@ -167,9 +147,9 @@ object ReferenceRegion { case class ReferenceRegion(referenceName: String, start: Long, end: Long) extends Ordered[ReferenceRegion] with Interval { assert(start >= 0) - assert(end >= start) + assert(end > start) - def width: Long = end - start - 1 + def width: Long = end - start /** * Merges two reference regions that are contiguous. diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/IndelTableSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/IndelTableSuite.scala index 1ebcc33568..af09f69e2c 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/models/IndelTableSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/models/IndelTableSuite.scala @@ -30,7 +30,7 @@ class IndelTableSuite extends SparkFunSuite { } test("check for indels in a contig that doesn't exist") { - assert(indelTable.getIndelsInRegion(ReferenceRegion("0", 0L, 0L)).length === 0) + assert(indelTable.getIndelsInRegion(ReferenceRegion("0", 0L, 1L)).length === 0) } test("check for indels in a region without known indels") { diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferencePositionSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferencePositionSuite.scala index 6a2dd915cd..9305b4d831 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferencePositionSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferencePositionSuite.scala @@ -145,50 +145,43 @@ class ReferencePositionSuite extends FunSuite { } test("liftOverToReference works with a multi-block alignment on the forward strand") { - val exons = Seq(ReferenceRegionWithOrientation("1", 100, 201, negativeStrand = false), - ReferenceRegionWithOrientation("1", 300, 401, negativeStrand = false), - ReferenceRegionWithOrientation("1", 500, 601, negativeStrand = false)) + val exons = Seq(ReferenceRegionWithOrientation("1", 100, 200, negativeStrand = false), + ReferenceRegionWithOrientation("1", 300, 400, negativeStrand = false), + ReferenceRegionWithOrientation("1", 500, 600, negativeStrand = false)) val p0 = ReferencePositionWithOrientation.liftOverToReference(0, exons) - assert(p0.refPos.isDefined) - assert(p0.refPos.get.referenceName === "1") - assert(p0.refPos.get.pos === 100) + assert(p0.refPos.referenceName === "1") + assert(p0.refPos.pos === 100) val p1 = ReferencePositionWithOrientation.liftOverToReference(50, exons) - assert(p1.refPos.isDefined) - assert(p1.refPos.get.referenceName === "1") - assert(p1.refPos.get.pos === 150) + assert(p1.refPos.referenceName === "1") + assert(p1.refPos.pos === 150) val p2 = ReferencePositionWithOrientation.liftOverToReference(150, exons) - assert(p2.refPos.isDefined) - assert(p2.refPos.get.referenceName === "1") - assert(p2.refPos.get.pos === 350) + assert(p2.refPos.referenceName === "1") + assert(p2.refPos.pos === 350) val p3 = ReferencePositionWithOrientation.liftOverToReference(250, exons) - assert(p3.refPos.isDefined) - assert(p3.refPos.get.referenceName === "1") - assert(p3.refPos.get.pos === 550) + assert(p3.refPos.referenceName === "1") + assert(p3.refPos.pos === 550) } test("liftOverToReference works with a multi-block alignment on the reverse strand") { - val exons = Seq(ReferenceRegionWithOrientation("1", 500, 601, negativeStrand = true), - ReferenceRegionWithOrientation("1", 300, 401, negativeStrand = true), - ReferenceRegionWithOrientation("1", 100, 201, negativeStrand = true)) + val exons = Seq(ReferenceRegionWithOrientation("1", 100, 200, negativeStrand = true), + ReferenceRegionWithOrientation("1", 300, 400, negativeStrand = true), + ReferenceRegionWithOrientation("1", 500, 600, negativeStrand = true)) val p1 = ReferencePositionWithOrientation.liftOverToReference(50, exons) - assert(p1.refPos.isDefined) - assert(p1.refPos.get.referenceName === "1") - assert(p1.refPos.get.pos === 550) + assert(p1.refPos.referenceName === "1") + assert(p1.refPos.pos === 549) val p2 = ReferencePositionWithOrientation.liftOverToReference(150, exons) - assert(p2.refPos.isDefined) - assert(p2.refPos.get.referenceName === "1") - assert(p2.refPos.get.pos === 350) + assert(p2.refPos.referenceName === "1") + assert(p2.refPos.pos === 349) val p3 = ReferencePositionWithOrientation.liftOverToReference(250, exons) - assert(p3.refPos.isDefined) - assert(p3.refPos.get.referenceName === "1") - assert(p3.refPos.get.pos === 150) + assert(p3.refPos.referenceName === "1") + assert(p3.refPos.pos === 149) } test("lift over between two transcripts on the forward strand") { @@ -200,9 +193,8 @@ class ReferencePositionSuite extends FunSuite { // check forward strand val pos = ReferencePositionWithOrientation.liftOverToReference(60, t1) - assert(pos.refPos.isDefined) - assert(pos.refPos.get.referenceName === "chr0") - assert(pos.refPos.get.pos === 60L) + assert(pos.refPos.referenceName === "chr0") + assert(pos.refPos.pos === 60L) assert(!pos.negativeStrand) val idx = pos.liftOverFromReference(t2) @@ -217,16 +209,15 @@ class ReferencePositionSuite extends FunSuite { ReferenceRegionWithOrientation("chr0", 50L, 101L, negativeStrand = true)) // check reverse strand - val idx = ReferencePositionWithOrientation(Some(ReferencePosition("chr0", 190L)), negativeStrand = true) + val idx = ReferencePositionWithOrientation(ReferencePosition("chr0", 190L), negativeStrand = true) .liftOverFromReference(t2) assert(idx === 11L) val pos = ReferencePositionWithOrientation.liftOverToReference(idx, t1) - assert(pos.refPos.isDefined) - assert(pos.refPos.get.referenceName === "chr0") - assert(pos.refPos.get.pos === 189L) + assert(pos.refPos.referenceName === "chr0") + assert(pos.refPos.pos === 189L) assert(pos.negativeStrand) } } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala index a625222ca9..0c86a8676c 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala @@ -138,7 +138,7 @@ class ReferenceRegionSuite extends FunSuite { .setEnd(6L) .setContig(Contig.newBuilder .setContigName("chr1") - .setContigLength(10) + .setContigLength(10L) .build) .build() @@ -265,24 +265,19 @@ class ReferenceRegionSuite extends FunSuite { test("comparison tests for oriented reference region vs position") { assert(ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = false) - .contains(ReferencePositionWithOrientation(Some(ReferencePosition("chr1", 10L)), negativeStrand = false))) + .contains(ReferencePositionWithOrientation(ReferencePosition("chr1", 10L), negativeStrand = false))) assert(ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = true) - .contains(ReferencePositionWithOrientation(Some(ReferencePosition("chr1", 17L)), negativeStrand = true))) + .contains(ReferencePositionWithOrientation(ReferencePosition("chr1", 17L), negativeStrand = true))) assert(!ReferenceRegionWithOrientation(ReferenceRegion("chr1", 10L, 20L), negativeStrand = false) - .contains(ReferencePositionWithOrientation(Some(ReferencePosition("chr1", 17L)), negativeStrand = true))) + .contains(ReferencePositionWithOrientation(ReferencePosition("chr1", 17L), negativeStrand = true))) assert(!ReferenceRegionWithOrientation(ReferenceRegion("chr1", 10L, 20L), negativeStrand = true) - .contains(ReferencePositionWithOrientation(Some(ReferencePosition("chr1", 10L)), negativeStrand = false))) - - assert(!ReferenceRegionWithOrientation(ReferenceRegion("chr1", 10L, 20L), negativeStrand = false) - .contains(ReferencePositionWithOrientation(None.asInstanceOf[Option[ReferencePosition]], negativeStrand = true))) - assert(!ReferenceRegionWithOrientation(ReferenceRegion("chr1", 10L, 20L), negativeStrand = true) - .contains(ReferencePositionWithOrientation(None.asInstanceOf[Option[ReferencePosition]], negativeStrand = false))) + .contains(ReferencePositionWithOrientation(ReferencePosition("chr1", 10L), negativeStrand = false))) assert(!ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = false) - .contains(ReferencePositionWithOrientation(Some(ReferencePosition("chr2", 10L)), negativeStrand = false))) + .contains(ReferencePositionWithOrientation(ReferencePosition("chr2", 10L), negativeStrand = false))) assert(!ReferenceRegionWithOrientation("chr1", 20L, 50L, negativeStrand = true) - .contains(ReferencePositionWithOrientation(Some(ReferencePosition("chr1", 100L)), negativeStrand = true))) + .contains(ReferencePositionWithOrientation(ReferencePosition("chr1", 100L), negativeStrand = true))) } test("overlap tests for oriented reference region") { @@ -291,10 +286,6 @@ class ReferenceRegionSuite extends FunSuite { assert(ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = true) .overlaps(ReferenceRegionWithOrientation("chr1", 5L, 15L, negativeStrand = true))) - val rrf = ReferenceRegionWithOrientation(ReferenceRegion("chr1", 12L, 22L), negativeStrand = false) - val rrr = ReferenceRegionWithOrientation(ReferenceRegion("chr1", 8L, 8L), negativeStrand = true) - assert(!rrf.overlaps(rrr)) - assert(!ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = false) .overlaps(ReferenceRegionWithOrientation("chr2", 10L, 20L, negativeStrand = false))) assert(!ReferenceRegionWithOrientation("chr1", 20L, 50L, negativeStrand = true) @@ -302,8 +293,8 @@ class ReferenceRegionSuite extends FunSuite { } test("check the width of a reference region") { - assert(ReferenceRegion("chr1", 100, 201).width === 100) - assert(ReferenceRegionWithOrientation("chr2", 200, 401, negativeStrand = false).width === 200) - assert(ReferenceRegionWithOrientation("chr3", 399, 1000, negativeStrand = true).width === 600) + assert(ReferenceRegion("chr1", 100, 201).width === 101) + assert(ReferenceRegionWithOrientation("chr2", 200, 401, negativeStrand = false).width === 201) + assert(ReferenceRegionWithOrientation("chr3", 399, 1000, negativeStrand = true).width === 601) } }