From 6855c05269f2c4998ae00d8fd8618c8cea11916c Mon Sep 17 00:00:00 2001 From: Devin Petersohn Date: Mon, 10 Apr 2017 10:01:37 -0700 Subject: [PATCH] Adding threshold logic to overlaps and covers --- .../adam/models/ReferenceRegion.scala | 46 ++++++++++++---- .../adam/models/ReferenceRegionSuite.scala | 53 +++++++++++++++++-- 2 files changed, 83 insertions(+), 16 deletions(-) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala index 53e53237cf..71ca59d04c 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala @@ -325,7 +325,7 @@ case class ReferenceRegion( extends Comparable[ReferenceRegion] with Interval[ReferenceRegion] { - assert(start >= 0 && end >= start, + require(start >= 0 && end >= start, "Failed when trying to create region %s %d %d on %s strand.".format( referenceName, start, end, strand)) @@ -338,7 +338,7 @@ case class ReferenceRegion( /** * Merges two reference regions that are contiguous. * - * @throws AssertionError Thrown if regions are not overlapping or adjacent. + * @throws IllegalArgumentException Thrown if regions are not overlapping or adjacent. * * @param other Other region to merge with this region. * @return The merger of both unions. @@ -346,14 +346,14 @@ case class ReferenceRegion( * @see hull */ def merge(other: ReferenceRegion): ReferenceRegion = { - assert(overlaps(other) || isAdjacent(other), "Cannot merge two regions that do not overlap or are not adjacent") + require(overlaps(other) || isAdjacent(other), "Cannot merge two regions that do not overlap or are not adjacent") merge(other, 1L) } /** * Merges two reference regions that are within a threshold of each other. - * - * @throws AssertionError Thrown if regions are no within the distance threshold. + * + * @throws IllegalArgumentException Thrown if regions are no within the distance threshold. * * @param other Other region to merge with this region. * @return The merger of both unions. @@ -361,7 +361,8 @@ case class ReferenceRegion( * @see hull */ def merge(other: ReferenceRegion, distanceThreshold: Long): ReferenceRegion = { - assert(isNearby(other, distanceThreshold), "Cannot merge two regions that do not meet the distance threshold") + require(isNearby(other, distanceThreshold), "Cannot merge two regions that do not meet the distance threshold") + require(distanceThreshold > 0, "Distance must be non-negative number") hull(other) } @@ -372,7 +373,7 @@ case class ReferenceRegion( * @return A smaller reference region. */ def intersection(other: ReferenceRegion): ReferenceRegion = { - assert(overlaps(other), "Cannot calculate the intersection of non-overlapping regions.") + require(overlaps(other), "Cannot calculate the intersection of non-overlapping regions.") ReferenceRegion(referenceName, max(start, other.start), min(end, other.end)) } @@ -380,7 +381,7 @@ case class ReferenceRegion( * Creates a region corresponding to the convex hull of two regions. Has no preconditions about the adjacency or * overlap of two regions. However, regions must be in the same reference space. * - * @throws AssertionError Thrown if regions are in different reference spaces. + * @throws IllegalArgumentException Thrown if regions are in different reference spaces. * * @param other Other region to compute hull of with this region. * @return The convex hull of both unions. @@ -388,8 +389,8 @@ case class ReferenceRegion( * @see merge */ def hull(other: ReferenceRegion): ReferenceRegion = { - assert(strand == other.strand, "Cannot compute convex hull of differently oriented regions.") - assert(referenceName == other.referenceName, "Cannot compute convex hull of regions on different references.") + require(strand == other.strand, "Cannot compute convex hull of differently oriented regions.") + require(referenceName == other.referenceName, "Cannot compute convex hull of regions on different references.") ReferenceRegion(referenceName, min(start, other.start), max(end, other.end)) } @@ -408,7 +409,7 @@ case class ReferenceRegion( /** * Returns whether two regions are nearby. * - * Nearby regions may overlap, and have a thresholded distance between the + * Nearby regions may overlap, and have a thresholded distance between the * start/end. * * @param other Region to compare against. @@ -507,6 +508,18 @@ case class ReferenceRegion( end > other.start && start < other.end } + /** + * Checks if our region overlaps (wholly or partially) another region, + * independent of strand. + * + * @param other The region to compare against. + * @param threshold The threshold within which the region must match. + * @return True if any section of the two regions overlap. + */ + def covers(other: ReferenceRegion, threshold: Long): Boolean = { + covers(other) || isNearby(other, threshold) + } + /** * Checks if our region overlaps (wholly or partially) another region. * @@ -518,6 +531,17 @@ case class ReferenceRegion( covers(other) } + /** + * Checks if our region overlaps (wholly or partially) another region. + * + * @param other The region to compare against. + * @param threshold The threshold within which the region must match. + * @return True if any section of the two regions overlap. + */ + def overlaps(other: ReferenceRegion, threshold: Long): Boolean = { + overlaps(other) || isNearby(other, threshold) + } + /** * Compares between two regions using the RegionOrdering. * diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala index 56bcfbd0d6..dede0cd2d5 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala @@ -50,7 +50,7 @@ class ReferenceRegionSuite extends FunSuite { } test("merge") { - intercept[AssertionError] { + intercept[IllegalArgumentException] { region("chr0", 10, 100).merge(region("chr1", 10, 100)) } @@ -64,9 +64,26 @@ class ReferenceRegionSuite extends FunSuite { assert(r1.merge(r1) === r1) assert(r1.merge(r2) === r12) assert(r1.merge(r3) === r13) + + val r4 = region("chr0", 1, 100) + val r5 = region("chr0", 102, 202) + val r6 = region("chr0", 2, 5) + + val r45 = region("chr0", 1, 202) + + assert(r4.merge(r5, 3) === r45) + + intercept[IllegalArgumentException] { + r4.merge(r5, -1) + } + + intercept[IllegalArgumentException] { + r4.merge(r5, 1L) + } + } - test("overlaps") { + test("overlaps and covers") { // contained assert(region("chr0", 10, 100).overlaps(region("chr0", 20, 50))) @@ -101,6 +118,12 @@ class ReferenceRegionSuite extends FunSuite { // different sequences assert(!region("chr0", 10, 100).overlaps(region("chr1", 50, 200))) assert(!region("chr0", 10, 100).covers(region("chr1", 50, 200))) + + // thresholded + assert(region("chr0", 1, 100).overlaps(region("chr0", 101, 201), 2)) + assert(region("chr0", 1, 100).covers(region("chr0", 101, 201), 2)) + assert(!region("chr0", 1, 100).overlaps(region("chr0", 101, 201), 1)) + assert(!region("chr0", 1, 100).covers(region("chr0", 101, 201), 1)) } test("distance(: ReferenceRegion)") { @@ -255,11 +278,31 @@ class ReferenceRegionSuite extends FunSuite { assert(!r1.isAdjacent(r2)) - intercept[AssertionError] { + intercept[IllegalArgumentException] { r1.merge(r2) } } + test("validate that nearby regions can be merged") { + val r1 = region("chr1", 0L, 5L) + val r2 = region("chr1", 7L, 10L) + + assert(r1.isNearby(r2, 3)) + + assert(r1.merge(r2, 3) == region("chr1", 0L, 10L)) + } + + test("validate that non-nearby regions cannot be merged") { + val r1 = region("chr1", 0L, 5L) + val r2 = region("chr1", 7L, 10L) + + assert(!r1.isNearby(r2, 2L)) + + intercept[IllegalArgumentException] { + r1.merge(r2, 2) + } + } + test("compute convex hull of two sets") { val r1 = region("chr1", 0L, 5L) val r2 = region("chr1", 7L, 10L) @@ -299,10 +342,10 @@ class ReferenceRegionSuite extends FunSuite { } test("intersection fails on non-overlapping regions") { - intercept[AssertionError] { + intercept[IllegalArgumentException] { ReferenceRegion("chr1", 1L, 10L).intersection(ReferenceRegion("chr1", 11L, 20L)) } - intercept[AssertionError] { + intercept[IllegalArgumentException] { ReferenceRegion("chr1", 1L, 10L).intersection(ReferenceRegion("chr2", 1L, 10L)) } }