Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from devin-petersohn/intersection
Intersection implementation
- Loading branch information
Showing
9 changed files
with
190 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 0 additions & 18 deletions
18
lime-core/src/main/scala/org/bdgenomics/lime/set_theory/Intersect.scala
This file was deleted.
Oops, something went wrong.
83 changes: 83 additions & 0 deletions
83
lime-core/src/main/scala/org/bdgenomics/lime/set_theory/Intersection.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
package org.bdgenomics.lime.set_theory | ||
|
||
import org.apache.spark.rdd.RDD | ||
import org.bdgenomics.adam.models.ReferenceRegion | ||
import scala.collection.mutable.ListBuffer | ||
import scala.reflect.ClassTag | ||
|
||
sealed abstract class Intersection[T: ClassTag, U: ClassTag] extends SetTheoryBetweenCollections[T, U, T, U] { | ||
|
||
val threshold: Long | ||
|
||
def primitive(currRegion: ReferenceRegion, | ||
tempRegion: ReferenceRegion, | ||
minimumOverlap: Long = 0L): ReferenceRegion = { | ||
|
||
currRegion.intersection(tempRegion, minimumOverlap) | ||
} | ||
|
||
def condition(firstRegion: ReferenceRegion, | ||
secondRegion: ReferenceRegion, | ||
minimumOverlap: Long = 0L): Boolean = { | ||
|
||
firstRegion.overlapsBy(secondRegion).exists(_ >= threshold) | ||
} | ||
} | ||
|
||
case class DistributedIntersection[T: ClassTag, U: ClassTag](leftRdd: RDD[(ReferenceRegion, T)], | ||
rightRdd: RDD[(ReferenceRegion, U)], | ||
partitionMap: Array[Option[(ReferenceRegion, ReferenceRegion)]], | ||
threshold: Long = 0L) extends Intersection[T, U] { | ||
|
||
private val cache: ListBuffer[(ReferenceRegion, U)] = ListBuffer.empty[(ReferenceRegion, U)] | ||
|
||
def compute(): RDD[(ReferenceRegion, (T, U))] = { | ||
leftRdd.zipPartitions(rightRdd)(sweep) | ||
} | ||
|
||
private def sweep(leftIter: Iterator[(ReferenceRegion, T)], | ||
rightIter: Iterator[(ReferenceRegion, U)]): Iterator[(ReferenceRegion, (T, U))] = { | ||
|
||
makeIterator(leftIter.buffered, rightIter.buffered) | ||
} | ||
|
||
private def makeIterator(left: BufferedIterator[(ReferenceRegion, T)], | ||
right: BufferedIterator[(ReferenceRegion, U)]): Iterator[(ReferenceRegion, (T, U))] = { | ||
|
||
def advanceCache(until: ReferenceRegion) = { | ||
while (right.hasNext && (right.head._1.compareTo(until) <= 0 || | ||
right.head._1.covers(until))) { | ||
|
||
cache += right.next | ||
} | ||
} | ||
|
||
def pruneCache(to: ReferenceRegion) = { | ||
cache.trimStart({ | ||
val index = cache.indexWhere(f => !(f._1.compareTo(to) < 0 && !f._1.covers(to))) | ||
if (index <= 0) { | ||
0 | ||
} else { | ||
index | ||
} | ||
}) | ||
} | ||
|
||
left.flatMap(f => { | ||
val (currentRegion, _) = f | ||
advanceCache(currentRegion) | ||
pruneCache(currentRegion) | ||
processHits(f) | ||
}) | ||
} | ||
|
||
private def processHits(current: (ReferenceRegion, T)): Iterator[(ReferenceRegion, (T, U))] = { | ||
|
||
val (currentRegion, _) = current | ||
cache.filter(f => f._1.overlapsBy(currentRegion).exists(_ >= threshold)) | ||
.map(g => { | ||
(currentRegion.intersection(g._1, threshold), (current._2, g._2)) | ||
}).iterator | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
chr1 28735 29810 CpG:_116 | ||
chr1 29800 29820 CpG:_30 | ||
chr1 29815 29830 CpG:_29 | ||
chr1 29825 29840 CpG:_84 | ||
chr1 29835 29850 CpG:_99 | ||
chr1 29845 29860 CpG:_94 | ||
chr1 29855 29870 CpG:_171 | ||
chr1 29865 29880 CpG:_60 | ||
chr1 29875 29890 CpG:_115 | ||
chr1 29885 29900 CpG:_28 | ||
chr1 29895 29910 CpG:_24 | ||
chr1 29905 29920 CpG:_50 | ||
chr1 29915 29930 CpG:_83 | ||
chr1 29925 29940 CpG:_153 | ||
chr1 29935 29950 CpG:_16 | ||
chr1 29945 29960 CpG:_257 | ||
chr1 29955 29970 CpG:_178 | ||
chr1 29965 29980 CpG:_246 | ||
chr1 29975 29990 CpG:_18 | ||
chr1 29985 30000 CpG:_615 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters