Skip to content

Commit

Permalink
Merge pull request #14 from ryan-williams/deps
Browse files Browse the repository at this point in the history
upgrade reference, test deps
  • Loading branch information
ryan-williams committed Mar 13, 2017
2 parents 640b783 + ca52ba3 commit b211dfa
Show file tree
Hide file tree
Showing 17 changed files with 70 additions and 60 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Expand Up @@ -5,7 +5,6 @@ jdk:
- oraclejdk8

scala:
- 2.10.6
- 2.11.8

script: sbt ++$TRAVIS_SCALA_VERSION clean test
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
@@ -1,7 +1,7 @@

organization := "org.hammerlab.genomics"
name := "loci"
version := "1.5.2"
version := "1.5.3"

addSparkDeps

Expand Down
2 changes: 1 addition & 1 deletion project/plugins.sbt
@@ -1 +1 @@
addSbtPlugin("org.hammerlab" % "sbt-parent" % "1.6.3")
addSbtPlugin("org.hammerlab" % "sbt-parent" % "1.7.1")
Expand Up @@ -8,8 +8,8 @@ object VariantContext {
Some(
(
vc.getContig,
vc.getStart - 1L,
vc.getEnd.toLong
Locus(vc.getStart - 1L),
Locus(vc.getEnd.toLong)
)
)
}
Expand Up @@ -37,7 +37,7 @@ case class Contig[T](name: ContigName, private val rangeMap: RangeMap[Locus, T])
def getAll(start: Locus, end: Locus): Set[T] = getAll(start, end, halfWindowSize = 0)
def getAll(start: Locus, end: Locus, halfWindowSize: Int): Set[T] =
rangeMap
.subRangeMap(lociRange(start.locus - halfWindowSize, end.locus + halfWindowSize))
.subRangeMap(lociRange(start - halfWindowSize, end + halfWindowSize))
.asMapOfRanges
.values
.toSet
Expand Down
Expand Up @@ -38,12 +38,12 @@ object ParsedLociRange {
case "none" | "" =>
None
case contigAndLoci(name, startStr, endStrOpt) =>
val start = startStr.toLong
val start = Locus(startStr.toLong)
val endOpt: Option[Locus] =
Option(endStrOpt) match {
case Some("") => None
case Some(s) => Some(s.toLong)
case None => Some(start + 1)
case Some(s) => Some(Locus(s.toLong))
case None => Some(start.next)
}

Some(LociRange(name, start, endOpt))
Expand Down
8 changes: 4 additions & 4 deletions src/main/scala/org/hammerlab/genomics/loci/set/Contig.scala
Expand Up @@ -25,8 +25,8 @@ case class Contig(var name: ContigName, private var rangeSet: RangeSet[Locus]) e
for {
i <- 0 until num
} {
val start = in.readLong()
val end = in.readLong()
val start = Locus(in.readLong())
val end = Locus(in.readLong())
val range = lociRange(start, end)
rangeSet.add(range)
}
Expand Down Expand Up @@ -65,7 +65,7 @@ case class Contig(var name: ContigName, private var rangeSet: RangeSet[Locus]) e
def iterator = new LociIterator(ranges.iterator.buffered)

/** Number of loci on this contig. */
def count: NumLoci = ranges.map(_.length: Long).sum
def count: NumLoci = ranges.map(_.length).sum

/** Returns whether a given genomic region overlaps with any loci on this contig. */
def intersects(start: Locus, end: Locus): Boolean = !rangeSet.subRangeSet(lociRange(start, end)).isEmpty
Expand All @@ -75,7 +75,7 @@ case class Contig(var name: ContigName, private var rangeSet: RangeSet[Locus]) e
*
* Used by LociSet.take.
*/
private[set] def take(numToTake: Long): (Contig, Contig) = {
private[set] def take(numToTake: NumLoci): (Contig, Contig) = {
val firstRanges = ArrayBuffer[Interval]()
val secondRanges = ArrayBuffer[Interval]()

Expand Down
Expand Up @@ -3,6 +3,7 @@ package org.hammerlab.genomics.loci.set
import com.esotericsoftware.kryo.io.{ Input, Output }
import com.esotericsoftware.kryo.{ Kryo, Serializer KryoSerializer }
import com.google.common.collect.{ TreeRangeSet, Range JRange }
import JRange.closedOpen
import org.hammerlab.genomics.reference.{ ContigName, Interval, Locus }

// We serialize a LociSet simply by writing out its constituent Contigs.
Expand All @@ -12,7 +13,7 @@ class ContigSerializer extends KryoSerializer[Contig] {
kryo.writeObject(output, obj.name)
output.writeInt(obj.ranges.length)
for {
Interval(start, end) <- obj.ranges
Interval(start, end) obj.ranges
} {
output.writeLong(start.locus)
output.writeLong(end.locus)
Expand All @@ -23,8 +24,13 @@ class ContigSerializer extends KryoSerializer[Contig] {
val name = kryo.readObject(input, classOf[ContigName])
val length = input.readInt()
val treeRangeSet = TreeRangeSet.create[Locus]()
val ranges = (0 until length).foreach { _ =>
treeRangeSet.add(JRange.closedOpen[Locus](input.readLong(), input.readLong()))
val ranges = (0 until length).foreach { _
treeRangeSet.add(
closedOpen(
Locus(input.readLong()),
Locus(input.readLong())
)
)
}
Contig(name, treeRangeSet)
}
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/org/hammerlab/genomics/loci/set/LociSet.scala
Expand Up @@ -147,9 +147,9 @@ object LociSet {
.map(Contig(_))
)

def apply(ranges: ParsedLoci, contigLengths: ContigLengths): LociSet =
def apply(loci: ParsedLoci, contigLengths: ContigLengths): LociSet =
LociSet(
ranges match {
loci match {
case All =>
for {
(contig, length) <- contigLengths
Expand Down
Expand Up @@ -24,7 +24,7 @@ class SkippableLociIteratorSuite
)

test("no skips") {
strs.toList should ===(
strs.toSeq should ===(
List(
10 "a",
11 "b",
Expand Down Expand Up @@ -76,8 +76,8 @@ class SkippableLociIteratorSuite
Interval(50, 51)
).buffered
)
).toList ===
List(
).toSeq ===
Seq(
10 "a",
30 "e",
31 "f",
Expand Down
Expand Up @@ -2,13 +2,13 @@ package org.hammerlab.genomics.loci.map

import com.google.common.collect.{ ImmutableRangeMap, Range }
import org.hammerlab.genomics.reference.Locus
import org.hammerlab.genomics.reference.test.ClearContigNames
import org.hammerlab.test.Suite
import org.hammerlab.genomics.reference.test.{ ClearContigNames, ContigNameConversions }
import org.hammerlab.genomics.reference.test.LociConversions._
import org.hammerlab.genomics.reference.test.ContigNameConversions._
import org.hammerlab.test.Suite

class ContigSuite
extends Suite
with ContigNameConversions
with ClearContigNames {

test("empty") {
Expand Down
Expand Up @@ -3,11 +3,12 @@ package org.hammerlab.genomics.loci.map
import org.hammerlab.genomics.loci.set.test.LociSetUtil
import org.hammerlab.genomics.reference.Interval
import org.hammerlab.genomics.reference.test.LociConversions.intToLocus
import org.hammerlab.genomics.reference.test.ContigNameConversions.toSeq
import org.hammerlab.genomics.reference.test.ContigNameConversions
import org.hammerlab.test.Suite

class LociMapSuite
extends Suite
with ContigNameConversions
with LociSetUtil {

test("properties of empty LociMap") {
Expand Down
Expand Up @@ -35,31 +35,31 @@ class SerializerSuite
testSerde("empty")()(9, 0, 0)

testSerde("1")(
("chr1", 100L, 200L, "A")
("chr1", 100, 200, "A")
)(
40, 1, 100
)

testSerde("2")(
("chr1", 100L, 200L, "A"),
("chr1", 400L, 500L, "B")
("chr1", 100, 200, "A"),
("chr1", 400, 500, "B")
)(
59, 2, 200
)

testSerde("3")(
("chr1", 100L, 200L, "A"),
("chr1", 400L, 500L, "B"),
("chr1", 600L, 700L, "C")
("chr1", 100, 200, "A"),
("chr1", 400, 500, "B"),
("chr1", 600, 700, "C")
)(
78, 3, 300
)

testSerde("4")(
("chr1", 100L, 200L, "A"),
("chr1", 400L, 500L, "B"),
("chr1", 600L, 700L, "C"),
("chr1", 700L, 800L, "A")
("chr1", 100, 200, "A"),
("chr1", 400, 500, "B"),
("chr1", 600, 700, "C"),
("chr1", 700, 800, "A")
)(
97, 4, 400
)
Expand Down
Expand Up @@ -20,7 +20,7 @@ class ParsedLociSuite
lociSet(
ParsedLoci.fromArgs(
lociStrOpt = None,
lociFileOpt = Some(File("truth.chr20.vcf").path),
lociFileOpt = Some(File("truth.chr20.vcf")),
conf
).get
)
Expand Down
Expand Up @@ -10,7 +10,7 @@ class LociIteratorSuite extends Suite {
def loci(intervals: (Int, Int)*): LociIterator =
new LociIterator(
(for {
(start, end) <- intervals
(start, end) intervals
} yield
Interval(start, end)
)
Expand All @@ -19,17 +19,17 @@ class LociIteratorSuite extends Suite {
)

test("simple") {
loci(100 -> 110).toList should ===(100 until 110)
loci(100 110).toSeq should ===(100 until 110)
}

test("skipTo") {
val it = loci(100 -> 110)
it.skipTo(103)
it.head should ===(103)
it.toList should ===(103 until 110)
it.toSeq should ===(103 until 110)
}

test("intervals") {
loci(100 -> 110, 120 -> 130).toList should ===((100 until 110) ++ (120 until 130))
loci(100 110, 120 130).toSeq should ===((100 until 110) ++ (120 until 130))
}
}
28 changes: 16 additions & 12 deletions src/test/scala/org/hammerlab/genomics/loci/set/LociSetSuite.scala
Expand Up @@ -2,24 +2,25 @@ package org.hammerlab.genomics.loci.set

import org.hammerlab.genomics.loci.parsing.ParsedLoci
import org.hammerlab.genomics.loci.set.test.LociSetUtil
import org.hammerlab.genomics.reference.test.{ ClearContigNames, ContigLengthsUtil }
import org.hammerlab.genomics.reference.test.ContigNameConversions.toArray
import org.hammerlab.genomics.reference.test.LociConversions.{ intToLocus, toSeq }
import org.hammerlab.genomics.reference.{ ContigLengths, ContigName, Locus, NumLoci }
import org.hammerlab.genomics.reference.test.{ ClearContigNames, ContigLengthsUtil, LenientContigNameConversions }
import org.hammerlab.genomics.reference.test.LociConversions._
import org.hammerlab.genomics.reference.{ ContigLengths, ContigName, Locus, NumLoci, PermissiveRegistrar }
import org.hammerlab.spark.test.suite.KryoSparkSuite

import scala.collection.mutable

class LociSetSuite
extends KryoSparkSuite(classOf[Registrar])
with LociSetUtil
with LenientContigNameConversions
with ClearContigNames
with ContigLengthsUtil {

import org.hammerlab.genomics.reference.ContigName.Normalization.Lenient

// "loci set invariants" collects some LociSets
register(classOf[mutable.WrappedArray.ofRef[_]])
register(
classOf[mutable.WrappedArray.ofRef[_]],
PermissiveRegistrar
)

def makeLociSet(str: String, lengths: (ContigName, NumLoci)*): LociSet =
LociSet(ParsedLoci(str), lengths.toMap)
Expand Down Expand Up @@ -160,17 +161,20 @@ class LociSetSuite
iter1.next() should ===(2)
iter1.next() should ===(10)
iter1.next() should ===(11)
iter1.skipTo(6000000000L) // will hang if it steps through each locus.
iter1.next() should ===(6000000000L)
iter1.next() should ===(6000000001L)

val sixBillion = Locus(6000000000L)
iter1.skipTo(sixBillion) // will hang if it steps through each locus.
iter1.next() should ===(sixBillion)
iter1.next() should ===(sixBillion.next)
iter1.hasNext should ===(true)

val hundredBillion = Locus(100000000000L)
val iter2 = set("chr1").iterator
iter2.skipTo(100000000000L)
iter2.skipTo(hundredBillion)
iter2.hasNext should ===(false)

val iter3 = set("chr1").iterator
iter3.skipTo(100000000000L - 1)
iter3.skipTo(hundredBillion.prev)
iter3.hasNext should ===(true)
iter3.next() should ===(100000000000L - 1)
iter3.hasNext should ===(false)
Expand Down
Expand Up @@ -5,22 +5,22 @@ import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream,
import org.apache.spark.broadcast.Broadcast
import org.hammerlab.genomics.loci.set.test.LociSetUtil
import org.hammerlab.genomics.reference.ContigName.Factory
import org.hammerlab.genomics.reference.Locus
import org.hammerlab.genomics.reference.test.ClearContigNames
import org.hammerlab.genomics.reference.test.LociConversions.{ intToLocus, toArray, toSeq }
import org.hammerlab.genomics.reference.{ Locus, PermissiveRegistrar }
import org.hammerlab.genomics.reference.test.{ ClearContigNames, LenientContigNameConversions }
import org.hammerlab.genomics.reference.test.LociConversions._
import org.hammerlab.spark.test.suite.{ KryoSparkSuite, SparkSerialization }

import scala.collection.mutable

class SerializerSuite
extends KryoSparkSuite(classOf[Registrar], referenceTracking = true)
with SparkSerialization
with LenientContigNameConversions
with ClearContigNames
with LociSetUtil
with Serializable {

import Helpers._
import org.hammerlab.genomics.reference.ContigName.Normalization.Lenient

// "a closure that includes a LociSet" parallelizes some Range[Long]s.
register(
Expand All @@ -30,11 +30,12 @@ class SerializerSuite
// "make an RDD[LociSet] and an RDD[Contig]" collects some Strings.
classOf[Array[String]],

classOf[mutable.WrappedArray.ofRef[_]]
classOf[mutable.WrappedArray.ofRef[_]],

PermissiveRegistrar
)

test("make an RDD[LociSet]") {
import org.hammerlab.genomics.reference.ContigName.Normalization.Lenient
val sets =
List[LociSet](
"",
Expand All @@ -52,7 +53,6 @@ class SerializerSuite
}

test("make an RDD[LociSet], and an RDD[Contig]") {
import org.hammerlab.genomics.reference.ContigName.Normalization.Lenient
val sets =
List[LociSet](
"",
Expand All @@ -77,7 +77,7 @@ class SerializerSuite
test("a closure that includes a LociSet") {
val set: LociSet = "chr21:100-200,chr20:0-10,chr20:8-15,chr20:100-120,empty:10-10"
val setBC = sc.broadcast(set)
val rdd = sc.parallelize[Locus](0 until 1000)
val rdd = sc.parallelize[Locus]((0 until 1000).toSeq)
val result = rdd.filter(lociSetFilterTask(setBC)).collect
result should ===(100 until 200)
}
Expand Down

0 comments on commit b211dfa

Please sign in to comment.