Skip to content

Commit

Permalink
use ContigName factories
Browse files Browse the repository at this point in the history
  • Loading branch information
ryan-williams committed Feb 5, 2017
1 parent b8ac49d commit 73ac093
Show file tree
Hide file tree
Showing 20 changed files with 96 additions and 76 deletions.
2 changes: 1 addition & 1 deletion build.sbt
@@ -1,7 +1,7 @@

organization := "org.hammerlab.genomics"
name := "loci"
version := "1.5.0"
version := "1.5.1-SNAPSHOT"

addSparkDeps

Expand Down
2 changes: 1 addition & 1 deletion project/plugins.sbt
@@ -1 +1 @@
addSbtPlugin("org.hammerlab" % "sbt-parent" % "1.5.6")
addSbtPlugin("org.hammerlab" % "sbt-parent" % "1.5.8-SNAPSHOT")
Expand Up @@ -2,14 +2,14 @@ package org.hammerlab.genomics.loci.map

import com.esotericsoftware.kryo.io.{ Input, Output }
import com.esotericsoftware.kryo.{ Kryo, Serializer KryoSerializer }
import org.hammerlab.genomics.reference.Locus
import org.hammerlab.genomics.reference.{ ContigName, Locus }

/**
* We serialize a Contig as its name, the number of ranges, and the ranges themselves (two longs and a value each).
*/
class ContigSerializer[T] extends KryoSerializer[Contig[T]] {
def write(kryo: Kryo, output: Output, obj: Contig[T]) = {
output.writeString(obj.name.name)
kryo.writeObject(output, obj.name)
output.writeLong(obj.asMap.size)
obj.asMap.foreach {
case (range, value) =>
Expand All @@ -21,7 +21,7 @@ class ContigSerializer[T] extends KryoSerializer[Contig[T]] {

def read(kryo: Kryo, input: Input, klass: Class[Contig[T]]): Contig[T] = {
val builder = LociMap.newBuilder[T]
val contig = input.readString()
val contig = kryo.readObject(input, classOf[ContigName])
val count = input.readLong()
(0L until count).foreach { _ =>
val start = input.readLong()
Expand Down
Expand Up @@ -2,9 +2,12 @@ package org.hammerlab.genomics.loci.map

import com.esotericsoftware.kryo.Kryo
import org.apache.spark.serializer.KryoRegistrator
import org.hammerlab.genomics.reference

class Registrar extends KryoRegistrator {
override def registerClasses(kryo: Kryo): Unit = {
new reference.Registrar().registerClasses(kryo)

kryo.register(classOf[LociMap[_]], new Serializer)
kryo.register(classOf[Array[LociMap[_]]])
kryo.register(classOf[Contig[_]], new ContigSerializer)
Expand Down
@@ -1,7 +1,7 @@
package org.hammerlab.genomics.loci.map

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer => KryoSerializer}
import com.esotericsoftware.kryo.io.{ Input, Output }
import com.esotericsoftware.kryo.{ Kryo, Serializer KryoSerializer }

/**
* We serialize a LociMap simply by writing out all of its Contigs.
Expand Down
@@ -1,6 +1,7 @@
package org.hammerlab.genomics.loci.parsing

import org.hammerlab.genomics.reference.{ContigName, Locus}
import org.hammerlab.genomics.reference.ContigName.Factory
import org.hammerlab.genomics.reference.{ ContigName, Locus }

/**
* Representation of a genomic range as parsed from a cmdline-flag or file.
Expand Down Expand Up @@ -30,7 +31,7 @@ object ParsedLociRange {
* "chr1:10000": just chr1, position 10000; equivalent to "chr1:10000-10001".
* "chr1:10000-": chr1, from position 10000 to the end of chr1.
*/
def apply(lociRangeStr: String): Option[ParsedLociRange] =
def apply(lociRangeStr: String)(implicit factory: Factory): Option[ParsedLociRange] =
lociRangeStr.replaceAll("\\s", "") match {
case "all" =>
Some(AllRange)
Expand Down
Expand Up @@ -7,6 +7,7 @@ import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.hammerlab.genomics.loci.VariantContext
import org.hammerlab.genomics.loci.args.LociArgs
import org.hammerlab.genomics.reference.ContigName.Factory

import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer
Expand Down Expand Up @@ -35,11 +36,9 @@ import scala.io.Source
sealed trait ParsedLoci extends Any

object ParsedLoci {
val empty = apply("")
def apply(lociStrs: String)(implicit factory: Factory): ParsedLoci = apply(Iterator(lociStrs))

def apply(lociStrs: String): ParsedLoci = apply(Iterator(lociStrs))

def apply(lines: Iterator[String]): ParsedLoci = {
def apply(lines: Iterator[String])(implicit factory: Factory): ParsedLoci = {
val lociRanges = ArrayBuffer[LociRange]()
for {
lociStrs <- lines
Expand All @@ -65,7 +64,7 @@ object ParsedLoci {

def fromArgs(lociStrOpt: Option[String],
lociFileOpt: Option[String],
hadoopConfiguration: Configuration): Option[ParsedLoci] =
hadoopConfiguration: Configuration)(implicit factory: Factory): Option[ParsedLoci] =
(lociStrOpt, lociFileOpt) match {
case (Some(lociStr), _) => Some(ParsedLoci(lociStr))
case (_, Some(lociFile)) => Some(loadFromFile(lociFile, hadoopConfiguration))
Expand All @@ -81,7 +80,7 @@ object ParsedLoci {
* "chrX:5-10,chr12-10-20", etc. Whitespace is ignored.
* @return parsed loci
*/
private def loadFromFile(lociFile: String, hadoopConfiguration: Configuration): ParsedLoci =
private def loadFromFile(lociFile: String, hadoopConfiguration: Configuration)(implicit factory: Factory): ParsedLoci =
if (lociFile.endsWith(".vcf")) {
LociRanges.fromVCF(lociFile)
} else if (lociFile.endsWith(".loci") || lociFile.endsWith(".txt")) {
Expand Down
Expand Up @@ -3,12 +3,13 @@ package org.hammerlab.genomics.loci.set
import com.esotericsoftware.kryo.io.{ Input, Output }
import com.esotericsoftware.kryo.{ Kryo, Serializer KryoSerializer }
import com.google.common.collect.{ TreeRangeSet, Range JRange }
import org.hammerlab.genomics.reference.{ Interval, Locus }
import org.hammerlab.genomics.reference.{ ContigName, Interval, Locus }

// We serialize a LociSet simply by writing out its constituent Contigs.
class ContigSerializer extends KryoSerializer[Contig] {

def write(kryo: Kryo, output: Output, obj: Contig) = {
output.writeString(obj.name.name)
kryo.writeObject(output, obj.name)
output.writeInt(obj.ranges.length)
for {
Interval(start, end) <- obj.ranges
Expand All @@ -19,12 +20,12 @@ class ContigSerializer extends KryoSerializer[Contig] {
}

def read(kryo: Kryo, input: Input, klass: Class[Contig]): Contig = {
val name = input.readString()
val name = kryo.readObject(input, classOf[ContigName])
val length = input.readInt()
val treeRangeSet = TreeRangeSet.create[Locus]()
val ranges = (0 until length).foreach(_ => {
val ranges = (0 until length).foreach { _ =>
treeRangeSet.add(JRange.closedOpen[Locus](input.readLong(), input.readLong()))
})
}
Contig(name, treeRangeSet)
}
}
Expand Up @@ -26,7 +26,7 @@ case class LociSet(private val map: SortedMap[ContigName, Contig]) extends Trunc
@transient lazy val contigs = map.values.toArray

/** The number of loci in this LociSet. */
@transient lazy val count: NumLoci = contigs.map(_.count: Long).sum
@transient lazy val count: NumLoci = contigs.map(_.count).sum

def isEmpty = map.isEmpty
def nonEmpty = map.nonEmpty
Expand Down
Expand Up @@ -2,9 +2,12 @@ package org.hammerlab.genomics.loci.set

import com.esotericsoftware.kryo.Kryo
import org.apache.spark.serializer.KryoRegistrator
import org.hammerlab.genomics.reference

class Registrar extends KryoRegistrator {
override def registerClasses(kryo: Kryo): Unit = {
new reference.Registrar().registerClasses(kryo)

kryo.register(classOf[LociSet], new Serializer)
kryo.register(classOf[Array[LociSet]])
kryo.register(classOf[Contig], new ContigSerializer)
Expand Down
@@ -1,7 +1,7 @@
package org.hammerlab.genomics.loci.set

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer => KryoSerializer}
import com.esotericsoftware.kryo.io.{ Input, Output }
import com.esotericsoftware.kryo.{ Kryo, Serializer KryoSerializer }

// We just serialize the underlying contigs, which contain their names which are the string keys of LociSet.map.
class Serializer extends KryoSerializer[LociSet] {
Expand Down
@@ -1,13 +1,12 @@
package org.hammerlab.genomics.loci.iterator

import org.hammerlab.genomics.reference.test.LocusUtil._
import org.hammerlab.genomics.reference.test.{ IntervalsUtil, LocusUtil }
import org.hammerlab.genomics.reference.test.IntervalsUtil
import org.hammerlab.genomics.reference.test.LociConversions._
import org.hammerlab.genomics.reference.{ Interval, Locus }
import org.hammerlab.test.Suite

class SkippableLociIteratorSuite
extends Suite
with LocusUtil
with IntervalsUtil {

def strs =
Expand Down
Expand Up @@ -2,10 +2,15 @@ package org.hammerlab.genomics.loci.map

import com.google.common.collect.{ ImmutableRangeMap, Range }
import org.hammerlab.genomics.reference.Locus
import org.hammerlab.genomics.reference.test.ClearContigNames
import org.hammerlab.test.Suite
import org.hammerlab.genomics.reference.test.LocusUtil._
import org.hammerlab.genomics.reference.test.LociConversions._
import org.hammerlab.genomics.reference.test.ContigNameConversions._

class ContigSuite
extends Suite
with ClearContigNames {

class ContigSuite extends Suite {
test("empty") {
val contigMap = new Contig("chr1", ImmutableRangeMap.builder[Locus, String]().build())

Expand Down
@@ -1,15 +1,13 @@
package org.hammerlab.genomics.loci.map

import org.hammerlab.genomics.loci.set.LociSet
import org.hammerlab.genomics.loci.set.test.LociSetUtil
import org.hammerlab.genomics.reference.test.{ ContigNameUtil, LocusUtil }
import org.hammerlab.genomics.reference.{ ContigName, Interval }
import org.hammerlab.genomics.reference.Interval
import org.hammerlab.genomics.reference.test.LociConversions.intToLocus
import org.hammerlab.genomics.reference.test.ContigNameConversions.toSeq
import org.hammerlab.test.Suite

class LociMapSuite
extends Suite
with LocusUtil
with ContigNameUtil
with LociSetUtil {

test("properties of empty LociMap") {
Expand Down
@@ -1,13 +1,12 @@
package org.hammerlab.genomics.loci.map

import org.hammerlab.genomics.reference.test.LocusUtil
import org.hammerlab.genomics.reference.test.LociConversions.intToLocus
import org.hammerlab.genomics.reference.{ ContigName, Locus }
import org.hammerlab.spark.test.suite.{ KryoSparkSuite, SparkSerialization }

class SerializerSuite
extends KryoSparkSuite(classOf[Registrar])
with SparkSerialization
with LocusUtil {
with SparkSerialization {

def testSerde(
name: String
Expand Down
Expand Up @@ -2,12 +2,13 @@ package org.hammerlab.genomics.loci.parsing

import org.apache.hadoop.conf.Configuration
import org.hammerlab.genomics.loci.set.test.LociSetUtil
import org.hammerlab.genomics.reference.test.LocusUtil
import org.hammerlab.genomics.reference.test.ClearContigNames
import org.hammerlab.genomics.reference.test.LociConversions.intToLocus
import org.hammerlab.test.Suite

class ParsedLociSuite
extends Suite
with LocusUtil
with ClearContigNames
with LociSetUtil {

val conf = new Configuration
Expand Down
Expand Up @@ -2,9 +2,8 @@ package org.hammerlab.genomics.loci.set

import org.hammerlab.genomics.loci.iterator.LociIterator
import org.hammerlab.genomics.reference.Interval
import org.hammerlab.genomics.reference.test.LocusUtil._
import org.hammerlab.genomics.reference.test.LociConversions._
import org.hammerlab.test.Suite
import org.hammerlab.test.implicits.toList

class LociIteratorSuite extends Suite {

Expand Down
38 changes: 22 additions & 16 deletions src/test/scala/org/hammerlab/genomics/loci/set/LociSetSuite.scala
Expand Up @@ -2,21 +2,24 @@ package org.hammerlab.genomics.loci.set

import org.hammerlab.genomics.loci.parsing.ParsedLoci
import org.hammerlab.genomics.loci.set.test.LociSetUtil
import org.hammerlab.genomics.reference.test.ContigLengthsUtil._
import org.hammerlab.genomics.reference.test.ContigNameUtil
import org.hammerlab.genomics.reference.test.LocusUtil._
import org.hammerlab.genomics.reference.test.{ ClearContigNames, ContigLengthsUtil }
import org.hammerlab.genomics.reference.test.ContigNameConversions.toArray
import org.hammerlab.genomics.reference.test.LociConversions.{ intToLocus, toSeq }
import org.hammerlab.genomics.reference.{ ContigLengths, ContigName, Locus, NumLoci }
import org.hammerlab.spark.test.suite.KryoSparkSuite

import scala.collection.mutable

class LociSetSuite
extends KryoSparkSuite(classOf[Registrar])
with ContigNameUtil
with LociSetUtil {
with LociSetUtil
with ClearContigNames
with ContigLengthsUtil {

import org.hammerlab.genomics.reference.ContigName.Normalization.Lenient

// "loci set invariants" collects some LociSets
kryoRegister(classOf[mutable.WrappedArray.ofRef[_]])
register(classOf[mutable.WrappedArray.ofRef[_]])

def makeLociSet(str: String, lengths: (ContigName, NumLoci)*): LociSet =
LociSet(ParsedLoci(str), lengths.toMap)
Expand Down Expand Up @@ -65,7 +68,7 @@ class LociSetSuite
set("chr21").intersects(90, 100) should ===(false)
set("chr21").intersects(90, 101) should ===(true)
set("chr21").intersects(90, 95) should ===(false)
set("chr21").iterator.toSeq should ===((100 until 200))
set("chr21").iterator.toSeq should ===(100 until 200)
}

test("single loci parsing") {
Expand All @@ -77,15 +80,17 @@ class LociSetSuite
}

test("loci set invariants") {
val sets = List(
"",
"empty:20-20,empty2:30-30",
"20:100-200",
"with_dots.and_underscores..2:100-200",
"21:300-400",
"X:5-17,X:19-22,Y:50-60",
"chr21:100-200,chr20:0-10,chr20:8-15,chr20:100-120"
).map(lociSet(_))
val sets =
List(
"",
"empty:20-20,empty2:30-30",
"20:100-200",
"with_dots.and_underscores..2:100-200",
"21:300-400",
"X:5-17,X:19-22,Y:50-60",
"chr21:100-200,chr20:0-10,chr20:8-15,chr20:100-120"
)
.map(lociSet)

def checkInvariants(set: LociSet): Unit = {
set should not be null
Expand All @@ -103,6 +108,7 @@ class LociSetSuite
result should ===(set)
}
}

sets.foreach(checkInvariants)
}

Expand Down

0 comments on commit 73ac093

Please sign in to comment.