diff --git a/build.sbt b/build.sbt index 57912d0..8b15e18 100644 --- a/build.sbt +++ b/build.sbt @@ -1,22 +1,21 @@ - organization := "org.hammerlab.genomics" name := "loci" -version := "2.0.1" +r"2.0.1" addSparkDeps -deps ++= Seq( +dep( args4j, - args4s % "1.3.0", + args4s % "1.3.0", htsjdk, - iterators % "1.3.0", - paths % "1.2.0", + iterators % "1.3.0", + paths % "1.2.0", + reference % "1.4.0" + testtest, scalautils, + spark_util % "2.0.1", string_utils % "1.2.0" ) -compileAndTestDeps += reference % "1.4.0" - // Shade Guava due to use of RangeSet classes from 16.0.1 that don't exist in Spark/Hadoop's Guava 11.0.2. shadedDeps += guava diff --git a/project/build.properties b/project/build.properties index 27e88aa..394cb75 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=0.13.13 +sbt.version=1.0.4 diff --git a/project/plugins.sbt b/project/plugins.sbt index c022d7b..dbb345f 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1 +1 @@ -addSbtPlugin("org.hammerlab" % "sbt-parent" % "3.1.0") +addSbtPlugin("org.hammerlab" % "sbt-parent" % "4.0.0-SNAPSHOT") diff --git a/src/main/scala/org/hammerlab/genomics/loci/map/Contig.scala b/src/main/scala/org/hammerlab/genomics/loci/map/Contig.scala index e37bad6..7f2ce39 100644 --- a/src/main/scala/org/hammerlab/genomics/loci/map/Contig.scala +++ b/src/main/scala/org/hammerlab/genomics/loci/map/Contig.scala @@ -121,4 +121,8 @@ object Contig { } def lociRange(start: Locus, end: Locus): JRange[Locus] = JRange.closedOpen[Locus](start, end) + + import org.hammerlab.kryo._ + implicit val serializer = new ContigSerializer[Nothing] + implicit val alsoRegister = AlsoRegister[Contig[Nothing]](cls[ContigName]) } diff --git a/src/main/scala/org/hammerlab/genomics/loci/map/LociMap.scala b/src/main/scala/org/hammerlab/genomics/loci/map/LociMap.scala index 3e1be4a..a2febb0 100644 --- a/src/main/scala/org/hammerlab/genomics/loci/map/LociMap.scala +++ b/src/main/scala/org/hammerlab/genomics/loci/map/LociMap.scala @@ -94,4 +94,9 @@ object LociMap { contigs.map(contig => contig.name -> contig).toSeq: _* ) ) + + import com.esotericsoftware.kryo + import org.hammerlab.kryo._ + implicit val serializer: kryo.Serializer[LociMap[Nothing]] = new Serializer[Nothing] + implicit val alsoRegister = AlsoRegister[LociMap[Nothing]](arr[Contig[Nothing]]) } diff --git a/src/main/scala/org/hammerlab/genomics/loci/map/Registrar.scala b/src/main/scala/org/hammerlab/genomics/loci/map/Registrar.scala deleted file mode 100644 index 87f937b..0000000 --- a/src/main/scala/org/hammerlab/genomics/loci/map/Registrar.scala +++ /dev/null @@ -1,16 +0,0 @@ -package org.hammerlab.genomics.loci.map - -import com.esotericsoftware.kryo.Kryo -import org.apache.spark.serializer.KryoRegistrator -import org.hammerlab.genomics.reference - -class Registrar extends KryoRegistrator { - override def registerClasses(kryo: Kryo): Unit = { - new reference.Registrar().registerClasses(kryo) - - kryo.register(classOf[LociMap[_]], new Serializer) - kryo.register(classOf[Array[LociMap[_]]]) - kryo.register(classOf[Contig[_]], new ContigSerializer) - kryo.register(classOf[Array[Contig[_]]]) - } -} diff --git a/src/main/scala/org/hammerlab/genomics/loci/map/Serializer.scala b/src/main/scala/org/hammerlab/genomics/loci/map/Serializer.scala index fabee82..4cf54ac 100644 --- a/src/main/scala/org/hammerlab/genomics/loci/map/Serializer.scala +++ b/src/main/scala/org/hammerlab/genomics/loci/map/Serializer.scala @@ -1,22 +1,23 @@ package org.hammerlab.genomics.loci.map +import com.esotericsoftware.kryo +import com.esotericsoftware.kryo.Kryo import com.esotericsoftware.kryo.io.{ Input, Output } -import com.esotericsoftware.kryo.{ Kryo, Serializer ⇒ KryoSerializer } /** * We serialize a LociMap simply by writing out all of its Contigs. */ -class Serializer[T] extends KryoSerializer[LociMap[T]] { +class Serializer[T] extends kryo.Serializer[LociMap[T]] { def write(kryo: Kryo, output: Output, obj: LociMap[T]) = { output.writeLong(obj.contigs.size) - obj.contigs.foreach(contig => + obj.contigs.foreach(contig ⇒ kryo.writeObject(output, contig) ) } def read(kryo: Kryo, input: Input, klass: Class[LociMap[T]]): LociMap[T] = { val count: Long = input.readLong() - val contigs = (0L until count).map(i => + val contigs = (0L until count).map(i ⇒ kryo.readObject(input, classOf[Contig[T]]) ) LociMap.fromContigs(contigs) diff --git a/src/main/scala/org/hammerlab/genomics/loci/set/Contig.scala b/src/main/scala/org/hammerlab/genomics/loci/set/Contig.scala index f1312ac..ee48774 100644 --- a/src/main/scala/org/hammerlab/genomics/loci/set/Contig.scala +++ b/src/main/scala/org/hammerlab/genomics/loci/set/Contig.scala @@ -113,4 +113,8 @@ object Contig { ) def lociRange(start: Locus, end: Locus): JRange[Locus] = closedOpen[Locus](start, end) + + import org.hammerlab.kryo._ + implicit val serializer = new ContigSerializer + implicit val alsoRegister = AlsoRegister[Contig](cls[ContigName]) } diff --git a/src/main/scala/org/hammerlab/genomics/loci/set/ContigSerializer.scala b/src/main/scala/org/hammerlab/genomics/loci/set/ContigSerializer.scala index ad1309e..b1da687 100644 --- a/src/main/scala/org/hammerlab/genomics/loci/set/ContigSerializer.scala +++ b/src/main/scala/org/hammerlab/genomics/loci/set/ContigSerializer.scala @@ -6,7 +6,7 @@ import com.google.common.collect.{ TreeRangeSet, Range ⇒ JRange } import JRange.closedOpen import org.hammerlab.genomics.reference.{ ContigName, Interval, Locus } -// We serialize a LociSet simply by writing out its constituent Contigs. +/** Serialize a [[LociSet]] simply by writing out its [[Contig]]s. */ class ContigSerializer extends KryoSerializer[Contig] { def write(kryo: Kryo, output: Output, obj: Contig) = { diff --git a/src/main/scala/org/hammerlab/genomics/loci/set/LociSet.scala b/src/main/scala/org/hammerlab/genomics/loci/set/LociSet.scala index 79f36f6..6c47f32 100644 --- a/src/main/scala/org/hammerlab/genomics/loci/set/LociSet.scala +++ b/src/main/scala/org/hammerlab/genomics/loci/set/LociSet.scala @@ -1,5 +1,7 @@ package org.hammerlab.genomics.loci.set +import com.esotericsoftware.kryo.io.{ Input, Output } +import com.esotericsoftware.kryo.{ Kryo, Serializer } import htsjdk.samtools.util.{ Interval ⇒ HTSJDKInterval } import org.hammerlab.genomics.loci.parsing.{ All, LociRange, LociRanges, ParsedLoci } import org.hammerlab.genomics.reference.ContigName.Factory @@ -175,4 +177,20 @@ object LociSet { } } ) + + // We just serialize the underlying contigs, which contain their names which are the string keys of LociSet.map. + implicit val serializer = + new Serializer[LociSet] { + def write(kryo: Kryo, output: Output, obj: LociSet) = { + kryo.writeObject(output, obj.contigs) + } + + def read(kryo: Kryo, input: Input, klass: Class[LociSet]): LociSet = { + val contigs = kryo.readObject(input, classOf[Array[Contig]]) + LociSet.fromContigs(contigs) + } + } + + import org.hammerlab.kryo._ + implicit val alsoRegister = AlsoRegister[LociSet](arr[Contig]) } diff --git a/src/main/scala/org/hammerlab/genomics/loci/set/Registrar.scala b/src/main/scala/org/hammerlab/genomics/loci/set/Registrar.scala deleted file mode 100644 index d083e1f..0000000 --- a/src/main/scala/org/hammerlab/genomics/loci/set/Registrar.scala +++ /dev/null @@ -1,16 +0,0 @@ -package org.hammerlab.genomics.loci.set - -import com.esotericsoftware.kryo.Kryo -import org.apache.spark.serializer.KryoRegistrator -import org.hammerlab.genomics.reference - -class Registrar extends KryoRegistrator { - override def registerClasses(kryo: Kryo): Unit = { - new reference.Registrar().registerClasses(kryo) - - kryo.register(classOf[LociSet], new Serializer) - kryo.register(classOf[Array[LociSet]]) - kryo.register(classOf[Contig], new ContigSerializer) - kryo.register(classOf[Array[Contig]]) - } -} diff --git a/src/main/scala/org/hammerlab/genomics/loci/set/Serializer.scala b/src/main/scala/org/hammerlab/genomics/loci/set/Serializer.scala index ae0eefa..c79056c 100644 --- a/src/main/scala/org/hammerlab/genomics/loci/set/Serializer.scala +++ b/src/main/scala/org/hammerlab/genomics/loci/set/Serializer.scala @@ -3,14 +3,3 @@ package org.hammerlab.genomics.loci.set import com.esotericsoftware.kryo.io.{ Input, Output } import com.esotericsoftware.kryo.{ Kryo, Serializer ⇒ KryoSerializer } -// We just serialize the underlying contigs, which contain their names which are the string keys of LociSet.map. -class Serializer extends KryoSerializer[LociSet] { - def write(kryo: Kryo, output: Output, obj: LociSet) = { - kryo.writeObject(output, obj.contigs) - } - - def read(kryo: Kryo, input: Input, klass: Class[LociSet]): LociSet = { - val contigs = kryo.readObject(input, classOf[Array[Contig]]) - LociSet.fromContigs(contigs) - } -} diff --git a/src/test/scala/org/hammerlab/genomics/loci/map/SerializerSuite.scala b/src/test/scala/org/hammerlab/genomics/loci/map/SerializerSuite.scala index ea8b254..bd84f5d 100644 --- a/src/test/scala/org/hammerlab/genomics/loci/map/SerializerSuite.scala +++ b/src/test/scala/org/hammerlab/genomics/loci/map/SerializerSuite.scala @@ -6,10 +6,14 @@ import org.hammerlab.genomics.reference.{ ContigName, Locus } import org.hammerlab.spark.test.suite.{ KryoSparkSuite, SparkSerialization } class SerializerSuite - extends KryoSparkSuite(classOf[Registrar]) + extends KryoSparkSuite with SparkSerialization with ClearContigNames { + register( + classOf[LociMap[Nothing]] + ) + def check( ranges: (ContigName, Locus, Locus, String)* )( diff --git a/src/test/scala/org/hammerlab/genomics/loci/set/LociSetSuite.scala b/src/test/scala/org/hammerlab/genomics/loci/set/LociSetSuite.scala index d2db6ae..5f16518 100644 --- a/src/test/scala/org/hammerlab/genomics/loci/set/LociSetSuite.scala +++ b/src/test/scala/org/hammerlab/genomics/loci/set/LociSetSuite.scala @@ -6,11 +6,12 @@ import org.hammerlab.genomics.reference.test.LociConversions._ import org.hammerlab.genomics.reference.test.{ ClearContigNames, ContigLengthsUtil, ContigNameConversions } import org.hammerlab.genomics.reference.{ ContigLengths, ContigName, Locus, NumLoci } import org.hammerlab.spark.test.suite.KryoSparkSuite +import org.hammerlab.kryo._ import scala.collection.mutable class LociSetSuite - extends KryoSparkSuite(classOf[Registrar]) + extends KryoSparkSuite with LociSetUtil with ContigNameConversions with ClearContigNames @@ -18,6 +19,7 @@ class LociSetSuite // "loci set invariants" collects some LociSets register( + arr[LociSet], classOf[mutable.WrappedArray.ofRef[_]] ) diff --git a/src/test/scala/org/hammerlab/genomics/loci/set/SerializerSuite.scala b/src/test/scala/org/hammerlab/genomics/loci/set/SerializerSuite.scala index 842917f..0b86377 100644 --- a/src/test/scala/org/hammerlab/genomics/loci/set/SerializerSuite.scala +++ b/src/test/scala/org/hammerlab/genomics/loci/set/SerializerSuite.scala @@ -8,12 +8,13 @@ import org.hammerlab.genomics.reference.ContigName.Factory import org.hammerlab.genomics.reference.test.LociConversions._ import org.hammerlab.genomics.reference.test.{ ClearContigNames, LenientContigNameConversions } import org.hammerlab.genomics.reference.{ Locus, PermissiveRegistrar } +import org.hammerlab.kryo._ import org.hammerlab.spark.test.suite.{ KryoSparkSuite, SparkSerialization } import scala.collection.mutable class SerializerSuite - extends KryoSparkSuite(classOf[Registrar], referenceTracking = true) + extends KryoSparkSuite(referenceTracking = true) with SparkSerialization with LenientContigNameConversions with ClearContigNames @@ -23,14 +24,16 @@ class SerializerSuite import Helpers._ register( + arr[LociSet], + // "a closure that includes a LociSet" parallelizes some Range[Long]s. - classOf[Range], - classOf[Array[Locus]], + cls[Range], + cls[Array[Locus]], // "make an RDD[LociSet] and an RDD[Contig]" collects some Strings. - classOf[Array[String]], + cls[Array[String]], - classOf[mutable.WrappedArray.ofRef[_]], + cls[mutable.WrappedArray.ofRef[_]], PermissiveRegistrar )