diff --git a/build.sbt b/build.sbt index 519cf12..40aeb03 100644 --- a/build.sbt +++ b/build.sbt @@ -1,20 +1,18 @@ -group("org.hammerlab.genomics") -name := "loci" -v"2.0.4" +subgroup("genomics", "loci") +v"2.1.0" -addSparkDeps +spark import genomics.reference dep( - args4j, - args4s % "1.3.1", + case_app, htsjdk, - iterators % "2.1.0", + iterators % "2.2.0", paths % "1.5.0", reference % "1.4.3" + testtest, scalautils, - spark_util % "2.0.3", + spark_util % "2.0.4", string_utils % "1.2.0" ) @@ -22,7 +20,7 @@ dep( shadedDeps += guava // Rename shaded Guava classes -shadeRenames += "com.google.common.**" → "org.hammerlab.guava.@1" +shadeRenames += "com.google.common.**" → "org.hammerlab.guava.@1" shadeRenames += "com.google.thirdparty.**" → "org.hammerlab.guava.@1" // Publish JAR that includes shaded Guava. diff --git a/project/build.properties b/project/build.properties index 31334bb..7c81737 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.1.1 +sbt.version=1.1.5 diff --git a/project/plugins.sbt b/project/plugins.sbt index 93d8288..0f14428 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1 +1 @@ -addSbtPlugin("org.hammerlab.sbt" % "base" % "4.4.2") +addSbtPlugin("org.hammerlab.sbt" % "base" % "4.6.1") diff --git a/src/main/scala/org/hammerlab/genomics/loci/Args.scala b/src/main/scala/org/hammerlab/genomics/loci/Args.scala new file mode 100644 index 0000000..7e3e012 --- /dev/null +++ b/src/main/scala/org/hammerlab/genomics/loci/Args.scala @@ -0,0 +1,31 @@ +package org.hammerlab.genomics.loci + +import caseapp.{ HelpMessage ⇒ M } +import org.hammerlab.genomics.loci.parsing.ParsedLoci +import org.hammerlab.genomics.loci.parsing.ParsedLoci.loadFromPath +import org.hammerlab.genomics.reference.ContigName.Factory +import org.hammerlab.paths.Path + +/** Arguments for accepting a set of loci to restrict variant-calling to. */ +case class Args( + @M("If set, loci to include. Either 'all' or 'contig[:start[-end]],contig[:start[-end]],…'") + loci: Option[String] = None, + + @M("Path to file giving loci to include") + lociFile: Option[Path] = None +) { + def parse( + implicit + factory: Factory + ): Option[ParsedLoci] = + (loci, lociFile) match { + case (Some(str), Some(file)) ⇒ + throw new IllegalArgumentException( + "Specify a loci string (--loci) xor file (--loci-file)" + ) + case (Some(str), _) => Some(ParsedLoci(str)) + case (_, Some(path)) => Some(loadFromPath(path)) + case _ => + None + } +} diff --git a/src/main/scala/org/hammerlab/genomics/loci/args/LociArgs.scala b/src/main/scala/org/hammerlab/genomics/loci/args/LociArgs.scala deleted file mode 100644 index b69be4d..0000000 --- a/src/main/scala/org/hammerlab/genomics/loci/args/LociArgs.scala +++ /dev/null @@ -1,25 +0,0 @@ -package org.hammerlab.genomics.loci.args - -import org.hammerlab.args4s.{ PathOptionHandler, StringOptionHandler } -import org.hammerlab.paths.Path -import org.kohsuke.args4j.{ Option ⇒ Args4jOption } - -/** Arguments for accepting a set of loci to restrict variant-calling to. */ -trait LociArgs - extends LociInput { - @Args4jOption( - name = "--loci", - usage = "If set, loci to include. Either 'all' or 'contig[:start[-end]],contig[:start[-end]],…'", - forbids = Array("--loci-file"), - handler = classOf[StringOptionHandler] - ) - var lociStrOpt: Option[String] = None - - @Args4jOption( - name = "--loci-file", - usage = "Path to file giving loci to include.", - forbids = Array("--loci"), - handler = classOf[PathOptionHandler] - ) - var lociFileOpt: Option[Path] = None -} diff --git a/src/main/scala/org/hammerlab/genomics/loci/args/LociInput.scala b/src/main/scala/org/hammerlab/genomics/loci/args/LociInput.scala deleted file mode 100644 index 27824b6..0000000 --- a/src/main/scala/org/hammerlab/genomics/loci/args/LociInput.scala +++ /dev/null @@ -1,8 +0,0 @@ -package org.hammerlab.genomics.loci.args - -import org.hammerlab.paths.Path - -trait LociInput { - def lociStrOpt: Option[String] - def lociFileOpt: Option[Path] -} diff --git a/src/main/scala/org/hammerlab/genomics/loci/parsing/ParsedLoci.scala b/src/main/scala/org/hammerlab/genomics/loci/parsing/ParsedLoci.scala index 4678555..69469ff 100644 --- a/src/main/scala/org/hammerlab/genomics/loci/parsing/ParsedLoci.scala +++ b/src/main/scala/org/hammerlab/genomics/loci/parsing/ParsedLoci.scala @@ -2,7 +2,6 @@ package org.hammerlab.genomics.loci.parsing import htsjdk.variant.vcf.VCFFileReader import org.hammerlab.genomics.loci.VariantContext -import org.hammerlab.genomics.loci.args.LociArgs import org.hammerlab.genomics.reference.ContigName.Factory import org.hammerlab.paths.Path @@ -20,7 +19,7 @@ import scala.collection.mutable.ArrayBuffer * The two implementations are: * * - [[All]]: sentinel value representing all loci on all contigs. - * - [[LociRanges]]: a sequence of [[LociRange]]s denoting (possibly open-ended) genomic-intervals. + * - [[LociRanges]]: a sequence of [[Range]]s denoting (possibly open-ended) genomic-intervals. * * Examples: * @@ -35,41 +34,21 @@ object ParsedLoci { def apply(lociStrs: String)(implicit factory: Factory): ParsedLoci = apply(Iterator(lociStrs)) def apply(lines: Iterator[String])(implicit factory: Factory): ParsedLoci = { - val lociRanges = ArrayBuffer[LociRange]() + val lociRanges = ArrayBuffer[Range]() for { lociStrs ← lines lociStr ← lociStrs.replaceAll("\\s", "").split(",") - lociRange ← ParsedLociRange(lociStr) + lociRange ← ParsedRange(lociStr) } { lociRange match { - case AllRange ⇒ return All - case lociRange: LociRange ⇒ + case ParsedRange.All ⇒ return All + case lociRange: Range ⇒ lociRanges += lociRange } } LociRanges(lociRanges) } - /** - * Parse string representations of loci ranges, either from one string (lociOpt) or a file with one range per line - * (lociFileOpt), and return a [[ParsedLoci]] encapsulating the result. The latter can then be converted into a - * [[org.hammerlab.genomics.loci.set.LociSet]] when contig-lengths are available / have been parsed from read-sets. - */ - def apply(args: LociArgs): Option[ParsedLoci] = - apply( - args.lociStrOpt, - args.lociFileOpt - ) - - def apply(lociStrOpt: Option[String], - lociFileOpt: Option[Path])(implicit factory: Factory): Option[ParsedLoci] = - (lociStrOpt, lociFileOpt) match { - case (Some(lociStr), _) => Some(ParsedLoci(lociStr)) - case (_, Some(lociPath)) => Some(loadFromPath(lociPath)) - case _ => - None - } - /** * Parse loci from the specified file. * @@ -78,7 +57,7 @@ object ParsedLoci { * "chrX:5-10,chr12-10-20", etc. Whitespace is ignored. * @return parsed loci */ - private def loadFromPath(path: Path)(implicit factory: Factory): ParsedLoci = + def loadFromPath(path: Path)(implicit factory: Factory): ParsedLoci = path.extension match { case "vcf" ⇒ LociRanges.fromVCF(path) case "loci" | "txt" ⇒ ParsedLoci(path.lines) @@ -94,10 +73,10 @@ object ParsedLoci { */ case object All extends ParsedLoci -case class LociRanges(ranges: Iterable[LociRange]) extends AnyVal with ParsedLoci +case class LociRanges(ranges: Iterable[Range]) extends AnyVal with ParsedLoci object LociRanges { - def apply(range: LociRange): LociRanges = apply(Iterable(range)) + def apply(range: Range): LociRanges = apply(Iterable(range)) def fromVCF(path: Path): LociRanges = apply( @@ -106,7 +85,7 @@ object LociRanges { new VCFFileReader(path.toFile, false) .map { case VariantContext(contigName, start, end) => - LociRange(contigName, start, end) + Range(contigName, start, end) } ) } diff --git a/src/main/scala/org/hammerlab/genomics/loci/parsing/LociRange.scala b/src/main/scala/org/hammerlab/genomics/loci/parsing/Range.scala similarity index 71% rename from src/main/scala/org/hammerlab/genomics/loci/parsing/LociRange.scala rename to src/main/scala/org/hammerlab/genomics/loci/parsing/Range.scala index 952df77..476844d 100644 --- a/src/main/scala/org/hammerlab/genomics/loci/parsing/LociRange.scala +++ b/src/main/scala/org/hammerlab/genomics/loci/parsing/Range.scala @@ -14,9 +14,9 @@ import org.hammerlab.genomics.reference.{ ContigName, Locus } * - all * - none */ -sealed trait ParsedLociRange +sealed trait ParsedRange -object ParsedLociRange { +object ParsedRange { val contigAndLoci = """^([\pL\pN._]+):(\pN+)(?:-(\pN*))?$""".r val contigOnly = """^([\pL\pN._]+)""".r @@ -31,10 +31,10 @@ object ParsedLociRange { * "chr1:10000": just chr1, position 10000; equivalent to "chr1:10000-10001". * "chr1:10000-": chr1, from position 10000 to the end of chr1. */ - def apply(lociRangeStr: String)(implicit factory: Factory): Option[ParsedLociRange] = + def apply(lociRangeStr: String)(implicit factory: Factory): Option[ParsedRange] = lociRangeStr.replaceAll("\\s", "") match { case "all" => - Some(AllRange) + Some(All) case "none" | "" => None case contigAndLoci(name, startStr, endStrOpt) => @@ -46,27 +46,28 @@ object ParsedLociRange { case None => Some(start.next) } - Some(LociRange(name, start, endOpt)) + Some(Range(name, start, endOpt)) case contigOnly(contig) => - Some(LociRange(contig, Locus(0), None)) + Some(Range(contig, Locus(0), None)) case other => throw new IllegalArgumentException(s"Couldn't parse loci range: $other") } -} -case object AllRange extends ParsedLociRange + case object All extends ParsedRange + +} -case class LociRange(contigName: ContigName, - start: Locus, - endOpt: Option[Locus]) - extends ParsedLociRange +case class Range(contigName: ContigName, + start: Locus, + endOpt: Option[Locus]) + extends ParsedRange -object LociRange { - def apply(contigName: ContigName, start: Locus, end: Locus): LociRange = - LociRange(contigName, start, Some(end)) +object Range { + def apply(contigName: ContigName, start: Locus, end: Locus): Range = + Range(contigName, start, Some(end)) - def apply(tuple: (ContigName, Locus, Locus)): LociRange = { + def apply(tuple: (ContigName, Locus, Locus)): Range = { val (contigName, start, end) = tuple - LociRange(contigName, start, Some(end)) + Range(contigName, start, Some(end)) } } diff --git a/src/main/scala/org/hammerlab/genomics/loci/set/LociSet.scala b/src/main/scala/org/hammerlab/genomics/loci/set/LociSet.scala index 6c47f32..7e1a620 100644 --- a/src/main/scala/org/hammerlab/genomics/loci/set/LociSet.scala +++ b/src/main/scala/org/hammerlab/genomics/loci/set/LociSet.scala @@ -3,7 +3,7 @@ package org.hammerlab.genomics.loci.set import com.esotericsoftware.kryo.io.{ Input, Output } import com.esotericsoftware.kryo.{ Kryo, Serializer } import htsjdk.samtools.util.{ Interval ⇒ HTSJDKInterval } -import org.hammerlab.genomics.loci.parsing.{ All, LociRange, LociRanges, ParsedLoci } +import org.hammerlab.genomics.loci.parsing.{ All, Range, LociRanges, ParsedLoci } import org.hammerlab.genomics.reference.ContigName.Factory import org.hammerlab.genomics.reference.{ ContigLengths, ContigName, Interval, Locus, NumLoci, Region } import org.hammerlab.strings.TruncatedToString @@ -158,7 +158,7 @@ object LociSet { Region(contig, Locus(0), Locus(length)) case LociRanges(ranges) ⇒ for { - LociRange(contigName, start, endOpt) ← ranges + Range(contigName, start, endOpt) ← ranges contigLengthOpt = contigLengths.get(contigName) } yield (endOpt, contigLengthOpt) match { diff --git a/src/test/scala/org/hammerlab/genomics/loci/cmps.scala b/src/test/scala/org/hammerlab/genomics/loci/cmps.scala new file mode 100644 index 0000000..a4aecbc --- /dev/null +++ b/src/test/scala/org/hammerlab/genomics/loci/cmps.scala @@ -0,0 +1,10 @@ +package org.hammerlab.genomics.loci + +import org.hammerlab.cmp.CanEq.Cmp +import org.hammerlab.cmp.Cmp +import org.hammerlab.genomics.reference.{ Interval, Locus } + +trait cmps { + // TODO: move this to reference repo + implicit def cmpInterval(implicit cmp: Cmp[(Locus, Locus)]): Cmp.Aux[Interval, cmp.Diff] = Cmp.by[(Locus, Locus), Interval](i ⇒ (i.start, i.end))(cmp) +} diff --git a/src/test/scala/org/hammerlab/genomics/loci/iterator/SkippableLociIteratorSuite.scala b/src/test/scala/org/hammerlab/genomics/loci/iterator/SkippableLociIteratorSuite.scala index 50041c2..97a00ae 100644 --- a/src/test/scala/org/hammerlab/genomics/loci/iterator/SkippableLociIteratorSuite.scala +++ b/src/test/scala/org/hammerlab/genomics/loci/iterator/SkippableLociIteratorSuite.scala @@ -24,7 +24,8 @@ class SkippableLociIteratorSuite ) test("no skips") { - strs.toSeq should ===( + ==( + strs.toSeq, List( 10 → "a", 11 → "b", @@ -49,34 +50,35 @@ class SkippableLociIteratorSuite test("misc skips") { val it = strs it.skipTo(15) - it.next() should ===(20 → "c") + ==(it.next(), 20 → "c") it.skipTo(30) - it.next() should ===(30 → "e") + ==(it.next(), 30 → "e") intercept[IllegalArgumentException] { it.skipTo(30) } - it.next() should ===(31 → "f") + ==(it.next(), 31 → "f") it.skipTo(32) - it.next() should ===(33 → "g") + ==(it.next(), 33 → "g") it.skipTo(34) - it.next() should ===(34 → "h") + ==(it.next(), 34 → "h") it.skipTo(41) - it.next() should ===(50 → "j") - it.hasNext should ===(false) + ==(it.next(), 50 → "j") + ==(it.hasNext, false) } test("intersect") { - strs.intersect( - new LociIterator( - Iterator( - Interval( 8, 11), - Interval(14, 16), - Interval(30, 35), - Interval(38, 42), - Interval(50, 51) - ).buffered - ) - ).toSeq === + ==( + strs.intersect( + new LociIterator( + Iterator( + Interval( 8, 11), + Interval(14, 16), + Interval(30, 35), + Interval(38, 42), + Interval(50, 51) + ).buffered + ) + ).toSeq, Seq( 10 → "a", 30 → "e", @@ -86,6 +88,7 @@ class SkippableLociIteratorSuite 40 → "i", 50 → "j" ) + ) } } diff --git a/src/test/scala/org/hammerlab/genomics/loci/map/ContigSuite.scala b/src/test/scala/org/hammerlab/genomics/loci/map/ContigSuite.scala index 4a6fb20..e6fa3ef 100644 --- a/src/test/scala/org/hammerlab/genomics/loci/map/ContigSuite.scala +++ b/src/test/scala/org/hammerlab/genomics/loci/map/ContigSuite.scala @@ -14,10 +14,10 @@ class ContigSuite test("empty") { val contigMap = new Contig("chr1", ImmutableRangeMap.builder[Locus, String]().build()) - contigMap.get(100) should ===(None) + ==(contigMap.get(100), None) contigMap.getAll(0, 10000) should be(Set()) - contigMap.count should ===(0) - contigMap.toString should ===("") + ==(contigMap.count, 0) + ==(contigMap.toString, "") } test("basic operations") { @@ -36,21 +36,21 @@ class ContigSuite ) contigMap.get(99) should be(None) - contigMap.get(100) should ===(Some("A")) - contigMap.get(199) should ===(Some("A")) - contigMap.get(200) should ===(Some("B")) - contigMap.get(299) should ===(Some("B")) + ==(contigMap.get(100), Some("A")) + ==(contigMap.get(199), Some("A")) + ==(contigMap.get(200), Some("B")) + ==(contigMap.get(299), Some("B")) contigMap.get(300) should be(None) contigMap.getAll(0, 100) should be(Set()) - contigMap.getAll(0, 101) should ===(Set("A")) - contigMap.getAll(199, 200) should ===(Set("A")) - contigMap.getAll(199, 201) should ===(Set("A", "B")) - contigMap.getAll(200, 201) should ===(Set("B")) - contigMap.getAll(0, 10000) should ===(Set("A", "B")) + ==(contigMap.getAll(0, 101), Set("A")) + ==(contigMap.getAll(199, 200), Set("A")) + ==(contigMap.getAll(199, 201), Set("A", "B")) + ==(contigMap.getAll(200, 201), Set("B")) + ==(contigMap.getAll(0, 10000), Set("A", "B")) - contigMap.count should ===(200) - contigMap.toString should ===("chr1:100-200=A,chr1:200-300=B") + ==(contigMap.count, 200) + ==(contigMap.toString, "chr1:100-200=A,chr1:200-300=B") } test("getAll") { @@ -60,7 +60,7 @@ class ContigSuite ("chrM", 8286, 16571, 1) ) - lociMap("chrM").getAll(5, 10) should ===(Set(0)) - lociMap("chrM").getAll(10000, 11000) should ===(Set(1)) + ==(lociMap("chrM").getAll(5, 10), Set(0)) + ==(lociMap("chrM").getAll(10000, 11000), Set(1)) } } diff --git a/src/test/scala/org/hammerlab/genomics/loci/map/LociMapSuite.scala b/src/test/scala/org/hammerlab/genomics/loci/map/LociMapSuite.scala index 363b257..6e0302a 100644 --- a/src/test/scala/org/hammerlab/genomics/loci/map/LociMapSuite.scala +++ b/src/test/scala/org/hammerlab/genomics/loci/map/LociMapSuite.scala @@ -10,14 +10,15 @@ class LociMapSuite extends Suite with ContigNameConversions with ClearContigNames - with LociSetUtil { + with LociSetUtil + with cmps { test("properties of empty LociMap") { val emptyMap = LociMap[String]() - emptyMap.count should ===(0) - emptyMap.toString() should ===("") - emptyMap should ===(LociMap[String]()) + ==(emptyMap.count, 0) + ==(emptyMap.toString, "") + ==(emptyMap, LociMap[String]()) } test("basic map operations") { @@ -26,21 +27,22 @@ class LociMapSuite ("chr20", 200, 201, "B") ) - lociMap.count should ===(101) - lociMap.toString should ===("chr1:100-200=A,chr20:200-201=B") - lociMap.contigs.map(_.name) should ===(Seq("chr1", "chr20")) + ==(lociMap.count, 101) + ==(lociMap.toString, "chr1:100-200=A,chr20:200-201=B") + ==(lociMap.contigs.map(_.name), Seq("chr1", "chr20")) lociMap should not equal LociMap[String]() - lociMap.inverse should ===( + ==( + lociMap.inverse, Map( "A" → lociSet("chr1:100-200"), "B" → lociSet("chr20:200-201") ) ) - lociMap("chr1").toString should ===("chr1:100-200=A") - lociMap("chr20").toString should ===("chr20:200-201=B") + ==(lociMap("chr1").toString, "chr1:100-200=A") + ==(lociMap("chr20").toString, "chr20:200-201=B") } test("asInverseMap with repeated values") { @@ -51,15 +53,16 @@ class LociMapSuite ) // asInverseMap stuffs all Loci with the same value into a LociSet. - lociMap.inverse should equal( + ==( + lociMap.inverse, Map( "A" -> lociSet("chr1:100-200,chr2:200-300"), "B" -> lociSet("chr3:400-500") ) ) - lociMap.count should ===(300) - lociMap.toString should ===("chr1:100-200=A,chr2:200-300=A,chr3:400-500=B") + ==(lociMap.count, 300) + ==(lociMap.toString, "chr1:100-200=A,chr2:200-300=A,chr3:400-500=B") } test("range coalescing") { @@ -77,8 +80,8 @@ class LociMapSuite "C" -> lociSet("chr1:150-160") ) - lociMap.count should ===(240) - lociMap.toString should ===("chr1:100-150=A,chr1:150-160=C,chr1:160-240=A,chr1:400-500=B") + ==(lociMap.count, 240) + ==(lociMap.toString, "chr1:100-150=A,chr1:150-160=C,chr1:160-240=A,chr1:400-500=B") } test("spanning equal values merges") { @@ -101,7 +104,7 @@ class LociMapSuite Interval(400, 500) -> "B" ) - map.count should ===(400) + ==(map.count, 400) } test("bridging equal values merges") { @@ -124,6 +127,6 @@ class LociMapSuite Interval(400, 500) -> "B" ) - map.count should ===(400) + ==(map.count, 400) } } diff --git a/src/test/scala/org/hammerlab/genomics/loci/map/SerializerSuite.scala b/src/test/scala/org/hammerlab/genomics/loci/map/SerializerSuite.scala index bd84f5d..d38812c 100644 --- a/src/test/scala/org/hammerlab/genomics/loci/map/SerializerSuite.scala +++ b/src/test/scala/org/hammerlab/genomics/loci/map/SerializerSuite.scala @@ -8,7 +8,8 @@ import org.hammerlab.spark.test.suite.{ KryoSparkSuite, SparkSerialization } class SerializerSuite extends KryoSparkSuite with SparkSerialization - with ClearContigNames { + with ClearContigNames + with cmps { register( classOf[LociMap[Nothing]] @@ -23,15 +24,15 @@ class SerializerSuite ) = { val beforeMap = LociMap(ranges: _*) - beforeMap("chr1").asMap.size should ===(numRanges) - beforeMap("chr1").count should ===(count) + ==(beforeMap("chr1").asMap.size, numRanges) + ==(beforeMap("chr1").count, count) val bytes = serialize(beforeMap) - bytes.array.length should ===(expectedBytes) + ==(bytes.array.length, expectedBytes) val afterMap: LociMap[String] = deserialize[LociMap[String]](bytes) - beforeMap should ===(afterMap) + ==(beforeMap, afterMap) } test("empty") { check()(9, 0, 0) } diff --git a/src/test/scala/org/hammerlab/genomics/loci/map/cmps.scala b/src/test/scala/org/hammerlab/genomics/loci/map/cmps.scala new file mode 100644 index 0000000..36eec92 --- /dev/null +++ b/src/test/scala/org/hammerlab/genomics/loci/map/cmps.scala @@ -0,0 +1,11 @@ +package org.hammerlab.genomics.loci.map + +import org.hammerlab.cmp.CanEq.Cmp +import org.hammerlab.cmp.Cmp +import org.hammerlab.genomics.loci.{ map, set } + +trait cmps + extends set.cmps { + implicit def cmpMapContig[T](implicit cmp: Cmp[Map[T, set.Contig]]): Cmp.Aux[map.Contig[T], cmp.Diff] = Cmp.by[Map[T, set.Contig], map.Contig[T]](_.inverse)(cmp) + implicit def lociMapCmp[T](implicit cmp: Cmp[Seq[map.Contig[T]]]): Cmp.Aux[LociMap[T], cmp.Diff] = Cmp.by[Seq[map.Contig[T]], LociMap[T]](_.contigs)(cmp) +} diff --git a/src/test/scala/org/hammerlab/genomics/loci/parsing/ParsedLociSuite.scala b/src/test/scala/org/hammerlab/genomics/loci/parsing/ParsedLociSuite.scala index b1e5eb5..859141e 100644 --- a/src/test/scala/org/hammerlab/genomics/loci/parsing/ParsedLociSuite.scala +++ b/src/test/scala/org/hammerlab/genomics/loci/parsing/ParsedLociSuite.scala @@ -15,12 +15,11 @@ class ParsedLociSuite test("vcf loading") { val loci = lociSet( - ParsedLoci( - lociStrOpt = None, - lociFileOpt = Some(File("truth.chr20.vcf").path) - ).get + ParsedLoci.loadFromPath( + File("truth.chr20.vcf") + ) ) - loci.count should ===(743606) + ==(loci.count, 743606) } } diff --git a/src/test/scala/org/hammerlab/genomics/loci/set/LociIteratorSuite.scala b/src/test/scala/org/hammerlab/genomics/loci/set/LociIteratorSuite.scala index 0459d91..8894a94 100644 --- a/src/test/scala/org/hammerlab/genomics/loci/set/LociIteratorSuite.scala +++ b/src/test/scala/org/hammerlab/genomics/loci/set/LociIteratorSuite.scala @@ -19,17 +19,17 @@ class LociIteratorSuite extends Suite { ) test("simple") { - loci(100 → 110).toSeq should ===(100 until 110) + ==(loci(100 → 110).toSeq, 100 until 110) } test("skipTo") { val it = loci(100 -> 110) it.skipTo(103) - it.head should ===(103) - it.toSeq should ===(103 until 110) + ==(it.head, 103) + ==(it.toSeq, 103 until 110) } test("intervals") { - loci(100 → 110, 120 → 130).toSeq should ===((100 until 110) ++ (120 until 130)) + ==(loci(100 → 110, 120 → 130).toSeq, (100 until 110) ++ (120 until 130)) } } diff --git a/src/test/scala/org/hammerlab/genomics/loci/set/LociSetSuite.scala b/src/test/scala/org/hammerlab/genomics/loci/set/LociSetSuite.scala index 5f16518..1fe94a8 100644 --- a/src/test/scala/org/hammerlab/genomics/loci/set/LociSetSuite.scala +++ b/src/test/scala/org/hammerlab/genomics/loci/set/LociSetSuite.scala @@ -5,8 +5,8 @@ import org.hammerlab.genomics.loci.set.test.LociSetUtil import org.hammerlab.genomics.reference.test.LociConversions._ import org.hammerlab.genomics.reference.test.{ ClearContigNames, ContigLengthsUtil, ContigNameConversions } import org.hammerlab.genomics.reference.{ ContigLengths, ContigName, Locus, NumLoci } -import org.hammerlab.spark.test.suite.KryoSparkSuite import org.hammerlab.kryo._ +import org.hammerlab.spark.test.suite.KryoSparkSuite import scala.collection.mutable @@ -15,7 +15,8 @@ class LociSetSuite with LociSetUtil with ContigNameConversions with ClearContigNames - with ContigLengthsUtil { + with ContigLengthsUtil + with cmps { // "loci set invariants" collects some LociSets register( @@ -29,56 +30,56 @@ class LociSetSuite test("properties of empty LociSet") { val empty = LociSet() empty.contigs should have size 0 - empty.count should ===(0) - empty should ===(lociSet("")) + ==(empty.count, 0) + ==(empty, lociSet("")) val empty2 = lociSet("empty1:30-30,empty2:40-40") - empty should ===(empty2) + ==(empty, empty2) } test("count, containment, intersection testing of a loci set") { val set = lociSet("chr21:100-200,chr20:0-10,chr20:8-15,chr20:100-120,empty:10-10") - set.contigs.map(_.name) should ===(Seq("chr20", "chr21")) - set.count should ===(135) - set("chr20").contains(110) should ===(true) - set("chr20").contains(100) should ===(true) - set("chr20").contains(99) should ===(false) - set("chr20").contains(120) should ===(false) - set("chr20").contains(119) should ===(true) - set("chr20").count should ===(35) - set("chr20").intersects(0, 5) should ===(true) - set("chr20").intersects(0, 1) should ===(true) - set("chr20").intersects(0, 0) should ===(false) - set("chr20").intersects(7, 8) should ===(true) - set("chr20").intersects(9, 11) should ===(true) - set("chr20").intersects(11, 18) should ===(true) - set("chr20").intersects(18, 19) should ===(false) - set("chr20").intersects(14, 80) should ===(true) - set("chr20").intersects(15, 80) should ===(false) - set("chr20").intersects(120, 130) should ===(false) - set("chr20").intersects(119, 130) should ===(true) - - set("chr21").contains(99) should ===(false) - set("chr21").contains(100) should ===(true) - set("chr21").contains(200) should ===(false) - set("chr21").count should ===(100) - set("chr21").intersects(110, 120) should ===(true) - set("chr21").intersects(90, 120) should ===(true) - set("chr21").intersects(150, 200) should ===(true) - set("chr21").intersects(150, 210) should ===(true) - set("chr21").intersects(200, 210) should ===(false) - set("chr21").intersects(201, 210) should ===(false) - set("chr21").intersects(90, 100) should ===(false) - set("chr21").intersects(90, 101) should ===(true) - set("chr21").intersects(90, 95) should ===(false) - set("chr21").iterator.toSeq should ===(100 until 200) + ==(set.contigs.map(_.name), Seq("chr20", "chr21")) + ==(set.count, 135) + ==(set("chr20").contains(110), true) + ==(set("chr20").contains(100), true) + ==(set("chr20").contains(99), false) + ==(set("chr20").contains(120), false) + ==(set("chr20").contains(119), true) + ==(set("chr20").count, 35) + ==(set("chr20").intersects(0, 5), true) + ==(set("chr20").intersects(0, 1), true) + ==(set("chr20").intersects(0, 0), false) + ==(set("chr20").intersects(7, 8), true) + ==(set("chr20").intersects(9, 11), true) + ==(set("chr20").intersects(11, 18), true) + ==(set("chr20").intersects(18, 19), false) + ==(set("chr20").intersects(14, 80), true) + ==(set("chr20").intersects(15, 80), false) + ==(set("chr20").intersects(120, 130), false) + ==(set("chr20").intersects(119, 130), true) + + ==(set("chr21").contains(99), false) + ==(set("chr21").contains(100), true) + ==(set("chr21").contains(200), false) + ==(set("chr21").count, 100) + ==(set("chr21").intersects(110, 120), true) + ==(set("chr21").intersects(90, 120), true) + ==(set("chr21").intersects(150, 200), true) + ==(set("chr21").intersects(150, 210), true) + ==(set("chr21").intersects(200, 210), false) + ==(set("chr21").intersects(201, 210), false) + ==(set("chr21").intersects(90, 100), false) + ==(set("chr21").intersects(90, 101), true) + ==(set("chr21").intersects(90, 95), false) + ==(set("chr21").iterator.toSeq, 100 until 200) } test("single loci parsing") { val set = lociSet("chr1:10000") - set.count should ===(1) - set("chr1").contains( 9999) should ===(false) - set("chr1").contains(10000) should ===(true) - set("chr1").contains(10001) should ===(false) + ==(set.count, 1) + ==(set("chr1").contains( 9999), false) + ==(set("chr1").contains(10000), true) + ==(set("chr1").contains(10001), false) } test("loci set invariants") { @@ -98,16 +99,16 @@ class LociSetSuite set should not be null set.toString should not be null withClue("invariants for: '%s'".format(set.toString)) { - lociSet(set.toString) should ===(set) - lociSet(set.toString).toString should ===(set.toString) - set should ===(set) + ==(lociSet(set.toString), set) + ==(lociSet(set.toString).toString, set.toString) + ==(set, set) // Test serialization. We hit all sorts of null pointer exceptions here at one point, so we are paranoid about // checking every pointer. val parallelized = sc.parallelize(List(set)) val collected = parallelized.collect() val result = collected(0) - result should ===(set) + ==(result, set) } } @@ -115,70 +116,73 @@ class LociSetSuite } test("loci set parsing with contig lengths") { - makeLociSet( - "chr1,chr2,chr17,chr2:3-5,chr20:10-20", - "chr1" → 10, - "chr2" → 20, - "chr17" → 12, - "chr20" → 5000 + ==( + makeLociSet( + "chr1,chr2,chr17,chr2:3-5,chr20:10-20", + "chr1" → 10, + "chr2" → 20, + "chr17" → 12, + "chr20" → 5000 + ) + .toString, + "chr1:0-10,chr2:0-20,chr17:0-12,chr20:10-20" ) - .toString should ===("chr1:0-10,chr2:0-20,chr17:0-12,chr20:10-20") } test("parse half-open interval") { - makeLociSet("chr1:10000-", "chr1" → 20000).toString should ===("chr1:10000-20000") + ==(makeLociSet("chr1:10000-", "chr1" → 20000).toString, "chr1:10000-20000") } test("loci set single contig iterator basic") { val set = lociSet("chr1:20-25,chr1:15-17,chr1:40-43,chr1:40-42,chr1:5-5,chr2:5-6,chr2:6-7,chr2:2-4") - set("chr1").iterator.toSeq should ===(Seq(15, 16, 20, 21, 22, 23, 24, 40, 41, 42)) - set("chr2").iterator.toSeq should ===(Seq(2, 3, 5, 6)) + ==(set("chr1").iterator.toSeq, Seq(15, 16, 20, 21, 22, 23, 24, 40, 41, 42)) + ==(set("chr2").iterator.toSeq, Seq(2, 3, 5, 6)) val iter1 = set("chr1").iterator - iter1.hasNext should ===(true) - iter1.head should ===(15) - iter1.next() should ===(15) - iter1.head should ===(16) - iter1.next() should ===(16) - iter1.head should ===(20) - iter1.next() should ===(20) - iter1.head should ===(21) + ==(iter1.hasNext, true) + ==(iter1.head, 15) + ==(iter1.next(), 15) + ==(iter1.head, 16) + ==(iter1.next(), 16) + ==(iter1.head, 20) + ==(iter1.next(), 20) + ==(iter1.head, 21) iter1.skipTo(23) - iter1.next() should ===(23) - iter1.head should ===(24) + ==(iter1.next(), 23) + ==(iter1.head, 24) iter1.skipTo(38) - iter1.head should ===(40) - iter1.hasNext should ===(true) + ==(iter1.head, 40) + ==(iter1.hasNext, true) iter1.skipTo(100) - iter1.hasNext should ===(false) + ==(iter1.hasNext, false) } test("loci set single contig iterator: test that skipTo implemented efficiently.") { val set = lociSet("chr1:2-3,chr1:10-15,chr1:100-100000000000") val iter1 = set("chr1").iterator - iter1.hasNext should ===(true) - iter1.head should ===(2) - iter1.next() should ===(2) - iter1.next() should ===(10) - iter1.next() should ===(11) + ==(iter1.hasNext, true) + ==(iter1.head, 2) + ==(iter1.next(), 2) + ==(iter1.next(), 10) + ==(iter1.next(), 11) val sixBillion = Locus(6000000000L) iter1.skipTo(sixBillion) // will hang if it steps through each locus. - iter1.next() should ===(sixBillion) - iter1.next() should ===(sixBillion.next) - iter1.hasNext should ===(true) + ==(iter1.next(), sixBillion) + ==(iter1.next(), sixBillion.next) + ==(iter1.hasNext, true) val hundredBillion = Locus(100000000000L) val iter2 = set("chr1").iterator iter2.skipTo(hundredBillion) - iter2.hasNext should ===(false) + ==(iter2.hasNext, false) val iter3 = set("chr1").iterator iter3.skipTo(hundredBillion.prev) - iter3.hasNext should ===(true) - iter3.next() should ===(100000000000L - 1) - iter3.hasNext should ===(false) + ==(iter3.hasNext, true) + ==(iter3.next(), 100000000000L - 1) + ==(iter3.hasNext, false) } test("take") { diff --git a/src/test/scala/org/hammerlab/genomics/loci/set/SerializerSuite.scala b/src/test/scala/org/hammerlab/genomics/loci/set/SerializerSuite.scala index 83b5ba8..163fd96 100644 --- a/src/test/scala/org/hammerlab/genomics/loci/set/SerializerSuite.scala +++ b/src/test/scala/org/hammerlab/genomics/loci/set/SerializerSuite.scala @@ -19,7 +19,8 @@ class SerializerSuite with LenientContigNameConversions with ClearContigNames with LociSetUtil - with Serializable { + with Serializable + with cmps { import Helpers._ @@ -52,7 +53,7 @@ class SerializerSuite val rdd = sc.parallelize(sets) val result = rdd.map(_.toString).collect.toSeq - result should ===(sets.map(_.toString)) + ==(result, sets.map(_.toString)) } test("make an RDD[LociSet], and an RDD[Contig]") { @@ -74,7 +75,7 @@ class SerializerSuite .collect .toSeq - result should ===(sets.map(_("20").toString)) + ==(result, sets.map(_("20").toString)) } @@ -83,7 +84,7 @@ class SerializerSuite val setBC = sc.broadcast(set) val rdd = sc.parallelize[Locus]((0 until 1000).toSeq) val result = rdd.filter(filterTask(setBC)).collect - result should ===(100 until 200) + ==(result, 100 until 200) } test("java serialization") { @@ -101,7 +102,7 @@ class SerializerSuite val loci2 = ois.readObject().asInstanceOf[LociSet] - loci2 should ===(loci) + ==(loci2, loci) } } diff --git a/src/test/scala/org/hammerlab/genomics/loci/set/cmps.scala b/src/test/scala/org/hammerlab/genomics/loci/set/cmps.scala new file mode 100644 index 0000000..f3e1ace --- /dev/null +++ b/src/test/scala/org/hammerlab/genomics/loci/set/cmps.scala @@ -0,0 +1,12 @@ +package org.hammerlab.genomics.loci.set + +import org.hammerlab.cmp.CanEq.Cmp +import org.hammerlab.cmp.Cmp +import org.hammerlab.genomics.loci +import org.hammerlab.genomics.reference.Interval + +trait cmps + extends loci.cmps { + implicit def cmpSetContig(implicit cmp: Cmp[Iterator[Interval]]): Cmp.Aux[Contig, cmp.Diff] = Cmp.by[Iterator[Interval], Contig](_.ranges)(cmp) + implicit def lociSetCmp(implicit cmp: Cmp[Seq[Contig]]): Cmp.Aux[LociSet, cmp.Diff] = Cmp.by[Seq[Contig], LociSet](_.contigs)(cmp) +}