Skip to content

Commit

Permalink
Merge pull request #25 from ryan-williams/ug
Browse files Browse the repository at this point in the history
upgrade tests, plugin
  • Loading branch information
ryan-williams committed Jun 15, 2018
2 parents c093fe1 + c3af66b commit 51d88cc
Show file tree
Hide file tree
Showing 20 changed files with 266 additions and 246 deletions.
16 changes: 7 additions & 9 deletions build.sbt
@@ -1,28 +1,26 @@
group("org.hammerlab.genomics")
name := "loci"
v"2.0.4"
subgroup("genomics", "loci")
v"2.1.0"

addSparkDeps
spark

import genomics.reference

dep(
args4j,
args4s % "1.3.1",
case_app,
htsjdk,
iterators % "2.1.0",
iterators % "2.2.0",
paths % "1.5.0",
reference % "1.4.3" + testtest,
scalautils,
spark_util % "2.0.3",
spark_util % "2.0.4",
string_utils % "1.2.0"
)

// Shade Guava due to use of RangeSet classes from 16.0.1 that don't exist in Spark/Hadoop's Guava 11.0.2.
shadedDeps += guava

// Rename shaded Guava classes
shadeRenames += "com.google.common.**" "org.hammerlab.guava.@1"
shadeRenames += "com.google.common.**" "org.hammerlab.guava.@1"
shadeRenames += "com.google.thirdparty.**" "org.hammerlab.guava.@1"

// Publish JAR that includes shaded Guava.
Expand Down
2 changes: 1 addition & 1 deletion project/build.properties
@@ -1 +1 @@
sbt.version=1.1.1
sbt.version=1.1.5
2 changes: 1 addition & 1 deletion project/plugins.sbt
@@ -1 +1 @@
addSbtPlugin("org.hammerlab.sbt" % "base" % "4.4.2")
addSbtPlugin("org.hammerlab.sbt" % "base" % "4.6.1")
31 changes: 31 additions & 0 deletions src/main/scala/org/hammerlab/genomics/loci/Args.scala
@@ -0,0 +1,31 @@
package org.hammerlab.genomics.loci

import caseapp.{ HelpMessage M }
import org.hammerlab.genomics.loci.parsing.ParsedLoci
import org.hammerlab.genomics.loci.parsing.ParsedLoci.loadFromPath
import org.hammerlab.genomics.reference.ContigName.Factory
import org.hammerlab.paths.Path

/** Arguments for accepting a set of loci to restrict variant-calling to. */
case class Args(
@M("If set, loci to include. Either 'all' or 'contig[:start[-end]],contig[:start[-end]],…'")
loci: Option[String] = None,

@M("Path to file giving loci to include")
lociFile: Option[Path] = None
) {
def parse(
implicit
factory: Factory
): Option[ParsedLoci] =
(loci, lociFile) match {
case (Some(str), Some(file))
throw new IllegalArgumentException(
"Specify a loci string (--loci) xor file (--loci-file)"
)
case (Some(str), _) => Some(ParsedLoci(str))
case (_, Some(path)) => Some(loadFromPath(path))
case _ =>
None
}
}
25 changes: 0 additions & 25 deletions src/main/scala/org/hammerlab/genomics/loci/args/LociArgs.scala

This file was deleted.

This file was deleted.

Expand Up @@ -2,7 +2,6 @@ package org.hammerlab.genomics.loci.parsing

import htsjdk.variant.vcf.VCFFileReader
import org.hammerlab.genomics.loci.VariantContext
import org.hammerlab.genomics.loci.args.LociArgs
import org.hammerlab.genomics.reference.ContigName.Factory
import org.hammerlab.paths.Path

Expand All @@ -20,7 +19,7 @@ import scala.collection.mutable.ArrayBuffer
* The two implementations are:
*
* - [[All]]: sentinel value representing all loci on all contigs.
* - [[LociRanges]]: a sequence of [[LociRange]]s denoting (possibly open-ended) genomic-intervals.
* - [[LociRanges]]: a sequence of [[Range]]s denoting (possibly open-ended) genomic-intervals.
*
* Examples:
*
Expand All @@ -35,41 +34,21 @@ object ParsedLoci {
def apply(lociStrs: String)(implicit factory: Factory): ParsedLoci = apply(Iterator(lociStrs))

def apply(lines: Iterator[String])(implicit factory: Factory): ParsedLoci = {
val lociRanges = ArrayBuffer[LociRange]()
val lociRanges = ArrayBuffer[Range]()
for {
lociStrs lines
lociStr lociStrs.replaceAll("\\s", "").split(",")
lociRange ParsedLociRange(lociStr)
lociRange ParsedRange(lociStr)
} {
lociRange match {
case AllRange return All
case lociRange: LociRange
case ParsedRange.All return All
case lociRange: Range
lociRanges += lociRange
}
}
LociRanges(lociRanges)
}

/**
* Parse string representations of loci ranges, either from one string (lociOpt) or a file with one range per line
* (lociFileOpt), and return a [[ParsedLoci]] encapsulating the result. The latter can then be converted into a
* [[org.hammerlab.genomics.loci.set.LociSet]] when contig-lengths are available / have been parsed from read-sets.
*/
def apply(args: LociArgs): Option[ParsedLoci] =
apply(
args.lociStrOpt,
args.lociFileOpt
)

def apply(lociStrOpt: Option[String],
lociFileOpt: Option[Path])(implicit factory: Factory): Option[ParsedLoci] =
(lociStrOpt, lociFileOpt) match {
case (Some(lociStr), _) => Some(ParsedLoci(lociStr))
case (_, Some(lociPath)) => Some(loadFromPath(lociPath))
case _ =>
None
}

/**
* Parse loci from the specified file.
*
Expand All @@ -78,7 +57,7 @@ object ParsedLoci {
* "chrX:5-10,chr12-10-20", etc. Whitespace is ignored.
* @return parsed loci
*/
private def loadFromPath(path: Path)(implicit factory: Factory): ParsedLoci =
def loadFromPath(path: Path)(implicit factory: Factory): ParsedLoci =
path.extension match {
case "vcf" LociRanges.fromVCF(path)
case "loci" | "txt" ParsedLoci(path.lines)
Expand All @@ -94,10 +73,10 @@ object ParsedLoci {
*/
case object All extends ParsedLoci

case class LociRanges(ranges: Iterable[LociRange]) extends AnyVal with ParsedLoci
case class LociRanges(ranges: Iterable[Range]) extends AnyVal with ParsedLoci

object LociRanges {
def apply(range: LociRange): LociRanges = apply(Iterable(range))
def apply(range: Range): LociRanges = apply(Iterable(range))

def fromVCF(path: Path): LociRanges =
apply(
Expand All @@ -106,7 +85,7 @@ object LociRanges {
new VCFFileReader(path.toFile, false)
.map {
case VariantContext(contigName, start, end) =>
LociRange(contigName, start, end)
Range(contigName, start, end)
}
)
}
Expand Up @@ -14,9 +14,9 @@ import org.hammerlab.genomics.reference.{ ContigName, Locus }
* - all
* - none
*/
sealed trait ParsedLociRange
sealed trait ParsedRange

object ParsedLociRange {
object ParsedRange {

val contigAndLoci = """^([\pL\pN._]+):(\pN+)(?:-(\pN*))?$""".r
val contigOnly = """^([\pL\pN._]+)""".r
Expand All @@ -31,10 +31,10 @@ object ParsedLociRange {
* "chr1:10000": just chr1, position 10000; equivalent to "chr1:10000-10001".
* "chr1:10000-": chr1, from position 10000 to the end of chr1.
*/
def apply(lociRangeStr: String)(implicit factory: Factory): Option[ParsedLociRange] =
def apply(lociRangeStr: String)(implicit factory: Factory): Option[ParsedRange] =
lociRangeStr.replaceAll("\\s", "") match {
case "all" =>
Some(AllRange)
Some(All)
case "none" | "" =>
None
case contigAndLoci(name, startStr, endStrOpt) =>
Expand All @@ -46,27 +46,28 @@ object ParsedLociRange {
case None => Some(start.next)
}

Some(LociRange(name, start, endOpt))
Some(Range(name, start, endOpt))
case contigOnly(contig) =>
Some(LociRange(contig, Locus(0), None))
Some(Range(contig, Locus(0), None))
case other =>
throw new IllegalArgumentException(s"Couldn't parse loci range: $other")
}
}

case object AllRange extends ParsedLociRange
case object All extends ParsedRange

}

case class LociRange(contigName: ContigName,
start: Locus,
endOpt: Option[Locus])
extends ParsedLociRange
case class Range(contigName: ContigName,
start: Locus,
endOpt: Option[Locus])
extends ParsedRange

object LociRange {
def apply(contigName: ContigName, start: Locus, end: Locus): LociRange =
LociRange(contigName, start, Some(end))
object Range {
def apply(contigName: ContigName, start: Locus, end: Locus): Range =
Range(contigName, start, Some(end))

def apply(tuple: (ContigName, Locus, Locus)): LociRange = {
def apply(tuple: (ContigName, Locus, Locus)): Range = {
val (contigName, start, end) = tuple
LociRange(contigName, start, Some(end))
Range(contigName, start, Some(end))
}
}
4 changes: 2 additions & 2 deletions src/main/scala/org/hammerlab/genomics/loci/set/LociSet.scala
Expand Up @@ -3,7 +3,7 @@ package org.hammerlab.genomics.loci.set
import com.esotericsoftware.kryo.io.{ Input, Output }
import com.esotericsoftware.kryo.{ Kryo, Serializer }
import htsjdk.samtools.util.{ Interval HTSJDKInterval }
import org.hammerlab.genomics.loci.parsing.{ All, LociRange, LociRanges, ParsedLoci }
import org.hammerlab.genomics.loci.parsing.{ All, Range, LociRanges, ParsedLoci }
import org.hammerlab.genomics.reference.ContigName.Factory
import org.hammerlab.genomics.reference.{ ContigLengths, ContigName, Interval, Locus, NumLoci, Region }
import org.hammerlab.strings.TruncatedToString
Expand Down Expand Up @@ -158,7 +158,7 @@ object LociSet {
Region(contig, Locus(0), Locus(length))
case LociRanges(ranges)
for {
LociRange(contigName, start, endOpt) ranges
Range(contigName, start, endOpt) ranges
contigLengthOpt = contigLengths.get(contigName)
} yield
(endOpt, contigLengthOpt) match {
Expand Down
10 changes: 10 additions & 0 deletions src/test/scala/org/hammerlab/genomics/loci/cmps.scala
@@ -0,0 +1,10 @@
package org.hammerlab.genomics.loci

import org.hammerlab.cmp.CanEq.Cmp
import org.hammerlab.cmp.Cmp
import org.hammerlab.genomics.reference.{ Interval, Locus }

trait cmps {
// TODO: move this to reference repo
implicit def cmpInterval(implicit cmp: Cmp[(Locus, Locus)]): Cmp.Aux[Interval, cmp.Diff] = Cmp.by[(Locus, Locus), Interval](i (i.start, i.end))(cmp)
}
Expand Up @@ -24,7 +24,8 @@ class SkippableLociIteratorSuite
)

test("no skips") {
strs.toSeq should ===(
==(
strs.toSeq,
List(
10 "a",
11 "b",
Expand All @@ -49,34 +50,35 @@ class SkippableLociIteratorSuite
test("misc skips") {
val it = strs
it.skipTo(15)
it.next() should ===(20 "c")
==(it.next(), 20 "c")
it.skipTo(30)
it.next() should ===(30 "e")
==(it.next(), 30 "e")
intercept[IllegalArgumentException] {
it.skipTo(30)
}
it.next() should ===(31 "f")
==(it.next(), 31 "f")
it.skipTo(32)
it.next() should ===(33 "g")
==(it.next(), 33 "g")
it.skipTo(34)
it.next() should ===(34 "h")
==(it.next(), 34 "h")
it.skipTo(41)
it.next() should ===(50 "j")
it.hasNext should ===(false)
==(it.next(), 50 "j")
==(it.hasNext, false)
}

test("intersect") {
strs.intersect(
new LociIterator(
Iterator(
Interval( 8, 11),
Interval(14, 16),
Interval(30, 35),
Interval(38, 42),
Interval(50, 51)
).buffered
)
).toSeq ===
==(
strs.intersect(
new LociIterator(
Iterator(
Interval( 8, 11),
Interval(14, 16),
Interval(30, 35),
Interval(38, 42),
Interval(50, 51)
).buffered
)
).toSeq,
Seq(
10 "a",
30 "e",
Expand All @@ -86,6 +88,7 @@ class SkippableLociIteratorSuite
40 "i",
50 "j"
)
)
}
}

Expand Down

0 comments on commit 51d88cc

Please sign in to comment.