diff --git a/build.sbt b/build.sbt index 4cd6c26..d6eb9bd 100644 --- a/build.sbt +++ b/build.sbt @@ -1,12 +1,7 @@ name := "iterator" -version := "1.2.2" +version := "1.3.0" addScala212 -deps ++= Seq( - libs.value('commons_math), - kryo.value, - "com.chuusai" %% "shapeless" % "2.3.2", - libs.value('spire) -) +deps += spire diff --git a/project/plugins.sbt b/project/plugins.sbt index 1dd7897..f53eab3 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1 +1 @@ -addSbtPlugin("org.hammerlab" % "sbt-parent" % "2.0.1") +addSbtPlugin("org.hammerlab" % "sbt-parent" % "3.0.0") diff --git a/src/main/scala/org/hammerlab/iterator/BufferedTakeWhileIterator.scala b/src/main/scala/org/hammerlab/iterator/BufferedTakeWhileIterator.scala deleted file mode 100644 index e3b823f..0000000 --- a/src/main/scala/org/hammerlab/iterator/BufferedTakeWhileIterator.scala +++ /dev/null @@ -1,17 +0,0 @@ -package org.hammerlab.iterator - -case class BufferedTakeWhileIterator[T](it: BufferedIterator[T]) { - def takewhile(fn: T ⇒ Boolean): BufferedIterator[T] = - new SimpleBufferedIterator[T] { - override protected def _advance: Option[T] = - if (it.hasNext && fn(it.head)) - Some(it.next) - else - None - } -} - -object BufferedTakeWhileIterator { - implicit def makeBufferedTakeWhileIterator[T](it: BufferedIterator[T]): BufferedTakeWhileIterator[T] = - BufferedTakeWhileIterator(it) -} diff --git a/src/main/scala/org/hammerlab/iterator/DropEagerIterator.scala b/src/main/scala/org/hammerlab/iterator/DropEagerIterator.scala new file mode 100644 index 0000000..fa1e4cd --- /dev/null +++ b/src/main/scala/org/hammerlab/iterator/DropEagerIterator.scala @@ -0,0 +1,17 @@ +package org.hammerlab.iterator + +case class DropEagerIterator[T](it: Iterator[T]) { + def dropEager(n: Int): Iterator[T] = { + var idx = 0 + while (it.hasNext && idx < n) { + it.next + idx += 1 + } + it + } +} + +object DropEagerIterator { + implicit def makeDropEagerIterator[T](it: Iterator[T]): DropEagerIterator[T] = + DropEagerIterator(it) +} diff --git a/src/main/scala/org/hammerlab/iterator/EitherIterator.scala b/src/main/scala/org/hammerlab/iterator/EitherIterator.scala new file mode 100644 index 0000000..c5fecf6 --- /dev/null +++ b/src/main/scala/org/hammerlab/iterator/EitherIterator.scala @@ -0,0 +1,77 @@ +package org.hammerlab.iterator + +import org.hammerlab.iterator.bulk.BufferedBulkIterator._ +import scala.collection.mutable.ArrayBuffer + +case class EitherIterator[T, U](it: BufferedIterator[Either[T, U]]) { + + def findLeft: Option[T] = + it + .collect { + case Left(t) ⇒ t + } + .buffered + .headOption + + def groupByLeft: BufferedIterator[(T, Iterator[U])] = { + + // Clear out any leading Rights + it + .collectwhile { + case Right(_) ⇒ null + } + .toList + + new SimpleBufferedIterator[(T, BufferedIterator[U])] { + var curLeft: Option[T] = None + var curRights = Iterator[U]().buffered + override protected def _advance: Option[(T, BufferedIterator[U])] = { + + // Clear any unused elements from the previous rights/U's/"values" iterator + curRights.toList + + it + .nextOption + .collect { + case Left(t) ⇒ + curRights = + it + .collectwhile { + case Right(u) ⇒ u + } + + t → curRights + case Right(u) ⇒ + throw new IllegalStateException( + s"nextOption should not be a Right" + ) + } + } + } + } + + def roundUpRight: BufferedIterator[(Seq[T], U)] = + new SimpleBufferedIterator[(Seq[T], U)] { + override protected def _advance: Option[(Seq[T], U)] = { + val lefts = ArrayBuffer[T]() + while (true) { + it.headOption match { + case Some(Left(t)) ⇒ + it.next + lefts += t + case Some(Right(u)) ⇒ + it.next + return Some(lefts → u) + case None ⇒ + return None + } + } + ??? + } + } +} + +object EitherIterator { + implicit def makeEitherIterator[T, U](it: Iterator[Either[T, U]]): EitherIterator[T, U] = + EitherIterator(it.buffered) +} diff --git a/src/main/scala/org/hammerlab/iterator/GroupWithIterator.scala b/src/main/scala/org/hammerlab/iterator/GroupWithIterator.scala index 5931ea9..3ea028a 100644 --- a/src/main/scala/org/hammerlab/iterator/GroupWithIterator.scala +++ b/src/main/scala/org/hammerlab/iterator/GroupWithIterator.scala @@ -1,6 +1,6 @@ package org.hammerlab.iterator -import BufferedTakeWhileIterator._ +import org.hammerlab.iterator.bulk.BufferedBulkIterator._ /** * Group one sorted iterator with another, emitting an iterator of the latter's elements for each of the former's diff --git a/src/main/scala/org/hammerlab/iterator/RangeAccruingIterator.scala b/src/main/scala/org/hammerlab/iterator/RangeAccruingIterator.scala index 9a78de8..f61e775 100644 --- a/src/main/scala/org/hammerlab/iterator/RangeAccruingIterator.scala +++ b/src/main/scala/org/hammerlab/iterator/RangeAccruingIterator.scala @@ -1,5 +1,7 @@ package org.hammerlab.iterator +import scala.Range + /** * Given an [[Iterator]] of [[Int]]s, collapse contiguous "ranges" of integers that are each 1 greater than their * predecessor. @@ -9,7 +11,8 @@ package org.hammerlab.iterator * * See RangeAccruingIteratorTest for more examples. */ -class RangeAccruingIterator(it: Iterator[Int]) extends Iterator[Range] { +class RangeAccruingIterator(it: Iterator[Int]) + extends Iterator[Range] { var anchor = -1 diff --git a/src/main/scala/org/hammerlab/iterator/SliceIterator.scala b/src/main/scala/org/hammerlab/iterator/SliceIterator.scala new file mode 100644 index 0000000..e035a7a --- /dev/null +++ b/src/main/scala/org/hammerlab/iterator/SliceIterator.scala @@ -0,0 +1,17 @@ +package org.hammerlab.iterator + +import org.hammerlab.iterator.DropEagerIterator._ + +case class SliceIterator[T](it: Iterator[T]) { + def sliceOpt(start: Option[Int], length: Option[Int]): Iterator[T] = { + start.foreach(it.dropEager) + length.map(it.take).getOrElse(it) + } + def sliceOpt(start: Int, length: Int): Iterator[T] = sliceOpt(Some(start), Some(length)) + def sliceOpt(start: Option[Int], length: Int): Iterator[T] = sliceOpt(start, Some(length)) + def sliceOpt(start: Int, length: Option[Int] = None): Iterator[T] = sliceOpt(Some(start), length) +} + +object SliceIterator { + implicit def makeSliceIterator[T](it: Iterator[T]): SliceIterator[T] = SliceIterator(it) +} diff --git a/src/main/scala/org/hammerlab/iterator/bulk/BufferedBulkIterator.scala b/src/main/scala/org/hammerlab/iterator/bulk/BufferedBulkIterator.scala new file mode 100644 index 0000000..8265b02 --- /dev/null +++ b/src/main/scala/org/hammerlab/iterator/bulk/BufferedBulkIterator.scala @@ -0,0 +1,39 @@ +package org.hammerlab.iterator.bulk + +import org.hammerlab.iterator.SimpleBufferedIterator + +/** + * Some smarter bulk operations on [[BufferedIterator]]s + */ +case class BufferedBulkIterator[T](it: BufferedIterator[T]) { + def takewhile(fn: T ⇒ Boolean): SimpleBufferedIterator[T] = + new SimpleBufferedIterator[T] { + override protected def _advance: Option[T] = + if (it.hasNext && fn(it.head)) + Some(it.next) + else + None + } + + def dropwhile(fn: T ⇒ Boolean): Unit = + while (it.hasNext && fn(it.head)) + it.next + + def collectwhile[U](pf: PartialFunction[T, U]): BufferedIterator[U] = + new SimpleBufferedIterator[U] { + override protected def _advance: Option[U] = + if (it.hasNext && pf.isDefinedAt(it.head)) + Some( + pf( + it.next + ) + ) + else + None + } +} + +object BufferedBulkIterator { + implicit def makeBufferedBulkWhileIterator[T](it: BufferedIterator[T]): BufferedBulkIterator[T] = + BufferedBulkIterator(it) +} diff --git a/src/main/scala/org/hammerlab/iterator/range/OverlappingRangesIterator.scala b/src/main/scala/org/hammerlab/iterator/range/OverlappingRangesIterator.scala new file mode 100644 index 0000000..d5e288f --- /dev/null +++ b/src/main/scala/org/hammerlab/iterator/range/OverlappingRangesIterator.scala @@ -0,0 +1,71 @@ +package org.hammerlab.iterator.range + +import org.hammerlab.iterator.SimpleBufferedIterator + +import scala.collection.mutable + +case class OverlappingRangesIterator[T: Ordering](it: BufferedIterator[Range[T]]) { + + type RangeT = (T, Option[T]) + + val ≤ = implicitly[Ordering[T]].lteq _ + + implicit val orderZippedRangeByEndOpt: Ordering[(Range[T], Int)] = + Ordering + .by[(Range[T], Int), Option[T]](_._1.endOpt) + .reverse + + def joinOverlaps(other: Iterable[Range[T]]): Iterator[(Range[T], Vector[(Range[T], Int)])] = + joinOverlaps(other.iterator.buffered) + + def joinOverlaps(other: BufferedIterator[Range[T]]): Iterator[(Range[T], Vector[(Range[T], Int)])] = { + val queue = mutable.PriorityQueue[(Range[T], Int)]() + + val zippedOther = + other + .zipWithIndex + .buffered + + new SimpleBufferedIterator[(Range[T], Vector[(Range[T], Int)])] { + override protected def _advance: Option[(Range[T], Vector[(Range[T], Int)])] = + it + .nextOption + .map { + elem ⇒ + while (queue.headOption.exists(!_._1.∩(elem))) { + queue.dequeue() + } + + while ( + zippedOther + .headOption + .flatMap(_._1.endOpt) + .exists(≤(_, elem.start)) + ) { + zippedOther.next + } + + while ( + zippedOther + .headOption + .exists(_._1.∩(elem)) + ) { + queue.enqueue(zippedOther.next) + } + + elem → + queue + .toVector + .sortBy(_._2) + } + } + } +} + +object OverlappingRangesIterator { + implicit def makeOverlappingRangesIteratorFromIterable[T: Ordering](it: Iterable[Range[T]]): OverlappingRangesIterator[T] = + OverlappingRangesIterator(it.iterator.buffered) + + implicit def makeOverlappingRangesIterator[T: Ordering](it: Iterator[Range[T]]): OverlappingRangesIterator[T] = + OverlappingRangesIterator(it.buffered) +} diff --git a/src/main/scala/org/hammerlab/iterator/range/Range.scala b/src/main/scala/org/hammerlab/iterator/range/Range.scala new file mode 100644 index 0000000..75dce05 --- /dev/null +++ b/src/main/scala/org/hammerlab/iterator/range/Range.scala @@ -0,0 +1,42 @@ +package org.hammerlab.iterator.range + +case class Range[T](start: T, endOpt: Option[T]) { + def ∩(right: Range[T])(implicit ord: Ordering[T]): Boolean = { + val ≤ = ord.lteq _ + val Range(rightStart, rightEndOpt) = right + if (≤(start, rightStart)) + !endOpt.exists(≤(_, rightStart)) + else + !rightEndOpt.exists(≤(_, start)) + } + + override def toString: String = + s"[$start,${endOpt.getOrElse("∞")})" +} + +object Range { + def apply[T](start: T, end: T): Range[T] = Range(start, Some(end)) + def apply[T](start: T): Range[T] = Range(start, None) + + implicit def endOptOrdering[T](implicit ord: Ordering[T]): Ordering[Option[T]] = + new Ordering[Option[T]] { + override def compare(x: Option[T], y: Option[T]): Int = + (x, y) match { + case (None, None) ⇒ 0 + case (None, _) ⇒ -1 + case (_, None) ⇒ 1 + case (Some(x), Some(y)) ⇒ ord.compare(x, y) + } + } + + implicit def orderByStartThenEnd[T: Ordering]: Ordering[Range[T]] = { + + implicit val tupleOrdering = + Ordering.Tuple2[T, Option[T]] + + Ordering.by[Range[T], (T, Option[T])] { + case Range(start, endOpt) ⇒ + start → endOpt + } + } +} diff --git a/src/main/scala/org/hammerlab/iterator/Sliding2Iterator.scala b/src/main/scala/org/hammerlab/iterator/sliding/Sliding2Iterator.scala similarity index 94% rename from src/main/scala/org/hammerlab/iterator/Sliding2Iterator.scala rename to src/main/scala/org/hammerlab/iterator/sliding/Sliding2Iterator.scala index 23d6195..9d7fa08 100644 --- a/src/main/scala/org/hammerlab/iterator/Sliding2Iterator.scala +++ b/src/main/scala/org/hammerlab/iterator/sliding/Sliding2Iterator.scala @@ -1,4 +1,6 @@ -package org.hammerlab.iterator +package org.hammerlab.iterator.sliding + +import org.hammerlab.iterator.SimpleBufferedIterator case class Sliding2Iterator[T](it: BufferedIterator[T]) { def sliding2Prev: Iterator[(Option[T], T)] = diff --git a/src/main/scala/org/hammerlab/iterator/Sliding3Iterator.scala b/src/main/scala/org/hammerlab/iterator/sliding/Sliding3Iterator.scala similarity index 94% rename from src/main/scala/org/hammerlab/iterator/Sliding3Iterator.scala rename to src/main/scala/org/hammerlab/iterator/sliding/Sliding3Iterator.scala index a6becb0..60b1f96 100644 --- a/src/main/scala/org/hammerlab/iterator/Sliding3Iterator.scala +++ b/src/main/scala/org/hammerlab/iterator/sliding/Sliding3Iterator.scala @@ -1,4 +1,6 @@ -package org.hammerlab.iterator +package org.hammerlab.iterator.sliding + +import org.hammerlab.iterator.{ NextOptionIterator, SimpleBufferedIterator } /** * Given an [[Iterator[T]]], emit each element sandwiched between its preceding and succeeding elements. diff --git a/src/main/scala/org/hammerlab/iterator/SlidingIterator.scala b/src/main/scala/org/hammerlab/iterator/sliding/SlidingIterator.scala similarity index 91% rename from src/main/scala/org/hammerlab/iterator/SlidingIterator.scala rename to src/main/scala/org/hammerlab/iterator/sliding/SlidingIterator.scala index 79bdfd2..ea4214b 100644 --- a/src/main/scala/org/hammerlab/iterator/SlidingIterator.scala +++ b/src/main/scala/org/hammerlab/iterator/sliding/SlidingIterator.scala @@ -1,4 +1,6 @@ -package org.hammerlab.iterator +package org.hammerlab.iterator.sliding + +import org.hammerlab.iterator.SimpleBufferedIterator import scala.collection.mutable.ArrayBuffer diff --git a/src/main/scala/org/hammerlab/iterator/sorted/EitherZipIterator.scala b/src/main/scala/org/hammerlab/iterator/sorted/EitherZipIterator.scala new file mode 100644 index 0000000..d3c8d30 --- /dev/null +++ b/src/main/scala/org/hammerlab/iterator/sorted/EitherZipIterator.scala @@ -0,0 +1,52 @@ +package org.hammerlab.iterator.sorted + +import org.hammerlab.iterator.{ HeadOptionIterator, SimpleBufferedIterator } + +case class EitherZipIterator[T](l: BufferedIterator[T]) { + def sortedEitherZip[U, V](other: Iterable[U])( + implicit + ord: Ordering[V], + tv: T ⇒ V, + uv: U ⇒ V + ): SimpleBufferedIterator[Either[T, U]] = + sortedEitherZip(other.iterator) + + def sortedEitherZip[U, V](other: Iterator[U])( + implicit + ord: Ordering[V], + tv: T ⇒ V, + uv: U ⇒ V + ): SimpleBufferedIterator[Either[T, U]] = { + val r = other.buffered + val ≤ = ord.lteq _ + new SimpleBufferedIterator[Either[T, U]] { + override protected def _advance: Option[Either[T, U]] = + (l.headOption, r.headOption) match { + case (Some(t), Some(u)) ⇒ + if (≤(t, u)) { + l.next + Some(Left(t)) + } else { + r.next + Some(Right(u)) + } + case (Some(t), _) ⇒ + l.next + Some(Left(t)) + case (_, Some(u)) ⇒ + r.next + Some(Right(u)) + case _ ⇒ + None + } + } + } +} + +object EitherZipIterator { + implicit def makeEitherZipIterator[T](it: Iterator[T]): EitherZipIterator[T] = + EitherZipIterator(it.buffered) + + implicit def makeEitherZipIteratorFromIterable[T](it: Iterable[T]): EitherZipIterator[T] = + EitherZipIterator(it.iterator.buffered) +} diff --git a/src/main/scala/org/hammerlab/iterator/sorted/OrZipIterator.scala b/src/main/scala/org/hammerlab/iterator/sorted/OrZipIterator.scala new file mode 100644 index 0000000..33bf3d9 --- /dev/null +++ b/src/main/scala/org/hammerlab/iterator/sorted/OrZipIterator.scala @@ -0,0 +1,59 @@ +package org.hammerlab.iterator.sorted + +import org.hammerlab.iterator.{ HeadOptionIterator, SimpleBufferedIterator } +import org.hammerlab.types.{ Both, LO, Or, RO } + +case class OrZipIterator[T](l: BufferedIterator[T]) { + def sortedOrZip[U, V](other: Iterable[U])( + implicit + ord: Ordering[V], + tv: T ⇒ V, + uv: U ⇒ V + ): SimpleBufferedIterator[Or[T, U]] = + sortedOrZip(other.iterator) + + def sortedOrZip[U, V](other: Iterator[U])( + implicit + ord: Ordering[V], + tv: T ⇒ V, + uv: U ⇒ V + ): SimpleBufferedIterator[Or[T, U]] = { + val r = other.buffered + new SimpleBufferedIterator[Or[T, U]] { + override protected def _advance: Option[Or[T, U]] = + (l.headOption, r.headOption) match { + case (Some(t), Some(u)) ⇒ + Some( + ord.compare(t, u) match { + case 0 ⇒ + l.next + r.next + Both(t, u) + case x if x < 0 ⇒ + l.next + LO(t) + case _ ⇒ + r.next + RO(u) + } + ) + case (Some(t), _) ⇒ + l.next + Some(LO(t)) + case (_, Some(u)) ⇒ + r.next + Some(RO(u)) + case _ ⇒ + None + } + } + } +} + +object OrZipIterator { + implicit def makeOrZipIterator[T](it: Iterator[T]): OrZipIterator[T] = + OrZipIterator(it.buffered) + + implicit def makeOrZipIteratorFromIterable[T](it: Iterable[T]): OrZipIterator[T] = + OrZipIterator(it.iterator.buffered) +} diff --git a/src/main/scala/org/hammerlab/iterator/sorted/ZipIterator.scala b/src/main/scala/org/hammerlab/iterator/sorted/ZipIterator.scala new file mode 100644 index 0000000..ae32c01 --- /dev/null +++ b/src/main/scala/org/hammerlab/iterator/sorted/ZipIterator.scala @@ -0,0 +1,50 @@ +package org.hammerlab.iterator.sorted + +import org.hammerlab.iterator.{ HeadOptionIterator, SimpleBufferedIterator } + +case class ZipIterator[T](l: BufferedIterator[T]) { + def sortedZip[V](other: Iterable[T])( + implicit + ord: Ordering[V], + tv: T ⇒ V + ): SimpleBufferedIterator[T] = + sortedZip[V](other.iterator) + + def sortedZip[V](other: Iterator[T])( + implicit + ord: Ordering[V], + tv: T ⇒ V + ): SimpleBufferedIterator[T] = { + val r = other.buffered + val ≤ = ord.lteq _ + new SimpleBufferedIterator[T] { + override protected def _advance: Option[T] = + (l.headOption, r.headOption) match { + case (Some(t), Some(u)) ⇒ + if (≤(t, u)) { + l.next + Some(t) + } else { + r.next + Some(u) + } + case (Some(t), _) ⇒ + l.next + Some(t) + case (_, Some(u)) ⇒ + r.next + Some(u) + case _ ⇒ + None + } + } + } +} + +object ZipIterator { + implicit def makeZipIterator[T](it: Iterator[T]): ZipIterator[T] = + ZipIterator(it.buffered) + + implicit def makeZipIteratorFromIterable[T](it: Iterable[T]): ZipIterator[T] = + ZipIterator(it.iterator.buffered) +} diff --git a/src/main/scala/org/hammerlab/math/HypergeometricDistribution.scala b/src/main/scala/org/hammerlab/math/HypergeometricDistribution.scala deleted file mode 100644 index 8b90dec..0000000 --- a/src/main/scala/org/hammerlab/math/HypergeometricDistribution.scala +++ /dev/null @@ -1,74 +0,0 @@ -package org.hammerlab.math - -import org.apache.commons.math3.util.FastMath - -import scala.collection.mutable.ArrayBuffer - -/** - * Implementation of a hypergeometric distribution, modeled after - * [[org.apache.commons.math3.distribution.HypergeometricDistribution]], but supporting [[Long]] parameters. - * @param N Population size. - * @param K Number of successes. - * @param n Number to sample. - */ -case class HypergeometricDistribution(N: Long, K: Long, n: Int) { - - // These will be filled with n+1 elements corresponding to the PDF and CDF values for k ∈ [0, n]. - val pdf = ArrayBuffer[Double]() - val cdf = ArrayBuffer[Double]() - - // This will be set to the log of the binomial coefficient C(N, n), which is used multiple times in subsequent - // calculations. - var d = 0.0 - - // logs of k!, for k in [0, n]. - val logBinomPartialSumsLo = ArrayBuffer[Double]() - - // logs of K! / (K - k)!, for k in [0, n]. - val logBinomPartialSumsK = ArrayBuffer[Double]() - - // logs of (N - K)! / (N - K - k)!, for k in [0, n]. - val logBinomPartialSumsNK = ArrayBuffer[Double]() - - // Compute log-arrays described above. - (0 to n).foreach(k ⇒ { - if (k == 0) { - logBinomPartialSumsLo += 0 - logBinomPartialSumsK += 0 - logBinomPartialSumsNK += 0 - } else { - logBinomPartialSumsLo += (logBinomPartialSumsLo(k - 1) + FastMath.log(k)) - logBinomPartialSumsK += (logBinomPartialSumsK(k - 1) + FastMath.log(K + 1 - k)) - logBinomPartialSumsNK += (logBinomPartialSumsNK(k - 1) + FastMath.log(N - K + 1 - k)) - - d += FastMath.log(N + 1 - k) - d -= FastMath.log(k) - } - }) - - // Compute PDF and CDF. - (0 to n).foreach(k ⇒ { - val p1 = logBinomPartialSumsK(k) - logBinomPartialSumsLo(k) - val p2 = logBinomPartialSumsNK(n - k) - logBinomPartialSumsLo(n - k) - val v = FastMath.exp(p1 + p2 - d) - pdf += v - if (k == 0) - cdf += v - else - cdf += (v + cdf(k - 1)) - }) - - // Given a double x in [0, 1], binary-search the CDF to find the greatest integer k such that CDF(k) ≤ x. - def invCDF(x: Double, start: Int = 0, end: Int = n): Int = { - if (start == end) - start - else { - val mid = (start + end) / 2 - val c = cdf(mid) - if (x <= c) - invCDF(x, start, mid) - else - invCDF(x, mid + 1, end) - } - } -} diff --git a/src/main/scala/org/hammerlab/math/Monoid.scala b/src/main/scala/org/hammerlab/math/Monoid.scala deleted file mode 100644 index f895529..0000000 --- a/src/main/scala/org/hammerlab/math/Monoid.scala +++ /dev/null @@ -1,59 +0,0 @@ -package org.hammerlab.math - -import shapeless._ - -/** - * Copied/Adapted from - * https://github.com/milessabin/shapeless/blob/shapeless-2.3.2/examples/src/main/scala/shapeless/examples/monoids.scala - */ - -trait MonoidSyntax[T] { - def |+|(b: T): T -} - -object MonoidSyntax { - implicit def monoidSyntax[T](a: T)(implicit mt: Monoid[T]): MonoidSyntax[T] = new MonoidSyntax[T] { - def |+|(b: T) = mt.append(a, b) - } -} - -trait Monoid[T] { - def zero: T - def append(a: T, b: T): T -} - -object Monoid extends ProductTypeClassCompanion[Monoid] { - def zero[T](implicit mt: Monoid[T]) = mt.zero - - implicit def longMonoid: Monoid[Long] = new Monoid[Long] { - def zero = 0 - def append(a: Long, b: Long) = a + b - } - - implicit def intMonoid: Monoid[Int] = new Monoid[Int] { - override def zero: Int = 0 - override def append(a: Int, b: Int): Int = a + b - } - - implicit def stringMonoid: Monoid[String] = new Monoid[String] { - override def zero: String = "" - override def append(a: String, b: String): String = a + b - } - - object typeClass extends ProductTypeClass[Monoid] { - def emptyProduct = new Monoid[HNil] { - def zero = HNil - def append(a: HNil, b: HNil) = HNil - } - - def product[F, T <: HList](mh: Monoid[F], mt: Monoid[T]) = new Monoid[F :: T] { - def zero = mh.zero :: mt.zero - def append(a: F :: T, b: F :: T) = mh.append(a.head, b.head) :: mt.append(a.tail, b.tail) - } - - def project[F, G](instance: => Monoid[G], to: F => G, from: G => F) = new Monoid[F] { - def zero = from(instance.zero) - def append(a: F, b: F) = from(instance.append(to(a), to(b))) - } - } -} diff --git a/src/main/scala/org/hammerlab/math/PartiallyOrdered.scala b/src/main/scala/org/hammerlab/math/PartiallyOrdered.scala deleted file mode 100644 index 982f6fa..0000000 --- a/src/main/scala/org/hammerlab/math/PartiallyOrdered.scala +++ /dev/null @@ -1,45 +0,0 @@ -package org.hammerlab.math - -/** - * Fork of [[scala.math.PartiallyOrdered]] trait that inherits [[Any]], making it a universal trait suitable for - * inheritance by value-classes. See https://issues.scala-lang.org/browse/SI-10128. - * - * A class for partially ordered data. - * - * Forked from the Scala standard-lib in order to make it a universal trait, for mixing-in to value-classes. - * - * @author Martin Odersky - * @version 1.0, 23/04/2004 - */ -trait PartiallyOrdered[+A] extends Any { - - /** Result of comparing `'''this'''` with operand `that`. - * Returns `None` if operands are not comparable. - * If operands are comparable, returns `Some(x)` where - * - `x < 0` iff `'''this''' < that` - * - `x == 0` iff `'''this''' == that` - * - `x > 0` iff `'''this''' > that` - */ - def tryCompareTo [B >: A : PartiallyOrdered](that: B): Option[Int] - - def < [B >: A : PartiallyOrdered](that: B): Boolean = - this tryCompareTo that match { - case Some(x) if x < 0 ⇒ true - case _ ⇒ false - } - def > [B >: A : PartiallyOrdered](that: B): Boolean = - this tryCompareTo that match { - case Some(x) if x > 0 ⇒ true - case _ ⇒ false - } - def <= [B >: A : PartiallyOrdered](that: B): Boolean = - this tryCompareTo that match { - case Some(x) if x <= 0 ⇒ true - case _ ⇒ false - } - def >= [B >: A : PartiallyOrdered](that: B): Boolean = - this tryCompareTo that match { - case Some(x) if x >= 0 ⇒ true - case _ ⇒ false - } -} diff --git a/src/main/scala/org/hammerlab/math/RoundNumbers.scala b/src/main/scala/org/hammerlab/math/RoundNumbers.scala deleted file mode 100644 index b5f76f2..0000000 --- a/src/main/scala/org/hammerlab/math/RoundNumbers.scala +++ /dev/null @@ -1,48 +0,0 @@ -package org.hammerlab.math - -import org.hammerlab.iterator.SimpleBufferedIterator -import spire.math.Integral -import spire.implicits._ - -/** - * Emit an exponentially-increasing sequence of integers composed of repetitions of `steps` scaled by successive powers - * of `base`. - */ -class RoundNumbers[I: Integral] private(steps: Seq[Int], - base: Int = 10, - limitOpt: Option[I]) - extends SimpleBufferedIterator[I] { - - private var idx = 0 - private var basePow: I = Integral[I].one - - override protected def _advance: Option[I] = { - val n = steps(idx) * basePow - if (limitOpt.exists(_ < n)) - None - else - Some(n) - } - - override protected def postNext(): Unit = { - idx += 1 - if (idx == steps.size) { - idx = 0 - basePow *= base - } - } -} - -/** - * Constructors. - */ -object RoundNumbers { - def apply[I: Integral](steps: Seq[Int], - limit: I, - base: Int = 10): Iterator[I] = - new RoundNumbers(steps, base, Some(limit)) - - def apply(steps: Seq[Int], - base: Int): Iterator[Long] = - new RoundNumbers[Long](steps, base, None) -} diff --git a/src/main/scala/org/hammerlab/math/Steps.scala b/src/main/scala/org/hammerlab/math/Steps.scala deleted file mode 100644 index c5465e3..0000000 --- a/src/main/scala/org/hammerlab/math/Steps.scala +++ /dev/null @@ -1,70 +0,0 @@ -package org.hammerlab.math - -import math.{exp, log, max, min} - -/** - * Some utilities for generating exponential sequences of integers that can be used as e.g. histogram-bucket boundaries. - */ -object Steps { - - /** - * Divide [0, maxDepth] into N geometrically-evenly-spaced steps (of size ≈maxDepth^(1/N)). - * - * Until the k-th step is bigger than k, the whole number k is used in its stead. - */ - def geometricEvenSteps(maxDepth: Int, N: Int = 100): Set[Int] = { - val logMaxDepth = log(maxDepth) - - Set(0) ++ - (for { - i ← 1 until N - } yield - min( - maxDepth, - max( - i, - exp( - (i - 1) * logMaxDepth / (N - 2) - ).toInt - ) - ) - ).toSet - } - - /** - * Produce a set of "round numbers" between 0 and a provided N, inclusive. - * - * Coverage is relatively dense but the total number of sampled/returned integers is still O(log(N)) in the input N; - * specifically, 35 integers are returned in each factor-of-10 window (detailed below). - * - * The absolute difference between consecutive integers is non-decreasing over the entire range and, (after the [0,10] - * interval), no two consecutive integers returned are more than 10% different from one another. - * - * - * 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, base case: include all of [0, 10]. - * 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, step by one from 10% to 20% of the next power of 10 (100 here). - * 20, 22, 24, 26, 28, - * 30, 32, 34, 36, 38, - * 40, 42, 44, 46, 48, step by two from 20% to 50% of the next power of 10. - * 50, 55, - * 60, 65, - * 70, 75, - * 80, 85, - * 90, 95, step by five from 50% to 100% of the next power of 10. - * - * …then repeat the [10, 95] portion, multiplied by powers of 10: - * - * 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, this is 10x the "steps by one" section above. - * 200, 220, 240, 260, 280, - * 300, 320, 340, 360, 380, likewise, 10x the "steps by two" from above. - * - * …etc. - */ - def roundNumbers(maxDepth: Int): Set[Int] = - (0 until 10).toSet ++ - RoundNumbers( - (10 until 20) ++ (20 until 50 by 2) ++ (50 until 100 by 5), - maxDepth, - 10 - ).toSet -} diff --git a/src/main/scala/org/hammerlab/math/VarNum.scala b/src/main/scala/org/hammerlab/math/VarNum.scala deleted file mode 100644 index eeb22c1..0000000 --- a/src/main/scala/org/hammerlab/math/VarNum.scala +++ /dev/null @@ -1,92 +0,0 @@ -package org.hammerlab.math - -import java.io.{OutputStream, InputStream} - -/** - * Serialization wrapper for [[Long]]s which burns one bit per byte indicating whether any more bytes follow. - * - * Can utilize less serialized space than naively writing 8-byte [[Long]]s in datasets where absolute values tend to be - * less than 2^48 more often than they are ≥ 2^55. - * - * [[Long]]'s absolute values correspond to the following number of serialized bytes: - * - * [ 0, 2^6): 1 byte - * [ 2^6, 2^13): 2 bytes - * [2^13, 2^20): 3 bytes - * [2^20, 2^27): 4 bytes - * [2^27, 2^34): 5 bytes - * [2^34, 2^41): 6 bytes - * [2^41, 2^48): 7 bytes - * [2^48, 2^55): 8 bytes - * [2^55, 2^63): 9 bytes - * - * The first byte, in addition to its most significant bit indicating whether any more bites follow, uses its - * second-most-significant bit to represent the sign of the [[Long]]. - */ -object VarNum { - def write(output: OutputStream, l: Long): Unit = { - var n = l - var more = true - var total = 0 - while (more) { - if (total == 56) { - output.write(n.toByte) - more = false - } else { - val b = - if (total == 0) { - val byte = - if (n < 0) { - n = -n - (n & 0x3F).toByte | 0x40 - } else { - (n & 0x3F).toByte - } - - n = n >> 6 - byte - } else { - val byte = (n & 0x7F).toByte - n = n >> 7 - byte - } - - total += 7 - more = n > 0 - output.write(b | (if (more) 0x80 else 0).toByte) - } - } - } - - def read(input: InputStream): Long = { - var l = 0L - var bits = 0 - val readBytes = Array[Byte](0) - var negate = false - while (bits < 63) { - input.read(readBytes) - val b = readBytes(0) - if (bits == 55) { - l += ((b & 0xffL) << bits) - bits += 8 - } else { - if (bits == 0) { - negate = (b & 0x40) > 0 - l += (b & 0x3FL) - bits += 6 - } else { - l += (b & 0x7FL) << bits - bits += 7 - } - - if ((b & 0x80) == 0) { - bits = 63 - } - } - } - if (negate) - -l - else - l - } -} diff --git a/src/main/scala/org/hammerlab/math/package.scala b/src/main/scala/org/hammerlab/math/package.scala deleted file mode 100644 index b28f395..0000000 --- a/src/main/scala/org/hammerlab/math/package.scala +++ /dev/null @@ -1,18 +0,0 @@ -package org.hammerlab - -import spire.math.Integral - -package object math { - /** - * Simple helper for rounding-up integer-division - */ - def ceil[N: Integral](numerator: N, denominator: N): N = { - val numeric = implicitly[Integral[N]] - import numeric._ - fromDouble( - scala.math.ceil( - toDouble(numerator) / toDouble(denominator) - ) - ) - } -} diff --git a/src/main/scala/org/hammerlab/stats/Runs.scala b/src/main/scala/org/hammerlab/stats/Runs.scala deleted file mode 100644 index d667d23..0000000 --- a/src/main/scala/org/hammerlab/stats/Runs.scala +++ /dev/null @@ -1,22 +0,0 @@ -package org.hammerlab.stats - -import spire.math.Integral - -/** - * Convenience class wrapping a sequence of key-number pairs, used in run-length-encoding in [[Stats]]. - */ -case class Runs[K, V: Integral](elems: Seq[(K, V)]) { - override def toString: String = - ( - for ((elem, count) ← elems) yield - if (count == 1) - elem.toString - else - s"$elem×$count" - ).mkString(", ") -} - -object Runs { - implicit def runsToSeq[K, V: Integral](runs: Runs[K, V]): Seq[(K, V)] = runs.elems - implicit def seqToRuns[K, V: Integral](elems: Seq[(K, V)]): Runs[K, V] = Runs(elems) -} diff --git a/src/main/scala/org/hammerlab/stats/Samples.scala b/src/main/scala/org/hammerlab/stats/Samples.scala deleted file mode 100644 index 48ddd7c..0000000 --- a/src/main/scala/org/hammerlab/stats/Samples.scala +++ /dev/null @@ -1,42 +0,0 @@ -package org.hammerlab.stats - -import spire.math.Integral -import spire.implicits._ - -/** - * Used by [[Stats]] to wrap some [[Runs]] of elements from the start and end of a dataset. - * @param n total number of elements in the dataset. - * @param first [[Runs]] of elements from the start of the dataset. - * @param numFirst the number of elements represented by the [[Runs]] in [[first]], i.e. the sum of the their values. - * @param last [[Runs]] of elements from the end of the dataset. - * @param numLast the number of elements represented by the [[Runs]] in [[last]], i.e. the sum of the their values. - * @tparam K arbitrary element type - * @tparam V [[Integral]] type, e.g. [[Int]] or [[Long]]. - */ -case class Samples[K, V: Integral](n: V, first: Runs[K, V], numFirst: V, last: Runs[K, V], numLast: V) { - def isEmpty: Boolean = first.isEmpty - def nonEmpty: Boolean = first.nonEmpty - - def removeOverlap(num: V, first: Runs[K, V], last: Runs[K, V]): Runs[K, V] = { - val lastIt = last.iterator.buffered - var dropped = Integral[V].zero - Runs( - first ++ lastIt.dropWhile(t ⇒ { - val (_, count) = t - val drop = dropped < num - dropped += count - drop - }) - ) - } - - override def toString: String = { - val numSampled = numFirst + numLast - val numSkipped = n - numSampled - if (numSkipped > 0) { - s"$first, …, $last" - } else { - removeOverlap(-numSkipped, first, last).toString - } - } -} diff --git a/src/main/scala/org/hammerlab/stats/Stats.scala b/src/main/scala/org/hammerlab/stats/Stats.scala deleted file mode 100644 index 37cb6d6..0000000 --- a/src/main/scala/org/hammerlab/stats/Stats.scala +++ /dev/null @@ -1,466 +0,0 @@ -package org.hammerlab.stats - -import org.hammerlab.iterator.RunLengthIterator._ -import spire.implicits._ -import spire.math.{ Integral, Numeric } - -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer -import scala.math.{log10, floor, ceil, abs, sqrt} - -/** - * Wrapper for some computed statistics about a dataset of [[Numeric]] elements. - * - * @param n number of elements in the dataset. - * @param mean mean. - * @param stddev stddev. - * @param mad median absolute deviation (from the median). - * @param samplesOpt "sample" elements; the start and end of the data. - * @param sortedSamplesOpt "sample" elements; the least and greatest elements. If the dataset is already sorted, meaning - * this would be equivalent to [[samplesOpt]], it is omitted. - * @param percentiles selected percentiles of the dataset. - * @tparam K [[Numeric]] element type. TODO(ryan): allow this to be non-[[Numeric]]. - * @tparam V [[Integral]] value type. - */ -case class Stats[K: Numeric, V: Integral](n: V, - mean: Double, - stddev: Double, - mad: Double, - samplesOpt: Option[Samples[K, V]], - sortedSamplesOpt: Option[Samples[K, V]], - percentiles: Seq[(Double, Double)]) { - - def prettyDouble(d: Double): String = - if (floor(d).toInt == ceil(d).toInt) - d.toInt.toString - else - "%.1f".format(d) - - def prettyPercentile(d: Double): String = - if (floor(d).toInt == ceil(d).toInt) - d.toInt.toString - else - d.toString - - override def toString: String = { - if (n == 0) - "(empty)" - else { - val strings = ArrayBuffer[String]() - - strings += - List( - s"num:\t$n", - s"mean:\t${prettyDouble(mean)}", - s"stddev:\t${prettyDouble(stddev)}", - s"mad:\t${prettyDouble(mad)}" - ).mkString(",\t") - - for { - samples ← samplesOpt - if samples.nonEmpty - } { - strings += s"elems:\t$samples" - } - - for { - sortedSamples ← sortedSamplesOpt - if sortedSamples.nonEmpty - } { - strings += s"sorted:\t$sortedSamples" - } - - strings ++= - percentiles.map { - case (k, v) ⇒ - s"${prettyPercentile(k)}:\t${prettyDouble(v)}" - } - - strings.mkString("\n") - } - } -} - -/** - * Helpers for constructing [[Stats]] / computing the statistics that populate a [[Stats]] instance. - */ -object Stats { - - /** - * Construct a [[Stats]] from a sequence of "runs"; elements paired with a count of repetitions. - * - * @param v values. - * @param numToSample highlight this many "runs" of data from the start and end of the data; likewise the least and - * greatest elements (and repetition counts). - * @param onlySampleSorted only highlight the least and greatest elements; omit the first and last - */ - def fromHist[K: Numeric: Ordering, V: Integral](v: Iterable[(K, V)], - numToSample: Int = 10, - onlySampleSorted: Boolean = false): Stats[K, V] = { - - var alreadySorted = true - val hist = mutable.HashMap[K, V]() - var n = Integral[V].zero - - val values = { - val vBuilder = Vector.newBuilder[(K, V)] - var prevOpt: Option[K] = None - for { - (value, num) ← reencode[K, V](v.iterator) - } { - if (alreadySorted) { - if (prevOpt.exists(_ > value)) - alreadySorted = false - else - prevOpt = Some(value) - } - vBuilder += value → num - n += num - hist.update(value, hist.getOrElse(value, Integral[V].zero) + num) - } - - vBuilder.result() - } - - if (values.isEmpty) { - return empty - } - - val sorted = - if (alreadySorted) - values - else - for { - key ← hist.keys.toVector.sorted - } yield - key → hist(key) - - val ps = histPercentiles(n, sorted) - - val median = ps(ps.length / 2)._2 - - val medianDeviationsBuilder = Vector.newBuilder[(Double, V)] - - var sum = 0.0 - var sumSquares = 0.0 - for ((value, num) ← sorted) { - val d = value.toDouble - sum += d * num.toDouble() - sumSquares += d * d * num.toDouble() - medianDeviationsBuilder += abs(d - median) → num - } - - val medianDeviations = medianDeviationsBuilder.result().sortBy(_._1) - - val mad = - getRunPercentiles( - medianDeviations, - Seq( - 50.0 → - ((n.toDouble() - 1) / 2.0) - ) - ) - .head - ._2 - - val mean = sum / n.toDouble() - val stddev = sqrt(sumSquares / n.toDouble() - mean * mean) - - val samplesOpt = - if (alreadySorted || !onlySampleSorted) { - val firstElems = values.take(numToSample) - val numFirstElems = firstElems.map(_._2).reduce(_ + _) - - val lastElems = values.takeRight(numToSample) - val numLastElems = lastElems.map(_._2).reduce(_ + _) - - Some(Samples[K, V](n, firstElems, numFirstElems, lastElems, numLastElems)) - } else - None - - val sortedSamplesOpt = - if (!alreadySorted) { - val leastElems = sorted.take(numToSample) - val numLeastElems = leastElems.map(_._2).reduce(_ + _) - - val greatestElems = sorted.takeRight(numToSample) - val numGreatestElems = greatestElems.map(_._2).reduce(_ + _) - - Some(Samples(n, leastElems, numLeastElems, greatestElems, numGreatestElems)) - } else - None - - Stats( - n, - mean, stddev, mad, - samplesOpt, - sortedSamplesOpt, - ps - ) - } - - /** - * Construct a [[Stats]] instance from input data `v`. - * @param v values. - * @param numToSample highlight this many "runs" of data from the start and end of the data; likewise the least and - * greatest elements (and repetition counts). - * @param onlySampleSorted only highlight the least and greatest elements; omit the first and last - */ - def apply[K: Numeric: Ordering](v: Iterable[K], - numToSample: Int = 10, - onlySampleSorted: Boolean = false): Stats[K, Int] = { - - val vBuilder = Vector.newBuilder[K] - var alreadySorted = true - var prevOpt: Option[K] = None - for (value ← v) { - if (alreadySorted) { - if (prevOpt.exists(_ > value)) - alreadySorted = false - else - prevOpt = Some(value) - } - vBuilder += value - } - - val values = vBuilder.result() - - val n = values.length - - val sorted = - if (alreadySorted) - values - else - values.sorted - - val median = getMedian(sorted) - - val medianDeviationsBuilder = Vector.newBuilder[Double] - - var sum = 0.0 - var sumSquares = 0.0 - for (value ← sorted) { - val d = value.toDouble - sum += d - sumSquares += d * d - medianDeviationsBuilder += abs(d - median) - } - - val medianDeviations = medianDeviationsBuilder.result().sorted - val mad = getMedian(medianDeviations) - - val mean = sum / n - val stddev = sqrt(sumSquares / n - mean * mean) - - val samplesOpt: Option[Samples[K, Int]] = - if (alreadySorted || !onlySampleSorted) { - // Count occurrences of the first N distinct values. - val (firstElems, numFirstElems) = runLengthEncodeWithSum(values.iterator, numToSample) - - // Count occurrences of the last N distinct values. - val (lastElems, numLastElems) = runLengthEncodeWithSum(values.reverseIterator, numToSample, reverse = true) - - Some(Samples(n, firstElems, numFirstElems, lastElems, numLastElems)) - } else - None - - val sortedSamplesOpt: Option[Samples[K, Int]] = - if (!alreadySorted) { - // Count occurrences of the least N distinct values. - val (leastElems, numLeastElems) = runLengthEncodeWithSum[K](sorted.iterator, numToSample) - - // Count occurrences of the greatest N distinct values. - val (greatestElems, numGreatestElems) = runLengthEncodeWithSum(sorted.reverseIterator, numToSample, reverse = true) - - Some(Samples(n, leastElems, numLeastElems, greatestElems, numGreatestElems)) - } else - None - - new Stats( - n, - mean, stddev, mad, - samplesOpt, - sortedSamplesOpt, - percentiles(sorted) - ) - } - - /** - * Construct an empty [[Stats]] instance. - */ - private def empty[K: Numeric, V: Integral]: Stats[K, V] = - new Stats( - n = Integral[V].zero, - mean = 0, - stddev = 0, - mad = 0, - samplesOpt = None, - sortedSamplesOpt = None, - percentiles = Nil - ) - - /** - * Compute percentiles listed in `ps` of the data in `values`; wrapper for implementation below. - */ - private def getRunPercentiles[K: Numeric, V: Integral](values: Seq[(K, V)], - ps: Seq[(Double, Double)]): Vector[(Double, Double)] = - getRunPercentiles( - values - .iterator - .buffered, - ps - .iterator - .buffered - ).toVector - - /** - * Compute percentiles listed in `ps` of the data in `values`. - * - * @param values runs of elements. - * @param percentiles percentiles to compute, specified as tuples where the key is the percentile and the value is the - * index in `values` at which that percentile lies (interpolated to be a fractional amount between - * two indices, where appropriate). - * @return pairs of (percentile, value). - */ - private def getRunPercentiles[K: Numeric, V: Integral](values: BufferedIterator[(K, V)], - percentiles: BufferedIterator[(Double, Double)]): Iterator[(Double, Double)] = - new Iterator[(Double, Double)] { - - var elemsPast = 0.0 - var curK: Option[Double] = None - - override def hasNext: Boolean = percentiles.hasNext - - override def next(): (Double, Double) = { - val (percentile, idx) = percentiles.next() - while(elemsPast <= idx) { - val (k, v) = values.next() - curK = Some(k.toDouble()) - elemsPast += v.toDouble() - } - - val distancePast = elemsPast - idx - - percentile → - ( - if (distancePast < 1) - curK.get * distancePast + values.head._1.toDouble() * (1 - distancePast) - else - curK.get - ) - } - } - - /** - * Compute some relevant percentiles based on the number of elements present. - * @return pairs of (percentile, value). - */ - private def histPercentiles[K: Numeric, V: Integral](N: V, values: IndexedSeq[(K, V)]): Vector[(Double, Double)] = { - val n = N - 1 - val denominators: Iterator[Int] = Iterator(2, 4, 10, 20, 100, 1000, 10000) - - val nd = n.toDouble - val percentileIdxs = - denominators - .takeWhile(d ⇒ d <= n || d == 2) // Always take the median (denominator 2 aka 50th percentile). - .flatMap(d ⇒ { - val loPercentile = 100.0 / d - val hiPercentile = 100.0 - loPercentile - - val loIdx = nd / d - val hiIdx = nd - loIdx - - if (d == 2) - // Median (50th percentile, denominator 2) only emits one tuple. - Iterator(loPercentile → loIdx) - else - // In general, we emit two tuples per "denominator", one on the high side and one on the low. For example, for - // denominator 4, we emit the 25th and 75th percentiles. - Iterator(loPercentile → loIdx, hiPercentile → hiIdx) - }) - .toArray - .sortBy(_._1) - - getRunPercentiles(values, percentileIdxs) - } - - /** - * Compute some relevant percentiles based on the number of elements present. - * @return pairs of (percentile, value). - */ - private def percentiles[T: Numeric](values: IndexedSeq[T]): Vector[(Double, Double)] = { - val n = values.length - 1 - - val denominators: Iterator[Int] = { - lazy val pow10s: Stream[Int] = 100 #:: pow10s.map(_ * 10) - Iterator(2, 4, 10, 20) ++ pow10s.iterator - } - - val nd = n.toDouble - denominators.takeWhile(_ <= n).flatMap(d ⇒ { - val loPercentile = 100.0 / d - val hiPercentile = 100.0 - loPercentile - - val loFrac = nd / d - - val loFloor = floor(loFrac).toInt - val loCeil = ceil(loFrac).toInt - - val hiFloor = n - loFloor - val hiCeil = n - loCeil - - val loRemainder = loFrac - loFloor - val (lo, hi) = - if (loFloor == loCeil) - (values(loFloor).toDouble(), values(hiFloor).toDouble()) - else - ( - values(loFloor).toDouble() * loRemainder + values(loCeil).toDouble() * (1 - loRemainder), - values(hiCeil).toDouble() * loRemainder + values(hiFloor).toDouble() * (1 - loRemainder) - ) - - if (d == 2) - // Median (50th percentile, denominator 2) only emits one tuple. - Iterator(loPercentile → lo) - else - // In general, we emit two tuples per "denominator", one on the high side and one on the low. For example, for - // denominator 4, we emit the 25th and 75th percentiles. - Iterator(loPercentile → lo, hiPercentile → hi) - - }).toVector.sortBy(_._1) - } - - private def getMedian[T: Numeric](sorted: Vector[T]): Double = { - val n = sorted.length - if (n == 0) - -1 - else if (n % 2 == 0) - (sorted(n / 2 - 1) + sorted(n / 2)).toDouble() / 2.0 - else - sorted(n / 2).toDouble() - } - - /** - * Find the first `N` "runs" from the beginning of `it`. If `reverse`, return them in reversed order. - */ - private def runLengthEncodeWithSum[K: Numeric](it: Iterator[K], - N: Int, - reverse: Boolean = false): (Seq[(K, Int)], Int) = { - var sum = 0 - var i = 0 - val runs = ArrayBuffer[(K, Int)]() - val runLengthIterator = it.runLengthEncode() - while (i < N && runLengthIterator.hasNext) { - val (elem, count) = runLengthIterator.next() - - if (reverse) - runs.prepend(elem → count) - else - runs += ((elem, count)) - - sum += count - i += 1 - } - runs → sum - } -} diff --git a/src/main/scala/org/hammerlab/types/Or.scala b/src/main/scala/org/hammerlab/types/Or.scala new file mode 100644 index 0000000..c1b2642 --- /dev/null +++ b/src/main/scala/org/hammerlab/types/Or.scala @@ -0,0 +1,21 @@ +package org.hammerlab.types + +sealed trait Or[+L, +R] + +object Or { + def apply[L, R](l: L, r: Option[R]): Or[L, R] = + r match { + case Some(r) ⇒ Both(l, r) + case None ⇒ LO(l) + } + + def apply[L, R](l: Option[L], r: R): Or[L, R] = + l match { + case Some(l) ⇒ Both(l, r) + case None ⇒ RO(r) + } +} + +final case class LO[+L, +R](l: L) extends Or[L, R] +final case class RO[+L, +R](r: R) extends Or[L, R] +final case class Both[+L, +R](l: L, r: R) extends Or[L, R] diff --git a/src/main/scala/org/hammerlab/types/package.scala b/src/main/scala/org/hammerlab/types/package.scala new file mode 100644 index 0000000..4c28b0c --- /dev/null +++ b/src/main/scala/org/hammerlab/types/package.scala @@ -0,0 +1,11 @@ +package org.hammerlab + +package object types { + implicit class BoolOps(val b: Boolean) extends AnyVal { + def |[A](a: ⇒ A): Option[A] = + if (b) + Some(a) + else + None + } +} diff --git a/src/test/scala/org/hammerlab/iterator/BufferedTakeWhileIteratorTest.scala b/src/test/scala/org/hammerlab/iterator/BufferedTakeWhileIteratorTest.scala deleted file mode 100644 index b4b1c10..0000000 --- a/src/test/scala/org/hammerlab/iterator/BufferedTakeWhileIteratorTest.scala +++ /dev/null @@ -1,25 +0,0 @@ -package org.hammerlab.iterator - -import org.hammerlab.iterator.BufferedTakeWhileIterator._ -import org.hammerlab.test.Suite - -class BufferedTakeWhileIteratorTest - extends Suite { - test("simple") { - val it = TestIterator(1 to 10: _*).buffered - it.takewhile(_ < 5).toList should be(1 to 4) - it.toList should be(5 to 10) - } - - test("take none") { - val it = TestIterator(1 to 10: _*).buffered - it.takewhile(_ < 1).toList should be(Nil) - it.toList should be(1 to 10) - } - - test("take all") { - val it = TestIterator(1 to 10: _*).buffered - it.takewhile(_ < 11).toList should be(1 to 10) - it.toList should be(Nil) - } -} diff --git a/src/test/scala/org/hammerlab/iterator/EitherIteratorTest.scala b/src/test/scala/org/hammerlab/iterator/EitherIteratorTest.scala new file mode 100644 index 0000000..4d8d137 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/EitherIteratorTest.scala @@ -0,0 +1,147 @@ +package org.hammerlab.iterator + +import org.hammerlab.iterator.EitherIterator._ +import org.hammerlab.iterator.EitherIteratorTest._ +import org.hammerlab.test.Suite + +class FindLeftTest + extends Suite { + + implicit def intOpt(n: Int): Option[Int] = Some(n) + + def check(elems: Either[Int, String]*)(expected: Option[Int] = None): Unit = { + eithers(elems).findLeft should be(expected) + } + + test("findleft") { + check(4, 5, "abc", 6)(4) + check("abc", 6, "def", 7)(6) + check("abc", "def", 8)(8) + check()() + check("abc")() + check("abc", "def")() + check(4)(4) + } +} + +class GroupByLeftTest + extends Suite { + + def check(elems: Either[Int, String]*)(expected: (Int, String)*): Unit = + eithers(elems) + .groupByLeft + .map { + case (num, strings) ⇒ + num → + strings.mkString("") + } + .toList should be( + expected + ) + + test("simple") { + check( + 1, "a", "b", "c", + 2, + 3, + 4, "d", + 5 + )( + 1 → "abc", + 2 → "", + 3 → "", + 4 → "d", + 5 → "" + ) + } + + test("rights first and last") { + check( + "a", "b", "c", + 1, "d", "e", + 2, + 3, "f", "g" + )( + 1 → "de", + 2 → "", + 3 → "fg" + ) + } + + test("rights not consumed") { + eithers( + Seq( + 1, "a", "b", "c", + 2, + 3, "d", + 4, "e", "f", + 5 + ) + ) + .groupByLeft + .map { + case (num, strings) ⇒ + num → + strings + .buffered + .headOption + .getOrElse("???") + } + .toList should be( + Seq( + 1 → "a", + 2 → "???", + 3 → "d", + 4 → "e", + 5 → "???" + ) + ) + } +} + +class RoundUpRightTest + extends Suite { + + def check(elems: Either[Int, String]*)(expected: (Seq[Int], String)*): Unit = { + eithers(elems).roundUpRight.toList should be(expected) + } + + test("mixed") { + check( + 1, 2, 3, "abc", + "def", + 4, "ghi", + 5 + )( + Seq(1, 2, 3) → "abc", + Nil → "def", + Seq(4) → "ghi" + ) + } + + test("rights first") { + check( + "abc", + "def", + 1, "ghi", + "jkl", + 2, 3, "mno", + "pqr" + )( + Nil → "abc", + Nil → "def", + Seq(1) → "ghi", + Nil → "jkl", + Seq(2, 3) → "mno", + Nil → "pqr" + ) + } + +} + +object EitherIteratorTest { + implicit def leftInt(n: Int): Left[Int, String] = Left(n) + implicit def rightString(s: String): Right[Int, String] = Right(s) + + def eithers(elems: Seq[Either[Int, String]]): BufferedIterator[Either[Int, String]] = Iterator(elems: _*).buffered +} diff --git a/src/test/scala/org/hammerlab/iterator/GroupWithIteratorTest.scala b/src/test/scala/org/hammerlab/iterator/GroupWithIteratorTest.scala index cca220b..82f279e 100644 --- a/src/test/scala/org/hammerlab/iterator/GroupWithIteratorTest.scala +++ b/src/test/scala/org/hammerlab/iterator/GroupWithIteratorTest.scala @@ -1,7 +1,7 @@ package org.hammerlab.iterator +import org.hammerlab.iterator.GroupWithIterator._ import org.hammerlab.test.Suite -import GroupWithIterator._ class GroupWithIteratorTest extends Suite { @@ -9,9 +9,9 @@ class GroupWithIteratorTest implicit def stringToInt(s: String): Int = augmentString(s).toInt test("mixed") { - TestIterator(2, 4, 6, 8, 10) + Iterator(2, 4, 6, 8, 10) .groupWith[String, Int]( - TestIterator("1", "2", "3", "4", "5", "5", "7", "11") + Iterator("1", "2", "3", "4", "5", "5", "7", "11") ) .toList .map { @@ -29,9 +29,9 @@ class GroupWithIteratorTest } test("left empty") { - TestIterator[Int]() + Iterator[Int]() .groupWith[String, Int]( - TestIterator("1", "2", "3", "4", "5", "5", "7", "11") + Iterator("1", "2", "3", "4", "5", "5", "7", "11") ) .toList .map { diff --git a/src/test/scala/org/hammerlab/iterator/RunLengthIteratorTest.scala b/src/test/scala/org/hammerlab/iterator/RunLengthIteratorTest.scala index c60d842..d794d9f 100644 --- a/src/test/scala/org/hammerlab/iterator/RunLengthIteratorTest.scala +++ b/src/test/scala/org/hammerlab/iterator/RunLengthIteratorTest.scala @@ -70,4 +70,21 @@ class RunLengthIteratorTest extends Suite { } } + { + test("re-encode") { + reencode( + Iterator('a' → 2, 'a' → 1, 'b' → 3, 'a' → 4, 'c' → 1, 'c' → 1, 'c' → 2, 'a' → 1) + ) + .toList should be( + List( + 'a' → 3, + 'b' → 3, + 'a' → 4, + 'c' → 4, + 'a' → 1 + ) + ) + } + } + } diff --git a/src/test/scala/org/hammerlab/iterator/SimpleBufferedIteratorTest.scala b/src/test/scala/org/hammerlab/iterator/SimpleBufferedIteratorTest.scala index e193b20..fd23f17 100644 --- a/src/test/scala/org/hammerlab/iterator/SimpleBufferedIteratorTest.scala +++ b/src/test/scala/org/hammerlab/iterator/SimpleBufferedIteratorTest.scala @@ -1,9 +1,8 @@ package org.hammerlab.iterator +import org.hammerlab.iterator.SimpleBufferedIterator._ import org.hammerlab.test.Suite -import SimpleBufferedIterator._ - class SimpleBufferedIteratorTest extends Suite { diff --git a/src/test/scala/org/hammerlab/iterator/SliceIteratorTest.scala b/src/test/scala/org/hammerlab/iterator/SliceIteratorTest.scala new file mode 100644 index 0000000..79bc92a --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/SliceIteratorTest.scala @@ -0,0 +1,19 @@ +package org.hammerlab.iterator + +import org.hammerlab.test.Suite +import SliceIterator._ + +class SliceIteratorTest + extends Suite { + test("10") { + (0 to 9 iterator).sliceOpt(0, 10).toList should be(0 to 9) + (0 to 9 iterator).sliceOpt(0, 1).toList should be(0 to 0) + (0 to 9 iterator).sliceOpt(0, 5).toList should be(0 to 4) + (0 to 9 iterator).sliceOpt(0, 11).toList should be(0 to 9) + (0 to 9 iterator).sliceOpt(2, 10).toList should be(2 to 9) + (0 to 9 iterator).sliceOpt(2, 1).toList should be(2 to 2) + (0 to 9 iterator).sliceOpt(2, 5).toList should be(2 to 6) + (0 to 9 iterator).sliceOpt(2 ).toList should be(2 to 9) + (0 to 9 iterator).sliceOpt(2, 11).toList should be(2 to 9) + } +} diff --git a/src/test/scala/org/hammerlab/iterator/TestIterator.scala b/src/test/scala/org/hammerlab/iterator/TestIterator.scala deleted file mode 100644 index 71e2e35..0000000 --- a/src/test/scala/org/hammerlab/iterator/TestIterator.scala +++ /dev/null @@ -1,20 +0,0 @@ -package org.hammerlab.iterator - -/** - * [[Iterator]]s constructed via [[Iterator.apply]] or [[scala.collection.IndexedSeqLike.iterator]] are tricky about - * what state they are left in after various operations are applied, cf. https://github.com/scala/bug/issues/9274. - * - * This class allows easy creation of vanilla [[Iterator]]s with no funny-business, for testing extended iterator - * functionality like [[BufferedTakeWhileIteratorTest]], which passes erroneously with naively-created [[Iterator]]s. - */ -case class TestIterator[T](elems: T*) - extends Iterator[T] { - var idx = 0 - override def hasNext: Boolean = idx < elems.size - - override def next(): T = { - val elem = elems(idx) - idx += 1 - elem - } -} diff --git a/src/test/scala/org/hammerlab/iterator/bulk/BufferedCollectWhileTest.scala b/src/test/scala/org/hammerlab/iterator/bulk/BufferedCollectWhileTest.scala new file mode 100644 index 0000000..c948ff1 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/bulk/BufferedCollectWhileTest.scala @@ -0,0 +1,52 @@ +package org.hammerlab.iterator.bulk + +import org.hammerlab.iterator.EitherIteratorTest._ +import org.hammerlab.iterator.bulk.BufferedBulkIterator._ +import org.hammerlab.test.Suite + +class BufferedCollectWhileTest + extends Suite { + + def check(elems: Either[Int, String]*)(expectedInts: Int*): Unit = { + eithers(elems) + .collectwhile { + case Left(n) ⇒ n + } + .toList should be( + expectedInts + ) + } + + test("two") { + check( + 4, 5, "abc", 6 + )( + 4, 5 + ) + } + + test("none") { + check( + "abc", 4, 5, 6 + )( + + ) + } + + test("all") { + check( + 4, 5, 6 + )( + 4, 5, 6 + ) + } + + test("empty") { + check( + + )( + + ) + } + +} diff --git a/src/test/scala/org/hammerlab/iterator/bulk/BufferedDropWhileTest.scala b/src/test/scala/org/hammerlab/iterator/bulk/BufferedDropWhileTest.scala new file mode 100644 index 0000000..5e84c03 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/bulk/BufferedDropWhileTest.scala @@ -0,0 +1,85 @@ +package org.hammerlab.iterator.bulk + +import org.hammerlab.iterator.bulk.BufferedBulkIterator._ +import org.hammerlab.test.Suite + +class BufferedDropWhileTest + extends Suite { + + def check(fn: Int ⇒ Boolean, + expectedRemainingElems: Seq[Int])( + implicit elems: Seq[Int] + ): Unit = { + //Seq(1,2,3).iterator.take(3) + val it = Iterator(elems: _*).buffered + it.dropwhile(fn) + it.toList should be(expectedRemainingElems) + } + + { + implicit val elems = 1 to 10 + + test("[1,10] half") { + check( + _ < 5, + 5 to 10 + ) + } + + test("[1,10] none") { + check( + _ < 1, + 1 to 10 + ) + } + + test("[1,10] one") { + check( + _ == 1, + 2 to 10 + ) + } + + test("[1,10] almost all") { + check( + _ < 10, + Seq(10) + ) + } + + test("[1,10] all") { + check( + _ < 11, + Nil + ) + } + } + + test("empty") { + check( + _ ⇒ true, + Nil + )( + Nil + ) + } + + { + implicit val elems = Seq(1) + + test("one, all") { + check( + _ ⇒ true, + Nil + ) + } + + test("one, none") { + check( + _ ⇒ false, + Seq(1) + ) + } + } + +} diff --git a/src/test/scala/org/hammerlab/iterator/bulk/BufferedTakeWhileTest.scala b/src/test/scala/org/hammerlab/iterator/bulk/BufferedTakeWhileTest.scala new file mode 100644 index 0000000..74e0c7b --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/bulk/BufferedTakeWhileTest.scala @@ -0,0 +1,92 @@ +package org.hammerlab.iterator.bulk + +import org.hammerlab.iterator.bulk.BufferedBulkIterator._ +import org.hammerlab.test.Suite + +class BufferedTakeWhileTest + extends Suite { + + def check(fn: Int ⇒ Boolean, + expectedElems: Seq[Int], + expectedRemainingElems: Seq[Int])( + implicit elems: Seq[Int] + ): Unit = { + val it = Iterator(elems: _*).buffered + it.takewhile(fn).toList should be(expectedElems) + it.toList should be(expectedRemainingElems) + } + + { + implicit val elems = 1 to 10 + + test("[1,10] half") { + check( + _ < 5, + 1 to 4, + 5 to 10 + ) + } + + test("[1,10] none") { + check( + _ < 1, + Nil, + 1 to 10 + ) + } + + test("[1,10] one") { + check( + _ == 1, + Seq(1), + 2 to 10 + ) + } + + test("[1,10] almost all") { + check( + _ < 10, + 1 to 9, + Seq(10) + ) + } + + test("[1,10] all") { + check( + _ < 11, + 1 to 10, + Nil + ) + } + } + + test("empty") { + check( + _ ⇒ true, + Nil, + Nil + )( + Nil + ) + } + + { + implicit val elems = Seq(1) + + test("one, all") { + check( + _ ⇒ true, + Seq(1), + Nil + ) + } + + test("one, none") { + check( + _ ⇒ false, + Nil, + Seq(1) + ) + } + } +} diff --git a/src/test/scala/org/hammerlab/iterator/range/OverlappingRangesIteratorTest.scala b/src/test/scala/org/hammerlab/iterator/range/OverlappingRangesIteratorTest.scala new file mode 100644 index 0000000..d007de1 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/range/OverlappingRangesIteratorTest.scala @@ -0,0 +1,114 @@ +package org.hammerlab.iterator.range + +import org.hammerlab.iterator.range.OverlappingRangesIterator._ +import org.hammerlab.test.Suite + +class OverlappingRangesIteratorTest + extends Suite { + + def Ranges(ranges: (Range[Int], Int)*): Seq[(Range[Int], Int)] = ranges + + implicit def intToRange(n: Int): Range[Int] = Range(n, None) + implicit def pairToIntRange(t: (Int, Int)): Range[Int] = Range(t._1, t._2) + implicit def indexedPairToRange(t: ((Int, Int), Int)): (Range[Int], Int) = (t._1, t._2) + + implicit def makeJoinedRangesElem(t: (Int, Seq[Range[Int]])): (Range[Int], Seq[Range[Int]]) = + (t._1: Range[Int]) → t._2 + + implicit def makeHalfOpenJoinedRangesElem(t: ((Int, Int), Seq[Range[Int]])): (Range[Int], Seq[Range[Int]]) = + (t._1: Range[Int]) → t._2 + + def check(left: Range[Int]*)(right: Range[Int]*)(expected: Seq[(Range[Int], Int)]*): Unit = { + left + .iterator + .joinOverlaps( + right + .iterator + .buffered + ) + .toList should be( + left.zip(expected) + ) + } + + test("trivial case") { + check( + 1 → 3 + )( + 1 → 3 + )( + Seq(Range(1, 3) → 0) + ) + } + + test("stable right-side ordering") { + check( + 2 → 4, + 5 → 7, + 8 + )( + 0 → 2, + 1 → 3, + 2 → 5, + 4 → 5, + 4 → 6, + 4 → 9, + 4 → 8, + 6 → 7, + 7 → 11, + 13 → 14, + 13 + )( + Seq(Range(1, 3) → 1, Range(2, 5) → 2), + Seq(Range(4, 6) → 4, Range(4, 9) → 5, Range( 4, 8) → 6, Range( 6, 7) → 7), + Seq(Range(4, 9) → 5, Range(7, 11) → 8, Range(13, 14) → 9, Range(13) → 10) + ) + } + + test("all rights before") { + check( + 5 → 10, + 7 → 9 + )( + 0 → 2, + 0 → 3, + 1 → 5 + )( + Nil, + Nil + ) + } + + test("all rights after") { + check( + 5 → 10, + 7 → 9 + )( + 10 → 12, + 10 → 13, + 11 → 15 + )( + Nil, + Nil + ) + } + + test("fully skipped rights") { + check( + 2 → 4, + 3 → 5, + 10 → 15 + )( + 1 → 2, + 3 → 4, + 5 → 10, + 6 → 9, + 11 → 12, + 15 → 20 + )( + Seq(Range( 3, 4) → 1), + Seq(Range( 3, 4) → 1), + Seq(Range(11, 12) → 4) + ) + } +} diff --git a/src/test/scala/org/hammerlab/iterator/Sliding2OptTest.scala b/src/test/scala/org/hammerlab/iterator/sliding/Sliding2OptTest.scala similarity index 82% rename from src/test/scala/org/hammerlab/iterator/Sliding2OptTest.scala rename to src/test/scala/org/hammerlab/iterator/sliding/Sliding2OptTest.scala index 3316899..1513ea8 100644 --- a/src/test/scala/org/hammerlab/iterator/Sliding2OptTest.scala +++ b/src/test/scala/org/hammerlab/iterator/sliding/Sliding2OptTest.scala @@ -1,7 +1,7 @@ -package org.hammerlab.iterator +package org.hammerlab.iterator.sliding +import org.hammerlab.iterator.sliding.Sliding2Iterator._ import org.hammerlab.test.Suite -import Sliding2Iterator._ class Sliding2OptTest extends Suite { test("empty") { diff --git a/src/test/scala/org/hammerlab/iterator/Sliding2PadTest.scala b/src/test/scala/org/hammerlab/iterator/sliding/Sliding2PadTest.scala similarity index 81% rename from src/test/scala/org/hammerlab/iterator/Sliding2PadTest.scala rename to src/test/scala/org/hammerlab/iterator/sliding/Sliding2PadTest.scala index 4a07e3f..d8199a3 100644 --- a/src/test/scala/org/hammerlab/iterator/Sliding2PadTest.scala +++ b/src/test/scala/org/hammerlab/iterator/sliding/Sliding2PadTest.scala @@ -1,7 +1,7 @@ -package org.hammerlab.iterator +package org.hammerlab.iterator.sliding +import org.hammerlab.iterator.sliding.Sliding2Iterator._ import org.hammerlab.test.Suite -import Sliding2Iterator._ class Sliding2PadTest extends Suite { diff --git a/src/test/scala/org/hammerlab/iterator/Sliding2PrevTest.scala b/src/test/scala/org/hammerlab/iterator/sliding/Sliding2PrevTest.scala similarity index 82% rename from src/test/scala/org/hammerlab/iterator/Sliding2PrevTest.scala rename to src/test/scala/org/hammerlab/iterator/sliding/Sliding2PrevTest.scala index 50481c5..63a46b9 100644 --- a/src/test/scala/org/hammerlab/iterator/Sliding2PrevTest.scala +++ b/src/test/scala/org/hammerlab/iterator/sliding/Sliding2PrevTest.scala @@ -1,7 +1,7 @@ -package org.hammerlab.iterator +package org.hammerlab.iterator.sliding +import org.hammerlab.iterator.sliding.Sliding2Iterator._ import org.hammerlab.test.Suite -import Sliding2Iterator._ class Sliding2PrevTest extends Suite { test("empty") { diff --git a/src/test/scala/org/hammerlab/iterator/Sliding2Test.scala b/src/test/scala/org/hammerlab/iterator/sliding/Sliding2Test.scala similarity index 79% rename from src/test/scala/org/hammerlab/iterator/Sliding2Test.scala rename to src/test/scala/org/hammerlab/iterator/sliding/Sliding2Test.scala index db0a3f9..9507be1 100644 --- a/src/test/scala/org/hammerlab/iterator/Sliding2Test.scala +++ b/src/test/scala/org/hammerlab/iterator/sliding/Sliding2Test.scala @@ -1,7 +1,7 @@ -package org.hammerlab.iterator +package org.hammerlab.iterator.sliding +import org.hammerlab.iterator.sliding.Sliding2Iterator._ import org.hammerlab.test.Suite -import Sliding2Iterator._ class Sliding2Test extends Suite { diff --git a/src/test/scala/org/hammerlab/iterator/Sliding3NextOptsTest.scala b/src/test/scala/org/hammerlab/iterator/sliding/Sliding3NextOptsTest.scala similarity index 92% rename from src/test/scala/org/hammerlab/iterator/Sliding3NextOptsTest.scala rename to src/test/scala/org/hammerlab/iterator/sliding/Sliding3NextOptsTest.scala index 87ce787..1283e36 100644 --- a/src/test/scala/org/hammerlab/iterator/Sliding3NextOptsTest.scala +++ b/src/test/scala/org/hammerlab/iterator/sliding/Sliding3NextOptsTest.scala @@ -1,6 +1,6 @@ -package org.hammerlab.iterator +package org.hammerlab.iterator.sliding -import org.hammerlab.iterator.Sliding3Iterator._ +import org.hammerlab.iterator.sliding.Sliding3Iterator._ import org.hammerlab.test.Suite class Sliding3NextOptsTest extends Suite { diff --git a/src/test/scala/org/hammerlab/iterator/Sliding3OptTest.scala b/src/test/scala/org/hammerlab/iterator/sliding/Sliding3OptTest.scala similarity index 92% rename from src/test/scala/org/hammerlab/iterator/Sliding3OptTest.scala rename to src/test/scala/org/hammerlab/iterator/sliding/Sliding3OptTest.scala index df72a10..7d01e54 100644 --- a/src/test/scala/org/hammerlab/iterator/Sliding3OptTest.scala +++ b/src/test/scala/org/hammerlab/iterator/sliding/Sliding3OptTest.scala @@ -1,6 +1,6 @@ -package org.hammerlab.iterator +package org.hammerlab.iterator.sliding -import org.hammerlab.iterator.Sliding3Iterator._ +import org.hammerlab.iterator.sliding.Sliding3Iterator._ import org.hammerlab.test.Suite class Sliding3OptTest extends Suite { diff --git a/src/test/scala/org/hammerlab/iterator/Sliding3Test.scala b/src/test/scala/org/hammerlab/iterator/sliding/Sliding3Test.scala similarity index 89% rename from src/test/scala/org/hammerlab/iterator/Sliding3Test.scala rename to src/test/scala/org/hammerlab/iterator/sliding/Sliding3Test.scala index 0c7801a..3ab293c 100644 --- a/src/test/scala/org/hammerlab/iterator/Sliding3Test.scala +++ b/src/test/scala/org/hammerlab/iterator/sliding/Sliding3Test.scala @@ -1,6 +1,6 @@ -package org.hammerlab.iterator +package org.hammerlab.iterator.sliding -import org.hammerlab.iterator.Sliding3Iterator._ +import org.hammerlab.iterator.sliding.Sliding3Iterator._ import org.hammerlab.test.Suite class Sliding3Test extends Suite { diff --git a/src/test/scala/org/hammerlab/iterator/SlidingIteratorTest.scala b/src/test/scala/org/hammerlab/iterator/sliding/SlidingIteratorTest.scala similarity index 92% rename from src/test/scala/org/hammerlab/iterator/SlidingIteratorTest.scala rename to src/test/scala/org/hammerlab/iterator/sliding/SlidingIteratorTest.scala index 0291979..f37671c 100644 --- a/src/test/scala/org/hammerlab/iterator/SlidingIteratorTest.scala +++ b/src/test/scala/org/hammerlab/iterator/sliding/SlidingIteratorTest.scala @@ -1,7 +1,7 @@ -package org.hammerlab.iterator +package org.hammerlab.iterator.sliding +import org.hammerlab.iterator.sliding.SlidingIterator._ import org.hammerlab.test.Suite -import SlidingIterator._ class SlidingIteratorTest extends Suite { diff --git a/src/test/scala/org/hammerlab/iterator/sorted/ConvertToInt.scala b/src/test/scala/org/hammerlab/iterator/sorted/ConvertToInt.scala new file mode 100644 index 0000000..c18afc0 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/ConvertToInt.scala @@ -0,0 +1,21 @@ +package org.hammerlab.iterator.sorted + +trait VInt { + self: Suite ⇒ + type V = Int + implicit def ord: Ordering[V] = Ordering.Int +} + +trait ConvertToInt + extends VInt { + self: Suite ⇒ + implicit def tv: L ⇒ V + implicit def uv: R ⇒ V +} + +trait IdentityIntConversions + extends ConvertToInt { + self: Suite with IntsTest ⇒ + override implicit val tv: L ⇒ V = identity + override implicit val uv: R ⇒ R = identity +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/IntStringTest.scala b/src/test/scala/org/hammerlab/iterator/sorted/IntStringTest.scala new file mode 100644 index 0000000..dcfc505 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/IntStringTest.scala @@ -0,0 +1,18 @@ +package org.hammerlab.iterator.sorted + +trait IntStringTest + extends WrappedIntString { + self: Suite ⇒ + + override type L = WrappedInt + override type R = String + + override implicit def tv = unwrapWrappedInt + override implicit val uv = strlen _ + + def expected: Seq[Result] + + test("different types") { + check(wrappedInts)(strings)(expected) + } +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/IntsTest.scala b/src/test/scala/org/hammerlab/iterator/sorted/IntsTest.scala new file mode 100644 index 0000000..93e3497 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/IntsTest.scala @@ -0,0 +1,70 @@ +package org.hammerlab.iterator.sorted + +trait IntsTest { + + self: Suite with VInt ⇒ + + type L = Int + type R = Int + + def L(t: L): Result + def R(u: R): Result + + def expected: Map[String, Seq[Result]] + + def test(l: Int*)(r: Int*): Unit = { + def str(n: Seq[Int]): String = + if (n.isEmpty) + "empty" + else + n.mkString(",") + + val name = s"${str(l)} ${str(r)}" + + test(name) { + check(l)(r)(expected(name)) + } + } + + test( + 1, 3, 5 + )( + 2, 4, 6 + ) + + test( + 1, 2, 3 + )( + 4, 5, 6 + ) + + test( + 1, 2, 3 + )( + 1, 2, 3 + ) + + test( + 1, 2, 4, 7, 9 + )( + 1, 3, 5, 6, 7, 8 + ) + + test()() + + test()(1) + + test( + + )( + 1, 10, 100 + ) + + test(1)() + + test( + 1, 10, 100 + )( + + ) +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/StringIntTest.scala b/src/test/scala/org/hammerlab/iterator/sorted/StringIntTest.scala new file mode 100644 index 0000000..874be6a --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/StringIntTest.scala @@ -0,0 +1,18 @@ +package org.hammerlab.iterator.sorted + +trait StringIntTest + extends WrappedIntString { + self: Suite ⇒ + + override type L = String + override type R = WrappedInt + + override implicit def tv = strlen + override implicit val uv = unwrapWrappedInt + + def expected: Seq[Result] + + test("different types") { + check(strings)(wrappedInts)(expected) + } +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/Suite.scala b/src/test/scala/org/hammerlab/iterator/sorted/Suite.scala new file mode 100644 index 0000000..e0206d4 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/Suite.scala @@ -0,0 +1,13 @@ +package org.hammerlab.iterator.sorted + +import org.hammerlab.test + +abstract class Suite + extends test.Suite { + + type L + type R + type Result + + def check(left: Seq[L])(right: Seq[R])(expected: Seq[Result]): Unit +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/WrappedIntString.scala b/src/test/scala/org/hammerlab/iterator/sorted/WrappedIntString.scala new file mode 100644 index 0000000..ba1ed45 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/WrappedIntString.scala @@ -0,0 +1,47 @@ +package org.hammerlab.iterator.sorted + +import scala.collection.immutable.StringOps + +trait WrappedIntString + extends ConvertToInt { + self: Suite ⇒ + + /** + * Workaround [[strlen]] making [[augmentString]] implicit (for accessing + * [[scala.collection.immutable.StringLike.*]]) ambiguous. + */ + implicit class StringMult(val s: String) { + def x(n: Int): String = (s: StringOps) * n + } + + case class WrappedInt(n: Int) + implicit val wrapInt: Int ⇒ WrappedInt = WrappedInt + val unwrapWrappedInt: WrappedInt ⇒ Int = _.n + + val wrappedInts = + Seq[WrappedInt]( + 1, + 2, + 4, + 7, + 10, + 11, + 11, + 15 + ) + + val strings = + Seq( + "", + "a", + "a", + "b" x 2, + "c" x 3, + "e" x 5, + "f" x 6, + "k" x 11, + "n" x 14 + ) + + def strlen(s: String): Int = s.length +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/either/IntStringTest.scala b/src/test/scala/org/hammerlab/iterator/sorted/either/IntStringTest.scala new file mode 100644 index 0000000..41e33fa --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/either/IntStringTest.scala @@ -0,0 +1,28 @@ +package org.hammerlab.iterator.sorted.either + +import org.hammerlab.iterator.sorted + +class IntStringTest + extends Suite + with sorted.IntStringTest { + override def expected: Seq[Either[L, R]] = + Seq( + R(""), + L(1), + R("a"), + R("a"), + L(2), + R("bb"), + R("ccc"), + L(4), + R("eeeee"), + R("ffffff"), + L(7), + L(10), + L(11), + L(11), + R("kkkkkkkkkkk"), + R("nnnnnnnnnnnnnn"), + L(15) + ) +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/either/IntsTest.scala b/src/test/scala/org/hammerlab/iterator/sorted/either/IntsTest.scala new file mode 100644 index 0000000..bb8581e --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/either/IntsTest.scala @@ -0,0 +1,71 @@ +package org.hammerlab.iterator.sorted.either + +import org.hammerlab.iterator.sorted +import org.hammerlab.iterator.sorted.IdentityIntConversions + +class IntsTest + extends Suite + with sorted.IntsTest + with IdentityIntConversions { + + override def expected: Map[String, Seq[Either[Int, Int]]] = + Map( + "1,2,3 4,5,6" → + Seq( + L(1), + L(2), + L(3), + R(4), + R(5), + R(6) + ), + "1,3,5 2,4,6" → + Seq( + L(1), + R(2), + L(3), + R(4), + L(5), + R(6) + ), + "1,2,3 1,2,3" → + Seq( + L(1), + R(1), + L(2), + R(2), + L(3), + R(3) + ), + "1,2,4,7,9 1,3,5,6,7,8" → + Seq( + L(1), + R(1), + L(2), + R(3), + L(4), + R(5), + R(6), + L(7), + R(7), + R(8), + L(9) + ), + "empty empty" → Nil, + "empty 1" → Seq(R(1)), + "empty 1,10,100" → + Seq( + R(1), + R(10), + R(100) + ), + "1 empty" → Seq(L(1)), + "1,10,100 empty" → + Seq( + L(1), + L(10), + L(100) + ) + ) +} + diff --git a/src/test/scala/org/hammerlab/iterator/sorted/either/StringIntTest.scala b/src/test/scala/org/hammerlab/iterator/sorted/either/StringIntTest.scala new file mode 100644 index 0000000..3420aa1 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/either/StringIntTest.scala @@ -0,0 +1,29 @@ +package org.hammerlab.iterator.sorted.either + +import org.hammerlab.iterator.sorted + +class StringIntTest + extends Suite + with sorted.StringIntTest { + + override def expected: Seq[Either[L, R]] = + Seq( + L(""), + L("a"), + L("a"), + R(1), + L("bb"), + R(2), + L("ccc"), + R(4), + L("eeeee"), + L("ffffff"), + R(7), + R(10), + L("kkkkkkkkkkk"), + R(11), + R(11), + L("nnnnnnnnnnnnnn"), + R(15) + ) +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/either/Suite.scala b/src/test/scala/org/hammerlab/iterator/sorted/either/Suite.scala new file mode 100644 index 0000000..7ef4c90 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/either/Suite.scala @@ -0,0 +1,23 @@ +package org.hammerlab.iterator.sorted.either + +import org.hammerlab.iterator.sorted +import org.hammerlab.iterator.sorted.ConvertToInt +import org.hammerlab.iterator.sorted.EitherZipIterator._ + +abstract class Suite + extends sorted.Suite + with ConvertToInt { + + type Result = Either[L, R] + + override def check(left: Seq[L])(right: Seq[R])(expected: Seq[Result]): Unit = { + left + .sortedEitherZip(right) + .toList should be( + expected + ) + } + + def L(l: L): Result = Left(l) + def R(r: R): Result = Right(r) +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/or/IntStringTest.scala b/src/test/scala/org/hammerlab/iterator/sorted/or/IntStringTest.scala new file mode 100644 index 0000000..9f6f741 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/or/IntStringTest.scala @@ -0,0 +1,26 @@ +package org.hammerlab.iterator.sorted.or + +import org.hammerlab.iterator.sorted +import org.hammerlab.types.Or + +class IntStringTest + extends Suite + with sorted.IntStringTest { + override def expected: Seq[Or[L, R]] = + Seq( + R(""), + B(1, "a"), + R("a"), + B(2, "bb"), + R("ccc"), + L(4), + R("eeeee"), + R("ffffff"), + L(7), + L(10), + B(11, "kkkkkkkkkkk"), + L(11), + R("nnnnnnnnnnnnnn"), + L(15) + ) +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/or/IntsTest.scala b/src/test/scala/org/hammerlab/iterator/sorted/or/IntsTest.scala new file mode 100644 index 0000000..266361b --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/or/IntsTest.scala @@ -0,0 +1,66 @@ +package org.hammerlab.iterator.sorted.or + +import org.hammerlab.iterator.sorted +import org.hammerlab.iterator.sorted.IdentityIntConversions +import org.hammerlab.types.Or + +class IntsTest + extends Suite + with sorted.IntsTest + with IdentityIntConversions { + + override def expected: Map[String, Seq[Or[Int, Int]]] = + Map( + "1,2,3 4,5,6" → + Seq( + L(1), + L(2), + L(3), + R(4), + R(5), + R(6) + ), + "1,3,5 2,4,6" → + Seq( + L(1), + R(2), + L(3), + R(4), + L(5), + R(6) + ), + "1,2,3 1,2,3" → + Seq( + B(1), + B(2), + B(3) + ), + "1,2,4,7,9 1,3,5,6,7,8" → + Seq( + B(1), + L(2), + R(3), + L(4), + R(5), + R(6), + B(7), + R(8), + L(9) + ), + "empty empty" → Nil, + "empty 1" → Seq(R(1)), + "empty 1,10,100" → + Seq( + R(1), + R(10), + R(100) + ), + "1 empty" → Seq(L(1)), + "1,10,100 empty" → + Seq( + L(1), + L(10), + L(100) + ) + ) +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/or/StringIntTest.scala b/src/test/scala/org/hammerlab/iterator/sorted/or/StringIntTest.scala new file mode 100644 index 0000000..d805821 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/or/StringIntTest.scala @@ -0,0 +1,26 @@ +package org.hammerlab.iterator.sorted.or + +import org.hammerlab.iterator.sorted + +class StringIntTest + extends Suite + with sorted.StringIntTest { + + override def expected: Seq[Result] = + Seq( + L(""), + B("a", 1), + L("a"), + B("bb", 2), + L("ccc"), + R(4), + L("eeeee"), + L("ffffff"), + R(7), + R(10), + B("kkkkkkkkkkk", 11), + R(11), + L("nnnnnnnnnnnnnn"), + R(15) + ) +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/or/Suite.scala b/src/test/scala/org/hammerlab/iterator/sorted/or/Suite.scala new file mode 100644 index 0000000..92a0584 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/or/Suite.scala @@ -0,0 +1,29 @@ +package org.hammerlab.iterator.sorted.or + +import org.hammerlab.iterator.sorted +import org.hammerlab.iterator.sorted.ConvertToInt +import org.hammerlab.iterator.sorted.OrZipIterator._ +import org.hammerlab.test.matchers.seqs.SeqMatcher.seqMatch +import org.hammerlab.types.{ Both, LO, Or, RO } + +abstract class Suite + extends sorted.Suite + with ConvertToInt { + + type Result = Or[L, R] + + override def check(left: Seq[L])(right: Seq[R])(expected: Seq[Result]): Unit = { + left + .iterator + .sortedOrZip(right.iterator) + .toList should seqMatch( + expected + ) + } + + def B(l: L)(implicit ev: L =:= R): Result = Both(l, l) + def B(l: L, r: R): Result = Both(l, r) + + def L(l: L): Result = LO(l) + def R(r: R): Result = RO(r) +} diff --git a/src/test/scala/org/hammerlab/iterator/sorted/zip/IntsTest.scala b/src/test/scala/org/hammerlab/iterator/sorted/zip/IntsTest.scala new file mode 100644 index 0000000..59f0564 --- /dev/null +++ b/src/test/scala/org/hammerlab/iterator/sorted/zip/IntsTest.scala @@ -0,0 +1,83 @@ +package org.hammerlab.iterator.sorted.zip + +import org.hammerlab.iterator.sorted +import org.hammerlab.iterator.sorted.VInt +import org.hammerlab.iterator.sorted.ZipIterator._ + +class IntsTest + extends sorted.Suite + with VInt + with sorted.IntsTest { + + override type Result = Int + + override def check(left: Seq[L])(right: Seq[R])(expected: Seq[Int]): Unit = + left + .sortedZip(right) + .toList should be( + expected + ) + + override def L(t: L): Result = t + override def R(u: R): Result = u + + override def expected: Map[String, Seq[Result]] = + Map( + "1,2,3 4,5,6" → + Seq( + L(1), + L(2), + L(3), + R(4), + R(5), + R(6) + ), + "1,3,5 2,4,6" → + Seq( + L(1), + R(2), + L(3), + R(4), + L(5), + R(6) + ), + "1,2,3 1,2,3" → + Seq( + L(1), + R(1), + L(2), + R(2), + L(3), + R(3) + ), + "1,2,4,7,9 1,3,5,6,7,8" → + Seq( + L(1), + R(1), + L(2), + R(3), + L(4), + R(5), + R(6), + L(7), + R(7), + R(8), + L(9) + ), + "empty empty" → Nil, + "empty 1" → Seq(R(1)), + "empty 1,10,100" → + Seq( + R(1), + R(10), + R(100) + ), + "1 empty" → Seq(L(1)), + "1,10,100 empty" → + Seq( + L(1), + L(10), + L(100) + ) + ) +} diff --git a/src/test/scala/org/hammerlab/math/CeilTest.scala b/src/test/scala/org/hammerlab/math/CeilTest.scala deleted file mode 100644 index 3c229cd..0000000 --- a/src/test/scala/org/hammerlab/math/CeilTest.scala +++ /dev/null @@ -1,28 +0,0 @@ -package org.hammerlab.math - -import org.hammerlab.test.Suite -import org.scalactic.TypeCheckedTripleEquals - -class CeilTest - extends Suite - with TypeCheckedTripleEquals { - - test("ints") { - ceil( 0, 20) should ===(0) - ceil(10, 20) should ===(1) - ceil(19, 20) should ===(1) - ceil(20, 20) should ===(1) - ceil(21, 20) should ===(2) - } - - test("longs") { - ceil( 0L, 20L) should ===(0) - ceil(10L, 20L) should ===(1L) - ceil(19L, 20L) should ===(1L) - ceil(20L, 20L) should ===(1L) - ceil(21L, 20L) should ===(2L) - - ceil(1L << 40, 1 << 4) should be(1L << 36) - ceil(1L << 40, 1L << 36) should be(1 << 4) - } -} diff --git a/src/test/scala/org/hammerlab/math/HypergeometricDistributionTest.scala b/src/test/scala/org/hammerlab/math/HypergeometricDistributionTest.scala deleted file mode 100644 index fff6bbb..0000000 --- a/src/test/scala/org/hammerlab/math/HypergeometricDistributionTest.scala +++ /dev/null @@ -1,109 +0,0 @@ -package org.hammerlab.math - -import org.scalactic.Equality -import org.hammerlab.test.Suite - -import org.apache.commons.math3.distribution.{HypergeometricDistribution ⇒ ApacheHyperGeometricDistribution} - -import scala.collection.mutable.ArrayBuffer - -class HypergeometricDistributionTest extends Suite { - - var epsilon = 0.00001 - - implicit val tolerance = - new Equality[Double] { - override def areEqual(a: Double, b: Any): Boolean = - b match { - case d: Double ⇒ a === d +- epsilon - case _ ⇒ false - } - } - - implicit val approxBuffers = - new Equality[ArrayBuffer[Double]] { - override def areEqual(a: ArrayBuffer[Double], b: Any): Boolean = - b match { - case s: ArrayBuffer[Double] ⇒ a.size == s.size && a.zip(s).forall(t ⇒ t._1 === t._2) - case _ ⇒ false - } - } - - def compareToApache(hgd: HypergeometricDistribution): Unit = { - val N = hgd.N.toInt - val K = hgd.K.toInt - val n = hgd.n - - val apache = new ApacheHyperGeometricDistribution(N, K, n) - - hgd.pdf should ===( - ArrayBuffer((0 to n).map(apache.probability): _*) - ) - - hgd.cdf should ===( - ArrayBuffer((0 to n).map(apache.cumulativeProbability): _*) - ) - } - - test("10-4-2") { - val hgd = HypergeometricDistribution(10, 4, 2) - - hgd.pdf should ===( - ArrayBuffer( - 1.0 / 3, - 8.0 / 15, - 2.0 / 15 - ) - ) - - hgd.cdf should ===( - ArrayBuffer( - 1.0 / 3, - 13.0 / 15, - 1 - ) - ) - - List[Double]( - 0, - 1.0 / 3 - epsilon, - 1.0 / 3, - 13.0 / 15 - epsilon, - 13.0 / 15, - 1 - epsilon, - 1 - ).map(hgd.invCDF(_)) should be( - List( - 0, 0, 1, 1, 2, 2, 2 - ) - ) - - compareToApache(hgd) - } - - test("500-100-10") { - val hgd = HypergeometricDistribution(500, 100, 10) - - compareToApache(hgd) - } - - test("5000000000-4000000000-10") { - val hgd = HypergeometricDistribution(5000000000L, 4000000000L, 10) - - hgd.pdf should be( - ArrayBuffer( - 1.0239999631360417E-7, // 0 - 4.0959998894081015E-6, // 1 - 7.372799858073784E-5, // 2 - 7.864319899730114E-4, // 3 - 0.005505023958712356, // 4 - 0.026424115107515793, // 5 - 0.08808038393393967, // 6 - 0.20132659215099705, // 7 - 0.30198988830199097, // 8 - 0.26843545599999896, // 9 - 0.10737418215840859 // 10 - ) - ) - } -} diff --git a/src/test/scala/org/hammerlab/math/MonoidTest.scala b/src/test/scala/org/hammerlab/math/MonoidTest.scala deleted file mode 100644 index 274354a..0000000 --- a/src/test/scala/org/hammerlab/math/MonoidTest.scala +++ /dev/null @@ -1,19 +0,0 @@ -package org.hammerlab.math - -import org.hammerlab.test.Suite -import MonoidSyntax._ -import Monoid.zero - -class MonoidTest - extends Suite { - test("case class") { - case class Foo(a: Int, b: String, c: Long) - - val foo1 = Foo(111, "abc", 123) - val foo2 = Foo(222, "def", 456) - - foo1 |+| foo2 should be(Foo(333, "abcdef", 579)) - foo1 |+| zero[Foo] should be(foo1) - foo2 |+| zero[Foo] should be(foo2) - } -} diff --git a/src/test/scala/org/hammerlab/math/StepsTest.scala b/src/test/scala/org/hammerlab/math/StepsTest.scala deleted file mode 100644 index 3ef6505..0000000 --- a/src/test/scala/org/hammerlab/math/StepsTest.scala +++ /dev/null @@ -1,21 +0,0 @@ -package org.hammerlab.math - -import org.hammerlab.math.Steps._ -import org.hammerlab.test.Suite -import org.hammerlab.test.matchers.seqs.SeqMatcher.seqMatch - -class StepsTest extends Suite { - test("roundNumbers") { - roundNumbers(200).toVector.sorted should seqMatch( - (0 until 20) ++ (20 until 50 by 2) ++ (50 until 100 by 5) ++ (100 to 200 by 10) - ) - } - - test("geometricEvenSteps") { - geometricEvenSteps(1000, 20).toVector.sorted should ===( - Vector( - 0, 1, 2, 3, 4, 5, 6, 9, 14, 21, 31, 46, 68, 99, 146, 215, 316, 464, 681, 999 - ) - ) - } -} diff --git a/src/test/scala/org/hammerlab/math/VarNumTest.scala b/src/test/scala/org/hammerlab/math/VarNumTest.scala deleted file mode 100644 index 65e9f55..0000000 --- a/src/test/scala/org/hammerlab/math/VarNumTest.scala +++ /dev/null @@ -1,133 +0,0 @@ -package org.hammerlab.math - -import java.io.ByteArrayOutputStream - -import com.esotericsoftware.kryo.io.{Input, Output} -import org.hammerlab.test.Suite - -class VarNumTest extends Suite { - - // [0,8] - testBytes(0, List(0)) - testBytes(1, List(1)) - testBytes(2, List(2)) - testBytes(3, List(3)) - testBytes(4, List(4)) - testBytes(5, List(5)) - testBytes(6, List(6)) - testBytes(7, List(7)) - testBytes(8, List(8)) - - // Jump from 1 byte to 2 bytes at 2^6. - testBytes(0x3b, List(0x3b)) - testBytes(0x3f, List(0x3f)) - testBytes(0x40, List(0x80, 0x01)) - testBytes(0x41, List(0x81, 0x01)) - testBytes(0x42, List(0x82, 0x01)) - testBytes(0x43, List(0x83, 0x01)) - testBytes(0x44, List(0x84, 0x01)) - testBytes(0x45, List(0x85, 0x01)) - - // Jump from 2 bytes to 3 bytes at 2^13. - testBytes(0x1fff, List(0xbf, 0x7f)) - testBytes(0x2000, List(0x80, 0x80, 0x01)) - testBytes(0x2001, List(0x81, 0x80, 0x01)) - testBytes(0x2002, List(0x82, 0x80, 0x01)) - testBytes(0x2003, List(0x83, 0x80, 0x01)) - testBytes(0x2004, List(0x84, 0x80, 0x01)) - - // 3 bytes to 4 bytes at 2^20. - testBytes( 0xfffff, List(0xbf, 0xff, 0x7f)) - testBytes(0x100000, List(0x80, 0x80, 0x80, 0x01)) - testBytes(0x100001, List(0x81, 0x80, 0x80, 0x01)) - testBytes(0x100002, List(0x82, 0x80, 0x80, 0x01)) - testBytes(0x100003, List(0x83, 0x80, 0x80, 0x01)) - testBytes(0x100004, List(0x84, 0x80, 0x80, 0x01)) - - // 4 bytes to 5 bytes at 2^27. - testBytes(0x7ffffff, List(0xbf, 0xff, 0xff, 0x7f)) - testBytes(0x8000000, List(0x80, 0x80, 0x80, 0x80, 0x01)) - testBytes(0x8000001, List(0x81, 0x80, 0x80, 0x80, 0x01)) - testBytes(0x8000002, List(0x82, 0x80, 0x80, 0x80, 0x01)) - testBytes(0x8000003, List(0x83, 0x80, 0x80, 0x80, 0x01)) - testBytes(0x8000004, List(0x84, 0x80, 0x80, 0x80, 0x01)) - - // 5 bytes to 6 bytes at 2^34. - testBytes(0x3ffffffffL, List(0xbf, 0xff, 0xff, 0xff, 0x7f)) - testBytes(0x400000000L, List(0x80, 0x80, 0x80, 0x80, 0x80, 0x01)) - testBytes(0x400000001L, List(0x81, 0x80, 0x80, 0x80, 0x80, 0x01)) - testBytes(0x400000002L, List(0x82, 0x80, 0x80, 0x80, 0x80, 0x01)) - testBytes(0x400000003L, List(0x83, 0x80, 0x80, 0x80, 0x80, 0x01)) - testBytes(0x400000004L, List(0x84, 0x80, 0x80, 0x80, 0x80, 0x01)) - - // Largest Long. - testBytes(0x7fffffffffffffffL, List(0xbf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff)) - - // Every 4th Fibonacci number from 21 to 2^63. - testBytes( 21L, List(0x15)) - testBytes( 144L, List(0x90, 0x02)) - testBytes( 987L, List(0x9b, 0x0f)) - testBytes( 6765L, List(0xad, 0x69)) - testBytes( 46368L, List(0xa0, 0xd4, 0x05)) - testBytes( 317811L, List(0xb3, 0xe5, 0x26)) - testBytes( 2178309L, List(0x85, 0xf4, 0x89, 0x02)) - testBytes( 14930352L, List(0xb0, 0xc6, 0x9e, 0x0e)) - testBytes( 102334155L, List(0x8b, 0xfb, 0xcb, 0x61)) - testBytes( 701408733L, List(0x9d, 0x97, 0xf5, 0x9c, 0x05)) - testBytes( 4807526976L, List(0x80, 0xa9, 0xe8, 0xe8, 0x23)) - testBytes( 32951280099L, List(0xa3, 0x87, 0xe5, 0xc0, 0xf5, 0x01)) - testBytes( 225851433717L, List(0xb5, 0x8b, 0xdb, 0xdc, 0x92, 0x0d)) - testBytes( 1548008755920L, List(0x90, 0xcb, 0x98, 0xc8, 0x8d, 0x5a)) - testBytes( 10610209857723L, List(0xbb, 0x82, 0xd1, 0x9c, 0xcc, 0xe9, 0x04)) - testBytes( 72723460248141L, List(0x8d, 0xc9, 0x9e, 0x80, 0x88, 0x89, 0x21)) - testBytes( 498454011879264L, List(0xa0, 0xfd, 0x84, 0xe5, 0xeb, 0xd5, 0xe2, 0x01)) - testBytes( 3416454622906707L, List(0x93, 0xa5, 0x84, 0xc3, 0xea, 0xcf, 0x91, 0x0c)) - testBytes( 23416728348467685L, List(0xa5, 0x87, 0x99, 0xf0, 0xfd, 0xd8, 0x98, 0x53)) - testBytes( 160500643816367088L, List(0xb0, 0x8f, 0xab, 0xce, 0x86, 0x9f, 0x9b, 0xba, 0x04)) - testBytes(1100087778366101931L, List(0xab, 0xe6, 0x94, 0xb4, 0xb0, 0x80, 0xa6, 0xc4, 0x1e)) - testBytes(7540113804746346429L, List(0xbd, 0xbe, 0xe6, 0x9e, 0xcc, 0xe3, 0xee, 0xa3, 0xd1)) - - /** - * Generate test cases that verify that: - * - * - `l` is written to bytes matching `expected` bytes, - * - those `expected` bytes are read back in to a [[Long]] equal to `l`, - * - and then do the same for the additive inverse of `l` with a list of `expected` bytes where the sign bit (0x40 - * in the first byte) is flipped. - */ - def testBytes(l: Long, expected: List[Int]): Unit = { - - test(s"0x${l.toHexString} ($l)") { - run(l, expected) - } - - if (l > 0) { - val negExpected = { - val head = expected.head - if ((head & 0x40) > 0) { - throw new IllegalArgumentException(s"Expected bytes [${expected.mkString(",")}] already have sign bit set negative") - } - (head | 0x40) :: expected.tail - } - - test(s"-0x${l.toHexString} (-$l)") { - run(-l, negExpected) - } - } - } - - def run(l: Long, expected: List[Int]): Unit = { - val baos = new ByteArrayOutputStream() - - val op = new Output(baos) - VarNum.write(op, l) - op.close() - - val bytes = baos.toByteArray - bytes should be(expected.toArray.map(_.toByte)) - - val ip = new Input(bytes) - VarNum.read(ip) should be(l) - ip.close() - } -} diff --git a/src/test/scala/org/hammerlab/stats/StatsHistTest.scala b/src/test/scala/org/hammerlab/stats/StatsHistTest.scala deleted file mode 100644 index 2acda94..0000000 --- a/src/test/scala/org/hammerlab/stats/StatsHistTest.scala +++ /dev/null @@ -1,184 +0,0 @@ -package org.hammerlab.stats - -import org.hammerlab.test.Suite -import spire.implicits._ -import spire.math.Integral - -import scala.util.Random - -/** - * Tests of the [[Stats.fromHist]] API for constructing [[Stats]] instances from "histograms" of elements that each come - * with an associated repetition count, which allows the total number of elements represented to be much larger - * ([[Long]] vs. [[Int]]). - */ -class StatsHistTest extends Suite { - - Random.setSeed(123L) - - def check[V: Integral](input: Seq[(Int, V)], lines: String*): Unit = { - Stats.fromHist(input).toString should be(lines.mkString("\n")) - } - - def check[V: Integral](input: Seq[(Int, V)], numToSample: Int, lines: String*): Unit = { - Stats.fromHist(input, numToSample).toString should be(lines.mkString("\n")) - } - - def check[V: Integral](input: Seq[(Int, V)], numToSample: Int, onlySampleSorted: Boolean, lines: String*): Unit = { - Stats.fromHist(input, numToSample, onlySampleSorted).toString should be(lines.mkString("\n")) - } - - test("empty") { - check( - List[(Int, Int)](), - "(empty)" - ) - } - - test("single") { - check( - List(0 → 1), - "num: 1, mean: 0, stddev: 0, mad: 0", - "elems: 0", - "50: 0" - ) - } - - test("double") { - check( - List(0 → 2), - "num: 2, mean: 0, stddev: 0, mad: 0", - "elems: 0×2", - "50: 0" - ) - } - - test("two singles") { - check( - List(0 → 1, 1 → 1), - "num: 2, mean: 0.5, stddev: 0.5, mad: 0.5", - "elems: 0, 1", - "50: 0.5" - ) - } - - test("three singles") { - check( - List(0 → 1, 5 → 1, 1 → 1), - "num: 3, mean: 2, stddev: 2.2, mad: 1", - "elems: 0, 5, 1", - "sorted: 0, 1, 5", - "50: 1" - ) - } - - test("single double") { - check( - List(0 → 1, 1 → 2), - "num: 3, mean: 0.7, stddev: 0.5, mad: 0", - "elems: 0, 1×2", - "50: 1" - ) - } - - test("1×5 2×4") { - check( - List(1 → 5, 2 → 4), - "num: 9, mean: 1.4, stddev: 0.5, mad: 0", - "elems: 1×5, 2×4", - "25: 1", - "50: 1", - "75: 2" - ) - } - - test("0×5 1×5") { - check( - List(0 → 5, 1 → 5), - "num: 10, mean: 0.5, stddev: 0.5, mad: 0.5", - "elems: 0×5, 1×5", - "25: 0", - "50: 0.5", - "75: 1" - ) - } - - test("0×4 1×6") { - check( - List(0 → 4, 1 → 6), - "num: 10, mean: 0.6, stddev: 0.5, mad: 0", - "elems: 0×4, 1×6", - "25: 0", - "50: 1", - "75: 1" - ) - } - - test("x(x) 1 to 10") { - check( - (1 to 10).map(i ⇒ i → i), - "num: 55, mean: 7, stddev: 2.4, mad: 2", - "elems: 1, 2×2, 3×3, 4×4, 5×5, 6×6, 7×7, 8×8, 9×9, 10×10", - "5: 2.7", - "10: 3.4", - "25: 5", - "50: 7", - "75: 9", - "90: 10", - "95: 10" - ) - } - - test("singletons") { - check( - (0 to 10).map(i ⇒ i → 1), - "num: 11, mean: 5, stddev: 3.2, mad: 3", - "elems: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10", - "10: 1", - "25: 2.5", - "50: 5", - "75: 7.5", - "90: 9" - ) - } - - test("re-encode") { - check( - List(0 → 1, 0 → 1, 10 → 3, 10 → 4, 3 → 5, 0 → 2, 3 → 1), - "num: 17, mean: 5.2, stddev: 4.2, mad: 3", - "elems: 0×2, 10×7, 3×5, 0×2, 3", - "sorted: 0×4, 3×6, 10×7", - "10: 0", - "25: 3", - "50: 3", - "75: 10", - "90: 10" - ) - } - - test("large hist") { - check( - List[(Int, Long)]( - 1 → 10000000000L, - 2 → 1000000000, - 1 → 100, - 2 → 1000000000 - ), - "num: 12000000100, mean: 1.2, stddev: 0.4, mad: 0", - "elems: 1×10000000000, 2×1000000000, 1×100, 2×1000000000", - "sorted: 1×10000000100, 2×2000000000", - "0.01: 1", - "0.1: 1", - "1: 1", - "5: 1", - "10: 1", - "25: 1", - "50: 1", - "75: 1", - "90: 2", - "95: 2", - "99: 2", - "99.9: 2", - "99.99: 2" - ) - } -} diff --git a/src/test/scala/org/hammerlab/stats/StatsTest.scala b/src/test/scala/org/hammerlab/stats/StatsTest.scala deleted file mode 100644 index 82a47be..0000000 --- a/src/test/scala/org/hammerlab/stats/StatsTest.scala +++ /dev/null @@ -1,362 +0,0 @@ -package org.hammerlab.stats - -import org.hammerlab.test.Suite -import spire.implicits._ -import spire.math.Integral - -import scala.util.Random -import scala.util.Random.shuffle - -/** - * Test constructing [[Stats]] instances. - */ -class StatsTest extends Suite { - - Random.setSeed(123L) - - def check(input: Seq[Int], lines: String*): Unit = { - Stats(input).toString should be(lines.mkString("\n")) - } - - def check(input: Seq[Int], numToSample: Int, lines: String*): Unit = { - Stats(input, numToSample).toString should be(lines.mkString("\n")) - } - - def check(input: Seq[Int], numToSample: Int, onlySampleSorted: Boolean, lines: String*): Unit = { - Stats(input, numToSample, onlySampleSorted).toString should be(lines.mkString("\n")) - } - - test("empty") { - check( - Nil, - "(empty)" - ) - } - - test("0 to 0") { - check( - 0 to 0, - "num: 1, mean: 0, stddev: 0, mad: 0", - "elems: 0" - ) - } - - test("0 to 1") { - check( - 0 to 1, - "num: 2, mean: 0.5, stddev: 0.5, mad: 0.5", - "elems: 0, 1" - ) - } - - test("1 to 0") { - check( - 1 to 0 by -1, - "num: 2, mean: 0.5, stddev: 0.5, mad: 0.5", - "elems: 1, 0", - "sorted: 0, 1" - ) - } - - test("0 to 2") { - check( - 0 to 2, - "num: 3, mean: 1, stddev: 0.8, mad: 1", - "elems: 0, 1, 2", - "50: 1" - ) - } - - test("2 to 0") { - check( - 2 to 0 by -1, - "num: 3, mean: 1, stddev: 0.8, mad: 1", - "elems: 2, 1, 0", - "sorted: 0, 1, 2", - "50: 1" - ) - } - - test("0 to 3") { - check( - 0 to 3, - "num: 4, mean: 1.5, stddev: 1.1, mad: 1", - "elems: 0, 1, 2, 3", - "50: 1.5" - ) - } - - test("3 to 0") { - check( - 3 to 0 by -1, - "num: 4, mean: 1.5, stddev: 1.1, mad: 1", - "elems: 3, 2, 1, 0", - "sorted: 0, 1, 2, 3", - "50: 1.5" - ) - } - - test("0 to 4") { - check( - 0 to 4, - "num: 5, mean: 2, stddev: 1.4, mad: 1", - "elems: 0, 1, 2, 3, 4", - "25: 1", - "50: 2", - "75: 3" - ) - } - - test("4 to 0") { - check( - 4 to 0 by -1, - "num: 5, mean: 2, stddev: 1.4, mad: 1", - "elems: 4, 3, 2, 1, 0", - "sorted: 0, 1, 2, 3, 4", - "25: 1", - "50: 2", - "75: 3" - ) - } - - test("0 to 10") { - check( - 0 to 10, - "num: 11, mean: 5, stddev: 3.2, mad: 3", - "elems: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10", - "10: 1", - "25: 2.5", - "50: 5", - "75: 7.5", - "90: 9" - ) - } - - test("10 to 0") { - check( - 10 to 0 by -1, - "num: 11, mean: 5, stddev: 3.2, mad: 3", - "elems: 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0", - "sorted: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10", - "10: 1", - "25: 2.5", - "50: 5", - "75: 7.5", - "90: 9" - ) - } - - val shuffled0to10 = shuffle(0 to 10).toArray - - test("0 to 10 sample 5") { - check( - shuffled0to10, - numToSample = 5, - "num: 11, mean: 5, stddev: 3.2, mad: 3", - "elems: 9, 3, 7, 1, 6, …, 4, 8, 2, 0, 10", - "sorted: 0, 1, 2, 3, 4, …, 6, 7, 8, 9, 10", - "10: 1", - "25: 2.5", - "50: 5", - "75: 7.5", - "90: 9" - ) - } - - test("0 to 10 sample 4") { - check( - shuffled0to10, - numToSample = 4, - "num: 11, mean: 5, stddev: 3.2, mad: 3", - "elems: 9, 3, 7, 1, …, 8, 2, 0, 10", - "sorted: 0, 1, 2, 3, …, 7, 8, 9, 10", - "10: 1", - "25: 2.5", - "50: 5", - "75: 7.5", - "90: 9" - ) - } - - test("0 to 10 sample 3") { - check( - shuffled0to10, - numToSample = 3, - "num: 11, mean: 5, stddev: 3.2, mad: 3", - "elems: 9, 3, 7, …, 2, 0, 10", - "sorted: 0, 1, 2, …, 8, 9, 10", - "10: 1", - "25: 2.5", - "50: 5", - "75: 7.5", - "90: 9" - ) - } - - test("0 to 10 sample 2") { - check( - shuffled0to10, - numToSample = 2, - "num: 11, mean: 5, stddev: 3.2, mad: 3", - "elems: 9, 3, …, 0, 10", - "sorted: 0, 1, …, 9, 10", - "10: 1", - "25: 2.5", - "50: 5", - "75: 7.5", - "90: 9" - ) - } - - test("0 to 10 sample 1") { - check( - shuffled0to10, - numToSample = 1, - "num: 11, mean: 5, stddev: 3.2, mad: 3", - "elems: 9, …, 10", - "sorted: 0, …, 10", - "10: 1", - "25: 2.5", - "50: 5", - "75: 7.5", - "90: 9" - ) - } - - test("0 to 10 sample 0") { - check( - shuffled0to10, - numToSample = 0, - "num: 11, mean: 5, stddev: 3.2, mad: 3", - "10: 1", - "25: 2.5", - "50: 5", - "75: 7.5", - "90: 9" - ) - } - - test("0 to 100") { - check( - 0 to 100, - "num: 101, mean: 50, stddev: 29.2, mad: 25", - "elems: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, …, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100", - "1: 1", - "5: 5", - "10: 10", - "25: 25", - "50: 50", - "75: 75", - "90: 90", - "95: 95", - "99: 99" - ) - } - - test("100 to 0") { - check( - 100 to 0 by -1, - "num: 101, mean: 50, stddev: 29.2, mad: 25", - "elems: 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, …, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0", - "sorted: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, …, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100", - "1: 1", - "5: 5", - "10: 10", - "25: 25", - "50: 50", - "75: 75", - "90: 90", - "95: 95", - "99: 99" - ) - } - - val shuffledDigits = (0 until 100).map(_ ⇒ Random.nextInt(10)) - - test("100 digits") { - check( - shuffledDigits, - "num: 100, mean: 4.3, stddev: 2.9, mad: 2", - "elems: 9, 6, 2, 5, 7, 9, 0, 5, 4, 6, …, 1, 9, 0×2, 8, 0, 7×2, 0, 6, 2, 4", - "sorted: 0×15, 1×7, 2×9, 3×10, 4×10, 5×11, 6×11, 7×9, 8×9, 9×9", - "5: 0", - "10: 0", - "25: 2", - "50: 4", - "75: 7", - "90: 8", - "95: 9" - - ) - } - - test("100 digits sample 4") { - check( - shuffledDigits, - numToSample = 4, - "num: 100, mean: 4.3, stddev: 2.9, mad: 2", - "elems: 9, 6, 2, 5, …, 0, 6, 2, 4", - "sorted: 0×15, 1×7, 2×9, 3×10, …, 6×11, 7×9, 8×9, 9×9", - "5: 0", - "10: 0", - "25: 2", - "50: 4", - "75: 7", - "90: 8", - "95: 9" - ) - } - - test("100 digits sample 4 only sample sorted") { - check( - shuffledDigits, - numToSample = 4, - onlySampleSorted = true, - "num: 100, mean: 4.3, stddev: 2.9, mad: 2", - "sorted: 0×15, 1×7, 2×9, 3×10, …, 6×11, 7×9, 8×9, 9×9", - "5: 0", - "10: 0", - "25: 2", - "50: 4", - "75: 7", - "90: 8", - "95: 9" - ) - } - - val sortedShuffledDigits = shuffledDigits.sorted - - test("100 sorted digits") { - check( - sortedShuffledDigits, - "num: 100, mean: 4.3, stddev: 2.9, mad: 2", - "elems: 0×15, 1×7, 2×9, 3×10, 4×10, 5×11, 6×11, 7×9, 8×9, 9×9", - "5: 0", - "10: 0", - "25: 2", - "50: 4", - "75: 7", - "90: 8", - "95: 9" - ) - } - - test("100 sorted digits only sample sorted overridden") { - check( - sortedShuffledDigits, - numToSample = 4, - onlySampleSorted = true, - "num: 100, mean: 4.3, stddev: 2.9, mad: 2", - "elems: 0×15, 1×7, 2×9, 3×10, …, 6×11, 7×9, 8×9, 9×9", - "5: 0", - "10: 0", - "25: 2", - "50: 4", - "75: 7", - "90: 8", - "95: 9" - ) - } -} - -