Skip to content

Commit

Permalink
many new iterators
Browse files Browse the repository at this point in the history
  • Loading branch information
ryan-williams committed Jun 19, 2017
1 parent d7158ea commit ae1a760
Show file tree
Hide file tree
Showing 52 changed files with 1,496 additions and 136 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ scala> import org.hammerlab.stats.Stats
scala> :paste
Stats.fromHist(
List[(Int, Long)](
1 -> 10000000000L,
2 -> 1000000000,
1 -> 100,
2 -> 1000000000
1 10000000000L,
2 1000000000,
1 100,
2 1000000000
)
)

Expand Down
3 changes: 2 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ addScala212

deps ++= Seq(
libs.value('commons_math),
libs.value('kryo),
kryo.value,
"com.chuusai" %% "shapeless" % "2.3.2",
libs.value('spire)
)
2 changes: 1 addition & 1 deletion project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
addSbtPlugin("org.hammerlab" % "sbt-parent" % "1.7.7-SNAPSHOT")
addSbtPlugin("org.hammerlab" % "sbt-parent" % "2.0.0-SNAPSHOT")
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package org.hammerlab.iterator

case class BufferedTakeWhileIterator[T](it: BufferedIterator[T]) {
def takewhile(fn: T Boolean): BufferedIterator[T] =
new SimpleBufferedIterator[T] {
override protected def _advance: Option[T] =
if (it.hasNext && fn(it.head))
Some(it.next)
else
None
}
}

object BufferedTakeWhileIterator {
implicit def makeBufferedTakeWhileIterator[T](it: BufferedIterator[T]): BufferedTakeWhileIterator[T] =
BufferedTakeWhileIterator(it)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package org.hammerlab.iterator

import org.hammerlab.iterator.CappedCostGroupsIterator.{ ElementTooCostlyException, ElementTooCostlyStrategy }

case class CappedCostGroupsIterator[T](it: BufferedIterator[T]) {

def cappedCostGroup[N: Numeric](withCosts: BufferedIterator[(T, N)], costFn: T N, limit: N)(
implicit elementTooCostlyStrategy: ElementTooCostlyStrategy
): Iterator[T] = {

import org.hammerlab.iterator.CappedCostGroupsIterator.ElementTooCostlyStrategy._

new SimpleBufferedIterator[T] {
val numeric = implicitly[Numeric[N]]
import numeric._
var cost = numeric.zero
var idx = 0
override protected def _advance: Option[T] =
if (gt(cost, limit))
None
else
withCosts
.headOption
.flatMap {
case (elem, curCost)
def take: Some[T] = {
cost += curCost
idx += 1
withCosts.next
Some(elem)
}
if (lteq(cost + curCost, limit)) {
take
} else if (idx == 0)
elementTooCostlyStrategy match {
case Discard
withCosts.next
_advance
case EmitAlone
take
case Throw
throw ElementTooCostlyException(
elem,
limit,
curCost
)
}
else
None
}
}
}

def cappedCostGroups[N: Numeric](costFn: T N, limit: N)(
implicit elementTooCostlyStrategy: ElementTooCostlyStrategy
): Iterator[Iterator[T]] = {
val withCosts = it.map(elem elem costFn(elem)).buffered
new SimpleBufferedIterator[Iterator[T]] {
override protected def _advance: Option[Iterator[T]] = {
val group = cappedCostGroup(withCosts, costFn, limit)
if (group.isEmpty)
None
else
Some(group)
}
}
}
}

object CappedCostGroupsIterator extends Serializable {

sealed trait ElementTooCostlyStrategy

case class ElementTooCostlyException[T, N: Numeric](elem: T, limit: N, cost: N)
extends Exception(
s"Element $elem's cost $cost exceeds limit $limit"
)

object ElementTooCostlyStrategy {
// Throw away elements that are too costly and start groups over on the other side of such elements
implicit case object Discard extends ElementTooCostlyStrategy

// Emit elements that exceed the cost limit in groups by themselves
implicit case object EmitAlone extends ElementTooCostlyStrategy

// Throw an exception when encountering elements that exceed the per-group cost limit
implicit case object Throw extends ElementTooCostlyStrategy
}

implicit def makeRunningCostSumIterator[T](it: Iterator[T]): CappedCostGroupsIterator[T] =
CappedCostGroupsIterator(it.buffered)
}
5 changes: 5 additions & 0 deletions src/main/scala/org/hammerlab/iterator/Closeable.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package org.hammerlab.iterator

trait Closeable {
def close(): Unit
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ case class CountIteratorByKey[K, V](it: Iterator[(K, V)]) {
for {
(k, _) it
} {
counts.update(k, counts.getOrElse(k, 0))
counts.update(k, counts.getOrElse(k, 0) + 1)
}
counts.toMap
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ case class CountIteratorElems[T](it: Iterator[T]) {
for {
elem it
} {
counts.update(elem, counts.getOrElse(elem, 0))
counts.update(elem, counts.getOrElse(elem, 0) + 1)
}
counts.toMap
}
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/org/hammerlab/iterator/DropRightIterator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package org.hammerlab.iterator

import scala.collection.mutable

class DropRightIterator[T](it: Iterator[T]) {
case class DropRightIterator[T](it: Iterator[T]) {
def dropRight(n: Int): Iterator[T] =
if (n == 0)
it
Expand Down Expand Up @@ -30,5 +30,5 @@ class DropRightIterator[T](it: Iterator[T]) {
}

object DropRightIterator {
implicit def make[T](it: Iterator[T]): DropRightIterator[T] = new DropRightIterator(it)
implicit def makeDropRightIterator[T](it: Iterator[T]): DropRightIterator[T] = DropRightIterator(it)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.hammerlab.iterator

case class ExpandLastElementIterator[T](it: Iterator[T]) {
def expandLastElement(fn: T Iterator[T]): Iterator[T] = {
val main =
new SimpleBufferedIterator[T] {
var lastOpt: Option[T] = None

override protected def _advance: Option[T] =
if (it.hasNext) {
lastOpt = Some(it.next)
lastOpt
} else
None
}

main ++
new Iterator[T] {
lazy val rest =
main
.lastOpt
.map(fn)
.getOrElse(Iterator())

override def hasNext: Boolean = rest.hasNext
override def next(): T = rest.next
}
}
}

object ExpandLastElementIterator {
implicit def makeExpandLastElementIterator[T](it: Iterator[T]): ExpandLastElementIterator[T] = ExpandLastElementIterator(it)
}
21 changes: 21 additions & 0 deletions src/main/scala/org/hammerlab/iterator/FinishingIterator.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package org.hammerlab.iterator

case class FinishingIterator[T](it: Iterator[T]) {
def finish(fn: Unit): Iterator[T] =
new SimpleBufferedIterator[T] {
override protected def _advance: Option[T] =
if (it.hasNext)
Some(it.next)
else
None

override protected def done(): Unit = {
fn
}
}
}

object FinishingIterator {
implicit def makeFinallyIterator[T](it: Iterator[T]): FinishingIterator[T] =
FinishingIterator(it)
}
69 changes: 69 additions & 0 deletions src/main/scala/org/hammerlab/iterator/FlatteningIterator.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package org.hammerlab.iterator

/**
* "Flatmap" over an [[Iterator]] of [[Iterator]]s, exposing the inner [[Iterator]] along the way (as its true type)
*
* @param it [[Iterator]] of [[Iterator]]s
* @tparam V "value" type of resulting [[Iterator]]
* @tparam T type of inner [[Iterator]]
*/
case class FlatteningIterator[V, T](it: BufferedIterator[T])(implicit ev: <:<[T, Iterator[V]])
extends SimpleBufferedIterator[V] {
private var _cur: Option[T] = null

def _advanceIterator(): Option[T] = {
it
.nextOption
.flatMap(
nextIt
if (nextIt.hasNext)
Some(nextIt)
else
_advanceIterator()
)
}

def hasNextIterator: Boolean = {
if (_cur == null) {
_cur = _advanceIterator()
}
_cur.nonEmpty
}


override protected def postNext(): Unit = {
super.postNext()
if (_cur.exists(!_.hasNext)) {
_cur = null
}
}

def cur: Option[T] = {
if (hasNextIterator)
_cur
else
None
}

def reset(): Unit = {
_cur = null
clear()
}

override protected def _advance: Option[V] =
cur
.map {
_.next()
}
}

case class FlattenableIterator[A](it: A) {
// Synonym for "flatten", which the standard library is already squatting on
def smush[V, T](implicit ev: <:<[A, Iterator[T]], ev2: <:<[T, Iterator[V]]): FlatteningIterator[V, T] =
FlatteningIterator[V, T](it.buffered)
}

object FlatteningIterator {
implicit def makeFlatmapIterator[A](it: A): FlattenableIterator[A] =
FlattenableIterator(it)
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ object GroupRunsIterator {
implicit class GroupRunsIterator[T](val it: BufferedIterator[T])
extends AnyVal {

def groupBy(pred: T => Boolean): Iterator[Iterator[T]] =
def groupBy(pred: T Boolean): Iterator[Iterator[T]] =
new Iterator[Iterator[T]] {

override def hasNext: Boolean = it.hasNext
Expand Down
35 changes: 35 additions & 0 deletions src/main/scala/org/hammerlab/iterator/GroupWithIterator.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.hammerlab.iterator

import BufferedTakeWhileIterator._

/**
* Group one sorted iterator with another, emitting an iterator of the latter's elements for each of the former's
*/
case class GroupWithIterator[T](it: BufferedIterator[T]) {
def groupWith[U, V: Ordering](other: Iterator[U])(
implicit
tv: T V,
uv: U V
): Iterator[(T, Iterator[U])] = {
val o = other.buffered
val = implicitly[Ordering[V]].gteq _
for {
t it
nextV = it.headOption.map(tv)
} yield
t
o
.takewhile(
u
// If the next bound exists, only take [[U]]s that are less than it
!nextV.exists(
≥(uv(u), _)
)
)
}
}

object GroupWithIterator {
implicit def makeGroupByIterator[T](it: Iterator[T]): GroupWithIterator[T] =
GroupWithIterator(it.buffered)
}
19 changes: 0 additions & 19 deletions src/main/scala/org/hammerlab/iterator/HeadOptionIterator.scala

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class RunLengthIterator[K](val it: BufferedIterator[K]) {
def runLengthEncode(implicit ord: Ordering[K]): Iterator[(K, Int)] =
runLengthEncode(ord.equiv(_, _))

def runLengthEncode(cmpFn: (K, K) => Boolean = (_ == _)): Iterator[(K, Int)] =
def runLengthEncode(cmpFn: (K, K) Boolean = (_ == _)): Iterator[(K, Int)] =
new Iterator[(K, Int)] {
override def hasNext: Boolean = it.hasNext

Expand All @@ -33,6 +33,7 @@ object RunLengthIterator {

implicit def make[K](it: Iterator[K]): RunLengthIterator[K] = new RunLengthIterator(it.buffered)

def reencode[K, V: Integral](it: Iterator[(K, V)]): Iterator[(K, V)] = reencode(it.buffered)
def reencode[K, V: Integral](it: BufferedIterator[(K, V)]): Iterator[(K, V)] =
new Iterator[(K, V)] {
override def hasNext: Boolean = it.hasNext
Expand Down

0 comments on commit ae1a760

Please sign in to comment.