Skip to content

Commit

Permalink
Actually, itemCount Was The Issue
Browse files Browse the repository at this point in the history
  • Loading branch information
James McClain authored and echeipesh committed Mar 29, 2018
1 parent bbe607b commit 133b045
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 42 deletions.
Expand Up @@ -74,7 +74,11 @@ abstract trait Histogram[@specialized (Int, Double) T <: AnyVal] extends Seriali
/**
* Return sequence of tuples pairing bin label value and to its associated count.
*/
def binCounts(): Seq[(T, Long)]
def binCounts(): Seq[(T, Long)] = {
val labels = values()
val counts = labels.map(itemCount)
labels.zip(counts)
}

/**
* Return an array containing the values seen by this histogram.
Expand Down
Expand Up @@ -24,15 +24,6 @@ import math.{abs, round, sqrt}
*/
abstract class MutableIntHistogram extends MutableHistogram[Int] with IntHistogram {

/**
* Return sequence of tuples pairing bin label value and to its associated count.
*/
def binCounts(): Seq[(Int, Long)] = {
val labels = values()
val counts = labels.map(itemCount)
labels.zip(counts)
}

/**
* Note the occurance of 'item'.
*
Expand Down
Expand Up @@ -285,31 +285,20 @@ class StreamingHistogram(
val Bucket(item2, count) = buckets().head
count / exp(abs(7 * (item2 - item))).toInt
}
else if (_buckets.containsKey(item)) {
_buckets.get(item)
}
else {
val lo = _buckets.lowerEntry(item)
val hi = _buckets.higherEntry(item)

val raw = {
if (lo == null && hi == null) {
// If we somehow landed right on a bucket value,
// the interpolated value is the count of that bucket.
if(_buckets.containsKey(item)) _buckets.get(item)
else 0
} else if (lo == null) {
val x = item / hi.getKey
x * hi.getValue
}
else if (hi == null) {
val x = (lo.getKey - item) / lo.getKey
(1 - x) * lo.getValue
}
else {
val x = (item - lo.getKey) / (hi.getKey - lo.getKey)
x * (hi.getValue - lo.getValue) + lo.getValue
}
if (lo == null) hi.getValue
else if (hi == null) lo.getValue
else {
val x = ((item-lo.getKey) / (hi.getKey-lo.getKey))
val result = ((1.0-x)*lo.getValue) + (x*hi.getValue)
result.toLong
}
if (areaUnderCurve != 0) ((raw / areaUnderCurve) * totalCount).toInt
else 0
}
}

Expand All @@ -331,16 +320,6 @@ class StreamingHistogram(
def values(): Array[Double] = buckets.map(_.label).toArray
def rawValues(): Array[Double] = values()

def binCounts(): Seq[(Double, Long)] = {
_buckets.entrySet.toArray.map({ m: Any =>
m match {
case m: java.util.Map.Entry[Double, Long] =>
(m.getKey, m.getValue)
case _ => throw new Exception
}
})
}

/**
* Execute the given function on each bucket. The value contained
* by the bucket is a Double, and the count is an integer (ergo the
Expand Down
Expand Up @@ -282,11 +282,17 @@ class StreamingHistogramSpec extends FunSpec with Matchers {
default.statistics should not be (custom.statistics)
}

describe("Bin Count") {
it("should report non-zero bin counts") {
describe("Counting") {
it("binCounts should report non-zero bin counts") {
val tile = DoubleArrayTile(Array[Double](52, 54, 61, 32, 52, 50, 11, 21, 18), 3, 3)
val result = tile.histogramDouble(3)
result.binCounts.map({ pair => pair._2 > 0.0 }) should be (Array(true, true, true))
}

it("itemCount should report non-zero values when appropriate") {
val tile = DoubleArrayTile(Array[Double](52, 54, 61, 32, 52, 50, 11, 21, 18), 3, 3)
val result = tile.histogramDouble(3)
result.itemCount(16.7) should be > 0L
}
}
}

0 comments on commit 133b045

Please sign in to comment.