Skip to content

Commit

Permalink
Fixed Histogram: halved memory footprint, test boundary conditions, d…
Browse files Browse the repository at this point in the history
…ropped unnecessary binary search.

These changes preserve/correct behavior of the current Histogram.

src/java/voldemort/store/stats/Histogram.java
- Halved memory footprint by dropping unnecessary "bounds" array
- Dropped unnecessary binary search, making insert O(1) rather than O(log(nBuckets))
- Improved documentation
- Made interface consistent for type of values inserted/got from histogram (i.e., all are 'long')

test/unit/voldemort/store/stats/HistogramTest.java
- Added tests for boundary conditions: -ive values are dropped on
  insert, "too large" values are bucketed in the final bucket on
  insert.

Minor fixes to RequestCounter calls to histogram to conform to 'long' interfaces.
  • Loading branch information
jayjwylie committed Jan 15, 2013
1 parent 1ac1b32 commit 90aabc3
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 59 deletions.
86 changes: 31 additions & 55 deletions src/java/voldemort/store/stats/Histogram.java
Expand Up @@ -7,10 +7,15 @@
import voldemort.annotations.concurrency.Threadsafe;

/**
* A class for computing percentiles based on a histogram. Values are bucketed
* by a configurable bound (e.g., 0-1, 1-2, 2-3). When a value is inserted,
* perform a binary search to find the correct bucket.
* A class for computing percentiles based on a simple histogram.
*
* The histogram starts at 0 and then has uniformly sized buckets. The number of
* buckets and width of each bucket is specified upon construction. Each bucket
* in the histogram "counts" the number of values inserted into the histogram
* that fall into the bucket's range.
*
* All interfaces for adding data to the histogram or querying the histogram for
* quantiles are synchronized to make this object threadsafe.
*
*/
@Threadsafe
Expand All @@ -19,7 +24,8 @@ public class Histogram {
private final int nBuckets;
private final int step;
private final int[] buckets;
private final int[] bounds;
private final long upperBound;

private int size;
private long sum;
private static final Logger logger = Logger.getLogger(Histogram.class);
Expand All @@ -43,21 +49,13 @@ public Histogram(int nBuckets, int step, long resetIntervalMs) {
* Initialize an empty histogram
*
* @param nBuckets The number of buckets to use
* @param step The size of each bucket
* @param step The size (width) of each bucket
*/
public Histogram(int nBuckets, int step) {
this.nBuckets = nBuckets;
this.step = step;
this.upperBound = step * nBuckets;
this.buckets = new int[nBuckets];
this.bounds = new int[nBuckets];
init();
}

protected void init() {
int bound = 0;
for(int i = 0; i < nBuckets; i++, bound += step) {
bounds[i] = bound;
}
reset();
}

Expand All @@ -73,37 +71,48 @@ public synchronized void reset() {

/**
* Insert a value into the right bucket of the histogram. If the value is
* larger than any bound, insert into the last bucket
* larger than any bound, insert into the last bucket. If the value is less
* than zero, then ignore it.
*
* @param data The value to insert into the histogram
*/
public synchronized void insert(long data) {
resetIfNeeded();
int index = findBucket(data);
if(index == -1) {
logger.error(data + " can't be bucketed, is invalid!");
long index = 0;
if(data >= this.upperBound) {
index = nBuckets - 1;
} else if(data < 0) {
logger.error(data + " can't be bucketed because it is negative!");
return;
} else {
index = data / step;
}
if(index >= nBuckets) {
// This is dead code. Defend against code changes in future.
logger.error(data + " can't be bucketed because index overflowed number of buckets!");
return;
}
buckets[index]++;
buckets[(int) index]++;
sum += data;
size++;
}

/**
* Find the a value <em>n</em> such that the percentile falls within [
* <em>n</em>, <em>n + step</em>)
* <em>n</em>, <em>n + step</em>). This method does a <em>LINEAR</em> prove
* of the histogram. I.e., this method is O(nBuckets).
*
* @param quantile The percentile to find
* @return Lower bound associated with the percentile
*/
public synchronized int getQuantile(double quantile) {
public synchronized long getQuantile(double quantile) {
resetIfNeeded();
int total = 0;
for(int i = 0; i < nBuckets; i++) {
total += buckets[i];
double currQuantile = ((double) total) / ((double) size);
if(currQuantile >= quantile) {
return bounds[i];
return i * step;
}
}
return 0;
Expand All @@ -123,39 +132,6 @@ public synchronized double getAverage() {
return (sum * 1.0) / size;
}

private int findBucket(long needle) {
long max = step * nBuckets;
if(needle > max) {
return nBuckets - 1;
}
int low = 0;
int high = nBuckets - 1;
while(low <= high) {
int mid = (low + high) / 2;
int cmp = compareToBucket(mid, needle);
if(cmp == 0) {
return mid;
} else if(cmp > 0) {
high = mid - 1;
} else if(cmp < 0) {
low = mid + 1;
}
}
return -1;
}

private int compareToBucket(int bucket, long needle) {
int low = bounds[bucket];
int high = low + step;
if(low <= needle && high > needle) {
return 0;
} else if(low > needle) {
return 1;
} else {
return -1;
}
}

private void resetIfNeeded() {
if(resetIntervalMs > -1) {
if((System.currentTimeMillis() - lastResetTimeMs) >= this.resetIntervalMs) {
Expand Down
8 changes: 4 additions & 4 deletions src/java/voldemort/store/stats/RequestCounter.java
Expand Up @@ -17,8 +17,8 @@ public class RequestCounter {
private final int durationMS;
private final Time time;
private final Histogram histogram;
private volatile int q95LatencyMs;
private volatile int q99LatencyMs;
private volatile long q95LatencyMs;
private volatile long q99LatencyMs;
private boolean useHistogram;

/**
Expand Down Expand Up @@ -233,11 +233,11 @@ public long getGetAllMaxCount() {
return getValidAccumulator().getAllMaxCount;
}

public int getQ95LatencyMs() {
public long getQ95LatencyMs() {
return q95LatencyMs;
}

public int getQ99LatencyMs() {
public long getQ99LatencyMs() {
return q99LatencyMs;
}

Expand Down
23 changes: 23 additions & 0 deletions test/unit/voldemort/store/stats/HistogramTest.java
Expand Up @@ -68,4 +68,27 @@ public void testResetHistogram() {
assertEquals(0, resetingHistogram.getQuantile(0.99));
assertEquals(0.0, resetingHistogram.getAverage(), 0.0);
}

@Test
public void testUpperBoundaryCondition() {
Histogram h = new Histogram(100, 1);
h.insert(98);
h.insert(99);
h.insert(100); // Should bucket with 99
h.insert(101); // Should bucket with 99

assertEquals(h.getQuantile(0.24), 98);
assertEquals(h.getQuantile(0.26), 99);
}

@Test
public void testLowerBoundaryCondition() {
Histogram h = new Histogram(100, 1);
h.insert(-1); // Should not be bucketed
h.insert(0);
h.insert(1);

assertEquals(h.getQuantile(0.49), 0);
assertEquals(h.getQuantile(0.51), 1);
}
}

0 comments on commit 90aabc3

Please sign in to comment.