Skip to content
Permalink
Browse files
Create common KllHelper to contain common methods between float and
double helpers.
  • Loading branch information
leerho committed Feb 9, 2022
1 parent 72bd075 commit 1dc9a74a6099aeb6cf60b401f24c32104849997e
Showing 11 changed files with 230 additions and 302 deletions.
@@ -20,7 +20,7 @@
package org.apache.datasketches;

/**
* Common static methods for quantiles sketches
* Common static methods for classic quantiles and KLL sketches
*/
public class QuantilesHelper {

@@ -29,7 +29,7 @@ public class QuantilesHelper {
* An array of {1,1,1,0} becomes {0,1,2,3}
* @param array of weights where first element is zero
* @return total weight
*/
*/ //also used by KLL
public static long convertToPrecedingCummulative(final long[] array) {
long subtotal = 0;
for (int i = 0; i < array.length; i++) {
@@ -46,7 +46,7 @@ public static long convertToPrecedingCummulative(final long[] array) {
* @param phi the fractional position where: 0 &le; &#966; &le; 1.0.
* @param n the size of the stream
* @return the index, a value between 0 and n-1.
*/
*/ //also used by KLL
public static long posOfPhi(final double phi, final long n) {
final long pos = (long) Math.floor(phi * n);
return pos == n ? n - 1 : pos; //avoids ArrayIndexOutOfBoundException
@@ -57,7 +57,7 @@ public static long posOfPhi(final double phi, final long n) {
* @param wtArr the cumulative weights array consisting of chunks
* @param pos the position
* @return the index of the chunk containing the position
*/
*/ //also used by KLL
public static int chunkContainingPos(final long[] wtArr, final long pos) {
final int nominalLength = wtArr.length - 1; /* remember, wtArr contains an "extra" position */
assert nominalLength > 0;
@@ -771,7 +771,7 @@ public static double pwrLawNextDouble(final int ppo, final double curPoint,
return next;
}

//Checks
//Checks that throw

/**
* Check the requested offset and length against the allocated size.
@@ -821,6 +821,8 @@ public static void checkProbability(final double p, final String argName) {
+ "\" must be between 0.0 inclusive and 1.0 inclusive: " + p);
}

//Boolean Checks

/**
* Unsigned compare with longs.
* @param n1 A long to be treated as if unsigned.
@@ -831,6 +833,23 @@ public static boolean isLessThanUnsigned(final long n1, final long n2) {
return n1 < n2 ^ n1 < 0 != n2 < 0;
}

/**
* Returns true if given n is even.
* @param n the given n
* @return true if given n is even.
*/
public static boolean isEven(final long n) {
return (n & 1L) == 0;
}

/**
* Returns true if given n is odd.
* @param n the given n
* @return true if given n is odd.
*/
public static boolean isOdd(final long n) {
return (n & 1L) == 1L;
}
//Resources

/**
@@ -19,7 +19,8 @@

package org.apache.datasketches.kll;

import static org.apache.datasketches.Util.floorPowerOf2;
import static org.apache.datasketches.Util.isEven;
import static org.apache.datasketches.Util.isOdd;

import java.util.Arrays;
import java.util.Random;
@@ -33,14 +34,6 @@
*/
public class KllDoublesHelper {

static boolean isEven(final int value) {
return (value & 1) == 0;
}

static boolean isOdd(final int value) {
return (value & 1) == 1;
}

/**
* Checks the sequential validity of the given array of double values.
* They must be unique, monotonically increasing and not NaN.
@@ -58,116 +51,7 @@ public static void validateDoubleValues(final double[] values) {
}
}

/**
* Copy the old array into a new larger array.
* The extra space is at the top.
* @param oldArr the given old array with data
* @param newLen the new length larger than the oldArr.length.
* @return the new array
*/
static int[] growIntArray(final int[] oldArr, final int newLen) {
final int oldLen = oldArr.length;
assert newLen > oldLen;
final int[] newArr = new int[newLen];
System.arraycopy(oldArr, 0, newArr, 0, oldLen);
return newArr;
}

/**
* Returns the upper bound of the number of levels based on <i>n</i>.
* @param n the length of the stream
* @return floor( log_2(n) )
*/
static int ubOnNumLevels(final long n) {
return 1 + Long.numberOfTrailingZeros(floorPowerOf2(n));
}

/**
* Returns the maximum number of items that this sketch can handle
* @param k The sizing / accuracy parameter of the sketch in items.
* Note: this method actually works for k values up to k = 2^29 and 61 levels,
* however only k values up to (2^16 - 1) are currently used by the sketch.
* @param m the size of the smallest level in items.
* @param numLevels the upper bound number of levels based on <i>n</i> items.
* @return the total item capacity of the sketch.
*/
static int computeTotalCapacity(final int k, final int m, final int numLevels) {
long total = 0;
for (int h = 0; h < numLevels; h++) {
total += levelCapacity(k, numLevels, h, m);
}
return (int) total;
}

/**
* Returns the capacity of a specific level.
* @param k the accuracy parameter of the sketch. Maximum is 2^29.
* @param numLevels the number of current levels in the sketch. Maximum is 61.
* @param height the zero-based index of a level with respect to the smallest level.
* This varies from 0 to 60.
* @param minWidth the minimum level width. Default is 8.
* @return the capacity of a specific level
*/
static int levelCapacity(final int k, final int numLevels, final int height, final int minWidth) {
assert (k <= (1 << 29));
assert (numLevels >= 1) && (numLevels <= 61);
assert (height >= 0) && (height < numLevels);
final int depth = numLevels - height - 1;
return (int) Math.max(minWidth, intCapAux(k, depth));
}

/**
* Computes the actual capacity of a given level given its depth index.
* If the depth of levels exceeds 30, this uses a folding technique to accurately compute the
* actual level capacity up to a depth of 60. Without folding, the internal calculations would
* exceed the capacity of a long.
* @param k the configured k of the sketch
* @param depth the zero-based index of the level being computed.
* @return the actual capacity of a given level given its depth index.
*/
private static long intCapAux(final int k, final int depth) {
if (depth <= 30) { return intCapAuxAux(k, depth); }
final int half = depth / 2;
final int rest = depth - half;
final long tmp = intCapAuxAux(k, half);
return intCapAuxAux(tmp, rest);
}

/**
* Performs the integer based calculation of an individual level (or folded level).
* @param k the configured k of the sketch
* @param depth depth the zero-based index of the level being computed.
* @return the actual capacity of a given level given its depth index.
*/
private static long intCapAuxAux(final long k, final int depth) {
final long twok = k << 1; // for rounding pre-multiply by 2
final long tmp = ((twok << depth) / powersOfThree[depth]);
final long result = ((tmp + 1L) >>> 1); // add 1 and divide by 2
assert (result <= k);
return result;
}

/**
* This is the exact powers of 3 from 3^0 to 3^30 where the exponent is the index
*/
private static final long[] powersOfThree =
new long[] {1, 3, 9, 27, 81, 243, 729, 2187, 6561, 19683, 59049, 177147, 531441,
1594323, 4782969, 14348907, 43046721, 129140163, 387420489, 1162261467,
3486784401L, 10460353203L, 31381059609L, 94143178827L, 282429536481L,
847288609443L, 2541865828329L, 7625597484987L, 22876792454961L, 68630377364883L,
205891132094649L};

static long sumTheSampleWeights(final int num_levels, final int[] levels) {
long total = 0;
long weight = 1;
for (int i = 0; i < num_levels; i++) {
total += weight * (levels[i + 1] - levels[i]);
weight *= 2;
}
return total;
}

static void mergeSortedArrays(
static void mergeSortedDoubleArrays(
final double[] bufA, final int startA, final int lenA,
final double[] bufB, final int startB, final int lenB,
final double[] bufC, final int startC) {
@@ -230,7 +114,7 @@ static void mergeSortedArrays(
* @param isLevelZeroSorted true if this.level 0 is sorted
* @return int array of: {numLevels, targetItemCount, currentItemCount)
*/
static int[] generalCompress(
static int[] generalDoublesCompress(
final int k,
final int m,
final int numLevelsIn,
@@ -243,7 +127,7 @@ static int[] generalCompress(
assert numLevelsIn > 0; // things are too weird if zero levels are allowed
int numLevels = numLevelsIn;
int currentItemCount = inLevels[numLevels] - inLevels[0]; // decreases with each compaction
int targetItemCount = computeTotalCapacity(k, m, numLevels); // increases if we add levels
int targetItemCount = KllHelper.computeTotalCapacity(k, m, numLevels); // increases if we add levels
boolean doneYet = false;
outLevels[0] = 0;
int curLevel = -1;
@@ -260,7 +144,7 @@ static int[] generalCompress(
final int rawLim = inLevels[curLevel + 1];
final int rawPop = rawLim - rawBeg;

if ((currentItemCount < targetItemCount) || (rawPop < levelCapacity(k, numLevels, curLevel, m))) {
if ((currentItemCount < targetItemCount) || (rawPop < KllHelper.levelCapacity(k, numLevels, curLevel, m))) {
// copy level over as is
// because inBuf and outBuf could be the same, make sure we are not moving data upwards!
assert (rawBeg >= outLevels[curLevel]);
@@ -290,10 +174,10 @@ static int[] generalCompress(
}

if (popAbove == 0) { // Level above is empty, so halve up
randomlyHalveUp(inBuf, adjBeg, adjPop, random);
randomlyHalveUpDoubles(inBuf, adjBeg, adjPop, random);
} else { // Level above is nonempty, so halve down, then merge up
randomlyHalveDown(inBuf, adjBeg, adjPop, random);
mergeSortedArrays(inBuf, adjBeg, halfAdjPop, inBuf, rawLim, popAbove, inBuf, adjBeg + halfAdjPop);
randomlyHalveDownDoubles(inBuf, adjBeg, adjPop, random);
mergeSortedDoubleArrays(inBuf, adjBeg, halfAdjPop, inBuf, rawLim, popAbove, inBuf, adjBeg + halfAdjPop);
}

// track the fact that we just eliminated some data
@@ -306,7 +190,7 @@ static int[] generalCompress(
// This creates some more capacity (the size of the new bottom level)
if (curLevel == (numLevels - 1)) {
numLevels++;
targetItemCount += levelCapacity(k, numLevels, 0, m);
targetItemCount += KllHelper.levelCapacity(k, numLevels, 0, m);
}

} // end of code for compacting a level
@@ -323,7 +207,7 @@ static int[] generalCompress(
}

//This must be modified for validation
static void randomlyHalveDown(final double[] buf, final int start, final int length, final Random random) {
static void randomlyHalveDownDoubles(final double[] buf, final int start, final int length, final Random random) {
assert isEven(length);
final int half_length = length / 2;
final int offset = random.nextInt(2); // disable for validation
@@ -336,7 +220,7 @@ static void randomlyHalveDown(final double[] buf, final int start, final int len
}

//This must be modified for validation
static void randomlyHalveUp(final double[] buf, final int start, final int length, final Random random) {
static void randomlyHalveUpDoubles(final double[] buf, final int start, final int length, final Random random) {
assert isEven(length);
final int half_length = length / 2;
final int offset = random.nextInt(2); // disable for validation

0 comments on commit 1dc9a74

Please sign in to comment.