Skip to content
Permalink
Browse files
This first commit prepares the current float-based classes.
No change in logic. Just some renaming of classes to be type specific.
Plus a few cosmetic code comment changes.

The KllFloatsValidationTest does work when properly enabled in that test
and in the KllFloatsHelper.
  • Loading branch information
leerho committed Feb 4, 2022
1 parent 3ef6678 commit 3883ca796e16d8d785db808662835c5c2b39b4a1
Showing 5 changed files with 52 additions and 43 deletions.
@@ -29,7 +29,7 @@
* @author Kevin Lang
* @author Alexander Saydakov
*/
class KllHelper {
class KllFloatsHelper {

static boolean isEven(final int value) {
return (value & 1) == 0;
@@ -303,34 +303,40 @@ static int[] generalCompress(
return new int[] {numLevels, targetItemCount, currentItemCount};
}

//This must be modified for validation
static void randomlyHalveDown(final float[] buf, final int start, final int length, final Random random) {
assert isEven(length);
final int half_length = length / 2;
final int offset = random.nextInt(2);
//final int offset = deterministicOffset(); // for validation
final int offset = random.nextInt(2); // disable for validation
//final int offset = deterministicOffset(); // enable for validation
int j = start + offset;
for (int i = start; i < (start + half_length); i++) {
buf[i] = buf[j];
j += 2;
}
}

//This must be modified for validation
static void randomlyHalveUp(final float[] buf, final int start, final int length, final Random random) {
assert isEven(length);
final int half_length = length / 2;
final int offset = random.nextInt(2);
//final int offset = deterministicOffset(); // for validation
final int offset = random.nextInt(2); // disable for validation
//final int offset = deterministicOffset(); // enable for validation
int j = (start + length) - 1 - offset;
for (int i = (start + length) - 1; i >= (start + half_length); i--) {
buf[i] = buf[j];
j -= 2;
}
}

// Enable the following to use KllValidationTest
/*
* The following must be enabled for use with the KllFloatsValidationTest,
* which is only enabled for manual testing. In addition, the two methods
* above need to be modified as commented.
*/

// static int nextOffset = 0;

//
// private static int deterministicOffset() {
// final int result = nextOffset;
// nextOffset = 1 - nextOffset;
@@ -329,7 +329,7 @@ private KllFloatsSketch(final Memory mem) {
}
levels_ = new int[numLevels_ + 1];
int offset = isSingleItem ? DATA_START_SINGLE_ITEM : DATA_START;
final int capacity = KllHelper.computeTotalCapacity(k_, m_, numLevels_);
final int capacity = KllFloatsHelper.computeTotalCapacity(k_, m_, numLevels_);
if (isSingleItem) {
levels_[0] = capacity - 1;
} else {
@@ -531,8 +531,8 @@ public int getNumRetained() {
* @return upper bound on the serialized size
*/
public static int getMaxSerializedSizeBytes(final int k, final long n) {
final int numLevels = KllHelper.ubOnNumLevels(n);
final int maxNumItems = KllHelper.computeTotalCapacity(k, DEFAULT_M, numLevels);
final int numLevels = KllFloatsHelper.ubOnNumLevels(n);
final int maxNumItems = KllFloatsHelper.computeTotalCapacity(k, DEFAULT_M, numLevels);
return getSerializedSizeBytes(numLevels, maxNumItems);
}

@@ -845,7 +845,7 @@ public String toString(final boolean withLevels, final boolean withData) {
.append(" level, offset: nominal capacity, actual size").append(Util.LS);
for (int i = 0; i < numLevels_; i++) {
sb.append(" ").append(i).append(", ").append(levels_[i]).append(": ")
.append(KllHelper.levelCapacity(k_, numLevels_, i, m_))
.append(KllFloatsHelper.levelCapacity(k_, numLevels_, i, m_))
.append(", ").append(safeLevelSize(i)).append(Util.LS);
}
sb.append("### End sketch levels").append(Util.LS);
@@ -1006,7 +1006,7 @@ private void compressWhileUpdating() {
// +2 is OK because we already added a new top level if necessary
final int popAbove = levels_[level + 2] - rawLim;
final int rawPop = rawLim - rawBeg;
final boolean oddPop = KllHelper.isOdd(rawPop);
final boolean oddPop = KllFloatsHelper.isOdd(rawPop);
final int adjBeg = oddPop ? rawBeg + 1 : rawBeg;
final int adjPop = oddPop ? rawPop - 1 : rawPop;
final int halfAdjPop = adjPop / 2;
@@ -1016,10 +1016,10 @@ private void compressWhileUpdating() {
Arrays.sort(items_, adjBeg, adjBeg + adjPop);
}
if (popAbove == 0) {
KllHelper.randomlyHalveUp(items_, adjBeg, adjPop, random);
KllFloatsHelper.randomlyHalveUp(items_, adjBeg, adjPop, random);
} else {
KllHelper.randomlyHalveDown(items_, adjBeg, adjPop, random);
KllHelper.mergeSortedArrays(
KllFloatsHelper.randomlyHalveDown(items_, adjBeg, adjPop, random);
KllFloatsHelper.mergeSortedArrays(
items_, adjBeg, halfAdjPop,
items_, rawLim, popAbove,
items_, adjBeg + halfAdjPop);
@@ -1055,7 +1055,7 @@ private int findLevelToCompact() { //
while (true) {
assert level < numLevels_;
final int pop = levels_[level + 1] - levels_[level];
final int cap = KllHelper.levelCapacity(k_, numLevels_, level, m_);
final int cap = KllFloatsHelper.levelCapacity(k_, numLevels_, level, m_);
if (pop >= cap) {
return level;
}
@@ -1072,10 +1072,10 @@ private void addEmptyTopLevelToCompletelyFullSketch() {

// note that merging MIGHT over-grow levels_, in which case we might not have to grow it here
if (levels_.length < numLevels_ + 2) {
levels_ = KllHelper.growIntArray(levels_, numLevels_ + 2);
levels_ = KllFloatsHelper.growIntArray(levels_, numLevels_ + 2);
}

final int deltaCap = KllHelper.levelCapacity(k_, numLevels_ + 1, 0, m_);
final int deltaCap = KllFloatsHelper.levelCapacity(k_, numLevels_ + 1, 0, m_);
final int newTotalCap = curTotalCap + deltaCap;

final float[] newBuf = new float[newTotalCap];
@@ -1105,7 +1105,7 @@ private void sortLevelZero() {
private void mergeHigherLevels(final KllFloatsSketch other, final long finalN) {
final int tmpSpaceNeeded = getNumRetained() + other.getNumRetainedAboveLevelZero();
final float[] workbuf = new float[tmpSpaceNeeded];
final int ub = KllHelper.ubOnNumLevels(finalN);
final int ub = KllFloatsHelper.ubOnNumLevels(finalN);
final int[] worklevels = new int[ub + 2]; // ub+1 does not work
final int[] outlevels = new int[ub + 2];

@@ -1114,7 +1114,7 @@ private void mergeHigherLevels(final KllFloatsSketch other, final long finalN) {
populateWorkArrays(other, workbuf, worklevels, provisionalNumLevels);

// notice that workbuf is being used as both the input and output here
final int[] result = KllHelper.generalCompress(k_, m_, provisionalNumLevels, workbuf,
final int[] result = KllFloatsHelper.generalCompress(k_, m_, provisionalNumLevels, workbuf,
worklevels, workbuf, outlevels, isLevelZeroSorted_, random);
final int finalNumLevels = result[0];
final int finalCapacity = result[1];
@@ -1159,7 +1159,7 @@ private void populateWorkArrays(final KllFloatsSketch other, final float[] workb
} else if (selfPop == 0 && otherPop > 0) {
System.arraycopy(other.items_, other.levels_[lvl], workbuf, worklevels[lvl], otherPop);
} else if (selfPop > 0 && otherPop > 0) {
KllHelper.mergeSortedArrays(items_, levels_[lvl], selfPop, other.items_,
KllFloatsHelper.mergeSortedArrays(items_, levels_[lvl], selfPop, other.items_,
other.levels_[lvl], otherPop, workbuf, worklevels[lvl]);
}
}
@@ -1176,7 +1176,7 @@ private int getNumRetainedAboveLevelZero() {
}

private void assertCorrectTotalWeight() {
final long total = KllHelper.sumTheSampleWeights(numLevels_, levels_);
final long total = KllFloatsHelper.sumTheSampleWeights(numLevels_, levels_);
assert total == n_;
}

@@ -389,21 +389,21 @@ public void getMaxSerializedSizeBytes() {

@Test
public void checkUbOnNumLevels() {
assertEquals(KllHelper.ubOnNumLevels(0), 1);
assertEquals(KllFloatsHelper.ubOnNumLevels(0), 1);
}

@Test
public void checkIntCapAux() {
int lvlCap = KllHelper.levelCapacity(10, 61, 0, 8);
int lvlCap = KllFloatsHelper.levelCapacity(10, 61, 0, 8);
assertEquals(lvlCap, 8);
lvlCap = KllHelper.levelCapacity(10, 61, 60, 8);
lvlCap = KllFloatsHelper.levelCapacity(10, 61, 60, 8);
assertEquals(lvlCap, 10);
}

@Test
public void checkSuperLargeKandLevels() {
//This is beyond what the sketch can be configured for.
final int size = KllHelper.computeTotalCapacity(1 << 29, 8, 61);
final int size = KllFloatsHelper.computeTotalCapacity(1 << 29, 8, 61);
assertEquals(size, 1_610_612_846);
}

@@ -20,6 +20,7 @@
package org.apache.datasketches.kll;

import org.testng.Assert;
import org.testng.annotations.Test;

/* A test record contains:
0. testIndex
@@ -31,12 +32,13 @@
6. hash of the retained samples
*/

// These results are for the version that delays the rollup until the next value comes in.
// The @Test annotations have to be enabled to use this class and a section in KllHelper also
// These results are for the version that delays the roll up until the next value comes in.
// The @Test annotations have to be enabled to use this class and a section in KllFloatsHelper also
// needs to be enabled.
@SuppressWarnings("javadoc")
public class KllValidationTest {
@SuppressWarnings({ "javadoc", "unused" })
public class KllFloatsValidationTest {

//Used only with manual running of checkTestResults(..)
private static final long[] correctResultsWithReset = {
0, 200, 180, 3246533, 1, 180, 1098352976109474698L,
1, 200, 198, 8349603, 1, 198, 686681527497651888L,
@@ -155,7 +157,7 @@ public class KllValidationTest {
};

private static int[] makeInputArray(int n, int stride) {
assert KllHelper.isOdd(stride);
assert KllFloatsHelper.isOdd(stride);
int mask = (1 << 23) - 1; // because library items are single-precision floats
int cur = 0;
int[] arr = new int[n];
@@ -167,6 +169,12 @@ private static int[] makeInputArray(int n, int stride) {
return arr;
}

//@Test //only enabled to test the above makeInputArray(..)
public void testMakeInputArray() {
final int[] array = { 3654721, 7309442, 2575555, 6230276, 1496389, 5151110 };
Assert.assertEquals(makeInputArray(6, 3654721), array);
}

private static long simpleHashOfSubArray(final float[] arr, final int start, final int subLength) {
final long multiplier = 738219921; // an arbitrary odd 30-bit number
final long mask60 = (1L << 60) - 1;
@@ -180,28 +188,23 @@ private static long simpleHashOfSubArray(final float[] arr, final int start, fin
return accum;
}

//@Test //need to enable
//@Test //only enabled to test the above simpleHashOfSubArray(..)
public void testHash() {
float[] array = { 907500, 944104, 807020, 219921, 678370, 955217, 426885 };
Assert.assertEquals(simpleHashOfSubArray(array, 1, 5), 1141543353991880193L);
}

//@Test //need to enable
public void testMakeInputArray() {
final int[] array = { 3654721, 7309442, 2575555, 6230276, 1496389, 5151110 };
Assert.assertEquals(makeInputArray(6, 3654721), array);
}

/*
* Please note that this test should be run with a modified version of KllHelper
* Please note that this test should be run with a modified version of KllFloatsHelper
* that chooses directions alternately instead of randomly.
* See the instructions at the bottom of that class.
*/

//@Test
//@Test //NEED TO ENABLE
public void checkTestResults() {
int numTests = correctResultsWithReset.length / 7;
for (int testI = 0; testI < numTests; testI++) {
//KllHelper.nextOffset = 0; //need to enable
//KllFloatsHelper.nextOffset = 0; //NEED TO ENABLE
assert (int) correctResultsWithReset[7 * testI] == testI;
int k = (int) correctResultsWithReset[(7 * testI) + 1];
int n = (int) correctResultsWithReset[(7 * testI) + 2];
@@ -218,7 +221,7 @@ public void checkTestResults() {
System.out.print(testI);
assert correctResultsWithReset[(7 * testI) + 4] == numLevels;
assert correctResultsWithReset[(7 * testI) + 5] == numSamples;
//assert correctResults[7 * testI + 6] == hashedSamples;
assert correctResultsWithReset[7 * testI + 6] == hashedSamples;
if (correctResultsWithReset[(7 * testI) + 6] == hashedSamples) {
System.out.println(" pass");
} else {
@@ -27,7 +27,7 @@
* @author Lee Rhodes
*/
@SuppressWarnings("javadoc")
public class MiscTest {
public class MiscFloatsTest {

@Test
public void checkGetKFromEps() {

0 comments on commit 3883ca7

Please sign in to comment.