From 45b36816ea3fa779e3c67ba53c698b1c98c50ec4 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 28 Feb 2022 17:00:41 -0800 Subject: [PATCH 01/31] Interim 1 --- .../datasketches/kll/BaseKllSketch.java | 94 +--- .../datasketches/kll/KllDoublesHelper.java | 1 + .../datasketches/kll/KllDoublesSketch.java | 138 +++--- .../datasketches/kll/KllFloatsHelper.java | 1 + .../datasketches/kll/KllFloatsSketch.java | 132 +++--- .../apache/datasketches/kll/KllHelper.java | 94 +++- .../apache/datasketches/kll/PreambleUtil.java | 409 ++++++++++++++++++ .../datasketches/quantiles/PreambleUtil.java | 18 +- .../kll/KllDoublesSketchTest.java | 42 +- .../datasketches/kll/KllFloatsSketchTest.java | 31 +- .../datasketches/kll/MiscDoublesTest.java | 32 +- .../datasketches/kll/MiscFloatsTest.java | 38 +- 12 files changed, 783 insertions(+), 247 deletions(-) create mode 100644 src/main/java/org/apache/datasketches/kll/PreambleUtil.java diff --git a/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java b/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java index be2335f79..4a019b5d3 100644 --- a/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java @@ -27,6 +27,8 @@ import static java.lang.Math.min; import static java.lang.Math.pow; import static java.lang.Math.round; +import static org.apache.datasketches.kll.PreambleUtil.MAX_K; +import static org.apache.datasketches.kll.PreambleUtil.MIN_K; import java.util.Random; @@ -34,89 +36,12 @@ abstract class BaseKllSketch { - /* Serialized float sketch layout, more than one item: - * Adr: - * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | - * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | - * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | - * 1 ||---------------------------------N_LONG---------------------------------------| - * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | - * 2 ||<--------------data----------------| unused |numLevels|-------min K-----------| - * - * - * - * Serialized float sketch layout, Empty and Single Item: - * Adr: - * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | - * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | - * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | - * 1 || |-------------------data-------------------| - */ - - /* Serialized double sketch layout, more than one item: - * Adr: - * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | - * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | - * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | - * 1 ||---------------------------------N_LONG---------------------------------------| - * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | - * 2 ||<-------------unused------------------------|numLevels|-------min K-----------| - * || | 24 | - * 3 ||<---------------------------------data----------------------------------------| - * - * Serialized double sketch layout, Empty and Single Item: - * Adr: - * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | - * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | - * || | 8 | - * 1 ||----------------------------------data----------------------------------------| - */ - - /** - * The default value of K. - */ - public static final int DEFAULT_K = 200; - static final int DEFAULT_M = 8; - static final int MIN_K = DEFAULT_M; - static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short - - // Preamble byte addresses - static final int PREAMBLE_INTS_BYTE = 0; - static final int SER_VER_BYTE = 1; - static final int FAMILY_BYTE = 2; - static final int FLAGS_BYTE = 3; - static final int K_SHORT = 4; // to 5 - static final int M_BYTE = 6; - // 7 is reserved for future use - // SINGLE ITEM ONLY - static final int DATA_START_SINGLE_ITEM = 8; - - // MULTI-ITEM - static final int N_LONG = 8; // to 15 - static final int MIN_K_SHORT = 16; // to 17 - static final int NUM_LEVELS_BYTE = 18; - - // FLOAT SKETCH 19 is reserved for future use in float sketch - static final int DATA_START_FLOAT = 20; // float sketch, not single item - - // DOUBLE SKETCH 19 to 23 is reserved for future use in double sketch - static final int DATA_START_DOUBLE = 24; // double sketch, not single item - - // Other static values - static final byte SERIAL_VERSION = 1; - static final byte SERIAL_VERSION_SINGLE = 2; // only used to specify the single-item format - static final int PREAMBLE_INTS_EMPTY_SINGLE = 2; // for empty and single item - static final int PREAMBLE_INTS_FLOAT = 5; // not empty or single item - static final int PREAMBLE_INTS_DOUBLE = 6; // not empty or single item - - enum Flags { IS_EMPTY, IS_LEVEL_ZERO_SORTED, IS_SINGLE_ITEM } - /* * Data is stored in items_. * The data for level i lies in positions levels_[i] through levels_[i + 1] - 1 inclusive. - * Hence levels_ must contain (numLevels_ + 1) indices. - * The valid portion of items_ is completely packed, except for level 0. - * Level 0 is filled from the top down. + * Hence, levels_ must contain (numLevels_ + 1) indices. + * The valid portion of items_ is completely packed, except for level 0, + * which is filled from the top down. * * Invariants: * 1) After a compaction, or an update, or a merge, all levels are sorted except for level zero. @@ -124,8 +49,8 @@ enum Flags { IS_EMPTY, IS_LEVEL_ZERO_SORTED, IS_SINGLE_ITEM } * so there is room for least 1 more item in level zero. * 3) There are no gaps except at the bottom, so if levels_[0] = 0, * the sketch is exactly filled to capacity and must be compacted. - * 4) Sum of weights of retained items == N. - * 5) curTotalCap == items_.length == levels_[numLevels_]. + * 4) Sum of weights of all retained items == N. + * 5) curTotalCap = items_.length = levels_[numLevels_]. */ final int k_; // configured value of K @@ -137,13 +62,14 @@ enum Flags { IS_EMPTY, IS_LEVEL_ZERO_SORTED, IS_SINGLE_ITEM } int[] levels_; // array of index offsets into the items[]. Size = numLevels + 1. boolean isLevelZeroSorted_; - final boolean compatible; //compatible with quantiles sketch + final boolean compatible; //compatible with quantiles sketch treatment of rank 0.0 and 1.0. static final Random random = new Random(); /** * Heap constructor. * @param k configured size of sketch. Range [m, 2^16] * @param m minimum level size. Default is 8. + * @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0. */ BaseKllSketch(final int k, final int m, final boolean compatible) { checkK(k); @@ -278,7 +204,7 @@ public String toString() { * Checks the validity of the given value k * @param k must be greater than 7 and less than 65536. */ - private static void checkK(final int k) { + static void checkK(final int k) { if (k < MIN_K || k > MAX_K) { throw new SketchesArgumentException( "K must be >= " + MIN_K + " and <= " + MAX_K + ": " + k); diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 9b738553e..125419e77 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -112,6 +112,7 @@ static void mergeSortedDoubleArrays( * @param outBuf the same array as inBuf * @param outLevels the same size as inLevels * @param isLevelZeroSorted true if this.level 0 is sorted + * @param random instance of java.util.Random * @return int array of: {numLevels, targetItemCount, currentItemCount) */ static int[] generalDoublesCompress( diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 1503ddb60..a0d4a6c3e 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -22,12 +22,34 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.Util.isOdd; +import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; +import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_DOUBLE; +import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M; +import static org.apache.datasketches.kll.PreambleUtil.EMPTY_BIT_MASK; +import static org.apache.datasketches.kll.PreambleUtil.FAMILY_BYTE_ADR; +import static org.apache.datasketches.kll.PreambleUtil.FLAGS_BYTE_ADR; +import static org.apache.datasketches.kll.PreambleUtil.K_SHORT_ADR; +import static org.apache.datasketches.kll.PreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; +import static org.apache.datasketches.kll.PreambleUtil.MIN_K_SHORT_ADR; +import static org.apache.datasketches.kll.PreambleUtil.M_BYTE_ADR; +import static org.apache.datasketches.kll.PreambleUtil.NUM_LEVELS_BYTE_ADR; +import static org.apache.datasketches.kll.PreambleUtil.N_LONG_ADR; +import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_BYTE_ADR; +import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_DOUBLE; +import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; +import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_EMPTY_FULL; +import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_SINGLE; +import static org.apache.datasketches.kll.PreambleUtil.SER_VER_BYTE_ADR; +import static org.apache.datasketches.kll.PreambleUtil.SINGLE_ITEM_BIT_MASK; import java.util.Arrays; import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; +import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -62,7 +84,7 @@ public KllDoublesSketch(final int k) { /** * Used for testing only. * @param k configured size of sketch. Range [m, 2^16] - * @param compatible if true, compatible with quantiles sketch. + * @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0. */ KllDoublesSketch(final int k, final boolean compatible) { this(k, DEFAULT_M, compatible); @@ -72,6 +94,7 @@ public KllDoublesSketch(final int k) { * Heap constructor. * @param k configured size of sketch. Range [m, 2^16] * @param m minimum level size. Default is 8. + * @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0. */ private KllDoublesSketch(final int k, final int m, final boolean compatible) { super(k, m, compatible); @@ -85,10 +108,10 @@ private KllDoublesSketch(final int k, final int m, final boolean compatible) { * @param mem Memory object that contains data serialized by this sketch. */ private KllDoublesSketch(final Memory mem) { - super(mem.getShort(K_SHORT) & 0xffff, DEFAULT_M, true); - final int flags = mem.getByte(FLAGS_BYTE) & 0xff; - final boolean empty = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; - final boolean singleItem = (flags & 1 << Flags.IS_SINGLE_ITEM.ordinal()) > 0; + super(mem.getShort(K_SHORT_ADR) & 0xffff, DEFAULT_M, true); + final int flags = mem.getByte(FLAGS_BYTE_ADR) & 0xff; + final boolean empty = (flags & EMPTY_BIT_MASK) > 0; + final boolean singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; if (empty) { numLevels_ = 1; levels_ = new int[] {k_, k_}; @@ -103,12 +126,12 @@ private KllDoublesSketch(final Memory mem) { minK_ = k_; numLevels_ = 1; } else { - n_ = mem.getLong(N_LONG); - minK_ = mem.getShort(MIN_K_SHORT) & 0xffff; - numLevels_ = mem.getByte(NUM_LEVELS_BYTE) & 0xff; + n_ = mem.getLong(N_LONG_ADR); + minK_ = mem.getShort(MIN_K_SHORT_ADR) & 0xffff; + numLevels_ = mem.getByte(NUM_LEVELS_BYTE_ADR) & 0xff; } levels_ = new int[numLevels_ + 1]; - int offset = singleItem ? DATA_START_SINGLE_ITEM : DATA_START_DOUBLE; + int offset = singleItem ? DATA_START_ADR_SINGLE_ITEM : DATA_START_ADR_DOUBLE; final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); if (singleItem) { levels_[0] = itemCapacity - 1; @@ -130,7 +153,7 @@ private KllDoublesSketch(final Memory mem) { minValue_ = items_[levels_[0]]; maxValue_ = items_[levels_[0]]; } - isLevelZeroSorted_ = (flags & 1 << Flags.IS_LEVEL_ZERO_SORTED.ordinal()) > 0; + isLevelZeroSorted_ = (flags & LEVEL_ZERO_SORTED_BIT_MASK) > 0; } } @@ -144,17 +167,17 @@ private KllDoublesSketch(final Memory mem) { //To simplify the code, this method does all the validity checking // then passes the verified Memory to the actual heapify constructor public static KllDoublesSketch heapify(final Memory mem) { - final int preambleInts = mem.getByte(PREAMBLE_INTS_BYTE) & 0xff; - final int serialVersion = mem.getByte(SER_VER_BYTE) & 0xff; - final int family = mem.getByte(FAMILY_BYTE) & 0xff; - final int flags = mem.getByte(FLAGS_BYTE) & 0xff; - final int m = mem.getByte(M_BYTE) & 0xff; + final int preambleInts = mem.getByte(PREAMBLE_INTS_BYTE_ADR) & 0xff; + final int serialVersion = mem.getByte(SER_VER_BYTE_ADR) & 0xff; + final int family = mem.getByte(FAMILY_BYTE_ADR) & 0xff; + final int flags = mem.getByte(FLAGS_BYTE_ADR) & 0xff; + final int m = mem.getByte(M_BYTE_ADR) & 0xff; if (m != DEFAULT_M) { throw new SketchesArgumentException( "Possible corruption: M must be " + DEFAULT_M + ": " + m); } - final boolean empty = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; - final boolean singleItem = (flags & 1 << Flags.IS_SINGLE_ITEM.ordinal()) > 0; + final boolean empty = (flags & EMPTY_BIT_MASK) > 0; + final boolean singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; if (empty || singleItem) { if (preambleInts != PREAMBLE_INTS_EMPTY_SINGLE) { throw new SketchesArgumentException("Possible corruption: preambleInts must be " @@ -166,9 +189,9 @@ public static KllDoublesSketch heapify(final Memory mem) { + PREAMBLE_INTS_DOUBLE + " for a sketch with more than one item: " + preambleInts); } } - if (serialVersion != SERIAL_VERSION && serialVersion != SERIAL_VERSION_SINGLE) { + if (serialVersion != SERIAL_VERSION_EMPTY_FULL && serialVersion != SERIAL_VERSION_SINGLE) { throw new SketchesArgumentException( - "Possible corruption: serial version mismatch: expected " + SERIAL_VERSION + " or " + "Possible corruption: serial version mismatch: expected " + SERIAL_VERSION_EMPTY_FULL + " or " + SERIAL_VERSION_SINGLE + ", got " + serialVersion); } if (family != Family.KLL.getID()) { @@ -227,17 +250,23 @@ public double getMinValue() { /** * Returns upper bound on the serialized size of a sketch given a parameter k and stream - * length. The resulting size is an overestimate to make sure actual sketches don't exceed it. - * This method can be used if allocation of storage is necessary beforehand, but it is not - * optimal. + * length. This method can be used if allocation of storage is necessary beforehand. * @param k parameter that controls size of the sketch and accuracy of estimates * @param n stream length * @return upper bound on the serialized size */ public static int getMaxSerializedSizeBytes(final int k, final long n) { - final int numLevels = KllHelper.ubOnNumLevels(n); - final int maxNumItems = KllHelper.computeTotalItemCapacity(k, DEFAULT_M, numLevels); - return getSerializedSizeBytes(numLevels, maxNumItems); + final LevelStats lvlStats = getAllLevelStatsGivenN(k, DEFAULT_M, n, false, false, true); + return lvlStats.getBytes(); + } + + /** + * Returns the current number of bytes this sketch would require to store in compact form. + * @return the current number of bytes this sketch would require to store in compact form. + */ + public int getCurrentCompactSerializedSizeBytes() { + if (isEmpty()) { return N_LONG_ADR; } + return KllHelper.getCompactSerializedSizeBytes(numLevels_, getNumRetained(), true); } /** @@ -285,13 +314,13 @@ public double[] getPMF(final double[] splitPoints) { */ public double getQuantile(final double fraction) { if (isEmpty()) { return Double.NaN; } + if (fraction < 0.0 || fraction > 1.0) { + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); + } if (compatible) { if (fraction == 0.0) { return minValue_; } if (fraction == 1.0) { return maxValue_; } } - if (fraction < 0.0 || fraction > 1.0) { - throw new SketchesArgumentException("Fraction cannot be less than zero or greater than 1.0"); - } final KllDoublesQuantileCalculator quant = getQuantileCalculator(); return quant.getQuantile(fraction); } @@ -343,7 +372,7 @@ public double[] getQuantiles(final double[] fractions) { for (int i = 0; i < fractions.length; i++) { final double fraction = fractions[i]; if (fraction < 0.0 || fraction > 1.0) { - throw new SketchesArgumentException("Fraction cannot be less than zero or greater than 1.0"); + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); } if (fraction == 0.0 && compatible) { quantiles[i] = minValue_; } else if (fraction == 1.0 && compatible) { quantiles[i] = maxValue_; } @@ -412,10 +441,11 @@ public double getRank(final double value) { /** * Returns the number of bytes this sketch would require to store. * @return the number of bytes this sketch would require to store. + * @deprecated use {@link #getCurrentCompactSerializedSizeBytes() } */ + @Deprecated public int getSerializedSizeBytes() { - if (isEmpty()) { return N_LONG; } - return getSerializedSizeBytes(numLevels_, getNumRetained()); + return getCurrentCompactSerializedSizeBytes(); } /** @@ -455,29 +485,30 @@ public void merge(final KllDoublesSketch other) { @Override public byte[] toByteArray() { - final byte[] bytes = new byte[getSerializedSizeBytes()]; + final byte[] bytes = new byte[getCurrentCompactSerializedSizeBytes()]; final WritableMemory wmem = WritableMemory.writableWrap(bytes); final boolean singleItem = n_ == 1; final boolean empty = isEmpty(); //load the preamble - wmem.putByte(PREAMBLE_INTS_BYTE, (byte) (empty || singleItem ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_DOUBLE)); - wmem.putByte(SER_VER_BYTE, singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION); - wmem.putByte(FAMILY_BYTE, (byte) Family.KLL.getID()); + wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) + (empty || singleItem ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_DOUBLE)); + wmem.putByte(SER_VER_BYTE_ADR, singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION_EMPTY_FULL); + wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); final byte flags = (byte) ( - (empty ? 1 << Flags.IS_EMPTY.ordinal() : 0) - | (isLevelZeroSorted_ ? 1 << Flags.IS_LEVEL_ZERO_SORTED.ordinal() : 0) - | (singleItem ? 1 << Flags.IS_SINGLE_ITEM.ordinal() : 0)); - wmem.putByte(FLAGS_BYTE, flags); - wmem.putShort(K_SHORT, (short) k_); - wmem.putByte(M_BYTE, (byte) m_); + (empty ? EMPTY_BIT_MASK : 0) + | (isLevelZeroSorted_ ? LEVEL_ZERO_SORTED_BIT_MASK : 0) + | (singleItem ? SINGLE_ITEM_BIT_MASK : 0)); + wmem.putByte(FLAGS_BYTE_ADR, flags); + wmem.putShort(K_SHORT_ADR, (short) k_); + wmem.putByte(M_BYTE_ADR, (byte) m_); if (empty) { return bytes; } //load data - int offset = DATA_START_SINGLE_ITEM; + int offset = DATA_START_ADR_SINGLE_ITEM; if (!singleItem) { - wmem.putLong(N_LONG, n_); - wmem.putShort(MIN_K_SHORT, (short) minK_); - wmem.putByte(NUM_LEVELS_BYTE, (byte) numLevels_); - offset = DATA_START_DOUBLE; + wmem.putLong(N_LONG_ADR, n_); + wmem.putShort(MIN_K_SHORT_ADR, (short) minK_); + wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) numLevels_); + offset = DATA_START_ADR_DOUBLE; // the last integer in levels_ is not serialized because it can be derived final int len = levels_.length - 1; wmem.putIntArray(offset, levels_, 0, len); @@ -509,7 +540,7 @@ public String toString(final boolean withLevels, final boolean withData) { sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted_).append(Util.LS); sb.append(" Capacity Items : ").append(items_.length).append(Util.LS); sb.append(" Retained Items : ").append(getNumRetained()).append(Util.LS); - sb.append(" Storage Bytes : ").append(getSerializedSizeBytes()).append(Util.LS); + sb.append(" Storage Bytes : ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS); sb.append(" Min Value : ").append(minValue_).append(Util.LS); sb.append(" Max Value : ").append(maxValue_).append(Util.LS); sb.append("### End sketch summary").append(Util.LS); @@ -766,7 +797,7 @@ private void mergeHigherLevels(final KllDoublesSketch other, final long finalN) final int finalCapacity = result[1]; final int finalPop = result[2]; - assert finalNumLevels <= ub; // can sometimes be much bigger + assert finalNumLevels <= ub; // ub may be much bigger // now we need to transfer the results back into the "self" sketch final double[] newbuf = finalCapacity == items_.length ? items_ : new double[finalCapacity]; @@ -811,16 +842,7 @@ private void populateWorkArrays(final KllDoublesSketch other, final double[] wor } } - private static int getSerializedSizeBytes(final int numLevels, final int numRetained) { - if (numLevels == 1 && numRetained == 1) { - return DATA_START_SINGLE_ITEM + Double.BYTES; - } - // the last integer in levels_ is not serialized because it can be derived - // + 2 for min and max - return DATA_START_DOUBLE + numLevels * Integer.BYTES + (numRetained + 2) * Double.BYTES; - } - - // for testing + // only for testing double[] getItems() { return items_; diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index 065e38eba..79e730ef1 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -112,6 +112,7 @@ static void mergeSortedFloatArrays( * @param outBuf the same array as inBuf * @param outLevels the same size as inLevels * @param isLevelZeroSorted true if this.level 0 is sorted + * @param random instance of java.util.Random * @return int array of: {numLevels, targetItemCount, currentItemCount) */ static int[] generalFloatsCompress( diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 7d2639fc8..c8e928af3 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -22,12 +22,34 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.Util.isOdd; +import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; +import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_FLOAT; +import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M; +import static org.apache.datasketches.kll.PreambleUtil.EMPTY_BIT_MASK; +import static org.apache.datasketches.kll.PreambleUtil.FAMILY_BYTE_ADR; +import static org.apache.datasketches.kll.PreambleUtil.FLAGS_BYTE_ADR; +import static org.apache.datasketches.kll.PreambleUtil.K_SHORT_ADR; +import static org.apache.datasketches.kll.PreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; +import static org.apache.datasketches.kll.PreambleUtil.MIN_K_SHORT_ADR; +import static org.apache.datasketches.kll.PreambleUtil.M_BYTE_ADR; +import static org.apache.datasketches.kll.PreambleUtil.NUM_LEVELS_BYTE_ADR; +import static org.apache.datasketches.kll.PreambleUtil.N_LONG_ADR; +import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_BYTE_ADR; +import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; +import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_FLOAT; +import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_EMPTY_FULL; +import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_SINGLE; +import static org.apache.datasketches.kll.PreambleUtil.SER_VER_BYTE_ADR; +import static org.apache.datasketches.kll.PreambleUtil.SINGLE_ITEM_BIT_MASK; import java.util.Arrays; import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; +import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -62,7 +84,7 @@ public KllFloatsSketch(final int k) { /** * Used for testing only. * @param k configured size of sketch. Range [m, 2^16] - * @param compatible if true, compatible with quantiles sketch. + * @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0. */ KllFloatsSketch(final int k, final boolean compatible) { this(k, DEFAULT_M, compatible); @@ -72,6 +94,7 @@ public KllFloatsSketch(final int k) { * Heap constructor. * @param k configured size of sketch. Range [m, 2^16] * @param m minimum level size. Default is 8. + * @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0. */ private KllFloatsSketch(final int k, final int m, final boolean compatible) { super(k, m, compatible); @@ -85,10 +108,10 @@ private KllFloatsSketch(final int k, final int m, final boolean compatible) { * @param mem Memory object that contains data serialized by this sketch. */ private KllFloatsSketch(final Memory mem) { - super(mem.getShort(K_SHORT) & 0xffff, DEFAULT_M, true); - final int flags = mem.getByte(FLAGS_BYTE) & 0xff; - final boolean empty = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; - final boolean singleItem = (flags & 1 << Flags.IS_SINGLE_ITEM.ordinal()) > 0; + super(mem.getShort(K_SHORT_ADR) & 0xffff, DEFAULT_M, true); + final int flags = mem.getByte(FLAGS_BYTE_ADR) & 0xff; + final boolean empty = (flags & EMPTY_BIT_MASK) > 0; + final boolean singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; if (empty) { numLevels_ = 1; levels_ = new int[] {k_, k_}; @@ -103,12 +126,12 @@ private KllFloatsSketch(final Memory mem) { minK_ = k_; numLevels_ = 1; } else { - n_ = mem.getLong(N_LONG); - minK_ = mem.getShort(MIN_K_SHORT) & 0xffff; - numLevels_ = mem.getByte(NUM_LEVELS_BYTE) & 0xff; + n_ = mem.getLong(N_LONG_ADR); + minK_ = mem.getShort(MIN_K_SHORT_ADR) & 0xffff; + numLevels_ = mem.getByte(NUM_LEVELS_BYTE_ADR) & 0xff; } levels_ = new int[numLevels_ + 1]; - int offset = singleItem ? DATA_START_SINGLE_ITEM : DATA_START_FLOAT; + int offset = singleItem ? DATA_START_ADR_SINGLE_ITEM : DATA_START_ADR_FLOAT; final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); if (singleItem) { levels_[0] = itemCapacity - 1; @@ -130,7 +153,7 @@ private KllFloatsSketch(final Memory mem) { minValue_ = items_[levels_[0]]; maxValue_ = items_[levels_[0]]; } - isLevelZeroSorted_ = (flags & 1 << Flags.IS_LEVEL_ZERO_SORTED.ordinal()) > 0; + isLevelZeroSorted_ = (flags & LEVEL_ZERO_SORTED_BIT_MASK) > 0; } } @@ -144,17 +167,17 @@ private KllFloatsSketch(final Memory mem) { //To simplify the code, this method does all the validity checking // then passes the verified Memory to the actual heapify constructor public static KllFloatsSketch heapify(final Memory mem) { - final int preambleInts = mem.getByte(PREAMBLE_INTS_BYTE) & 0xff; - final int serialVersion = mem.getByte(SER_VER_BYTE) & 0xff; - final int family = mem.getByte(FAMILY_BYTE) & 0xff; - final int flags = mem.getByte(FLAGS_BYTE) & 0xff; - final int m = mem.getByte(M_BYTE) & 0xff; + final int preambleInts = mem.getByte(PREAMBLE_INTS_BYTE_ADR) & 0xff; + final int serialVersion = mem.getByte(SER_VER_BYTE_ADR) & 0xff; + final int family = mem.getByte(FAMILY_BYTE_ADR) & 0xff; + final int flags = mem.getByte(FLAGS_BYTE_ADR) & 0xff; + final int m = mem.getByte(M_BYTE_ADR) & 0xff; if (m != DEFAULT_M) { throw new SketchesArgumentException( "Possible corruption: M must be " + DEFAULT_M + ": " + m); } - final boolean empty = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; - final boolean singleItem = (flags & 1 << Flags.IS_SINGLE_ITEM.ordinal()) > 0; + final boolean empty = (flags & EMPTY_BIT_MASK) > 0; + final boolean singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; if (empty || singleItem) { if (preambleInts != PREAMBLE_INTS_EMPTY_SINGLE) { throw new SketchesArgumentException("Possible corruption: preambleInts must be " @@ -166,9 +189,9 @@ public static KllFloatsSketch heapify(final Memory mem) { + PREAMBLE_INTS_FLOAT + " for a sketch with more than one item: " + preambleInts); } } - if (serialVersion != SERIAL_VERSION && serialVersion != SERIAL_VERSION_SINGLE) { + if (serialVersion != SERIAL_VERSION_EMPTY_FULL && serialVersion != SERIAL_VERSION_SINGLE) { throw new SketchesArgumentException( - "Possible corruption: serial version mismatch: expected " + SERIAL_VERSION + " or " + "Possible corruption: serial version mismatch: expected " + SERIAL_VERSION_EMPTY_FULL + " or " + SERIAL_VERSION_SINGLE + ", got " + serialVersion); } if (family != Family.KLL.getID()) { @@ -227,17 +250,23 @@ public float getMinValue() { /** * Returns upper bound on the serialized size of a sketch given a parameter k and stream - * length. The resulting size is an overestimate to make sure actual sketches don't exceed it. - * This method can be used if allocation of storage is necessary beforehand, but it is not - * optimal. + * length. This method can be used if allocation of storage is necessary beforehand. * @param k parameter that controls size of the sketch and accuracy of estimates * @param n stream length * @return upper bound on the serialized size */ public static int getMaxSerializedSizeBytes(final int k, final long n) { - final int numLevels = KllHelper.ubOnNumLevels(n); - final int maxNumItems = KllHelper.computeTotalItemCapacity(k, DEFAULT_M, numLevels); - return getSerializedSizeBytes(numLevels, maxNumItems); + final LevelStats lvlStats = getAllLevelStatsGivenN(k, DEFAULT_M, n, false, false, false); + return lvlStats.getBytes(); + } + + /** + * Returns the current number of bytes this sketch would require to store in compact form. + * @return the current number of bytes this sketch would require to store in compact form. + */ + public int getCurrentCompactSerializedSizeBytes() { + if (isEmpty()) { return N_LONG_ADR; } + return KllHelper.getCompactSerializedSizeBytes(numLevels_, getNumRetained(), false); } /** @@ -285,13 +314,13 @@ public double[] getPMF(final float[] splitPoints) { */ public float getQuantile(final double fraction) { if (isEmpty()) { return Float.NaN; } + if (fraction < 0.0 || fraction > 1.0) { + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); + } if (compatible) { if (fraction == 0.0) { return minValue_; } if (fraction == 1.0) { return maxValue_; } } - if (fraction < 0.0 || fraction > 1.0) { - throw new SketchesArgumentException("Fraction cannot be less than zero or greater than 1.0"); - } final KllFloatsQuantileCalculator quant = getQuantileCalculator(); return quant.getQuantile(fraction); } @@ -343,7 +372,7 @@ public float[] getQuantiles(final double[] fractions) { for (int i = 0; i < fractions.length; i++) { final double fraction = fractions[i]; if (fraction < 0.0 || fraction > 1.0) { - throw new SketchesArgumentException("Fraction cannot be less than zero or greater than 1.0"); + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); } if (fraction == 0.0 && compatible) { quantiles[i] = minValue_; } else if (fraction == 1.0 && compatible) { quantiles[i] = maxValue_; } @@ -412,10 +441,11 @@ public double getRank(final float value) { /** * Returns the number of bytes this sketch would require to store. * @return the number of bytes this sketch would require to store. + * @deprecated use {@link #getCurrentCompactSerializedSizeBytes() } */ + @Deprecated public int getSerializedSizeBytes() { - if (isEmpty()) { return N_LONG; } - return getSerializedSizeBytes(numLevels_, getNumRetained()); + return getCurrentCompactSerializedSizeBytes(); } /** @@ -460,24 +490,25 @@ public byte[] toByteArray() { final boolean singleItem = n_ == 1; final boolean empty = isEmpty(); //load the preamble - wmem.putByte(PREAMBLE_INTS_BYTE, (byte) (empty || singleItem ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_FLOAT)); - wmem.putByte(SER_VER_BYTE, singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION); - wmem.putByte(FAMILY_BYTE, (byte) Family.KLL.getID()); + wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) + (empty || singleItem ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_FLOAT)); + wmem.putByte(SER_VER_BYTE_ADR, singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION_EMPTY_FULL); + wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); final byte flags = (byte) ( - (empty ? 1 << Flags.IS_EMPTY.ordinal() : 0) - | (isLevelZeroSorted_ ? 1 << Flags.IS_LEVEL_ZERO_SORTED.ordinal() : 0) - | (singleItem ? 1 << Flags.IS_SINGLE_ITEM.ordinal() : 0)); - wmem.putByte(FLAGS_BYTE, flags); - wmem.putShort(K_SHORT, (short) k_); - wmem.putByte(M_BYTE, (byte) m_); + (empty ? EMPTY_BIT_MASK : 0) + | (isLevelZeroSorted_ ? LEVEL_ZERO_SORTED_BIT_MASK : 0) + | (singleItem ? SINGLE_ITEM_BIT_MASK : 0)); + wmem.putByte(FLAGS_BYTE_ADR, flags); + wmem.putShort(K_SHORT_ADR, (short) k_); + wmem.putByte(M_BYTE_ADR, (byte) m_); if (empty) { return bytes; } //load data - int offset = DATA_START_SINGLE_ITEM; + int offset = DATA_START_ADR_SINGLE_ITEM; if (!singleItem) { - wmem.putLong(N_LONG, n_); - wmem.putShort(MIN_K_SHORT, (short) minK_); - wmem.putByte(NUM_LEVELS_BYTE, (byte) numLevels_); - offset = DATA_START_FLOAT; + wmem.putLong(N_LONG_ADR, n_); + wmem.putShort(MIN_K_SHORT_ADR, (short) minK_); + wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) numLevels_); + offset = DATA_START_ADR_FLOAT; // the last integer in levels_ is not serialized because it can be derived final int len = levels_.length - 1; wmem.putIntArray(offset, levels_, 0, len); @@ -766,7 +797,7 @@ private void mergeHigherLevels(final KllFloatsSketch other, final long finalN) { final int finalCapacity = result[1]; final int finalPop = result[2]; - assert finalNumLevels <= ub; // ub can sometimes be much bigger + assert finalNumLevels <= ub; // ub may be much bigger // now we need to transfer the results back into the "self" sketch final float[] newbuf = finalCapacity == items_.length ? items_ : new float[finalCapacity]; @@ -811,15 +842,6 @@ private void populateWorkArrays(final KllFloatsSketch other, final float[] workb } } - private static int getSerializedSizeBytes(final int numLevels, final int numRetained) { - if (numLevels == 1 && numRetained == 1) { - return DATA_START_SINGLE_ITEM + Float.BYTES; - } - // the last integer in levels_ is not serialized because it can be derived - // + 2 for min and max - return DATA_START_FLOAT + numLevels * Integer.BYTES + (numRetained + 2) * Float.BYTES; - } - // for testing float[] getItems() { diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index d59dfc606..9cb1123e2 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -20,8 +20,12 @@ package org.apache.datasketches.kll; import static org.apache.datasketches.Util.floorPowerOf2; +import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_DOUBLE; +import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_FLOAT; +import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; class KllHelper { + static final String LS = System.getProperty("line.separator"); /** * Copy the old array into a new larger array. @@ -39,7 +43,7 @@ static int[] growIntArray(final int[] oldArr, final int newLen) { } /** - * Returns the upper bound of the number of levels based on n. + * Returns very conservative upper bound of the number of levels based on n. * @param n the length of the stream * @return floor( log_2(n) ) */ @@ -47,6 +51,94 @@ static int ubOnNumLevels(final long n) { return 1 + Long.numberOfTrailingZeros(floorPowerOf2(n)); } + public static LevelStats getAllLevelStatsGivenN(final int k, final int m, final long n, + final boolean printDetail, final boolean printSummaries, final boolean isDouble) { + long cumN; + int numLevels = 0; + LevelStats lvlStats; + do { + numLevels++; + lvlStats = getLevelStats(k, m, numLevels, printDetail, printSummaries, isDouble); + cumN = lvlStats.getMaxN(); + } while (cumN < n); + return lvlStats; + } + + static LevelStats getLevelStats(final int k, final int m, final int numLevels, + final boolean printDetail, final boolean printSummary, final boolean isDouble) { + int cumN = 0; + int cumCap = 0; + if (printDetail) { + System.out.println("Total Levels: " + numLevels); + System.out.printf("%6s%12s%8s%16s\n", "Level","Wt","Cap","N"); + } + for (int level = 0; level < numLevels; level++) { + final long levelCap = levelCapacity(k, numLevels, level, m); + final long maxNAtLevel = levelCap << level; + cumN += maxNAtLevel; + cumCap += (int)levelCap; + if (printDetail) { + System.out.printf("%6d%,12d%8d%,16d\n", level, 1 << level, levelCap, maxNAtLevel); + } + } + final int bytes = getCompactSerializedSizeBytes(numLevels, cumCap, isDouble); + if (printDetail) { + System.out.printf(" TOTALS%10s %8d%,16d\n", "", cumCap, cumN); + System.out.println(" TOTAL BYTES: " + bytes); + System.out.println(""); + } + final LevelStats lvlStats = new LevelStats(cumN, bytes, numLevels, cumCap); + if (printSummary) { System.out.println(lvlStats.toString()); } + return lvlStats; + } + + public static class LevelStats { + private long maxN; + private int bytes; + private int numLevels; + private int maxCap; + + LevelStats(final long maxN, final int bytes, final int numLevels, final int maxCap) { + this.maxN = maxN; + this.bytes = bytes; + this.numLevels = numLevels; + this.maxCap = maxCap; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append("Level Stats Summary:" + LS); + sb.append(" NumLevels: " + numLevels + LS); + sb.append(" MaxCap : " + maxCap + LS); + sb.append(" MaxN : " + maxN + LS); + sb.append(" TotBytes : " + bytes + LS + LS); + return sb.toString(); + } + + public long getMaxN() { return maxN; } + + public int getBytes() { return bytes; } + + public int getNumLevels() { return numLevels; } + + public int getMaxCap() { return maxCap; } + } + + static int getCompactSerializedSizeBytes(final int numLevels, final int numRetained, + final boolean isDouble) { + if (numLevels == 1 && numRetained == 1) { + return DATA_START_ADR_SINGLE_ITEM + (isDouble ? Double.BYTES : Float.BYTES); + } + // The last integer in levels_ is not serialized because it can be derived. + // The + 2 is for min and max + if (isDouble) { + return DATA_START_ADR_DOUBLE + numLevels * Integer.BYTES + (numRetained + 2) * Double.BYTES; + } else { + return DATA_START_ADR_FLOAT + numLevels * Integer.BYTES + (numRetained + 2) * Float.BYTES; + } + } + /** * Returns the maximum number of items that this sketch can handle * @param k The sizing / accuracy parameter of the sketch in items. diff --git a/src/main/java/org/apache/datasketches/kll/PreambleUtil.java b/src/main/java/org/apache/datasketches/kll/PreambleUtil.java new file mode 100644 index 000000000..c2211db95 --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/PreambleUtil.java @@ -0,0 +1,409 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.Family.idToFamily; + +import org.apache.datasketches.Family; +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; + +//@formatter:off + +/** + * This class defines the serialized data structure and provides access methods for the key fields. + * + *

The intent of the design of this class was to isolate the detailed knowledge of the bit and + * byte layout of the serialized form of the sketches derived from the base sketch classes into one place. + * This allows the possibility of the introduction of different serialization + * schemes with minimal impact on the rest of the library.

+ * + *

+ * LAYOUT: The low significance bytes of this long based data structure are on the right. + * The multi-byte primitives are stored in native byte order. + * The single byte fields are treated as unsigned.

+ * + *

An empty sketch requires only 8 bytes, which is only preamble. + * A serialized, non-empty KllDoublesSketch requires at least 16 bytes of preamble. + * A serialized, non-empty KllFloatsSketch requires at least 12 bytes of preamble.

+ * + *
{@code
+ * Serialized float sketch layout, more than one item:
+ *  Adr:
+ *      ||    7    |   6   |    5   |    4   |    3   |    2    |    1   |      0       |
+ *  0   || unused  |   M   |--------K--------|  Flags |  FamID  | SerVer | PreambleInts |
+ *      ||   15    |   14  |   13   |   12   |   11   |   10    |    9   |      8       |
+ *  1   ||---------------------------------N_LONG---------------------------------------|
+ *      ||   23    |   22  |   21   |   20   |   19   |    18   |   17   |      16      |
+ *  2   ||<--------------data----------------| unused |numLevels|-------min K-----------|
+ *
+ * Serialized float sketch layout, Empty (8 bytes) and Single Item (12 bytes):
+ *  Adr:
+ *      ||    7    |   6   |    5   |    4   |    3   |    2    |    1   |      0       |
+ *  0   || unused  |   M   |--------K--------|  Flags |  FamID  | SerVer | PreambleInts |
+ *      ||   15    |   14  |   13   |   12   |   11   |   10    |    9   |      8       |
+ *  1   ||                                   |-------------------data-------------------|
+ *
+ *
+ *
+ * Serialized double sketch layout, more than one item:
+ *  Adr:
+ *      ||    7    |   6   |    5   |    4   |    3   |    2    |    1   |      0       |
+ *  0   || unused  |   M   |--------K--------|  Flags |  FamID  | SerVer | PreambleInts |
+ *      ||   15    |   14  |   13   |   12   |   11   |   10    |    9   |      8       |
+ *  1   ||---------------------------------N_LONG---------------------------------------|
+ *      ||   23    |   22  |   21   |   20   |   19   |    18   |   17   |      16      |
+ *  2   ||--------------unused------------------------|numLevels|-------min K-----------|
+ *      ||                                                               |      24      |
+ *  3   ||<---------------------------------data----------------------------------------|
+ *
+ * Serialized double sketch layout, Empty (8 bytes) and Single Item (16 bytes):
+ *  Adr:
+ *      ||    7    |   6   |    5   |    4   |    3   |    2    |    1   |      0       |
+ *  0   || unused  |   M   |--------K--------|  Flags |  FamID  | SerVer | PreambleInts |
+ *      ||                                                               |      8       |
+ *  1   ||----------------------------------data----------------------------------------|
+ * }
+ * + * @author Lee Rhodes + */ +final class PreambleUtil { + + private PreambleUtil() {} + + static final String LS = System.getProperty("line.separator"); + + /** + * The default value of K + */ + public static final int DEFAULT_K = 200; + static final int DEFAULT_M = 8; + static final int MIN_K = DEFAULT_M; + static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short + + // Preamble byte addresses + static final int PREAMBLE_INTS_BYTE_ADR = 0; + static final int SER_VER_BYTE_ADR = 1; + static final int FAMILY_BYTE_ADR = 2; + static final int FLAGS_BYTE_ADR = 3; + static final int K_SHORT_ADR = 4; // to 5 + static final int M_BYTE_ADR = 6; + // 7 is reserved for future use + // SINGLE ITEM ONLY + static final int DATA_START_ADR_SINGLE_ITEM = 8; + + // MULTI-ITEM + static final int N_LONG_ADR = 8; // to 15 + static final int MIN_K_SHORT_ADR = 16; // to 17 + static final int NUM_LEVELS_BYTE_ADR = 18; + + // FLOAT SKETCH 19 is reserved for future use in float sketch + static final int DATA_START_ADR_FLOAT = 20; // float sketch, not single item + + // DOUBLE SKETCH 19 to 23 is reserved for future use in double sketch + static final int DATA_START_ADR_DOUBLE = 24; // double sketch, not single item + + // Other static values + static final byte SERIAL_VERSION_EMPTY_FULL = 1; // Empty or full preamble, NOT single item format + static final byte SERIAL_VERSION_SINGLE = 2; // only single-item format + static final int PREAMBLE_INTS_EMPTY_SINGLE = 2; // for empty or single item + static final int PREAMBLE_INTS_FLOAT = 5; // not empty nor single item, full preamble float + static final int PREAMBLE_INTS_DOUBLE = 6; // not empty nor single item, full preamble double + + // Flag bit masks + static final int EMPTY_BIT_MASK = 1; + static final int LEVEL_ZERO_SORTED_BIT_MASK = 2; + static final int SINGLE_ITEM_BIT_MASK = 4; + static final int DOUBLES_SKETCH_BIT_MASK = 8; + static final int NOT_COMPACT_BIT_MASK = 16; + + enum Layout { + FLOAT_FULL_COMPACT, FLOAT_EMPTY_COMPACT, FLOAT_SINGLE_COMPACT, + DOUBLE_FULL_COMPACT, DOUBLE_EMPTY_COMPACT, DOUBLE_SINGLE_COMPACT, + FLOAT_FULL_NOT_COMPACT, FLOAT_EMPTY_NOT_COMPACT, FLOAT_SINGLE_NOT_COMPACT, + DOUBLE_FULL_NOT_COMPACT, DOUBLE_EMPTY_NOT_COMPACT, DOUBLE_SINGLE_NOT_COMPACT } + + static Layout layout; + + /** + * Returns a human readable string summary of the internal state of the given byte array. + * Used primarily in testing. + * + * @param byteArr the given byte array. + * @return the summary string. + */ + static String toString(final byte[] byteArr) { + final Memory mem = Memory.wrap(byteArr); + return toString(mem); + } + + /** + * Returns a human readable string summary of the internal state of the given Memory. + * Used primarily in testing. + * + * @param mem the given Memory + * @return the summary string. + */ + static String toString(final Memory mem) { + return null; //memoryToString(mem); + } + + + @SuppressWarnings("unused") + private static String memoryToString(final Memory srcMem) { + final int preInts = extractPreInts(srcMem); + final int serVer = extractSerVer(srcMem); + final int familyID = extractFamilyID(srcMem); + final String famName = idToFamily(familyID).toString(); + final int flags = extractFlags(srcMem); + final boolean empty = (flags & EMPTY_BIT_MASK) > 0; + final boolean level0Sorted = (flags & LEVEL_ZERO_SORTED_BIT_MASK) > 0; + final boolean singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; + final boolean doublesSketch = (flags & DOUBLES_SKETCH_BIT_MASK) > 0; + final boolean notCompact = (flags & NOT_COMPACT_BIT_MASK) > 0; + final int k = extractK(srcMem); + final int m = extractM(srcMem); + long n = 0; + final int minK; + final int numLevels; + + //preamble checks + if (familyID != Family.KLL.getID()) { throwCustom(0, familyID); } + final int checkFlags = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); + if ((checkFlags & 5) == 5) { throwCustom(20, flags); } + switch (checkFlags) { + case 0: { //not empty, not single item, float full + if (preInts != PREAMBLE_INTS_FLOAT) { throwCustom(6, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } + layout = notCompact ? Layout.FLOAT_FULL_NOT_COMPACT : Layout.FLOAT_FULL_COMPACT; + n = extractN(srcMem); + minK = extractMinK(srcMem); + numLevels = extractNumLevels(srcMem); + break; + } + case 1: { //empty, not single item, float empty + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } + if (notCompact) { + layout = Layout.FLOAT_EMPTY_NOT_COMPACT; + n = extractN(srcMem); + if (n != 0) { throwCustom(21, (int) n); } + minK = extractMinK(srcMem); + numLevels = extractNumLevels(srcMem); + } else { + layout = Layout.FLOAT_EMPTY_COMPACT; + n = 0; + minK = k; + numLevels = 1; + } + break; + } + case 4: { //not empty, single item, float single item + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { throwCustom(4, serVer); } + if (notCompact) { + layout = Layout.FLOAT_SINGLE_NOT_COMPACT; + n = extractN(srcMem); + if (n != 1) { throwCustom(22, (int)n); } + minK = extractMinK(srcMem); + numLevels = extractNumLevels(srcMem); + } else { + layout = Layout.FLOAT_SINGLE_COMPACT; + n = 1; + minK = k; + numLevels = 1; + } + break; + } + case 8: { //not empty, not single item, double full + if (preInts != PREAMBLE_INTS_DOUBLE) { throwCustom(5, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } + layout = notCompact ? Layout.DOUBLE_FULL_NOT_COMPACT : Layout.DOUBLE_FULL_COMPACT; + n = extractN(srcMem); + minK = extractMinK(srcMem); + numLevels = extractNumLevels(srcMem); + break; + } + case 9: { //empty, not single item, double empty + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } + if (notCompact) { + layout = Layout.DOUBLE_EMPTY_NOT_COMPACT; + n = extractN(srcMem); + if (n != 0) { throwCustom(21, (int) n); } + minK = extractMinK(srcMem); + numLevels = extractNumLevels(srcMem); + } else { + layout = Layout.DOUBLE_EMPTY_COMPACT; + n = 0; + minK = k; + numLevels = 1; + } + break; + } + case 12: { //not empty, single item, double single item + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { throwCustom(4, serVer); } + if (notCompact) { + layout = Layout.DOUBLE_SINGLE_NOT_COMPACT; + n = extractN(srcMem); + if (n != 1) { throwCustom(22, (int)n); } + minK = extractMinK(srcMem); + numLevels = extractNumLevels(srcMem); + } else { + layout = Layout.DOUBLE_SINGLE_COMPACT; + n = 1; + minK = k; + numLevels = 1; + } + break; + } + } + + BaseKllSketch.checkK(k); + + if (m != 8) { + System.err.println("WARNING: Minimum Level width set to non-default value: " + m); + } + + + return null; + } + +private static void throwCustom(final int errNo, final int value) { + String msg = ""; + switch (errNo) { + case 0: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; + case 1: msg = "Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; + case 2: msg = "Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: " + value; break; + case 3: msg = "Single Item Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; + case 4: msg = "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; + case 5: msg = "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; + case 6: msg = "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; + case 20: msg = "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; + case 21: msg = "N != 0 and empty bit is set. N: " + value; break; + case 22: msg = "N != 1 and single item bit is set. N: " + value; break; + + } + throw new SketchesArgumentException(msg); +} + + static int extractPreInts(final Memory mem) { + return mem.getByte(PREAMBLE_INTS_BYTE_ADR) & 0XFF; + } + + static int extractSerVer(final Memory mem) { + return mem.getByte(SER_VER_BYTE_ADR) & 0XFF; + } + + static int extractFamilyID(final Memory mem) { + return mem.getByte(FAMILY_BYTE_ADR) & 0XFF; + } + + static int extractFlags(final Memory mem) { + return mem.getByte(FLAGS_BYTE_ADR) & 0XFF; + } + + static boolean extractEmptyFlag(final Memory mem) { + return (extractFlags(mem) & EMPTY_BIT_MASK) != 0; + } + + static boolean extractLevelZeroSortedFlag(final Memory mem) { + return (extractFlags(mem) & LEVEL_ZERO_SORTED_BIT_MASK) != 0; + } + + static boolean extractSingleItemFlag(final Memory mem) { + return (extractFlags(mem) & SINGLE_ITEM_BIT_MASK) != 0; + } + + static int extractK(final Memory mem) { + return mem.getShort(K_SHORT_ADR) & 0XFFFF; + } + + static int extractM(final Memory mem) { + return mem.getByte(M_BYTE_ADR) & 0XFF; + } + + static long extractN(final Memory mem) { + return mem.getLong(N_LONG_ADR); + } + + static int extractMinK(final Memory mem) { + return mem.getShort(MIN_K_SHORT_ADR) & 0XFFFF; + } + + static int extractNumLevels(final Memory mem) { + return mem.getByte(NUM_LEVELS_BYTE_ADR) & 0XFF; + } + + static void insertPreInts(final WritableMemory wmem, final int value) { + wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) value); + } + + static void insertSerVer(final WritableMemory wmem, final int value) { + wmem.putByte(SER_VER_BYTE_ADR, (byte) value); + } + + static void insertFamilyID(final WritableMemory wmem, final int value) { + wmem.putByte(FAMILY_BYTE_ADR, (byte) value); + } + + static void insertFlags(final WritableMemory wmem, final int value) { + wmem.putByte(FLAGS_BYTE_ADR, (byte) value); + } + + static void insertEmptyFlag(final WritableMemory wmem, final boolean empty) { + final int flags = extractFlags(wmem); + insertFlags(wmem, empty ? flags | EMPTY_BIT_MASK : flags & ~EMPTY_BIT_MASK); + } + + static void insertLevelZeroSortedFlag(final WritableMemory wmem, final boolean levelZeroSorted) { + final int flags = extractFlags(wmem); + insertFlags(wmem, levelZeroSorted ? flags | LEVEL_ZERO_SORTED_BIT_MASK : flags & ~LEVEL_ZERO_SORTED_BIT_MASK); + } + + static void insertSingleItemFlag(final WritableMemory wmem, final boolean singleItem) { + final int flags = extractFlags(wmem); + insertFlags(wmem, singleItem ? flags | SINGLE_ITEM_BIT_MASK : flags & ~SINGLE_ITEM_BIT_MASK); + } + + static void insertK(final WritableMemory wmem, final int value) { + wmem.putShort(K_SHORT_ADR, (short) value); + } + + static void insertM(final WritableMemory wmem, final int value) { + wmem.putByte(M_BYTE_ADR, (byte) value); + } + + static void insertN(final WritableMemory wmem, final long value) { + wmem.putLong(N_LONG_ADR, value); + } + + static void insertMinK(final WritableMemory wmem, final int value) { + wmem.putShort(MIN_K_SHORT_ADR, (short) value); + } + + static void insertNumLevels(final WritableMemory wmem, final int value) { + wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) value); + } + +} + diff --git a/src/main/java/org/apache/datasketches/quantiles/PreambleUtil.java b/src/main/java/org/apache/datasketches/quantiles/PreambleUtil.java index 45990bca9..2b3f53952 100644 --- a/src/main/java/org/apache/datasketches/quantiles/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/quantiles/PreambleUtil.java @@ -31,23 +31,23 @@ //@formatter:off /** - * This class defines the preamble data structure and provides basic utilities for some of the key - * fields. + * This class defines the serialized data structure and provides access methods for the key fields. + * *

The intent of the design of this class was to isolate the detailed knowledge of the bit and - * byte layout of the serialized form of the sketches derived from the Sketch class into one place. + * byte layout of the serialized form of the sketches derived from the base sketch classes into one place. * This allows the possibility of the introduction of different serialization * schemes with minimal impact on the rest of the library.

* *

- * MAP: Low significance bytes of this long data structure are on the right. However, the - * multi-byte integers (int and long) are stored in native byte order. The - * byte values are treated as unsigned.

+ * LAYOUT: The low significance bytes of this long based data structure are on the right. + * The multi-byte primitives are stored in native byte order. + * The single byte fields are treated as unsigned.

* *

An empty ItemsSketch, on-heap DoublesSketch or compact off-heap DoublesSketch only require 8 - * bytes. An off-heap UpdateDoublesSketch and all non-empty skethces require at least 16 bytes of + * bytes. An off-heap UpdateDoublesSketch and all non-empty sketches require at least 16 bytes of * preamble.

* - *
+ * 
{@code
  * Long || Start Byte Adr: Common for both DoublesSketch and ItemsSketch
  * Adr:
  *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0          |
@@ -66,7 +66,7 @@
  *
  *      ||   39   |   38   |   37   |   36   |   35   |   34   |   33   |    32          |
  *  4   ||---------------------------START OF COMBINED BUfFER----------------------------|
- *  
+ * }
* * @author Lee Rhodes */ diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index 7eeea733d..860e175b9 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -19,6 +19,10 @@ package org.apache.datasketches.kll; +//import static org.apache.datasketches.Util.getResourceBytes; +import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.PreambleUtil.MAX_K; +import static org.apache.datasketches.kll.PreambleUtil.MIN_K; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -276,32 +280,32 @@ public void mergeMinAndMaxFromOther() { @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooSmall() { - new KllDoublesSketch(BaseKllSketch.MIN_K - 1); + new KllDoublesSketch(MIN_K - 1); } @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooLarge() { - new KllDoublesSketch(BaseKllSketch.MAX_K + 1); + new KllDoublesSketch(MAX_K + 1); } @Test public void minK() { - final KllDoublesSketch sketch = new KllDoublesSketch(BaseKllSketch.MIN_K); + final KllDoublesSketch sketch = new KllDoublesSketch(MIN_K); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), BaseKllSketch.MIN_K); + assertEquals(sketch.getK(), MIN_K); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); } @Test public void maxK() { - final KllDoublesSketch sketch = new KllDoublesSketch(BaseKllSketch.MAX_K); + final KllDoublesSketch sketch = new KllDoublesSketch(MAX_K); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), BaseKllSketch.MAX_K); + assertEquals(sketch.getK(), MAX_K); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); } @@ -310,14 +314,14 @@ public void serializeDeserializeEmpty() { final KllDoublesSketch sketch1 = new KllDoublesSketch(); final byte[] bytes = sketch1.toByteArray(); final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); - assertEquals(bytes.length, sketch1.getSerializedSizeBytes()); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); assertTrue(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); assertEquals(sketch2.getN(), sketch1.getN()); assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); assertTrue(Double.isNaN(sketch2.getMinValue())); assertTrue(Double.isNaN(sketch2.getMaxValue())); - assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); } @Test @@ -326,16 +330,26 @@ public void serializeDeserializeOneItem() { sketch1.update(1); final byte[] bytes = sketch1.toByteArray(); final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); - assertEquals(bytes.length, sketch1.getSerializedSizeBytes()); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); assertFalse(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), 1); assertEquals(sketch2.getN(), 1); assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); assertFalse(Double.isNaN(sketch2.getMinValue())); assertFalse(Double.isNaN(sketch2.getMaxValue())); - assertEquals(sketch2.getSerializedSizeBytes(), 8 + Double.BYTES); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Double.BYTES); } +// @Test //not implemented from C++ yet +// public void deserializeOneItemV1() throws Exception { +// final byte[] bytes = getResourceBytes("kll_sketch_float_one_item_v1.sk"); +// final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(bytes)); +// assertFalse(sketch.isEmpty()); +// assertFalse(sketch.isEstimationMode()); +// assertEquals(sketch.getN(), 1); +// assertEquals(sketch.getNumRetained(), 1); +// } + @Test public void serializeDeserialize() { final KllDoublesSketch sketch1 = new KllDoublesSketch(); @@ -345,14 +359,14 @@ public void serializeDeserialize() { } final byte[] bytes = sketch1.toByteArray(); final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); - assertEquals(bytes.length, sketch1.getSerializedSizeBytes()); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); assertFalse(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); assertEquals(sketch2.getN(), sketch1.getN()); assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); assertEquals(sketch2.getMinValue(), sketch1.getMinValue()); assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue()); - assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -372,8 +386,8 @@ public void nanSplitPoint() { @Test public void getMaxSerializedSizeBytes() { final int sizeBytes = - KllDoublesSketch.getMaxSerializedSizeBytes(BaseKllSketch.DEFAULT_K, 1_000_000_000); - assertEquals(sizeBytes, 6184); + KllDoublesSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30); + assertEquals(sizeBytes, 5708); } @Test diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index fd2313b03..fa1fd7b47 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -19,6 +19,9 @@ package org.apache.datasketches.kll; +import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.PreambleUtil.MAX_K; +import static org.apache.datasketches.kll.PreambleUtil.MIN_K; import static org.apache.datasketches.Util.getResourceBytes; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -277,32 +280,32 @@ public void mergeMinAndMaxFromOther() { @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooSmall() { - new KllFloatsSketch(BaseKllSketch.MIN_K - 1); + new KllFloatsSketch(MIN_K - 1); } @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooLarge() { - new KllFloatsSketch(BaseKllSketch.MAX_K + 1); + new KllFloatsSketch(MAX_K + 1); } @Test public void minK() { - final KllFloatsSketch sketch = new KllFloatsSketch(BaseKllSketch.MIN_K); + final KllFloatsSketch sketch = new KllFloatsSketch(MIN_K); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), BaseKllSketch.MIN_K); + assertEquals(sketch.getK(), MIN_K); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); } @Test public void maxK() { - final KllFloatsSketch sketch = new KllFloatsSketch(BaseKllSketch.MAX_K); + final KllFloatsSketch sketch = new KllFloatsSketch(MAX_K); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), BaseKllSketch.MAX_K); + assertEquals(sketch.getK(), MAX_K); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); } @@ -311,14 +314,14 @@ public void serializeDeserializeEmpty() { final KllFloatsSketch sketch1 = new KllFloatsSketch(); final byte[] bytes = sketch1.toByteArray(); final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); - assertEquals(bytes.length, sketch1.getSerializedSizeBytes()); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); assertTrue(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); assertEquals(sketch2.getN(), sketch1.getN()); assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); assertTrue(Float.isNaN(sketch2.getMinValue())); assertTrue(Float.isNaN(sketch2.getMaxValue())); - assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); } @Test @@ -327,14 +330,14 @@ public void serializeDeserializeOneItem() { sketch1.update(1); final byte[] bytes = sketch1.toByteArray(); final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); - assertEquals(bytes.length, sketch1.getSerializedSizeBytes()); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); assertFalse(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), 1); assertEquals(sketch2.getN(), 1); assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); assertFalse(Float.isNaN(sketch2.getMinValue())); assertFalse(Float.isNaN(sketch2.getMaxValue())); - assertEquals(sketch2.getSerializedSizeBytes(), 8 + Float.BYTES); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Float.BYTES); } @Test @@ -356,14 +359,14 @@ public void serializeDeserialize() { } final byte[] bytes = sketch1.toByteArray(); final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); - assertEquals(bytes.length, sketch1.getSerializedSizeBytes()); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); assertFalse(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); assertEquals(sketch2.getN(), sketch1.getN()); assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); assertEquals(sketch2.getMinValue(), sketch1.getMinValue()); assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue()); - assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -383,8 +386,8 @@ public void nanSplitPoint() { @Test public void getMaxSerializedSizeBytes() { final int sizeBytes = - KllFloatsSketch.getMaxSerializedSizeBytes(BaseKllSketch.DEFAULT_K, 1_000_000_000); - assertEquals(sizeBytes, 3160); + KllFloatsSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30); + assertEquals(sizeBytes, 2908); } @Test diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index 791684eb9..a02d7282a 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -19,12 +19,16 @@ package org.apache.datasketches.kll; +import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; +import static org.apache.datasketches.kll.KllHelper.getLevelStats; +import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; import java.util.Objects; import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; @@ -33,10 +37,11 @@ */ @SuppressWarnings("javadoc") public class MiscDoublesTest { + static final String LS = System.getProperty("line.separator"); @Test public void checkGetKFromEps() { - final int k = BaseKllSketch.DEFAULT_K; + final int k = DEFAULT_K; final double eps = BaseKllSketch.getNormalizedRankError(k, false); final double epsPmf = BaseKllSketch.getNormalizedRankError(k, true); final int kEps = BaseKllSketch.getKFromEpsilon(eps, false); @@ -119,8 +124,8 @@ public void checkMisc() { assertEquals(sk.getNumLevels(), 2); } - //@Test //requires visual check - public void visualCheck() { + @Test //enable static println(..) for visual checking + public void visualCheckToString() { final KllDoublesSketch sketch = new KllDoublesSketch(20); for (int i = 0; i < 10; i++) { sketch.update(i + 1); } println(sketch.toString(true, true)); @@ -130,7 +135,26 @@ public void visualCheck() { println("\n" + sketch2.toString(true, true)); sketch2.merge(sketch); - println("\n" + sketch2.toString(true, true)); + final String s2 = sketch2.toString(true, true); + println(LS + s2); + } + + @Test //convert false to true below for visual checking + public void testGetAllLevelStats() { + long n = 1L << 30; + int k = 200; + int m = 8; + LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, false, false, true); + assertEquals(lvlStats.getBytes(), 5708); + } + + @Test //convert false to true below for visual checking + public void getStatsAtNumLevels() { + int k = 200; + int m = 8; + int numLevels = 23; + LevelStats lvlStats = getLevelStats(k, m, numLevels, false, false, true); + assertEquals(lvlStats.getBytes(), 5708); } @Test diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index f7a0aeebc..92b54a326 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -19,14 +19,17 @@ package org.apache.datasketches.kll; +import static org.apache.datasketches.kll.KllHelper.getLevelStats; +import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; +import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; import java.util.Objects; import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.memory.WritableMemory; - import org.testng.annotations.Test; /** @@ -34,10 +37,11 @@ */ @SuppressWarnings("javadoc") public class MiscFloatsTest { + static final String LS = System.getProperty("line.separator"); @Test public void checkGetKFromEps() { - final int k = BaseKllSketch.DEFAULT_K; + final int k = DEFAULT_K; final double eps = BaseKllSketch.getNormalizedRankError(k, false); final double epsPmf = BaseKllSketch.getNormalizedRankError(k, true); final int kEps = BaseKllSketch.getKFromEpsilon(eps, false); @@ -120,19 +124,37 @@ public void checkMisc() { assertEquals(sk.getNumLevels(), 2); } - //@Test //requires visual check - public void checkNumRetainedAboveLevelZero() { + @Test //enable static println(..) for visual checking + public void visualCheckToString() { final KllFloatsSketch sketch = new KllFloatsSketch(20); for (int i = 0; i < 10; i++) { sketch.update(i + 1); } final String s1 = sketch.toString(true, true); println(s1); + final KllFloatsSketch sketch2 = new KllFloatsSketch(20); - for (int i = 0; i < 400; i++) { - sketch2.update(i + 1); - } + for (int i = 0; i < 400; i++) { sketch2.update(i + 1); } + sketch2.merge(sketch); final String s2 = sketch2.toString(true, true); - println(s2); + println(LS + s2); + } + + @Test //convert false to true below for visual checking + public void testGetAllLevelStats() { + long n = 1L << 30; + int k = 200; + int m = 8; + LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, false, false, false); + assertEquals(lvlStats.getBytes(), 2908); + } + + @Test //convert false to true below for visual checking + public void getStatsAtNumLevels() { + int k = 200; + int m = 8; + int numLevels = 23; + LevelStats lvlStats = getLevelStats(k, m, numLevels, false, false, false); + assertEquals(lvlStats.getBytes(), 2908); } @Test From f8cf41dcb11f577df4f68ec039c4bd73ecde5d9f Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Thu, 3 Mar 2022 11:26:38 -0800 Subject: [PATCH 02/31] Interim 2 --- .../datasketches/kll/BaseKllSketch.java | 67 +---- .../datasketches/kll/KllDoublesSketch.java | 136 ++++----- .../datasketches/kll/KllFloatsSketch.java | 130 ++++----- .../apache/datasketches/kll/KllHelper.java | 114 +++++++- .../apache/datasketches/kll/PreambleUtil.java | 276 ++++++++++-------- .../kll/KllDoublesSketchTest.java | 6 +- .../kll/KllDoublesValidationTest.java | 4 +- .../datasketches/kll/KllFloatsSketchTest.java | 8 +- .../kll/KllFloatsValidationTest.java | 4 +- .../datasketches/kll/KllHelperTest.java | 48 +++ .../datasketches/kll/MiscDoublesTest.java | 33 +-- .../datasketches/kll/MiscFloatsTest.java | 33 +-- 12 files changed, 429 insertions(+), 430 deletions(-) create mode 100644 src/test/java/org/apache/datasketches/kll/KllHelperTest.java diff --git a/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java b/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java index 4a019b5d3..6dab9289f 100644 --- a/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java @@ -25,15 +25,12 @@ import static java.lang.Math.log; import static java.lang.Math.max; import static java.lang.Math.min; -import static java.lang.Math.pow; import static java.lang.Math.round; import static org.apache.datasketches.kll.PreambleUtil.MAX_K; import static org.apache.datasketches.kll.PreambleUtil.MIN_K; import java.util.Random; -import org.apache.datasketches.SketchesArgumentException; - abstract class BaseKllSketch { /* @@ -72,7 +69,7 @@ abstract class BaseKllSketch { * @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0. */ BaseKllSketch(final int k, final int m, final boolean compatible) { - checkK(k); + KllHelper.checkK(k); k_ = k; minK_ = k; m_ = m; @@ -133,7 +130,7 @@ public long getN() { * @see KllDoublesSketch */ public double getNormalizedRankError(final boolean pmf) { - return getNormalizedRankError(minK_, pmf); + return KllHelper.getNormalizedRankError(minK_, pmf); } /** @@ -144,14 +141,9 @@ public double getNormalizedRankError(final boolean pmf) { * Otherwise, it is the "single-sided" normalized rank error for all the other queries. * @return if pmf is true, the normalized rank error for the getPMF() function. * Otherwise, it is the "single-sided" normalized rank error for all the other queries. - * @see KllDoublesSketch */ - // constants were derived as the best fit to 99 percentile empirically measured max error in - // thousands of trials public static double getNormalizedRankError(final int k, final boolean pmf) { - return pmf - ? 2.446 / pow(k, 0.9433) - : 2.296 / pow(k, 0.9723); + return KllHelper.getNormalizedRankError(k, pmf); } /** @@ -159,7 +151,7 @@ public static double getNormalizedRankError(final int k, final boolean pmf) { * @return the number of retained items (samples) in the sketch */ public int getNumRetained() { - return levels_[numLevels_] - levels_[0]; + return KllHelper.getNumRetained(numLevels_, levels_); } /** @@ -198,55 +190,4 @@ public String toString() { */ public abstract String toString(final boolean withLevels, final boolean withData); - // Restricted Methods - - /** - * Checks the validity of the given value k - * @param k must be greater than 7 and less than 65536. - */ - static void checkK(final int k) { - if (k < MIN_K || k > MAX_K) { - throw new SketchesArgumentException( - "K must be >= " + MIN_K + " and <= " + MAX_K + ": " + k); - } - } - - /** - * Finds the first level starting with level 0 that exceeds its nominal capacity - * @return level to compact - */ - int findLevelToCompact() { // - int level = 0; - while (true) { - assert level < numLevels_; - final int pop = levels_[level + 1] - levels_[level]; - final int cap = KllHelper.levelCapacity(k_, numLevels_, level, m_); - if (pop >= cap) { - return level; - } - level++; - } - } - - int currentLevelSize(final int level) { - if (level >= numLevels_) { return 0; } - return levels_[level + 1] - levels_[level]; - } - - int getNumRetainedAboveLevelZero() { - if (numLevels_ == 1) { return 0; } - return levels_[numLevels_] - levels_[1]; - } - - // for testing - - int[] getLevels() { - return levels_; - } - - int getNumLevels() { - return numLevels_; - } - } - diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index a0d4a6c3e..ec1e0a2ed 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -27,6 +27,7 @@ import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M; +import static org.apache.datasketches.kll.PreambleUtil.DOUBLES_SKETCH_BIT_MASK; import static org.apache.datasketches.kll.PreambleUtil.EMPTY_BIT_MASK; import static org.apache.datasketches.kll.PreambleUtil.FAMILY_BYTE_ADR; import static org.apache.datasketches.kll.PreambleUtil.FLAGS_BYTE_ADR; @@ -50,6 +51,7 @@ import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; import org.apache.datasketches.kll.KllHelper.LevelStats; +import org.apache.datasketches.kll.PreambleUtil.MemoryCheck; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -106,13 +108,12 @@ private KllDoublesSketch(final int k, final int m, final boolean compatible) { /** * Private heapify constructor. * @param mem Memory object that contains data serialized by this sketch. + * @param memChk the MemoryCheck object */ - private KllDoublesSketch(final Memory mem) { - super(mem.getShort(K_SHORT_ADR) & 0xffff, DEFAULT_M, true); - final int flags = mem.getByte(FLAGS_BYTE_ADR) & 0xff; - final boolean empty = (flags & EMPTY_BIT_MASK) > 0; - final boolean singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; - if (empty) { + private KllDoublesSketch(final Memory mem, final MemoryCheck memChk) { + super(memChk.k, memChk.m, true); + isLevelZeroSorted_ = memChk.level0Sorted; + if (memChk.empty) { numLevels_ = 1; levels_ = new int[] {k_, k_}; isLevelZeroSorted_ = false; @@ -120,40 +121,36 @@ private KllDoublesSketch(final Memory mem) { items_ = new double[k_]; minValue_ = Double.NaN; maxValue_ = Double.NaN; + } else if (memChk.singleItem) { + n_ = 1; + minK_ = k_; + numLevels_ = 1; + levels_ = new int[numLevels_ + 1]; //ALL + final int offset = DATA_START_ADR_SINGLE_ITEM; + final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); + levels_[0] = itemCapacity - 1; + levels_[numLevels_] = itemCapacity; //load the last integer in levels_ + items_ = new double[itemCapacity]; + mem.getDoubleArray(offset, items_, levels_[0], getNumRetained()); + minValue_ = items_[levels_[0]]; + maxValue_ = items_[levels_[0]]; } else { - if (singleItem) { - n_ = 1; - minK_ = k_; - numLevels_ = 1; - } else { - n_ = mem.getLong(N_LONG_ADR); - minK_ = mem.getShort(MIN_K_SHORT_ADR) & 0xffff; - numLevels_ = mem.getByte(NUM_LEVELS_BYTE_ADR) & 0xff; - } - levels_ = new int[numLevels_ + 1]; - int offset = singleItem ? DATA_START_ADR_SINGLE_ITEM : DATA_START_ADR_DOUBLE; + n_ = memChk.n; + minK_ = memChk.minK; + numLevels_ = memChk.numLevels; + levels_ = new int[numLevels_ + 1]; //ALL + int offset = DATA_START_ADR_DOUBLE; final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); - if (singleItem) { - levels_[0] = itemCapacity - 1; - } else { - // the last integer in levels_ is not serialized because it can be derived - mem.getIntArray(offset, levels_, 0, numLevels_); - offset += numLevels_ * Integer.BYTES; - } - levels_[numLevels_] = itemCapacity; - if (!singleItem) { - minValue_ = mem.getDouble(offset); - offset += Double.BYTES; - maxValue_ = mem.getDouble(offset); - offset += Double.BYTES; - } + // the last integer in levels_ is not serialized because it can be derived + mem.getIntArray(offset, levels_, 0, numLevels_); //load levels_ + offset += numLevels_ * Integer.BYTES; + levels_[numLevels_] = itemCapacity; //load the last integer in levels_ + minValue_ = mem.getDouble(offset); + offset += Double.BYTES; + maxValue_ = mem.getDouble(offset); + offset += Double.BYTES; items_ = new double[itemCapacity]; mem.getDoubleArray(offset, items_, levels_[0], getNumRetained()); - if (singleItem) { - minValue_ = items_[levels_[0]]; - maxValue_ = items_[levels_[0]]; - } - isLevelZeroSorted_ = (flags & LEVEL_ZERO_SORTED_BIT_MASK) > 0; } } @@ -164,41 +161,14 @@ private KllDoublesSketch(final Memory mem) { * See Memory * @return a heap-based sketch based on the given Memory. */ - //To simplify the code, this method does all the validity checking - // then passes the verified Memory to the actual heapify constructor + //To simplify the code, the PreambleUtil.MemoryCheck does nearly all the validity checking. + //The verified Memory is then passed to the actual private heapify constructor. public static KllDoublesSketch heapify(final Memory mem) { - final int preambleInts = mem.getByte(PREAMBLE_INTS_BYTE_ADR) & 0xff; - final int serialVersion = mem.getByte(SER_VER_BYTE_ADR) & 0xff; - final int family = mem.getByte(FAMILY_BYTE_ADR) & 0xff; - final int flags = mem.getByte(FLAGS_BYTE_ADR) & 0xff; - final int m = mem.getByte(M_BYTE_ADR) & 0xff; - if (m != DEFAULT_M) { - throw new SketchesArgumentException( - "Possible corruption: M must be " + DEFAULT_M + ": " + m); - } - final boolean empty = (flags & EMPTY_BIT_MASK) > 0; - final boolean singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; - if (empty || singleItem) { - if (preambleInts != PREAMBLE_INTS_EMPTY_SINGLE) { - throw new SketchesArgumentException("Possible corruption: preambleInts must be " - + PREAMBLE_INTS_EMPTY_SINGLE + " for an empty or single item sketch: " + preambleInts); - } - } else { - if (preambleInts != PREAMBLE_INTS_DOUBLE) { - throw new SketchesArgumentException("Possible corruption: preambleInts must be " - + PREAMBLE_INTS_DOUBLE + " for a sketch with more than one item: " + preambleInts); - } - } - if (serialVersion != SERIAL_VERSION_EMPTY_FULL && serialVersion != SERIAL_VERSION_SINGLE) { - throw new SketchesArgumentException( - "Possible corruption: serial version mismatch: expected " + SERIAL_VERSION_EMPTY_FULL + " or " - + SERIAL_VERSION_SINGLE + ", got " + serialVersion); - } - if (family != Family.KLL.getID()) { - throw new SketchesArgumentException( - "Possible corruption: family mismatch: expected " + Family.KLL.getID() + ", got " + family); + final MemoryCheck memChk = new MemoryCheck(mem); + if (!memChk.doublesSketch) { + throw new SketchesArgumentException("Memory object is not a KllDoublesSketch."); } - return new KllDoublesSketch(mem); + return new KllDoublesSketch(mem, memChk); } // public functions @@ -249,15 +219,15 @@ public double getMinValue() { } /** - * Returns upper bound on the serialized size of a sketch given a parameter k and stream + * Returns upper bound on the compact serialized size of a sketch given a parameter k and stream * length. This method can be used if allocation of storage is necessary beforehand. * @param k parameter that controls size of the sketch and accuracy of estimates * @param n stream length - * @return upper bound on the serialized size + * @return upper bound on the compact serialized size */ public static int getMaxSerializedSizeBytes(final int k, final long n) { final LevelStats lvlStats = getAllLevelStatsGivenN(k, DEFAULT_M, n, false, false, true); - return lvlStats.getBytes(); + return lvlStats.getCompactBytes(); } /** @@ -333,7 +303,7 @@ public double getQuantile(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public double getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + getNormalizedRankError(minK_, false))); + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(minK_, false))); } /** @@ -344,7 +314,7 @@ public double getQuantileUpperBound(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public double getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - getNormalizedRankError(minK_, false))); + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(minK_, false))); } /** @@ -497,7 +467,8 @@ public byte[] toByteArray() { final byte flags = (byte) ( (empty ? EMPTY_BIT_MASK : 0) | (isLevelZeroSorted_ ? LEVEL_ZERO_SORTED_BIT_MASK : 0) - | (singleItem ? SINGLE_ITEM_BIT_MASK : 0)); + | (singleItem ? SINGLE_ITEM_BIT_MASK : 0) + | DOUBLES_SKETCH_BIT_MASK); wmem.putByte(FLAGS_BYTE_ADR, flags); wmem.putShort(K_SHORT_ADR, (short) k_); wmem.putByte(M_BYTE_ADR, (byte) m_); @@ -527,7 +498,7 @@ public String toString(final boolean withLevels, final boolean withData) { final String epsPct = String.format("%.3f%%", getNormalizedRankError(false) * 100); final String epsPMFPct = String.format("%.3f%%", getNormalizedRankError(true) * 100); final StringBuilder sb = new StringBuilder(); - sb.append(Util.LS).append("### KLL sketch summary:").append(Util.LS); + sb.append(Util.LS).append("### KLL Doubles Sketch summary:").append(Util.LS); sb.append(" K : ").append(k_).append(Util.LS); sb.append(" min K : ").append(minK_).append(Util.LS); sb.append(" M : ").append(m_).append(Util.LS); @@ -551,7 +522,7 @@ public String toString(final boolean withLevels, final boolean withData) { for (int i = 0; i < numLevels_; i++) { sb.append(" ").append(i).append(", ").append(levels_[i]).append(": ") .append(KllHelper.levelCapacity(k_, numLevels_, i, m_)) - .append(", ").append(currentLevelSize(i)).append(Util.LS); + .append(", ").append(KllHelper.currentLevelSize(i, numLevels_, levels_)).append(Util.LS); } sb.append("### End sketch levels").append(Util.LS); } @@ -686,7 +657,7 @@ private void incrementBucketsSortedLevel(final int fromIndex, final int toIndex, // The following code is only valid in the special case of exactly reaching capacity while updating. // It cannot be used while merging, while reducing k, or anything else. private void compressWhileUpdating() { - final int level = findLevelToCompact(); + final int level = KllHelper.findLevelToCompact(k_, m_, numLevels_, levels_); // It is important to do add the new top level right here. Be aware that this operation // grows the buffer and shifts the data and also the boundaries of the data and grows the @@ -780,7 +751,8 @@ private void sortLevelZero() { } private void mergeHigherLevels(final KllDoublesSketch other, final long finalN) { - final int tmpSpaceNeeded = getNumRetained() + other.getNumRetainedAboveLevelZero(); + final int tmpSpaceNeeded = getNumRetained() + + KllHelper.getNumRetainedAboveLevelZero(other.numLevels_, other.levels_); final double[] workbuf = new double[tmpSpaceNeeded]; final int ub = KllHelper.ubOnNumLevels(finalN); final int[] worklevels = new int[ub + 2]; // ub+1 does not work @@ -822,13 +794,13 @@ private void populateWorkArrays(final KllDoublesSketch other, final double[] wor worklevels[0] = 0; // Note: the level zero data from "other" was already inserted into "self" - final int selfPopZero = currentLevelSize(0); + final int selfPopZero = KllHelper.currentLevelSize(0, numLevels_, levels_); System.arraycopy(items_, levels_[0], workbuf, worklevels[0], selfPopZero); worklevels[1] = worklevels[0] + selfPopZero; for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { - final int selfPop = currentLevelSize(lvl); - final int otherPop = other.currentLevelSize(lvl); + final int selfPop = KllHelper.currentLevelSize(lvl, numLevels_, levels_); + final int otherPop = KllHelper.currentLevelSize(lvl, other.numLevels_, other.levels_); worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; if (selfPop > 0 && otherPop == 0) { diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index c8e928af3..f0aa48e5b 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -50,9 +50,11 @@ import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; import org.apache.datasketches.kll.KllHelper.LevelStats; +import org.apache.datasketches.kll.PreambleUtil.MemoryCheck; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; + /** * Please refer to the documentation in the package-info:
* {@link org.apache.datasketches.kll} @@ -106,13 +108,12 @@ private KllFloatsSketch(final int k, final int m, final boolean compatible) { /** * Private heapify constructor. * @param mem Memory object that contains data serialized by this sketch. + * @param memChk the MemoryCheck object */ - private KllFloatsSketch(final Memory mem) { - super(mem.getShort(K_SHORT_ADR) & 0xffff, DEFAULT_M, true); - final int flags = mem.getByte(FLAGS_BYTE_ADR) & 0xff; - final boolean empty = (flags & EMPTY_BIT_MASK) > 0; - final boolean singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; - if (empty) { + private KllFloatsSketch(final Memory mem, final MemoryCheck memChk) { + super(memChk.k, memChk.m, true); + isLevelZeroSorted_ = memChk.level0Sorted; + if (memChk.empty) { numLevels_ = 1; levels_ = new int[] {k_, k_}; isLevelZeroSorted_ = false; @@ -120,40 +121,36 @@ private KllFloatsSketch(final Memory mem) { items_ = new float[k_]; minValue_ = Float.NaN; maxValue_ = Float.NaN; + } else if (memChk.singleItem) { + n_ = 1; + minK_ = k_; + numLevels_ = 1; + levels_ = new int[numLevels_ + 1]; //ALL + final int offset = DATA_START_ADR_SINGLE_ITEM; + final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); + levels_[0] = itemCapacity - 1; + levels_[numLevels_] = itemCapacity; //load the last integer in levels_ + items_ = new float[itemCapacity]; + mem.getFloatArray(offset, items_, levels_[0], getNumRetained()); + minValue_ = items_[levels_[0]]; + maxValue_ = items_[levels_[0]]; } else { - if (singleItem) { - n_ = 1; - minK_ = k_; - numLevels_ = 1; - } else { - n_ = mem.getLong(N_LONG_ADR); - minK_ = mem.getShort(MIN_K_SHORT_ADR) & 0xffff; - numLevels_ = mem.getByte(NUM_LEVELS_BYTE_ADR) & 0xff; - } - levels_ = new int[numLevels_ + 1]; - int offset = singleItem ? DATA_START_ADR_SINGLE_ITEM : DATA_START_ADR_FLOAT; + n_ = memChk.n; + minK_ = memChk.minK; + numLevels_ = memChk.numLevels; + levels_ = new int[numLevels_ + 1]; //ALL + int offset = DATA_START_ADR_FLOAT; final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); - if (singleItem) { - levels_[0] = itemCapacity - 1; - } else { - // the last integer in levels_ is not serialized because it can be derived - mem.getIntArray(offset, levels_, 0, numLevels_); - offset += numLevels_ * Integer.BYTES; - } - levels_[numLevels_] = itemCapacity; - if (!singleItem) { - minValue_ = mem.getFloat(offset); - offset += Float.BYTES; - maxValue_ = mem.getFloat(offset); - offset += Float.BYTES; - } + // the last integer in levels_ is not serialized because it can be derived + mem.getIntArray(offset, levels_, 0, numLevels_); //load levels_ + offset += numLevels_ * Integer.BYTES; + levels_[numLevels_] = itemCapacity; //load the last integer in levels_ + minValue_ = mem.getFloat(offset); + offset += Float.BYTES; + maxValue_ = mem.getFloat(offset); + offset += Float.BYTES; items_ = new float[itemCapacity]; mem.getFloatArray(offset, items_, levels_[0], getNumRetained()); - if (singleItem) { - minValue_ = items_[levels_[0]]; - maxValue_ = items_[levels_[0]]; - } - isLevelZeroSorted_ = (flags & LEVEL_ZERO_SORTED_BIT_MASK) > 0; } } @@ -164,41 +161,14 @@ private KllFloatsSketch(final Memory mem) { * See Memory * @return a heap-based sketch based on the given Memory. */ - //To simplify the code, this method does all the validity checking - // then passes the verified Memory to the actual heapify constructor + //To simplify the code, the PreambleUtil.MemoryCheck does nearly all the validity checking. + //The verified Memory is then passed to the actual private heapify constructor. public static KllFloatsSketch heapify(final Memory mem) { - final int preambleInts = mem.getByte(PREAMBLE_INTS_BYTE_ADR) & 0xff; - final int serialVersion = mem.getByte(SER_VER_BYTE_ADR) & 0xff; - final int family = mem.getByte(FAMILY_BYTE_ADR) & 0xff; - final int flags = mem.getByte(FLAGS_BYTE_ADR) & 0xff; - final int m = mem.getByte(M_BYTE_ADR) & 0xff; - if (m != DEFAULT_M) { - throw new SketchesArgumentException( - "Possible corruption: M must be " + DEFAULT_M + ": " + m); + final MemoryCheck memChk = new MemoryCheck(mem); + if (memChk.doublesSketch) { + throw new SketchesArgumentException("Memory object is not a KllFloatsSketch."); } - final boolean empty = (flags & EMPTY_BIT_MASK) > 0; - final boolean singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; - if (empty || singleItem) { - if (preambleInts != PREAMBLE_INTS_EMPTY_SINGLE) { - throw new SketchesArgumentException("Possible corruption: preambleInts must be " - + PREAMBLE_INTS_EMPTY_SINGLE + " for an empty or single item sketch: " + preambleInts); - } - } else { - if (preambleInts != PREAMBLE_INTS_FLOAT) { - throw new SketchesArgumentException("Possible corruption: preambleInts must be " - + PREAMBLE_INTS_FLOAT + " for a sketch with more than one item: " + preambleInts); - } - } - if (serialVersion != SERIAL_VERSION_EMPTY_FULL && serialVersion != SERIAL_VERSION_SINGLE) { - throw new SketchesArgumentException( - "Possible corruption: serial version mismatch: expected " + SERIAL_VERSION_EMPTY_FULL + " or " - + SERIAL_VERSION_SINGLE + ", got " + serialVersion); - } - if (family != Family.KLL.getID()) { - throw new SketchesArgumentException( - "Possible corruption: family mismatch: expected " + Family.KLL.getID() + ", got " + family); - } - return new KllFloatsSketch(mem); + return new KllFloatsSketch(mem, memChk); } // public functions @@ -257,7 +227,7 @@ public float getMinValue() { */ public static int getMaxSerializedSizeBytes(final int k, final long n) { final LevelStats lvlStats = getAllLevelStatsGivenN(k, DEFAULT_M, n, false, false, false); - return lvlStats.getBytes(); + return lvlStats.getCompactBytes(); } /** @@ -333,7 +303,7 @@ public float getQuantile(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public float getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + getNormalizedRankError(minK_, false))); + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(minK_, false))); } /** @@ -344,7 +314,7 @@ public float getQuantileUpperBound(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public float getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - getNormalizedRankError(minK_, false))); + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(minK_, false))); } /** @@ -498,6 +468,7 @@ public byte[] toByteArray() { (empty ? EMPTY_BIT_MASK : 0) | (isLevelZeroSorted_ ? LEVEL_ZERO_SORTED_BIT_MASK : 0) | (singleItem ? SINGLE_ITEM_BIT_MASK : 0)); + wmem.putByte(FLAGS_BYTE_ADR, flags); wmem.putShort(K_SHORT_ADR, (short) k_); wmem.putByte(M_BYTE_ADR, (byte) m_); @@ -527,7 +498,7 @@ public String toString(final boolean withLevels, final boolean withData) { final String epsPct = String.format("%.3f%%", getNormalizedRankError(false) * 100); final String epsPMFPct = String.format("%.3f%%", getNormalizedRankError(true) * 100); final StringBuilder sb = new StringBuilder(); - sb.append(Util.LS).append("### KLL sketch summary:").append(Util.LS); + sb.append(Util.LS).append("### KLL Floats Sketch summary:").append(Util.LS); sb.append(" K : ").append(k_).append(Util.LS); sb.append(" min K : ").append(minK_).append(Util.LS); sb.append(" M : ").append(m_).append(Util.LS); @@ -551,7 +522,7 @@ public String toString(final boolean withLevels, final boolean withData) { for (int i = 0; i < numLevels_; i++) { sb.append(" ").append(i).append(", ").append(levels_[i]).append(": ") .append(KllHelper.levelCapacity(k_, numLevels_, i, m_)) - .append(", ").append(currentLevelSize(i)).append(Util.LS); + .append(", ").append(KllHelper.currentLevelSize(i, numLevels_, levels_)).append(Util.LS); } sb.append("### End sketch levels").append(Util.LS); } @@ -686,7 +657,7 @@ private void incrementBucketsSortedLevel(final int fromIndex, final int toIndex, // The following code is only valid in the special case of exactly reaching capacity while updating. // It cannot be used while merging, while reducing k, or anything else. private void compressWhileUpdating() { - final int level = findLevelToCompact(); + final int level = KllHelper.findLevelToCompact(k_, m_, numLevels_, levels_); // It is important to do add the new top level right here. Be aware that this operation // grows the buffer and shifts the data and also the boundaries of the data and grows the @@ -780,7 +751,8 @@ private void sortLevelZero() { } private void mergeHigherLevels(final KllFloatsSketch other, final long finalN) { - final int tmpSpaceNeeded = getNumRetained() + other.getNumRetainedAboveLevelZero(); + final int tmpSpaceNeeded = getNumRetained() + + KllHelper.getNumRetainedAboveLevelZero(other.numLevels_, other.levels_); final float[] workbuf = new float[tmpSpaceNeeded]; final int ub = KllHelper.ubOnNumLevels(finalN); final int[] worklevels = new int[ub + 2]; // ub+1 does not work @@ -822,13 +794,13 @@ private void populateWorkArrays(final KllFloatsSketch other, final float[] workb worklevels[0] = 0; // Note: the level zero data from "other" was already inserted into "self" - final int selfPopZero = currentLevelSize(0); + final int selfPopZero = KllHelper.currentLevelSize(0, numLevels_, levels_); System.arraycopy(items_, levels_[0], workbuf, worklevels[0], selfPopZero); worklevels[1] = worklevels[0] + selfPopZero; for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { - final int selfPop = currentLevelSize(lvl); - final int otherPop = other.currentLevelSize(lvl); + final int selfPop = KllHelper.currentLevelSize(lvl, numLevels_, levels_); + final int otherPop = KllHelper.currentLevelSize(lvl, other.numLevels_, other.levels_); worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; if (selfPop > 0 && otherPop == 0) { diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 9cb1123e2..891ed3c97 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -19,10 +19,15 @@ package org.apache.datasketches.kll; +import static java.lang.Math.pow; import static org.apache.datasketches.Util.floorPowerOf2; import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.PreambleUtil.MAX_K; +import static org.apache.datasketches.kll.PreambleUtil.MIN_K; + +import org.apache.datasketches.SketchesArgumentException; class KllHelper { static final String LS = System.getProperty("line.separator"); @@ -51,7 +56,7 @@ static int ubOnNumLevels(final long n) { return 1 + Long.numberOfTrailingZeros(floorPowerOf2(n)); } - public static LevelStats getAllLevelStatsGivenN(final int k, final int m, final long n, + static LevelStats getAllLevelStatsGivenN(final int k, final int m, final long n, final boolean printDetail, final boolean printSummaries, final boolean isDouble) { long cumN; int numLevels = 0; @@ -81,52 +86,56 @@ static LevelStats getLevelStats(final int k, final int m, final int numLevels, System.out.printf("%6d%,12d%8d%,16d\n", level, 1 << level, levelCap, maxNAtLevel); } } - final int bytes = getCompactSerializedSizeBytes(numLevels, cumCap, isDouble); + final int compactBytes = getCompactSerializedSizeBytes(numLevels, cumCap, isDouble); + final int updatableBytes = getUpdatableSerializedSizeBytes(k, m, numLevels, isDouble); if (printDetail) { System.out.printf(" TOTALS%10s %8d%,16d\n", "", cumCap, cumN); - System.out.println(" TOTAL BYTES: " + bytes); + System.out.println(" COMPACT BYTES: " + compactBytes); + System.out.println(" UPDATABLE BYTES: " + updatableBytes); System.out.println(""); } - final LevelStats lvlStats = new LevelStats(cumN, bytes, numLevels, cumCap); + final LevelStats lvlStats = new LevelStats(cumN, numLevels, cumCap, compactBytes, updatableBytes); if (printSummary) { System.out.println(lvlStats.toString()); } return lvlStats; } - public static class LevelStats { + static class LevelStats { private long maxN; - private int bytes; + private int compactBytes; + private int updatableBytes; private int numLevels; private int maxCap; - LevelStats(final long maxN, final int bytes, final int numLevels, final int maxCap) { + LevelStats(final long maxN, final int numLevels, final int maxCap, final int compactBytes, + final int updatableBytes) { this.maxN = maxN; - this.bytes = bytes; this.numLevels = numLevels; this.maxCap = maxCap; + this.compactBytes = compactBytes; + this.updatableBytes = updatableBytes; + } @Override public String toString() { + final String[] hdr = {"NumLevels", "MaxCap", "MaxN", "TotCompactBytes", "TotUpdatableBytes"}; final StringBuilder sb = new StringBuilder(); sb.append("Level Stats Summary:" + LS); - sb.append(" NumLevels: " + numLevels + LS); - sb.append(" MaxCap : " + maxCap + LS); - sb.append(" MaxN : " + maxN + LS); - sb.append(" TotBytes : " + bytes + LS + LS); + sb.append(String.format("%10s %10s %14s %17s %17s" + LS, (Object[]) hdr)); + sb.append(String.format("%10d %10d %14d %17d %17d" + LS, numLevels, maxCap, maxN, compactBytes, updatableBytes)); return sb.toString(); } public long getMaxN() { return maxN; } - public int getBytes() { return bytes; } + public int getCompactBytes() { return compactBytes; } public int getNumLevels() { return numLevels; } public int getMaxCap() { return maxCap; } } - static int getCompactSerializedSizeBytes(final int numLevels, final int numRetained, - final boolean isDouble) { + static int getCompactSerializedSizeBytes(final int numLevels, final int numRetained, final boolean isDouble) { if (numLevels == 1 && numRetained == 1) { return DATA_START_ADR_SINGLE_ITEM + (isDouble ? Double.BYTES : Float.BYTES); } @@ -139,6 +148,18 @@ static int getCompactSerializedSizeBytes(final int numLevels, final int numRetai } } + static int getUpdatableSerializedSizeBytes(final int k, final int m, final int numLevels, final boolean isDouble) { + //There are no special accommodations for empty or single item. + //The last integer in levels IS serialized. + // The + 2 is for min and max + final int totCap = computeTotalItemCapacity(k, m, numLevels) + 2; + if (isDouble) { + return DATA_START_ADR_DOUBLE + (numLevels + 1) * Integer.BYTES + totCap * Double.BYTES; + } else { + return DATA_START_ADR_FLOAT + (numLevels + 1) * Integer.BYTES + totCap * Float.BYTES; + } + } + /** * Returns the maximum number of items that this sketch can handle * @param k The sizing / accuracy parameter of the sketch in items. @@ -225,5 +246,68 @@ static long sumTheSampleWeights(final int num_levels, final int[] levels) { return total; } + /** + * Gets the normalized rank error given k and pmf. + * Static method version of the getNormalizedRankError(boolean). + * @param k the configuration parameter + * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function. + * Otherwise, it is the "single-sided" normalized rank error for all the other queries. + * @return if pmf is true, the normalized rank error for the getPMF() function. + * Otherwise, it is the "single-sided" normalized rank error for all the other queries. + * @see KllDoublesSketch + */ + // constants were derived as the best fit to 99 percentile empirically measured max error in + // thousands of trials + static double getNormalizedRankError(final int k, final boolean pmf) { + return pmf + ? 2.446 / pow(k, 0.9433) + : 2.296 / pow(k, 0.9723); + } + + /** + * Checks the validity of the given value k + * @param k must be greater than 7 and less than 65536. + */ + static void checkK(final int k) { + if (k < MIN_K || k > MAX_K) { + throw new SketchesArgumentException( + "K must be >= " + MIN_K + " and <= " + MAX_K + ": " + k); + } + } + + /** + * Finds the first level starting with level 0 that exceeds its nominal capacity + * @param k configured size of sketch. Range [m, 2^16] + * @param m minimum level size. Default is 8. + * @param numLevels one-based number of current levels + * @return level to compact + */ + static int findLevelToCompact(final int k, final int m, final int numLevels, final int[] levels) { + int level = 0; + while (true) { + assert level < numLevels; + final int pop = levels[level + 1] - levels[level]; + final int cap = KllHelper.levelCapacity(k, numLevels, level, m); + if (pop >= cap) { + return level; + } + level++; + } + } + + static int currentLevelSize(final int level, final int numLevels, final int[] levels) { + if (level >= numLevels) { return 0; } + return levels[level + 1] - levels[level]; + } + + static int getNumRetained(final int numLevels, final int[] levels) { + return levels[numLevels] - levels[0]; + } + + static int getNumRetainedAboveLevelZero(final int numLevels, final int[] levels) { + return levels[numLevels] - levels[1]; + } + + } diff --git a/src/main/java/org/apache/datasketches/kll/PreambleUtil.java b/src/main/java/org/apache/datasketches/kll/PreambleUtil.java index c2211db95..3d9314b9c 100644 --- a/src/main/java/org/apache/datasketches/kll/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/PreambleUtil.java @@ -133,7 +133,7 @@ private PreambleUtil() {} static final int LEVEL_ZERO_SORTED_BIT_MASK = 2; static final int SINGLE_ITEM_BIT_MASK = 4; static final int DOUBLES_SKETCH_BIT_MASK = 8; - static final int NOT_COMPACT_BIT_MASK = 16; + static final int UPDATABLE_BIT_MASK = 16; enum Layout { FLOAT_FULL_COMPACT, FLOAT_EMPTY_COMPACT, FLOAT_SINGLE_COMPACT, @@ -141,8 +141,6 @@ enum Layout { FLOAT_FULL_NOT_COMPACT, FLOAT_EMPTY_NOT_COMPACT, FLOAT_SINGLE_NOT_COMPACT, DOUBLE_FULL_NOT_COMPACT, DOUBLE_EMPTY_NOT_COMPACT, DOUBLE_SINGLE_NOT_COMPACT } - static Layout layout; - /** * Returns a human readable string summary of the internal state of the given byte array. * Used primarily in testing. @@ -167,144 +165,170 @@ static String toString(final Memory mem) { } - @SuppressWarnings("unused") - private static String memoryToString(final Memory srcMem) { - final int preInts = extractPreInts(srcMem); - final int serVer = extractSerVer(srcMem); - final int familyID = extractFamilyID(srcMem); - final String famName = idToFamily(familyID).toString(); - final int flags = extractFlags(srcMem); - final boolean empty = (flags & EMPTY_BIT_MASK) > 0; - final boolean level0Sorted = (flags & LEVEL_ZERO_SORTED_BIT_MASK) > 0; - final boolean singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; - final boolean doublesSketch = (flags & DOUBLES_SKETCH_BIT_MASK) > 0; - final boolean notCompact = (flags & NOT_COMPACT_BIT_MASK) > 0; - final int k = extractK(srcMem); - final int m = extractM(srcMem); - long n = 0; - final int minK; - final int numLevels; - - //preamble checks - if (familyID != Family.KLL.getID()) { throwCustom(0, familyID); } - final int checkFlags = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); - if ((checkFlags & 5) == 5) { throwCustom(20, flags); } - switch (checkFlags) { - case 0: { //not empty, not single item, float full - if (preInts != PREAMBLE_INTS_FLOAT) { throwCustom(6, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } - layout = notCompact ? Layout.FLOAT_FULL_NOT_COMPACT : Layout.FLOAT_FULL_COMPACT; - n = extractN(srcMem); - minK = extractMinK(srcMem); - numLevels = extractNumLevels(srcMem); - break; - } - case 1: { //empty, not single item, float empty - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } - if (notCompact) { - layout = Layout.FLOAT_EMPTY_NOT_COMPACT; + + + + + static class MemoryCheck { + final int preInts; // = extractPreInts(srcMem); + final int serVer; + final int familyID; + final String famName; + final int flags; + final boolean empty; + final boolean level0Sorted; + final boolean singleItem; + final boolean doublesSketch; + final boolean updatable; + final int k; + final int m; + long n; + int minK; + int dataStart; + int numLevels; + int[] levels; + Layout layout; + + MemoryCheck(final Memory srcMem) { + preInts = extractPreInts(srcMem); + serVer = extractSerVer(srcMem); + familyID = extractFamilyID(srcMem); + flags = extractFlags(srcMem); + empty = (flags & EMPTY_BIT_MASK) > 0; + level0Sorted = (flags & LEVEL_ZERO_SORTED_BIT_MASK) > 0; + singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; + doublesSketch = (flags & DOUBLES_SKETCH_BIT_MASK) > 0; + updatable = (flags & UPDATABLE_BIT_MASK) > 0; + k = extractK(srcMem); + m = extractM(srcMem); + + KllHelper.checkK(k); + if (m != 8) { System.err.println("WARNING: Minimum Level width set to non-default value: " + m); } + if (familyID != Family.KLL.getID()) { throwCustom(0, familyID); } + famName = idToFamily(familyID).toString(); + if (famName != "KLL") { throwCustom(23, 0); } + + final int checkFlags = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); + if ((checkFlags & 5) == 5) { throwCustom(20, flags); } + + switch (checkFlags) { + case 0: { //not empty, not single item, float full + if (preInts != PREAMBLE_INTS_FLOAT) { throwCustom(6, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } + layout = updatable ? Layout.FLOAT_FULL_NOT_COMPACT : Layout.FLOAT_FULL_COMPACT; n = extractN(srcMem); - if (n != 0) { throwCustom(21, (int) n); } minK = extractMinK(srcMem); numLevels = extractNumLevels(srcMem); - } else { - layout = Layout.FLOAT_EMPTY_COMPACT; - n = 0; - minK = k; - numLevels = 1; + dataStart = DATA_START_ADR_FLOAT; + break; } - break; - } - case 4: { //not empty, single item, float single item - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { throwCustom(4, serVer); } - if (notCompact) { - layout = Layout.FLOAT_SINGLE_NOT_COMPACT; - n = extractN(srcMem); - if (n != 1) { throwCustom(22, (int)n); } - minK = extractMinK(srcMem); - numLevels = extractNumLevels(srcMem); - } else { - layout = Layout.FLOAT_SINGLE_COMPACT; - n = 1; - minK = k; - numLevels = 1; + case 1: { //empty, not single item, float empty + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } + if (updatable) { + layout = Layout.FLOAT_EMPTY_NOT_COMPACT; + n = extractN(srcMem); + if (n != 0) { throwCustom(21, (int) n); } + minK = extractMinK(srcMem); + numLevels = extractNumLevels(srcMem); + dataStart = DATA_START_ADR_FLOAT; + } else { + layout = Layout.FLOAT_EMPTY_COMPACT; + n = 0; + minK = k; + numLevels = 1; + dataStart = DATA_START_ADR_SINGLE_ITEM; //ignore if empty + } + break; } - break; - } - case 8: { //not empty, not single item, double full - if (preInts != PREAMBLE_INTS_DOUBLE) { throwCustom(5, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } - layout = notCompact ? Layout.DOUBLE_FULL_NOT_COMPACT : Layout.DOUBLE_FULL_COMPACT; - n = extractN(srcMem); - minK = extractMinK(srcMem); - numLevels = extractNumLevels(srcMem); - break; - } - case 9: { //empty, not single item, double empty - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } - if (notCompact) { - layout = Layout.DOUBLE_EMPTY_NOT_COMPACT; - n = extractN(srcMem); - if (n != 0) { throwCustom(21, (int) n); } - minK = extractMinK(srcMem); - numLevels = extractNumLevels(srcMem); - } else { - layout = Layout.DOUBLE_EMPTY_COMPACT; - n = 0; - minK = k; - numLevels = 1; + case 4: { //not empty, single item, float single item + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { throwCustom(4, serVer); } + if (updatable) { + layout = Layout.FLOAT_SINGLE_NOT_COMPACT; + n = extractN(srcMem); + if (n != 1) { throwCustom(22, (int)n); } + minK = extractMinK(srcMem); + numLevels = extractNumLevels(srcMem); + dataStart = DATA_START_ADR_FLOAT; + } else { + layout = Layout.FLOAT_SINGLE_COMPACT; + n = 1; + minK = k; + numLevels = 1; + dataStart = DATA_START_ADR_SINGLE_ITEM; + } + break; } - break; - } - case 12: { //not empty, single item, double single item - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { throwCustom(4, serVer); } - if (notCompact) { - layout = Layout.DOUBLE_SINGLE_NOT_COMPACT; + case 8: { //not empty, not single item, double full + if (preInts != PREAMBLE_INTS_DOUBLE) { throwCustom(5, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } + layout = updatable ? Layout.DOUBLE_FULL_NOT_COMPACT : Layout.DOUBLE_FULL_COMPACT; n = extractN(srcMem); - if (n != 1) { throwCustom(22, (int)n); } minK = extractMinK(srcMem); numLevels = extractNumLevels(srcMem); - } else { - layout = Layout.DOUBLE_SINGLE_COMPACT; - n = 1; - minK = k; - numLevels = 1; + dataStart = DATA_START_ADR_DOUBLE; + break; + } + case 9: { //empty, not single item, double empty + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } + if (updatable) { + layout = Layout.DOUBLE_EMPTY_NOT_COMPACT; + n = extractN(srcMem); + if (n != 0) { throwCustom(21, (int) n); } + minK = extractMinK(srcMem); + numLevels = extractNumLevels(srcMem); + dataStart = DATA_START_ADR_DOUBLE; + } else { + layout = Layout.DOUBLE_EMPTY_COMPACT; + n = 0; + minK = k; + numLevels = 1; + dataStart = DATA_START_ADR_SINGLE_ITEM; //ignore if empty + } + break; + } + case 12: { //not empty, single item, double single item + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { throwCustom(4, serVer); } + if (updatable) { + layout = Layout.DOUBLE_SINGLE_NOT_COMPACT; + n = extractN(srcMem); + if (n != 1) { throwCustom(22, (int)n); } + minK = extractMinK(srcMem); + numLevels = extractNumLevels(srcMem); + dataStart = DATA_START_ADR_DOUBLE; + } else { + layout = Layout.DOUBLE_SINGLE_COMPACT; + n = 1; + minK = k; + numLevels = 1; + dataStart = DATA_START_ADR_SINGLE_ITEM; + } + break; } - break; } } - BaseKllSketch.checkK(k); - - if (m != 8) { - System.err.println("WARNING: Minimum Level width set to non-default value: " + m); + private static void throwCustom(final int errNo, final int value) { + String msg = ""; + switch (errNo) { + case 0: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; + case 1: msg = "Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; + case 2: msg = "Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: " + value; break; + case 3: msg = "Single Item Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; + case 4: msg = "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; + case 5: msg = "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; + case 6: msg = "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; + case 20: msg = "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; + case 21: msg = "N != 0 and empty bit is set. N: " + value; break; + case 22: msg = "N != 1 and single item bit is set. N: " + value; break; + case 23: msg = "Family name is not KLL"; break; + } + throw new SketchesArgumentException(msg); } - - - return null; - } - -private static void throwCustom(final int errNo, final int value) { - String msg = ""; - switch (errNo) { - case 0: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; - case 1: msg = "Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; - case 2: msg = "Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: " + value; break; - case 3: msg = "Single Item Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; - case 4: msg = "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; - case 5: msg = "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; - case 6: msg = "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; - case 20: msg = "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; - case 21: msg = "N != 0 and empty bit is set. N: " + value; break; - case 22: msg = "N != 1 and single item bit is set. N: " + value; break; - } - throw new SketchesArgumentException(msg); -} static int extractPreInts(final Memory mem) { return mem.getByte(PREAMBLE_INTS_BYTE_ADR) & 0XFF; diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index 860e175b9..f21024240 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -268,13 +268,13 @@ public void mergeMinMinValueFromOther() { @Test public void mergeMinAndMaxFromOther() { final KllDoublesSketch sketch1 = new KllDoublesSketch(); - for (int i = 0; i < 1000000; i++) { + for (int i = 1; i <= 1_000_000; i++) { sketch1.update(i); } final KllDoublesSketch sketch2 = new KllDoublesSketch(); sketch2.merge(sketch1); - assertEquals(sketch2.getMinValue(), 0F); - assertEquals(sketch2.getMaxValue(), 999999F); + assertEquals(sketch2.getMinValue(), 1F); + assertEquals(sketch2.getMaxValue(), 1_000_000F); } @SuppressWarnings("unused") diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java index ec1087d70..32c5ebb4c 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java @@ -215,9 +215,9 @@ public void checkTestResults() { for (int i = 0; i < n; i++) { sketch.update(inputArray[i]); } - int numLevels = sketch.getNumLevels(); + int numLevels = sketch.numLevels_; int numSamples = sketch.getNumRetained(); - int[] levels = sketch.getLevels(); + int[] levels = sketch.levels_; long hashedSamples = simpleHashOfSubArray(sketch.getItems(), levels[0], numSamples); System.out.print(testI); assert correctResultsWithReset[(7 * testI) + 4] == numLevels; diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index fa1fd7b47..6c9b48519 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -268,13 +268,13 @@ public void mergeMinMinValueFromOther() { @Test public void mergeMinAndMaxFromOther() { final KllFloatsSketch sketch1 = new KllFloatsSketch(); - for (int i = 0; i < 1000000; i++) { + for (int i = 1; i <= 1_000_000; i++) { sketch1.update(i); } - final KllFloatsSketch sketch2 = new KllFloatsSketch(); + final KllFloatsSketch sketch2 = new KllFloatsSketch(10); sketch2.merge(sketch1); - assertEquals(sketch2.getMinValue(), 0F); - assertEquals(sketch2.getMaxValue(), 999999F); + assertEquals(sketch2.getMinValue(), 1F); + assertEquals(sketch2.getMaxValue(), 1_000_000F); } @SuppressWarnings("unused") diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java index 71de641ed..2b57766c4 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java @@ -216,9 +216,9 @@ public void checkTestResults() { for (int i = 0; i < n; i++) { sketch.update(inputArray[i]); } - int numLevels = sketch.getNumLevels(); + int numLevels = sketch.numLevels_; int numSamples = sketch.getNumRetained(); - int[] levels = sketch.getLevels(); + int[] levels = sketch.levels_; long hashedSamples = simpleHashOfSubArray(sketch.getItems(), levels[0], numSamples); System.out.print(testI); assert correctResultsWithReset[(7 * testI) + 4] == numLevels; diff --git a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java new file mode 100644 index 000000000..4e56f86d1 --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; +import static org.apache.datasketches.kll.KllHelper.getLevelStats; +import static org.testng.Assert.assertEquals; + +import org.apache.datasketches.kll.KllHelper.LevelStats; +import org.testng.annotations.Test; + +public class KllHelperTest { + + @Test //convert two false below to true for visual checking + public void testGetAllLevelStats() { + long n = 1L << 30; + int k = 200; + int m = 8; + LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, false, false, true); + assertEquals(lvlStats.getCompactBytes(), 5708); + } + + @Test //convert two false below to true for visual checking + public void getStatsAtNumLevels() { + int k = 200; + int m = 8; + int numLevels = 23; + LevelStats lvlStats = getLevelStats(k, m, numLevels, false, false, true); + assertEquals(lvlStats.getCompactBytes(), 5708); + } +} diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index a02d7282a..17e4e3db0 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -19,8 +19,6 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; -import static org.apache.datasketches.kll.KllHelper.getLevelStats; import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; @@ -28,7 +26,6 @@ import java.util.Objects; import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; @@ -42,8 +39,8 @@ public class MiscDoublesTest { @Test public void checkGetKFromEps() { final int k = DEFAULT_K; - final double eps = BaseKllSketch.getNormalizedRankError(k, false); - final double epsPmf = BaseKllSketch.getNormalizedRankError(k, true); + final double eps = KllHelper.getNormalizedRankError(k, false); + final double epsPmf = KllHelper.getNormalizedRankError(k, true); final int kEps = BaseKllSketch.getKFromEpsilon(eps, false); final int kEpsPmf = BaseKllSketch.getKFromEpsilon(epsPmf, true); assertEquals(kEps, k); @@ -67,11 +64,11 @@ public void checkBounds() { println("LB : " + lb); } - @Test(expectedExceptions = SketchesArgumentException.class) + @Test public void checkHeapifyExceptions1() { KllDoublesSketch sk = new KllDoublesSketch(); WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); - wmem.putByte(6, (byte)4); //corrupt M + wmem.putByte(6, (byte)4); //use different M: produces a warning KllDoublesSketch.heapify(wmem); } @@ -119,9 +116,9 @@ public void checkMisc() { sk.toByteArray(); final double[] items = sk.getItems(); assertEquals(items.length, 16); - final int[] levels = sk.getLevels(); + final int[] levels = sk.levels_; assertEquals(levels.length, 3); - assertEquals(sk.getNumLevels(), 2); + assertEquals(sk.numLevels_, 2); } @Test //enable static println(..) for visual checking @@ -139,24 +136,6 @@ public void visualCheckToString() { println(LS + s2); } - @Test //convert false to true below for visual checking - public void testGetAllLevelStats() { - long n = 1L << 30; - int k = 200; - int m = 8; - LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, false, false, true); - assertEquals(lvlStats.getBytes(), 5708); - } - - @Test //convert false to true below for visual checking - public void getStatsAtNumLevels() { - int k = 200; - int m = 8; - int numLevels = 23; - LevelStats lvlStats = getLevelStats(k, m, numLevels, false, false, true); - assertEquals(lvlStats.getBytes(), 5708); - } - @Test public void printlnTest() { println("PRINTING: " + this.getClass().getName()); diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index 92b54a326..77d5a5a79 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -19,8 +19,6 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.KllHelper.getLevelStats; -import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; @@ -28,7 +26,6 @@ import java.util.Objects; import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; @@ -42,8 +39,8 @@ public class MiscFloatsTest { @Test public void checkGetKFromEps() { final int k = DEFAULT_K; - final double eps = BaseKllSketch.getNormalizedRankError(k, false); - final double epsPmf = BaseKllSketch.getNormalizedRankError(k, true); + final double eps = KllHelper.getNormalizedRankError(k, false); + final double epsPmf = KllHelper.getNormalizedRankError(k, true); final int kEps = BaseKllSketch.getKFromEpsilon(eps, false); final int kEpsPmf = BaseKllSketch.getKFromEpsilon(epsPmf, true); assertEquals(kEps, k); @@ -67,11 +64,11 @@ public void checkBounds() { println("LB : " + lb); } - @Test(expectedExceptions = SketchesArgumentException.class) + @Test public void checkHeapifyExceptions1() { KllFloatsSketch sk = new KllFloatsSketch(); WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); - wmem.putByte(6, (byte)4); //corrupt M + wmem.putByte(6, (byte)4); //use different M: produces a warning KllFloatsSketch.heapify(wmem); } @@ -119,9 +116,9 @@ public void checkMisc() { sk.toByteArray(); final float[] items = sk.getItems(); assertEquals(items.length, 16); - final int[] levels = sk.getLevels(); + final int[] levels = sk.levels_; assertEquals(levels.length, 3); - assertEquals(sk.getNumLevels(), 2); + assertEquals(sk.numLevels_, 2); } @Test //enable static println(..) for visual checking @@ -139,24 +136,6 @@ public void visualCheckToString() { println(LS + s2); } - @Test //convert false to true below for visual checking - public void testGetAllLevelStats() { - long n = 1L << 30; - int k = 200; - int m = 8; - LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, false, false, false); - assertEquals(lvlStats.getBytes(), 2908); - } - - @Test //convert false to true below for visual checking - public void getStatsAtNumLevels() { - int k = 200; - int m = 8; - int numLevels = 23; - LevelStats lvlStats = getLevelStats(k, m, numLevels, false, false, false); - assertEquals(lvlStats.getBytes(), 2908); - } - @Test public void printlnTest() { println("PRINTING: " + this.getClass().getName()); From 032a9a6c53397c88ea0c6e3c2866df12f5f3f08b Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Thu, 3 Mar 2022 21:28:56 -0800 Subject: [PATCH 03/31] Interim 3 --- .../datasketches/kll/BaseKllSketch.java | 5 + .../datasketches/kll/KllDoublesSketch.java | 91 ++++++++++++---- .../datasketches/kll/KllFloatsSketch.java | 101 +++++++++++++----- .../apache/datasketches/kll/KllHelper.java | 33 +++--- .../apache/datasketches/kll/PreambleUtil.java | 20 ++-- .../kll/KllDoublesSketchIteratorTest.java | 2 - .../kll/KllFloatsSketchIteratorTest.java | 1 - .../datasketches/kll/KllHelperTest.java | 65 +++++++++++ .../datasketches/kll/MiscDoublesTest.java | 4 +- .../datasketches/kll/MiscFloatsTest.java | 4 +- 10 files changed, 244 insertions(+), 82 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java b/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java index 6dab9289f..e61398279 100644 --- a/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java @@ -176,6 +176,11 @@ public boolean isEstimationMode() { */ public abstract byte[] toByteArray(); + /** + * Returns serialized sketch in an updatable byte array form. + * @return serialized sketch in an updatable byte array form. + */ + public abstract byte[] toUpdatableByteArray(); @Override public String toString() { diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index ec1e0a2ed..a95cc7215 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -44,6 +44,7 @@ import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_SINGLE; import static org.apache.datasketches.kll.PreambleUtil.SER_VER_BYTE_ADR; import static org.apache.datasketches.kll.PreambleUtil.SINGLE_ITEM_BIT_MASK; +import static org.apache.datasketches.kll.PreambleUtil.UPDATABLE_BIT_MASK; import java.util.Arrays; @@ -65,6 +66,7 @@ public class KllDoublesSketch extends BaseKllSketch { private double[] items_; // the continuous array of double items private double minValue_; private double maxValue_; + private static final boolean IS_DOUBLE = true; /** * Heap constructor with the default k = 200, which has a rank error of about 1.65%. @@ -125,26 +127,31 @@ private KllDoublesSketch(final Memory mem, final MemoryCheck memChk) { n_ = 1; minK_ = k_; numLevels_ = 1; - levels_ = new int[numLevels_ + 1]; //ALL - final int offset = DATA_START_ADR_SINGLE_ITEM; + levels_ = new int[numLevels_ + 1]; final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); levels_[0] = itemCapacity - 1; levels_[numLevels_] = itemCapacity; //load the last integer in levels_ items_ = new double[itemCapacity]; - mem.getDoubleArray(offset, items_, levels_[0], getNumRetained()); + items_[levels_[0]] = mem.getDouble(DATA_START_ADR_SINGLE_ITEM); minValue_ = items_[levels_[0]]; maxValue_ = items_[levels_[0]]; } else { n_ = memChk.n; minK_ = memChk.minK; numLevels_ = memChk.numLevels; - levels_ = new int[numLevels_ + 1]; //ALL + levels_ = new int[numLevels_ + 1]; int offset = DATA_START_ADR_DOUBLE; final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); - // the last integer in levels_ is not serialized because it can be derived - mem.getIntArray(offset, levels_, 0, numLevels_); //load levels_ - offset += numLevels_ * Integer.BYTES; - levels_[numLevels_] = itemCapacity; //load the last integer in levels_ + if (memChk.updatable) { + // If updatable the last integer in levels_ IS serialized. + mem.getIntArray(offset, levels_, 0, numLevels_ + 1); //load levels_ + offset += (numLevels_ + 1) * Integer.BYTES; + } else { + // If compact the last integer in levels_ is not serialized. + mem.getIntArray(offset, levels_, 0, numLevels_); //load levels_ + offset += numLevels_ * Integer.BYTES; + levels_[numLevels_] = itemCapacity; //load the last integer in levels_ + } minValue_ = mem.getDouble(offset); offset += Double.BYTES; maxValue_ = mem.getDouble(offset); @@ -218,6 +225,8 @@ public double getMinValue() { return minValue_; } + //Size related + /** * Returns upper bound on the compact serialized size of a sketch given a parameter k and stream * length. This method can be used if allocation of storage is necessary beforehand. @@ -226,7 +235,7 @@ public double getMinValue() { * @return upper bound on the compact serialized size */ public static int getMaxSerializedSizeBytes(final int k, final long n) { - final LevelStats lvlStats = getAllLevelStatsGivenN(k, DEFAULT_M, n, false, false, true); + final LevelStats lvlStats = getAllLevelStatsGivenN(k, DEFAULT_M, n, false, false, IS_DOUBLE); return lvlStats.getCompactBytes(); } @@ -236,7 +245,25 @@ public static int getMaxSerializedSizeBytes(final int k, final long n) { */ public int getCurrentCompactSerializedSizeBytes() { if (isEmpty()) { return N_LONG_ADR; } - return KllHelper.getCompactSerializedSizeBytes(numLevels_, getNumRetained(), true); + return KllHelper.getSerializedSizeBytes(numLevels_, getNumRetained(), IS_DOUBLE, false); + } + + /** + * Returns the current number of bytes this sketch would require to store in updatable form. + * @return the current number of bytes this sketch would require to store in updatable form. + */ + public int getCurrentUpdatableSerializedSizeBytes() { + return KllHelper.getSerializedSizeBytes(numLevels_, getNumRetained(), IS_DOUBLE, true); + } + + /** + * Returns the number of bytes this sketch would require to store. + * @return the number of bytes this sketch would require to store. + * @deprecated use {@link #getCurrentCompactSerializedSizeBytes() } + */ + @Deprecated + public int getSerializedSizeBytes() { + return getCurrentCompactSerializedSizeBytes(); } /** @@ -408,16 +435,6 @@ public double getRank(final double value) { return (double) total / n_; } - /** - * Returns the number of bytes this sketch would require to store. - * @return the number of bytes this sketch would require to store. - * @deprecated use {@link #getCurrentCompactSerializedSizeBytes() } - */ - @Deprecated - public int getSerializedSizeBytes() { - return getCurrentCompactSerializedSizeBytes(); - } - /** * @return the iterator for this class */ @@ -493,6 +510,40 @@ public byte[] toByteArray() { return bytes; } + @Override + public byte[] toUpdatableByteArray() { + final int itemCap = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); + final int numBytes = KllHelper.getSerializedSizeBytes(numLevels_, itemCap, IS_DOUBLE, true); + final byte[] bytes = new byte[numBytes]; + final WritableMemory wmem = WritableMemory.writableWrap(bytes); + //load the preamble + wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) PREAMBLE_INTS_DOUBLE); + wmem.putByte(SER_VER_BYTE_ADR, SERIAL_VERSION_EMPTY_FULL); + wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); + final byte flags = (byte) + ((isLevelZeroSorted_ ? LEVEL_ZERO_SORTED_BIT_MASK : 0) + | DOUBLES_SKETCH_BIT_MASK + | UPDATABLE_BIT_MASK); + wmem.putByte(FLAGS_BYTE_ADR, flags); + wmem.putShort(K_SHORT_ADR, (short) k_); + wmem.putByte(M_BYTE_ADR, (byte) m_); + //load data + wmem.putLong(N_LONG_ADR, n_); + wmem.putShort(MIN_K_SHORT_ADR, (short) minK_); + wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) numLevels_); + int offset = DATA_START_ADR_DOUBLE; + // the last integer in levels_ IS serialized + final int len = levels_.length; + wmem.putIntArray(offset, levels_, 0, len); + offset += len * Integer.BYTES; + wmem.putDouble(offset, minValue_); + offset += Double.BYTES; + wmem.putDouble(offset, maxValue_); + offset += Double.BYTES; + wmem.putDoubleArray(offset, items_, levels_[0], getNumRetained()); + return bytes; + } + @Override public String toString(final boolean withLevels, final boolean withData) { final String epsPct = String.format("%.3f%%", getNormalizedRankError(false) * 100); diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index f0aa48e5b..fc1059110 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -43,6 +43,7 @@ import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_SINGLE; import static org.apache.datasketches.kll.PreambleUtil.SER_VER_BYTE_ADR; import static org.apache.datasketches.kll.PreambleUtil.SINGLE_ITEM_BIT_MASK; +import static org.apache.datasketches.kll.PreambleUtil.UPDATABLE_BIT_MASK; import java.util.Arrays; @@ -54,7 +55,7 @@ import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; - +// (leave blank) /** * Please refer to the documentation in the package-info:
* {@link org.apache.datasketches.kll} @@ -65,6 +66,7 @@ public class KllFloatsSketch extends BaseKllSketch { private float[] items_; // the continuous array of float items private float minValue_; private float maxValue_; + private static final boolean IS_DOUBLE = false; /** * Heap constructor with the default k = 200, which has a rank error of about 1.65%. @@ -125,26 +127,31 @@ private KllFloatsSketch(final Memory mem, final MemoryCheck memChk) { n_ = 1; minK_ = k_; numLevels_ = 1; - levels_ = new int[numLevels_ + 1]; //ALL - final int offset = DATA_START_ADR_SINGLE_ITEM; + levels_ = new int[numLevels_ + 1]; final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); levels_[0] = itemCapacity - 1; levels_[numLevels_] = itemCapacity; //load the last integer in levels_ items_ = new float[itemCapacity]; - mem.getFloatArray(offset, items_, levels_[0], getNumRetained()); + items_[levels_[0]] = mem.getFloat(DATA_START_ADR_SINGLE_ITEM); minValue_ = items_[levels_[0]]; maxValue_ = items_[levels_[0]]; } else { n_ = memChk.n; minK_ = memChk.minK; numLevels_ = memChk.numLevels; - levels_ = new int[numLevels_ + 1]; //ALL + levels_ = new int[numLevels_ + 1]; int offset = DATA_START_ADR_FLOAT; final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); - // the last integer in levels_ is not serialized because it can be derived - mem.getIntArray(offset, levels_, 0, numLevels_); //load levels_ - offset += numLevels_ * Integer.BYTES; - levels_[numLevels_] = itemCapacity; //load the last integer in levels_ + if (memChk.updatable) { + // If updatable the last integer in levels_ IS serialized. + mem.getIntArray(offset, levels_, 0, numLevels_ + 1); //load levels_ + offset += (numLevels_ + 1) * Integer.BYTES; + } else { + // If compact the last integer in levels_ is not serialized. + mem.getIntArray(offset, levels_, 0, numLevels_); //load levels_ + offset += numLevels_ * Integer.BYTES; + levels_[numLevels_] = itemCapacity; //load the last integer in levels_ + } minValue_ = mem.getFloat(offset); offset += Float.BYTES; maxValue_ = mem.getFloat(offset); @@ -218,15 +225,17 @@ public float getMinValue() { return minValue_; } + //Size related + /** - * Returns upper bound on the serialized size of a sketch given a parameter k and stream + * Returns upper bound on the compact serialized size of a sketch given a parameter k and stream * length. This method can be used if allocation of storage is necessary beforehand. * @param k parameter that controls size of the sketch and accuracy of estimates * @param n stream length - * @return upper bound on the serialized size + * @return upper bound on the compact serialized size */ public static int getMaxSerializedSizeBytes(final int k, final long n) { - final LevelStats lvlStats = getAllLevelStatsGivenN(k, DEFAULT_M, n, false, false, false); + final LevelStats lvlStats = getAllLevelStatsGivenN(k, DEFAULT_M, n, false, false, IS_DOUBLE); return lvlStats.getCompactBytes(); } @@ -236,7 +245,25 @@ public static int getMaxSerializedSizeBytes(final int k, final long n) { */ public int getCurrentCompactSerializedSizeBytes() { if (isEmpty()) { return N_LONG_ADR; } - return KllHelper.getCompactSerializedSizeBytes(numLevels_, getNumRetained(), false); + return KllHelper.getSerializedSizeBytes(numLevels_, getNumRetained(), IS_DOUBLE, false); + } + + /** + * Returns the current number of bytes this sketch would require to store in updatable form. + * @return the current number of bytes this sketch would require to store in updatable form. + */ + public int getCurrentUpdatableSerializedSizeBytes() { + return KllHelper.getSerializedSizeBytes(numLevels_, getNumRetained(), IS_DOUBLE, true); + } + + /** + * Returns the number of bytes this sketch would require to store. + * @return the number of bytes this sketch would require to store. + * @deprecated use {@link #getCurrentCompactSerializedSizeBytes() } + */ + @Deprecated + public int getSerializedSizeBytes() { + return getCurrentCompactSerializedSizeBytes(); } /** @@ -408,16 +435,6 @@ public double getRank(final float value) { return (double) total / n_; } - /** - * Returns the number of bytes this sketch would require to store. - * @return the number of bytes this sketch would require to store. - * @deprecated use {@link #getCurrentCompactSerializedSizeBytes() } - */ - @Deprecated - public int getSerializedSizeBytes() { - return getCurrentCompactSerializedSizeBytes(); - } - /** * @return the iterator for this class */ @@ -455,7 +472,7 @@ public void merge(final KllFloatsSketch other) { @Override public byte[] toByteArray() { - final byte[] bytes = new byte[getSerializedSizeBytes()]; + final byte[] bytes = new byte[getCurrentCompactSerializedSizeBytes()]; final WritableMemory wmem = WritableMemory.writableWrap(bytes); final boolean singleItem = n_ == 1; final boolean empty = isEmpty(); @@ -468,7 +485,7 @@ public byte[] toByteArray() { (empty ? EMPTY_BIT_MASK : 0) | (isLevelZeroSorted_ ? LEVEL_ZERO_SORTED_BIT_MASK : 0) | (singleItem ? SINGLE_ITEM_BIT_MASK : 0)); - + // (leave blank) wmem.putByte(FLAGS_BYTE_ADR, flags); wmem.putShort(K_SHORT_ADR, (short) k_); wmem.putByte(M_BYTE_ADR, (byte) m_); @@ -493,6 +510,40 @@ public byte[] toByteArray() { return bytes; } + @Override + public byte[] toUpdatableByteArray() { + final int itemCap = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); + final int numBytes = KllHelper.getSerializedSizeBytes(numLevels_, itemCap, IS_DOUBLE, true); + final byte[] bytes = new byte[numBytes]; + final WritableMemory wmem = WritableMemory.writableWrap(bytes); + //load the preamble + wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) PREAMBLE_INTS_FLOAT); + wmem.putByte(SER_VER_BYTE_ADR, SERIAL_VERSION_EMPTY_FULL); + wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); + final byte flags = (byte) + ((isLevelZeroSorted_ ? LEVEL_ZERO_SORTED_BIT_MASK : 0) + | UPDATABLE_BIT_MASK); + // (leave blank) + wmem.putByte(FLAGS_BYTE_ADR, flags); + wmem.putShort(K_SHORT_ADR, (short) k_); + wmem.putByte(M_BYTE_ADR, (byte) m_); + //load data + wmem.putLong(N_LONG_ADR, n_); + wmem.putShort(MIN_K_SHORT_ADR, (short) minK_); + wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) numLevels_); + int offset = DATA_START_ADR_FLOAT; + // the last integer in levels_ IS serialized + final int len = levels_.length; + wmem.putIntArray(offset, levels_, 0, len); + offset += len * Integer.BYTES; + wmem.putDouble(offset, minValue_); + offset += Float.BYTES; + wmem.putDouble(offset, maxValue_); + offset += Float.BYTES; + wmem.putFloatArray(offset, items_, levels_[0], getNumRetained()); + return bytes; + } + @Override public String toString(final boolean withLevels, final boolean withData) { final String epsPct = String.format("%.3f%%", getNormalizedRankError(false) * 100); diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 891ed3c97..2965d9ff2 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -86,8 +86,8 @@ static LevelStats getLevelStats(final int k, final int m, final int numLevels, System.out.printf("%6d%,12d%8d%,16d\n", level, 1 << level, levelCap, maxNAtLevel); } } - final int compactBytes = getCompactSerializedSizeBytes(numLevels, cumCap, isDouble); - final int updatableBytes = getUpdatableSerializedSizeBytes(k, m, numLevels, isDouble); + final int compactBytes = getSerializedSizeBytes(numLevels, cumCap, isDouble, false); + final int updatableBytes = getSerializedSizeBytes(numLevels, cumCap, isDouble, true); if (printDetail) { System.out.printf(" TOTALS%10s %8d%,16d\n", "", cumCap, cumN); System.out.println(" COMPACT BYTES: " + compactBytes); @@ -135,28 +135,21 @@ public String toString() { public int getMaxCap() { return maxCap; } } - static int getCompactSerializedSizeBytes(final int numLevels, final int numRetained, final boolean isDouble) { - if (numLevels == 1 && numRetained == 1) { - return DATA_START_ADR_SINGLE_ITEM + (isDouble ? Double.BYTES : Float.BYTES); - } - // The last integer in levels_ is not serialized because it can be derived. - // The + 2 is for min and max - if (isDouble) { - return DATA_START_ADR_DOUBLE + numLevels * Integer.BYTES + (numRetained + 2) * Double.BYTES; + static int getSerializedSizeBytes(final int numLevels, final int numRetained, final boolean isDouble, + final boolean updatable) { + int levelsBytes = 0; + if (!updatable) { + if (numLevels == 1 && numRetained == 1) { + return DATA_START_ADR_SINGLE_ITEM + (isDouble ? Double.BYTES : Float.BYTES); + } + levelsBytes = numLevels * Integer.BYTES; } else { - return DATA_START_ADR_FLOAT + numLevels * Integer.BYTES + (numRetained + 2) * Float.BYTES; + levelsBytes = (numLevels + 1) * Integer.BYTES; } - } - - static int getUpdatableSerializedSizeBytes(final int k, final int m, final int numLevels, final boolean isDouble) { - //There are no special accommodations for empty or single item. - //The last integer in levels IS serialized. - // The + 2 is for min and max - final int totCap = computeTotalItemCapacity(k, m, numLevels) + 2; if (isDouble) { - return DATA_START_ADR_DOUBLE + (numLevels + 1) * Integer.BYTES + totCap * Double.BYTES; + return DATA_START_ADR_DOUBLE + levelsBytes + (numRetained + 2) * Double.BYTES; } else { - return DATA_START_ADR_FLOAT + (numLevels + 1) * Integer.BYTES + totCap * Float.BYTES; + return DATA_START_ADR_FLOAT + levelsBytes + (numRetained + 2) * Float.BYTES; } } diff --git a/src/main/java/org/apache/datasketches/kll/PreambleUtil.java b/src/main/java/org/apache/datasketches/kll/PreambleUtil.java index 3d9314b9c..1203d9753 100644 --- a/src/main/java/org/apache/datasketches/kll/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/PreambleUtil.java @@ -133,13 +133,12 @@ private PreambleUtil() {} static final int LEVEL_ZERO_SORTED_BIT_MASK = 2; static final int SINGLE_ITEM_BIT_MASK = 4; static final int DOUBLES_SKETCH_BIT_MASK = 8; - static final int UPDATABLE_BIT_MASK = 16; + static final int UPDATABLE_BIT_MASK = 16; enum Layout { FLOAT_FULL_COMPACT, FLOAT_EMPTY_COMPACT, FLOAT_SINGLE_COMPACT, DOUBLE_FULL_COMPACT, DOUBLE_EMPTY_COMPACT, DOUBLE_SINGLE_COMPACT, - FLOAT_FULL_NOT_COMPACT, FLOAT_EMPTY_NOT_COMPACT, FLOAT_SINGLE_NOT_COMPACT, - DOUBLE_FULL_NOT_COMPACT, DOUBLE_EMPTY_NOT_COMPACT, DOUBLE_SINGLE_NOT_COMPACT } + FLOAT_UPDATABLE, DOUBLE_UPDATABLE } /** * Returns a human readable string summary of the internal state of the given byte array. @@ -203,7 +202,7 @@ static class MemoryCheck { m = extractM(srcMem); KllHelper.checkK(k); - if (m != 8) { System.err.println("WARNING: Minimum Level width set to non-default value: " + m); } + if (m != 8) { throwCustom(7, m); } if (familyID != Family.KLL.getID()) { throwCustom(0, familyID); } famName = idToFamily(familyID).toString(); if (famName != "KLL") { throwCustom(23, 0); } @@ -215,7 +214,7 @@ static class MemoryCheck { case 0: { //not empty, not single item, float full if (preInts != PREAMBLE_INTS_FLOAT) { throwCustom(6, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } - layout = updatable ? Layout.FLOAT_FULL_NOT_COMPACT : Layout.FLOAT_FULL_COMPACT; + layout = updatable ? Layout.FLOAT_UPDATABLE : Layout.FLOAT_FULL_COMPACT; n = extractN(srcMem); minK = extractMinK(srcMem); numLevels = extractNumLevels(srcMem); @@ -226,7 +225,7 @@ static class MemoryCheck { if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } if (updatable) { - layout = Layout.FLOAT_EMPTY_NOT_COMPACT; + layout = Layout.FLOAT_UPDATABLE; n = extractN(srcMem); if (n != 0) { throwCustom(21, (int) n); } minK = extractMinK(srcMem); @@ -245,7 +244,7 @@ static class MemoryCheck { if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } if (serVer != SERIAL_VERSION_SINGLE) { throwCustom(4, serVer); } if (updatable) { - layout = Layout.FLOAT_SINGLE_NOT_COMPACT; + layout = Layout.FLOAT_UPDATABLE; n = extractN(srcMem); if (n != 1) { throwCustom(22, (int)n); } minK = extractMinK(srcMem); @@ -263,7 +262,7 @@ static class MemoryCheck { case 8: { //not empty, not single item, double full if (preInts != PREAMBLE_INTS_DOUBLE) { throwCustom(5, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } - layout = updatable ? Layout.DOUBLE_FULL_NOT_COMPACT : Layout.DOUBLE_FULL_COMPACT; + layout = updatable ? Layout.DOUBLE_UPDATABLE : Layout.DOUBLE_FULL_COMPACT; n = extractN(srcMem); minK = extractMinK(srcMem); numLevels = extractNumLevels(srcMem); @@ -274,7 +273,7 @@ static class MemoryCheck { if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } if (updatable) { - layout = Layout.DOUBLE_EMPTY_NOT_COMPACT; + layout = Layout.DOUBLE_UPDATABLE; n = extractN(srcMem); if (n != 0) { throwCustom(21, (int) n); } minK = extractMinK(srcMem); @@ -293,7 +292,7 @@ static class MemoryCheck { if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } if (serVer != SERIAL_VERSION_SINGLE) { throwCustom(4, serVer); } if (updatable) { - layout = Layout.DOUBLE_SINGLE_NOT_COMPACT; + layout = Layout.DOUBLE_UPDATABLE; n = extractN(srcMem); if (n != 1) { throwCustom(22, (int)n); } minK = extractMinK(srcMem); @@ -321,6 +320,7 @@ private static void throwCustom(final int errNo, final int value) { case 4: msg = "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; case 5: msg = "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; case 6: msg = "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; + case 7: msg = "The M field must be set to " + DEFAULT_M + ", NOT: " + value; break; case 20: msg = "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; case 21: msg = "N != 0 and empty bit is set. N: " + value; break; case 22: msg = "N != 1 and single item bit is set. N: " + value; break; diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java index 391052a64..64a995038 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java @@ -61,6 +61,4 @@ public void bigSketches() { } } - } - diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java index 7732efce8..33d829fcc 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java @@ -22,7 +22,6 @@ import org.testng.Assert; import org.testng.annotations.Test; -@SuppressWarnings("javadoc") public class KllFloatsSketchIteratorTest { @Test diff --git a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java index 4e56f86d1..b3e396956 100644 --- a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java @@ -24,8 +24,10 @@ import static org.testng.Assert.assertEquals; import org.apache.datasketches.kll.KllHelper.LevelStats; +import org.apache.datasketches.memory.Memory; import org.testng.annotations.Test; +@SuppressWarnings("unused") public class KllHelperTest { @Test //convert two false below to true for visual checking @@ -45,4 +47,67 @@ public void getStatsAtNumLevels() { LevelStats lvlStats = getLevelStats(k, m, numLevels, false, false, true); assertEquals(lvlStats.getCompactBytes(), 5708); } + + @Test + public void checkUpdatableSerDe() { + KllDoublesSketch sk = new KllDoublesSketch(200); + for (int i = 1; i <= 533; i++) { sk.update(i); } + int retained = sk.getNumRetained(); + int numLevels = ((BaseKllSketch)sk).numLevels_; + println("NumLevels: " + numLevels); + println("NumRetained: " + retained); + + byte[] compByteArr1 = sk.toByteArray(); + int compBytes1 = compByteArr1.length; + println("compBytes1: " + compBytes1); + + byte[] upByteArr1 = sk.toUpdatableByteArray(); + int upBytes1 = upByteArr1.length; + println("upBytes1: " + upBytes1); + + Memory mem; + KllDoublesSketch sk2; + + mem = Memory.wrap(compByteArr1); + sk2 = KllDoublesSketch.heapify(mem); + byte[] compByteArr2 = sk2.toByteArray(); + int compBytes2 = compByteArr2.length; + println("compBytes2: " + compBytes2); + assertEquals(compBytes1, compBytes2); + assertEquals(sk2.getNumRetained(), retained); + + mem = Memory.wrap(compByteArr2); + sk2 = KllDoublesSketch.heapify(mem); + byte[] upByteArr2 = sk2.toUpdatableByteArray(); + int upBytes2 = upByteArr2.length; + println("upBytes2: " + upBytes2); + assertEquals(upBytes1, upBytes2); + assertEquals(sk2.getNumRetained(), retained); + } + + //Experimental + + //@Test //convert two false below to true for visual checking + public void testGetAllLevelStats2() { + long n = 533; + int k = 200; + int m = 8; + LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, true, true, true); + } + + //@Test + public void getStatsAtNumLevels2() { + int k = 20; + int m = 8; + int numLevels = 2; + LevelStats lvlStats = getLevelStats(k, m, numLevels, true, true, true); + } + + /** + * Println Object o + * @param o object to print + */ + static void println(Object o) { + //System.out.println(o.toString()); + } } diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index 17e4e3db0..10594895c 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -64,11 +64,11 @@ public void checkBounds() { println("LB : " + lb); } - @Test + @Test(expectedExceptions = SketchesArgumentException.class) public void checkHeapifyExceptions1() { KllDoublesSketch sk = new KllDoublesSketch(); WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); - wmem.putByte(6, (byte)4); //use different M: produces a warning + wmem.putByte(6, (byte)4); //corrupt with different M KllDoublesSketch.heapify(wmem); } diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index 77d5a5a79..c684888e0 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -64,11 +64,11 @@ public void checkBounds() { println("LB : " + lb); } - @Test + @Test(expectedExceptions = SketchesArgumentException.class) public void checkHeapifyExceptions1() { KllFloatsSketch sk = new KllFloatsSketch(); WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); - wmem.putByte(6, (byte)4); //use different M: produces a warning + wmem.putByte(6, (byte)4); //corrupt with different M KllFloatsSketch.heapify(wmem); } From ae156ae9985f76dd57e547536b96f0e2247cbe5d Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 4 Mar 2022 12:23:24 -0800 Subject: [PATCH 04/31] interim 4 --- .../org/apache/datasketches/kll/KllDoublesSketch.java | 9 ++++----- .../org/apache/datasketches/kll/KllFloatsSketch.java | 9 ++++----- src/main/java/org/apache/datasketches/kll/KllHelper.java | 8 ++++---- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index a95cc7215..54136a446 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -240,17 +240,16 @@ public static int getMaxSerializedSizeBytes(final int k, final long n) { } /** - * Returns the current number of bytes this sketch would require to store in compact form. - * @return the current number of bytes this sketch would require to store in compact form. + * Returns the current compact number of bytes this sketch would require to store. + * @return the current compact number of bytes this sketch would require to store. */ public int getCurrentCompactSerializedSizeBytes() { - if (isEmpty()) { return N_LONG_ADR; } return KllHelper.getSerializedSizeBytes(numLevels_, getNumRetained(), IS_DOUBLE, false); } /** - * Returns the current number of bytes this sketch would require to store in updatable form. - * @return the current number of bytes this sketch would require to store in updatable form. + * Returns the current updatable number of bytes this sketch would require to store. + * @return the current updatable number of bytes this sketch would require to store. */ public int getCurrentUpdatableSerializedSizeBytes() { return KllHelper.getSerializedSizeBytes(numLevels_, getNumRetained(), IS_DOUBLE, true); diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index fc1059110..c6b48d228 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -240,17 +240,16 @@ public static int getMaxSerializedSizeBytes(final int k, final long n) { } /** - * Returns the current number of bytes this sketch would require to store in compact form. - * @return the current number of bytes this sketch would require to store in compact form. + * Returns the current compact number of bytes this sketch would require to store. + * @return the current compact number of bytes this sketch would require to store. */ public int getCurrentCompactSerializedSizeBytes() { - if (isEmpty()) { return N_LONG_ADR; } return KllHelper.getSerializedSizeBytes(numLevels_, getNumRetained(), IS_DOUBLE, false); } /** - * Returns the current number of bytes this sketch would require to store in updatable form. - * @return the current number of bytes this sketch would require to store in updatable form. + * Returns the current updatable number of bytes this sketch would require to store. + * @return the current updatable number of bytes this sketch would require to store. */ public int getCurrentUpdatableSerializedSizeBytes() { return KllHelper.getSerializedSizeBytes(numLevels_, getNumRetained(), IS_DOUBLE, true); diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 2965d9ff2..6e3cac0c7 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -26,6 +26,7 @@ import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.PreambleUtil.MAX_K; import static org.apache.datasketches.kll.PreambleUtil.MIN_K; +import static org.apache.datasketches.kll.PreambleUtil.N_LONG_ADR; import org.apache.datasketches.SketchesArgumentException; @@ -139,15 +140,14 @@ static int getSerializedSizeBytes(final int numLevels, final int numRetained, fi final boolean updatable) { int levelsBytes = 0; if (!updatable) { - if (numLevels == 1 && numRetained == 1) { - return DATA_START_ADR_SINGLE_ITEM + (isDouble ? Double.BYTES : Float.BYTES); - } + if (numRetained == 0) { return N_LONG_ADR; } + if (numRetained == 1) { return DATA_START_ADR_SINGLE_ITEM + (isDouble ? Double.BYTES : Float.BYTES); } levelsBytes = numLevels * Integer.BYTES; } else { levelsBytes = (numLevels + 1) * Integer.BYTES; } if (isDouble) { - return DATA_START_ADR_DOUBLE + levelsBytes + (numRetained + 2) * Double.BYTES; + return DATA_START_ADR_DOUBLE + levelsBytes + (numRetained + 2) * Double.BYTES; //+2 is for min & max } else { return DATA_START_ADR_FLOAT + levelsBytes + (numRetained + 2) * Float.BYTES; } From b662bb04ae9aa3d801b40a95cc56ac029a752743 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 4 Mar 2022 17:26:21 -0800 Subject: [PATCH 05/31] Interim 5 --- .../datasketches/kll/BaseKllSketch.java | 103 +++++-- .../kll/KllDirectDoublesSketch.java | 68 ++++ .../datasketches/kll/KllDoublesSketch.java | 291 +++++++++--------- .../datasketches/kll/KllFloatsSketch.java | 288 ++++++++--------- .../apache/datasketches/kll/PreambleUtil.java | 30 +- .../kll/KllDoublesValidationTest.java | 4 +- .../kll/KllFloatsValidationTest.java | 4 +- .../datasketches/kll/KllHelperTest.java | 2 +- .../datasketches/kll/MiscDoublesTest.java | 4 +- .../datasketches/kll/MiscFloatsTest.java | 4 +- 10 files changed, 466 insertions(+), 332 deletions(-) create mode 100644 src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java diff --git a/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java b/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java index e61398279..6c7e90593 100644 --- a/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java @@ -26,6 +26,7 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static java.lang.Math.round; +import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.PreambleUtil.MAX_K; import static org.apache.datasketches.kll.PreambleUtil.MIN_K; @@ -50,16 +51,15 @@ abstract class BaseKllSketch { * 5) curTotalCap = items_.length = levels_[numLevels_]. */ - final int k_; // configured value of K - final int m_; // configured minimum buffer "width", Must always be DEFAULT_M for now. + static final int M = DEFAULT_M; // configured minimum buffer "width", Must always be 8 for now. + private final int k_; // configured value of K + private int dyMinK_; // dynamic minK for error estimation after merging with different k + private long n_; // number of items input into this sketch + private int numLevels_; // one-based number of current levels, + private int[] levels_; // array of index offsets into the items[]. Size = numLevels + 1. + private boolean isLevelZeroSorted_; - int minK_; // for error estimation after merging with different k - long n_; // number of items input into this sketch - int numLevels_; // one-based number of current levels, - int[] levels_; // array of index offsets into the items[]. Size = numLevels + 1. - boolean isLevelZeroSorted_; - - final boolean compatible; //compatible with quantiles sketch treatment of rank 0.0 and 1.0. + private final boolean compatible; //compatible with quantiles sketch treatment of rank 0.0 and 1.0. static final Random random = new Random(); /** @@ -71,14 +71,77 @@ abstract class BaseKllSketch { BaseKllSketch(final int k, final int m, final boolean compatible) { KllHelper.checkK(k); k_ = k; - minK_ = k; - m_ = m; + dyMinK_ = k; numLevels_ = 1; levels_ = new int[] {k, k}; isLevelZeroSorted_ = false; this.compatible = compatible; } + int getDyMinK() { + return dyMinK_; + } + + void setDyMinK(final int dyMinK) { + dyMinK_ = dyMinK; + } + + int getNumLevels() { + return numLevels_; + } + + void setNumLevels(final int numLevels) { + numLevels_ = numLevels; + } + + void incNumLevels() { + numLevels_++; + } + + int[] getLevelsArray() { + return levels_; + } + + int getLevelsArrayAt(final int index) { + return levels_[index]; + } + + void setLevelsArray(final int[] levels) { + this.levels_ = levels; + } + + void setLevelsArrayAt(final int index, final int value) { + this.levels_[index] = value; + } + + void setLevelsArrayAtPlusEq(final int index, final int plusEq) { + this.levels_[index] += plusEq; + } + + void setLevelsArrayAtMinusEq(final int index, final int minusEq) { + this.levels_[index] -= minusEq; + } + + boolean isLevelZeroSorted() { + return isLevelZeroSorted_; + } + + void setLevelZeroSorted(final boolean sorted) { + this.isLevelZeroSorted_ = sorted; + } + + boolean isCompatible() { + return this.compatible; + } + + void setN(final long n) { + n_ = n; + } + + void incN() { + n_++; + } + // public functions /** @@ -89,6 +152,14 @@ public int getK() { return k_; } + /** + * Returns the length of the input stream. + * @return stream length + */ + public long getN() { + return n_; + } + /** * Gets the approximate value of k to use given epsilon, the normalized rank error. * @param epsilon the normalized rank error between zero and one. @@ -113,14 +184,6 @@ public static int getKFromEpsilon(final double epsilon, final boolean pmf) { return max(MIN_K, min(MAX_K, k)); } - /** - * Returns the length of the input stream. - * @return stream length - */ - public long getN() { - return n_; - } - /** * Gets the approximate rank error of this sketch normalized as a fraction between zero and one. * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function. @@ -130,7 +193,7 @@ public long getN() { * @see KllDoublesSketch */ public double getNormalizedRankError(final boolean pmf) { - return KllHelper.getNormalizedRankError(minK_, pmf); + return KllHelper.getNormalizedRankError(dyMinK_, pmf); } /** diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java new file mode 100644 index 000000000..4d4d0b0d9 --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +//import static java.lang.Math.max; +//import static java.lang.Math.min; +//import static org.apache.datasketches.Util.isOdd; +//import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; +//import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_DOUBLE; +//import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; +//import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; +//import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M; +//import static org.apache.datasketches.kll.PreambleUtil.DOUBLES_SKETCH_BIT_MASK; +//import static org.apache.datasketches.kll.PreambleUtil.EMPTY_BIT_MASK; +//import static org.apache.datasketches.kll.PreambleUtil.FAMILY_BYTE_ADR; +//import static org.apache.datasketches.kll.PreambleUtil.FLAGS_BYTE_ADR; +//import static org.apache.datasketches.kll.PreambleUtil.K_SHORT_ADR; +//import static org.apache.datasketches.kll.PreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; +//import static org.apache.datasketches.kll.PreambleUtil.MIN_K_SHORT_ADR; +//import static org.apache.datasketches.kll.PreambleUtil.M_BYTE_ADR; +//import static org.apache.datasketches.kll.PreambleUtil.NUM_LEVELS_BYTE_ADR; +//import static org.apache.datasketches.kll.PreambleUtil.N_LONG_ADR; +//import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_BYTE_ADR; +//import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_DOUBLE; +//import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; +//import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_EMPTY_FULL; +//import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_SINGLE; +//import static org.apache.datasketches.kll.PreambleUtil.SER_VER_BYTE_ADR; +//import static org.apache.datasketches.kll.PreambleUtil.SINGLE_ITEM_BIT_MASK; +//import static org.apache.datasketches.kll.PreambleUtil.UPDATABLE_BIT_MASK; +// +//import java.util.Arrays; +// +//import org.apache.datasketches.Family; +//import org.apache.datasketches.SketchesArgumentException; +//import org.apache.datasketches.Util; +//import org.apache.datasketches.kll.KllHelper.LevelStats; +//import org.apache.datasketches.kll.PreambleUtil.MemoryCheck; +//import org.apache.datasketches.memory.Memory; +//import org.apache.datasketches.memory.WritableMemory; + +/** + * Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll} + */ +public class KllDirectDoublesSketch { + + + +} + diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 54136a446..554a49372 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -28,12 +28,12 @@ import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.PreambleUtil.DOUBLES_SKETCH_BIT_MASK; +import static org.apache.datasketches.kll.PreambleUtil.DY_MIN_K_SHORT_ADR; import static org.apache.datasketches.kll.PreambleUtil.EMPTY_BIT_MASK; import static org.apache.datasketches.kll.PreambleUtil.FAMILY_BYTE_ADR; import static org.apache.datasketches.kll.PreambleUtil.FLAGS_BYTE_ADR; import static org.apache.datasketches.kll.PreambleUtil.K_SHORT_ADR; import static org.apache.datasketches.kll.PreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; -import static org.apache.datasketches.kll.PreambleUtil.MIN_K_SHORT_ADR; import static org.apache.datasketches.kll.PreambleUtil.M_BYTE_ADR; import static org.apache.datasketches.kll.PreambleUtil.NUM_LEVELS_BYTE_ADR; import static org.apache.datasketches.kll.PreambleUtil.N_LONG_ADR; @@ -114,50 +114,52 @@ private KllDoublesSketch(final int k, final int m, final boolean compatible) { */ private KllDoublesSketch(final Memory mem, final MemoryCheck memChk) { super(memChk.k, memChk.m, true); - isLevelZeroSorted_ = memChk.level0Sorted; + setLevelZeroSorted(memChk.level0Sorted); + + final int k = getK(); if (memChk.empty) { - numLevels_ = 1; - levels_ = new int[] {k_, k_}; - isLevelZeroSorted_ = false; - minK_ = k_; - items_ = new double[k_]; + setNumLevels(1); + setLevelsArray(new int[] {k, k}); + setLevelZeroSorted(false); + setDyMinK(k); + items_ = new double[k]; minValue_ = Double.NaN; maxValue_ = Double.NaN; } else if (memChk.singleItem) { - n_ = 1; - minK_ = k_; - numLevels_ = 1; - levels_ = new int[numLevels_ + 1]; - final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); - levels_[0] = itemCapacity - 1; - levels_[numLevels_] = itemCapacity; //load the last integer in levels_ + setN(1); + setDyMinK(k); + setNumLevels(1); + setLevelsArray(new int[getNumLevels() + 1]); + final int itemCapacity = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); + setLevelsArrayAt(0, itemCapacity - 1); + setLevelsArrayAt(getNumLevels(), itemCapacity); //load the last integer in levels_ items_ = new double[itemCapacity]; - items_[levels_[0]] = mem.getDouble(DATA_START_ADR_SINGLE_ITEM); - minValue_ = items_[levels_[0]]; - maxValue_ = items_[levels_[0]]; + items_[getLevelsArrayAt(0)] = mem.getDouble(DATA_START_ADR_SINGLE_ITEM); + minValue_ = items_[getLevelsArrayAt(0)]; + maxValue_ = items_[getLevelsArrayAt(0)]; } else { - n_ = memChk.n; - minK_ = memChk.minK; - numLevels_ = memChk.numLevels; - levels_ = new int[numLevels_ + 1]; + setN(memChk.n); + setDyMinK(memChk.dyMinK); + setNumLevels(memChk.numLevels); + setLevelsArray(new int[getNumLevels() + 1]); int offset = DATA_START_ADR_DOUBLE; - final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); + final int itemCapacity = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); if (memChk.updatable) { // If updatable the last integer in levels_ IS serialized. - mem.getIntArray(offset, levels_, 0, numLevels_ + 1); //load levels_ - offset += (numLevels_ + 1) * Integer.BYTES; + mem.getIntArray(offset, getLevelsArray(), 0, getNumLevels() + 1); //load levels_ + offset += (getNumLevels() + 1) * Integer.BYTES; } else { // If compact the last integer in levels_ is not serialized. - mem.getIntArray(offset, levels_, 0, numLevels_); //load levels_ - offset += numLevels_ * Integer.BYTES; - levels_[numLevels_] = itemCapacity; //load the last integer in levels_ + mem.getIntArray(offset, getLevelsArray(), 0, getNumLevels()); //load levels_ + offset += getNumLevels() * Integer.BYTES; + setLevelsArrayAt(getNumLevels(), itemCapacity); //load the last integer in levels_ } minValue_ = mem.getDouble(offset); offset += Double.BYTES; maxValue_ = mem.getDouble(offset); offset += Double.BYTES; items_ = new double[itemCapacity]; - mem.getDoubleArray(offset, items_, levels_[0], getNumRetained()); + mem.getDoubleArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); } } @@ -244,7 +246,7 @@ public static int getMaxSerializedSizeBytes(final int k, final long n) { * @return the current compact number of bytes this sketch would require to store. */ public int getCurrentCompactSerializedSizeBytes() { - return KllHelper.getSerializedSizeBytes(numLevels_, getNumRetained(), IS_DOUBLE, false); + return KllHelper.getSerializedSizeBytes(getNumLevels(), getNumRetained(), IS_DOUBLE, false); } /** @@ -252,7 +254,7 @@ public int getCurrentCompactSerializedSizeBytes() { * @return the current updatable number of bytes this sketch would require to store. */ public int getCurrentUpdatableSerializedSizeBytes() { - return KllHelper.getSerializedSizeBytes(numLevels_, getNumRetained(), IS_DOUBLE, true); + return KllHelper.getSerializedSizeBytes(getNumLevels(), getNumRetained(), IS_DOUBLE, true); } /** @@ -313,7 +315,7 @@ public double getQuantile(final double fraction) { if (fraction < 0.0 || fraction > 1.0) { throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); } - if (compatible) { + if (isCompatible()) { if (fraction == 0.0) { return minValue_; } if (fraction == 1.0) { return maxValue_; } } @@ -329,7 +331,7 @@ public double getQuantile(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public double getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(minK_, false))); + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); } /** @@ -340,7 +342,7 @@ public double getQuantileUpperBound(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public double getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(minK_, false))); + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false))); } /** @@ -370,8 +372,8 @@ public double[] getQuantiles(final double[] fractions) { if (fraction < 0.0 || fraction > 1.0) { throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); } - if (fraction == 0.0 && compatible) { quantiles[i] = minValue_; } - else if (fraction == 1.0 && compatible) { quantiles[i] = maxValue_; } + if (fraction == 0.0 && isCompatible()) { quantiles[i] = minValue_; } + else if (fraction == 1.0 && isCompatible()) { quantiles[i] = maxValue_; } else { if (quant == null) { quant = getQuantileCalculator(); @@ -418,27 +420,27 @@ public double getRank(final double value) { int level = 0; int weight = 1; long total = 0; - while (level < numLevels_) { - final int fromIndex = levels_[level]; - final int toIndex = levels_[level + 1]; // exclusive + while (level < getNumLevels()) { + final int fromIndex = getLevelsArrayAt(level); + final int toIndex = getLevelsArrayAt(level + 1); // exclusive for (int i = fromIndex; i < toIndex; i++) { if (items_[i] < value) { total += weight; - } else if (level > 0 || isLevelZeroSorted_) { + } else if (level > 0 || isLevelZeroSorted()) { break; // levels above 0 are sorted, no point comparing further } } level++; weight *= 2; } - return (double) total / n_; + return (double) total / getN(); } /** * @return the iterator for this class */ public KllDoublesSketchIterator iterator() { - return new KllDoublesSketchIterator(items_, levels_, numLevels_); + return new KllDoublesSketchIterator(items_, getLevelsArray(), getNumLevels()); } /** @@ -447,25 +449,22 @@ public KllDoublesSketchIterator iterator() { */ public void merge(final KllDoublesSketch other) { if (other == null || other.isEmpty()) { return; } - if (m_ != other.m_) { - throw new SketchesArgumentException("incompatible M: " + m_ + " and " + other.m_); - } - final long finalN = n_ + other.n_; + final long finalN = getN() + other.getN(); //update this sketch with level0 items from the other sketch - for (int i = other.levels_[0]; i < other.levels_[1]; i++) { + for (int i = other.getLevelsArrayAt(0); i < other.getLevelsArrayAt(1); i++) { update(other.items_[i]); } - if (other.numLevels_ >= 2) { //now merge other levels if they exist + if (other.getNumLevels() >= 2) { //now merge other levels if they exist mergeHigherLevels(other, finalN); } //update min, max values, n if (Double.isNaN(minValue_) || other.minValue_ < minValue_) { minValue_ = other.minValue_; } if (Double.isNaN(maxValue_) || other.maxValue_ > maxValue_) { maxValue_ = other.maxValue_; } - n_ = finalN; + setN(finalN); - assert KllHelper.sumTheSampleWeights(numLevels_, levels_) == n_; + assert KllHelper.sumTheSampleWeights(getNumLevels(), getLevelsArray()) == getN(); if (other.isEstimationMode()) { - minK_ = min(minK_, other.minK_); + setDyMinK(min(getDyMinK(), other.getDyMinK())); } } @@ -473,7 +472,7 @@ public void merge(final KllDoublesSketch other) { public byte[] toByteArray() { final byte[] bytes = new byte[getCurrentCompactSerializedSizeBytes()]; final WritableMemory wmem = WritableMemory.writableWrap(bytes); - final boolean singleItem = n_ == 1; + final boolean singleItem = getN() == 1; final boolean empty = isEmpty(); //load the preamble wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) @@ -482,37 +481,38 @@ public byte[] toByteArray() { wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); final byte flags = (byte) ( (empty ? EMPTY_BIT_MASK : 0) - | (isLevelZeroSorted_ ? LEVEL_ZERO_SORTED_BIT_MASK : 0) + | (isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) | (singleItem ? SINGLE_ITEM_BIT_MASK : 0) | DOUBLES_SKETCH_BIT_MASK); wmem.putByte(FLAGS_BYTE_ADR, flags); - wmem.putShort(K_SHORT_ADR, (short) k_); - wmem.putByte(M_BYTE_ADR, (byte) m_); + wmem.putShort(K_SHORT_ADR, (short) getK()); + wmem.putByte(M_BYTE_ADR, (byte) M); if (empty) { return bytes; } //load data int offset = DATA_START_ADR_SINGLE_ITEM; if (!singleItem) { - wmem.putLong(N_LONG_ADR, n_); - wmem.putShort(MIN_K_SHORT_ADR, (short) minK_); - wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) numLevels_); + wmem.putLong(N_LONG_ADR, getN()); + wmem.putShort(DY_MIN_K_SHORT_ADR, (short) getDyMinK()); + wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) getNumLevels()); offset = DATA_START_ADR_DOUBLE; // the last integer in levels_ is not serialized because it can be derived - final int len = levels_.length - 1; - wmem.putIntArray(offset, levels_, 0, len); + final int len = getLevelsArray().length - 1; + wmem.putIntArray(offset, getLevelsArray(), 0, len); offset += len * Integer.BYTES; wmem.putDouble(offset, minValue_); offset += Double.BYTES; wmem.putDouble(offset, maxValue_); offset += Double.BYTES; } - wmem.putDoubleArray(offset, items_, levels_[0], getNumRetained()); + wmem.putDoubleArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); return bytes; } @Override public byte[] toUpdatableByteArray() { - final int itemCap = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); - final int numBytes = KllHelper.getSerializedSizeBytes(numLevels_, itemCap, IS_DOUBLE, true); + final int k = getK(); + final int itemCap = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); + final int numBytes = KllHelper.getSerializedSizeBytes(getNumLevels(), itemCap, IS_DOUBLE, true); final byte[] bytes = new byte[numBytes]; final WritableMemory wmem = WritableMemory.writableWrap(bytes); //load the preamble @@ -520,45 +520,46 @@ public byte[] toUpdatableByteArray() { wmem.putByte(SER_VER_BYTE_ADR, SERIAL_VERSION_EMPTY_FULL); wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); final byte flags = (byte) - ((isLevelZeroSorted_ ? LEVEL_ZERO_SORTED_BIT_MASK : 0) + ((isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) | DOUBLES_SKETCH_BIT_MASK | UPDATABLE_BIT_MASK); wmem.putByte(FLAGS_BYTE_ADR, flags); - wmem.putShort(K_SHORT_ADR, (short) k_); - wmem.putByte(M_BYTE_ADR, (byte) m_); + wmem.putShort(K_SHORT_ADR, (short) k); + wmem.putByte(M_BYTE_ADR, (byte) M); //load data - wmem.putLong(N_LONG_ADR, n_); - wmem.putShort(MIN_K_SHORT_ADR, (short) minK_); - wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) numLevels_); + wmem.putLong(N_LONG_ADR, getN()); + wmem.putShort(DY_MIN_K_SHORT_ADR, (short) getDyMinK()); + wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) getNumLevels()); int offset = DATA_START_ADR_DOUBLE; // the last integer in levels_ IS serialized - final int len = levels_.length; - wmem.putIntArray(offset, levels_, 0, len); + final int len = getLevelsArray().length; + wmem.putIntArray(offset, getLevelsArray(), 0, len); offset += len * Integer.BYTES; wmem.putDouble(offset, minValue_); offset += Double.BYTES; wmem.putDouble(offset, maxValue_); offset += Double.BYTES; - wmem.putDoubleArray(offset, items_, levels_[0], getNumRetained()); + wmem.putDoubleArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); return bytes; } @Override public String toString(final boolean withLevels, final boolean withData) { + final int k = getK(); final String epsPct = String.format("%.3f%%", getNormalizedRankError(false) * 100); final String epsPMFPct = String.format("%.3f%%", getNormalizedRankError(true) * 100); final StringBuilder sb = new StringBuilder(); sb.append(Util.LS).append("### KLL Doubles Sketch summary:").append(Util.LS); - sb.append(" K : ").append(k_).append(Util.LS); - sb.append(" min K : ").append(minK_).append(Util.LS); - sb.append(" M : ").append(m_).append(Util.LS); - sb.append(" N : ").append(n_).append(Util.LS); + sb.append(" K : ").append(k).append(Util.LS); + sb.append(" Dynamic min K : ").append(getDyMinK()).append(Util.LS); + sb.append(" M : ").append(M).append(Util.LS); + sb.append(" N : ").append(getN()).append(Util.LS); sb.append(" Epsilon : ").append(epsPct).append(Util.LS); sb.append(" Epsison PMF : ").append(epsPMFPct).append(Util.LS); sb.append(" Empty : ").append(isEmpty()).append(Util.LS); sb.append(" Estimation Mode : ").append(isEstimationMode()).append(Util.LS); - sb.append(" Levels : ").append(numLevels_).append(Util.LS); - sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted_).append(Util.LS); + sb.append(" Levels : ").append(getNumLevels()).append(Util.LS); + sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted()).append(Util.LS); sb.append(" Capacity Items : ").append(items_.length).append(Util.LS); sb.append(" Retained Items : ").append(getNumRetained()).append(Util.LS); sb.append(" Storage Bytes : ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS); @@ -569,29 +570,29 @@ public String toString(final boolean withLevels, final boolean withData) { if (withLevels) { sb.append("### KLL sketch levels:").append(Util.LS) .append(" level, offset: nominal capacity, actual size").append(Util.LS); - for (int i = 0; i < numLevels_; i++) { - sb.append(" ").append(i).append(", ").append(levels_[i]).append(": ") - .append(KllHelper.levelCapacity(k_, numLevels_, i, m_)) - .append(", ").append(KllHelper.currentLevelSize(i, numLevels_, levels_)).append(Util.LS); + for (int i = 0; i < getNumLevels(); i++) { + sb.append(" ").append(i).append(", ").append(getLevelsArrayAt(i)).append(": ") + .append(KllHelper.levelCapacity(k, getNumLevels(), i, M)) + .append(", ").append(KllHelper.currentLevelSize(i, getNumLevels(), getLevelsArray())).append(Util.LS); } sb.append("### End sketch levels").append(Util.LS); } if (withData) { sb.append("### KLL sketch data {index, item}:").append(Util.LS); - if (levels_[0] > 0) { + if (getLevelsArrayAt(0) > 0) { sb.append(" Garbage:" + Util.LS); - for (int i = 0; i < levels_[0]; i++) { + for (int i = 0; i < getLevelsArrayAt(0); i++) { if (items_[i] == 0.0f) { continue; } sb.append(" ").append(i + ", ").append(items_[i]).append(Util.LS); } } int level = 0; - while (level < numLevels_) { - final int fromIndex = levels_[level]; - final int toIndex = levels_[level + 1]; // exclusive + while (level < getNumLevels()) { + final int fromIndex = getLevelsArrayAt(level); + final int toIndex = getLevelsArrayAt(level + 1); // exclusive if (fromIndex < toIndex) { - sb.append(" level[").append(level).append("]: offset: " + levels_[level] + " wt: " + (1 << level)); + sb.append(" level[").append(level).append("]: offset: " + getLevelsArrayAt(level) + " wt: " + (1 << level)); sb.append(Util.LS); } for (int i = fromIndex; i < toIndex; i++) { @@ -599,7 +600,7 @@ public String toString(final boolean withLevels, final boolean withData) { } level++; } - sb.append(" level[" + level + "]: offset: " + levels_[level] + " (Exclusive)"); + sb.append(" level[" + level + "]: offset: " + getLevelsArrayAt(level) + " (Exclusive)"); sb.append(Util.LS); sb.append("### End sketch data").append(Util.LS); } @@ -621,14 +622,14 @@ public void update(final double value) { if (value < minValue_) { minValue_ = value; } if (value > maxValue_) { maxValue_ = value; } } - if (levels_[0] == 0) { + if (getLevelsArrayAt(0) == 0) { compressWhileUpdating(); } - n_++; - isLevelZeroSorted_ = false; - final int nextPos = levels_[0] - 1; - assert levels_[0] >= 0; - levels_[0] = nextPos; + incN(); + setLevelZeroSorted(false); + final int nextPos = getLevelsArrayAt(0) - 1; + assert getLevelsArrayAt(0) >= 0; + setLevelsArrayAt(0, nextPos); items_[nextPos] = value; } @@ -636,7 +637,7 @@ public void update(final double value) { private KllDoublesQuantileCalculator getQuantileCalculator() { sortLevelZero(); // sort in the sketch to reuse if possible - return new KllDoublesQuantileCalculator(items_, levels_, numLevels_, n_); + return new KllDoublesQuantileCalculator(items_, getLevelsArray(), getNumLevels(), getN()); } private double[] getPmfOrCdf(final double[] splitPoints, final boolean isCdf) { @@ -645,10 +646,10 @@ private double[] getPmfOrCdf(final double[] splitPoints, final boolean isCdf) { final double[] buckets = new double[splitPoints.length + 1]; int level = 0; int weight = 1; - while (level < numLevels_) { - final int fromIndex = levels_[level]; - final int toIndex = levels_[level + 1]; // exclusive - if (level == 0 && !isLevelZeroSorted_) { + while (level < getNumLevels()) { + final int fromIndex = getLevelsArrayAt(level); + final int toIndex = getLevelsArrayAt(level + 1); // exclusive + if (level == 0 && !isLevelZeroSorted()) { incrementBucketsUnsortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); } else { incrementBucketsSortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); @@ -661,11 +662,11 @@ private double[] getPmfOrCdf(final double[] splitPoints, final boolean isCdf) { double subtotal = 0; for (int i = 0; i < buckets.length; i++) { subtotal += buckets[i]; - buckets[i] = subtotal / n_; + buckets[i] = subtotal / getN(); } } else { for (int i = 0; i < buckets.length; i++) { - buckets[i] /= n_; + buckets[i] /= getN(); } } return buckets; @@ -707,19 +708,19 @@ private void incrementBucketsSortedLevel(final int fromIndex, final int toIndex, // The following code is only valid in the special case of exactly reaching capacity while updating. // It cannot be used while merging, while reducing k, or anything else. private void compressWhileUpdating() { - final int level = KllHelper.findLevelToCompact(k_, m_, numLevels_, levels_); + final int level = KllHelper.findLevelToCompact(getK(), M, getNumLevels(), getLevelsArray()); // It is important to do add the new top level right here. Be aware that this operation // grows the buffer and shifts the data and also the boundaries of the data and grows the // levels array and increments numLevels_ - if (level == numLevels_ - 1) { + if (level == getNumLevels() - 1) { addEmptyTopLevelToCompletelyFullSketch(); } - final int rawBeg = levels_[level]; - final int rawLim = levels_[level + 1]; + final int rawBeg = getLevelsArrayAt(level); + final int rawLim = getLevelsArrayAt(level + 1); // +2 is OK because we already added a new top level if necessary - final int popAbove = levels_[level + 2] - rawLim; + final int popAbove = getLevelsArrayAt(level + 2) - rawLim; final int rawPop = rawLim - rawBeg; final boolean oddPop = isOdd(rawPop); final int adjBeg = oddPop ? rawBeg + 1 : rawBeg; @@ -739,82 +740,84 @@ private void compressWhileUpdating() { items_, rawLim, popAbove, items_, adjBeg + halfAdjPop); } - levels_[level + 1] -= halfAdjPop; // adjust boundaries of the level above + setLevelsArrayAtMinusEq(level + 1, halfAdjPop); // adjust boundaries of the level above + if (oddPop) { - levels_[level] = levels_[level + 1] - 1; // the current level now contains one item - items_[levels_[level]] = items_[rawBeg]; // namely this leftover guy + setLevelsArrayAt(level, getLevelsArrayAt(level + 1) - 1); // the current level now contains one item + + items_[getLevelsArrayAt(level)] = items_[rawBeg]; // namely this leftover guy } else { - levels_[level] = levels_[level + 1]; // the current level is now empty + setLevelsArrayAt(level, getLevelsArrayAt(level + 1)); // the current level is now empty } // verify that we freed up halfAdjPop array slots just below the current level - assert levels_[level] == rawBeg + halfAdjPop; + assert getLevelsArrayAt(level) == rawBeg + halfAdjPop; // finally, we need to shift up the data in the levels below // so that the freed-up space can be used by level zero if (level > 0) { - final int amount = rawBeg - levels_[0]; - System.arraycopy(items_, levels_[0], items_, levels_[0] + halfAdjPop, amount); + final int amount = rawBeg - getLevelsArrayAt(0); + System.arraycopy(items_, getLevelsArrayAt(0), items_, getLevelsArrayAt(0) + halfAdjPop, amount); for (int lvl = 0; lvl < level; lvl++) { - levels_[lvl] += halfAdjPop; + setLevelsArrayAtPlusEq(lvl, halfAdjPop); } } } private void addEmptyTopLevelToCompletelyFullSketch() { - final int curTotalCap = levels_[numLevels_]; + final int curTotalCap = getLevelsArrayAt(getNumLevels()); // make sure that we are following a certain growth scheme - assert levels_[0] == 0; //definition of full + assert getLevelsArrayAt(0) == 0; //definition of full assert items_.length == curTotalCap; // note that merging MIGHT over-grow levels_, in which case we might not have to grow it here - if (levels_.length < numLevels_ + 2) { - levels_ = KllHelper.growIntArray(levels_, numLevels_ + 2); + if (getLevelsArray().length < getNumLevels() + 2) { + setLevelsArray(KllHelper.growIntArray(getLevelsArray(), getNumLevels() + 2)); } - final int deltaCap = KllHelper.levelCapacity(k_, numLevels_ + 1, 0, m_); + final int deltaCap = KllHelper.levelCapacity(getK(), getNumLevels() + 1, 0, M); final int newTotalCap = curTotalCap + deltaCap; final double[] newBuf = new double[newTotalCap]; // copy (and shift) the current data into the new buffer - System.arraycopy(items_, levels_[0], newBuf, levels_[0] + deltaCap, curTotalCap); + System.arraycopy(items_, getLevelsArrayAt(0), newBuf, getLevelsArrayAt(0) + deltaCap, curTotalCap); items_ = newBuf; // this loop includes the old "extra" index at the top - for (int i = 0; i <= numLevels_; i++) { - levels_[i] += deltaCap; + for (int i = 0; i <= getNumLevels(); i++) { + setLevelsArrayAtPlusEq(i,deltaCap); } - assert levels_[numLevels_] == newTotalCap; + assert getLevelsArrayAt(getNumLevels()) == newTotalCap; - numLevels_++; - levels_[numLevels_] = newTotalCap; // initialize the new "extra" index at the top + incNumLevels(); + setLevelsArrayAt(getNumLevels(), newTotalCap); // initialize the new "extra" index at the top } private void sortLevelZero() { - if (!isLevelZeroSorted_) { - Arrays.sort(items_, levels_[0], levels_[1]); - isLevelZeroSorted_ = true; + if (!isLevelZeroSorted()) { + Arrays.sort(items_, getLevelsArrayAt(0), getLevelsArrayAt(1)); + setLevelZeroSorted(true); } } private void mergeHigherLevels(final KllDoublesSketch other, final long finalN) { final int tmpSpaceNeeded = getNumRetained() - + KllHelper.getNumRetainedAboveLevelZero(other.numLevels_, other.levels_); + + KllHelper.getNumRetainedAboveLevelZero(other.getNumLevels(), other.getLevelsArray()); final double[] workbuf = new double[tmpSpaceNeeded]; final int ub = KllHelper.ubOnNumLevels(finalN); final int[] worklevels = new int[ub + 2]; // ub+1 does not work final int[] outlevels = new int[ub + 2]; - final int provisionalNumLevels = max(numLevels_, other.numLevels_); + final int provisionalNumLevels = max(getNumLevels(), other.getNumLevels()); populateWorkArrays(other, workbuf, worklevels, provisionalNumLevels); // notice that workbuf is being used as both the input and output here - final int[] result = KllDoublesHelper.generalDoublesCompress(k_, m_, provisionalNumLevels, workbuf, - worklevels, workbuf, outlevels, isLevelZeroSorted_, random); + final int[] result = KllDoublesHelper.generalDoublesCompress(getK(), M, provisionalNumLevels, workbuf, + worklevels, workbuf, outlevels, isLevelZeroSorted(), random); final int finalNumLevels = result[0]; final int finalCapacity = result[1]; final int finalPop = result[2]; @@ -827,16 +830,16 @@ private void mergeHigherLevels(final KllDoublesSketch other, final long finalN) System.arraycopy(workbuf, outlevels[0], newbuf, freeSpaceAtBottom, finalPop); final int theShift = freeSpaceAtBottom - outlevels[0]; - if (levels_.length < finalNumLevels + 1) { - levels_ = new int[finalNumLevels + 1]; + if (getLevelsArray().length < finalNumLevels + 1) { + setLevelsArray(new int[finalNumLevels + 1]); } for (int lvl = 0; lvl < finalNumLevels + 1; lvl++) { // includes the "extra" index - levels_[lvl] = outlevels[lvl] + theShift; + setLevelsArrayAt(lvl, outlevels[lvl] + theShift); } items_ = newbuf; - numLevels_ = finalNumLevels; + setNumLevels(finalNumLevels); } private void populateWorkArrays(final KllDoublesSketch other, final double[] workbuf, @@ -844,22 +847,22 @@ private void populateWorkArrays(final KllDoublesSketch other, final double[] wor worklevels[0] = 0; // Note: the level zero data from "other" was already inserted into "self" - final int selfPopZero = KllHelper.currentLevelSize(0, numLevels_, levels_); - System.arraycopy(items_, levels_[0], workbuf, worklevels[0], selfPopZero); + final int selfPopZero = KllHelper.currentLevelSize(0, getNumLevels(), getLevelsArray()); + System.arraycopy(items_, getLevelsArrayAt(0), workbuf, worklevels[0], selfPopZero); worklevels[1] = worklevels[0] + selfPopZero; for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { - final int selfPop = KllHelper.currentLevelSize(lvl, numLevels_, levels_); - final int otherPop = KllHelper.currentLevelSize(lvl, other.numLevels_, other.levels_); + final int selfPop = KllHelper.currentLevelSize(lvl, getNumLevels(), getLevelsArray()); + final int otherPop = KllHelper.currentLevelSize(lvl, other.getNumLevels(), other.getLevelsArray()); worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; if (selfPop > 0 && otherPop == 0) { - System.arraycopy(items_, levels_[lvl], workbuf, worklevels[lvl], selfPop); + System.arraycopy(items_, getLevelsArrayAt(lvl), workbuf, worklevels[lvl], selfPop); } else if (selfPop == 0 && otherPop > 0) { - System.arraycopy(other.items_, other.levels_[lvl], workbuf, worklevels[lvl], otherPop); + System.arraycopy(other.items_, other.getLevelsArrayAt(lvl), workbuf, worklevels[lvl], otherPop); } else if (selfPop > 0 && otherPop > 0) { - KllDoublesHelper.mergeSortedDoubleArrays(items_, levels_[lvl], selfPop, other.items_, - other.levels_[lvl], otherPop, workbuf, worklevels[lvl]); + KllDoublesHelper.mergeSortedDoubleArrays(items_, getLevelsArrayAt(lvl), selfPop, other.items_, + other.getLevelsArrayAt(lvl), otherPop, workbuf, worklevels[lvl]); } } } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index c6b48d228..78176a2b8 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -27,12 +27,12 @@ import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M; +import static org.apache.datasketches.kll.PreambleUtil.DY_MIN_K_SHORT_ADR; import static org.apache.datasketches.kll.PreambleUtil.EMPTY_BIT_MASK; import static org.apache.datasketches.kll.PreambleUtil.FAMILY_BYTE_ADR; import static org.apache.datasketches.kll.PreambleUtil.FLAGS_BYTE_ADR; import static org.apache.datasketches.kll.PreambleUtil.K_SHORT_ADR; import static org.apache.datasketches.kll.PreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; -import static org.apache.datasketches.kll.PreambleUtil.MIN_K_SHORT_ADR; import static org.apache.datasketches.kll.PreambleUtil.M_BYTE_ADR; import static org.apache.datasketches.kll.PreambleUtil.NUM_LEVELS_BYTE_ADR; import static org.apache.datasketches.kll.PreambleUtil.N_LONG_ADR; @@ -114,50 +114,51 @@ private KllFloatsSketch(final int k, final int m, final boolean compatible) { */ private KllFloatsSketch(final Memory mem, final MemoryCheck memChk) { super(memChk.k, memChk.m, true); - isLevelZeroSorted_ = memChk.level0Sorted; + setLevelZeroSorted(memChk.level0Sorted); + final int k = getK(); if (memChk.empty) { - numLevels_ = 1; - levels_ = new int[] {k_, k_}; - isLevelZeroSorted_ = false; - minK_ = k_; - items_ = new float[k_]; + setNumLevels(1); + setLevelsArray(new int[] {k, k}); + setLevelZeroSorted(false); + setDyMinK(k); + items_ = new float[k]; minValue_ = Float.NaN; maxValue_ = Float.NaN; } else if (memChk.singleItem) { - n_ = 1; - minK_ = k_; - numLevels_ = 1; - levels_ = new int[numLevels_ + 1]; - final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); - levels_[0] = itemCapacity - 1; - levels_[numLevels_] = itemCapacity; //load the last integer in levels_ + setN(1); + setDyMinK(k); + setNumLevels(1); + setLevelsArray(new int[getNumLevels() + 1]); + final int itemCapacity = KllHelper.computeTotalItemCapacity(k, DEFAULT_M, getNumLevels()); + setLevelsArrayAt(0,itemCapacity - 1); + setLevelsArrayAt(getNumLevels(), itemCapacity); //load the last integer in levels_ items_ = new float[itemCapacity]; - items_[levels_[0]] = mem.getFloat(DATA_START_ADR_SINGLE_ITEM); - minValue_ = items_[levels_[0]]; - maxValue_ = items_[levels_[0]]; + items_[getLevelsArrayAt(0)] = mem.getFloat(DATA_START_ADR_SINGLE_ITEM); + minValue_ = items_[getLevelsArrayAt(0)]; + maxValue_ = items_[getLevelsArrayAt(0)]; } else { - n_ = memChk.n; - minK_ = memChk.minK; - numLevels_ = memChk.numLevels; - levels_ = new int[numLevels_ + 1]; + setN(memChk.n); + setDyMinK(memChk.dyMinK); + setNumLevels(memChk.numLevels); + setLevelsArray(new int[getNumLevels() + 1]); int offset = DATA_START_ADR_FLOAT; - final int itemCapacity = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); + final int itemCapacity = KllHelper.computeTotalItemCapacity(k, DEFAULT_M, getNumLevels()); if (memChk.updatable) { // If updatable the last integer in levels_ IS serialized. - mem.getIntArray(offset, levels_, 0, numLevels_ + 1); //load levels_ - offset += (numLevels_ + 1) * Integer.BYTES; + mem.getIntArray(offset, getLevelsArray(), 0, getNumLevels() + 1); //load levels_ + offset += (getNumLevels() + 1) * Integer.BYTES; } else { // If compact the last integer in levels_ is not serialized. - mem.getIntArray(offset, levels_, 0, numLevels_); //load levels_ - offset += numLevels_ * Integer.BYTES; - levels_[numLevels_] = itemCapacity; //load the last integer in levels_ + mem.getIntArray(offset, getLevelsArray(), 0, getNumLevels()); //load levels_ + offset += getNumLevels() * Integer.BYTES; + setLevelsArrayAt(getNumLevels(), itemCapacity); //load the last integer in levels_ } minValue_ = mem.getFloat(offset); offset += Float.BYTES; maxValue_ = mem.getFloat(offset); offset += Float.BYTES; items_ = new float[itemCapacity]; - mem.getFloatArray(offset, items_, levels_[0], getNumRetained()); + mem.getFloatArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); } } @@ -244,7 +245,7 @@ public static int getMaxSerializedSizeBytes(final int k, final long n) { * @return the current compact number of bytes this sketch would require to store. */ public int getCurrentCompactSerializedSizeBytes() { - return KllHelper.getSerializedSizeBytes(numLevels_, getNumRetained(), IS_DOUBLE, false); + return KllHelper.getSerializedSizeBytes(getNumLevels(), getNumRetained(), IS_DOUBLE, false); } /** @@ -252,7 +253,7 @@ public int getCurrentCompactSerializedSizeBytes() { * @return the current updatable number of bytes this sketch would require to store. */ public int getCurrentUpdatableSerializedSizeBytes() { - return KllHelper.getSerializedSizeBytes(numLevels_, getNumRetained(), IS_DOUBLE, true); + return KllHelper.getSerializedSizeBytes(getNumLevels(), getNumRetained(), IS_DOUBLE, true); } /** @@ -313,7 +314,7 @@ public float getQuantile(final double fraction) { if (fraction < 0.0 || fraction > 1.0) { throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); } - if (compatible) { + if (isCompatible()) { if (fraction == 0.0) { return minValue_; } if (fraction == 1.0) { return maxValue_; } } @@ -329,7 +330,7 @@ public float getQuantile(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public float getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(minK_, false))); + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); } /** @@ -340,7 +341,7 @@ public float getQuantileUpperBound(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public float getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(minK_, false))); + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false))); } /** @@ -370,8 +371,8 @@ public float[] getQuantiles(final double[] fractions) { if (fraction < 0.0 || fraction > 1.0) { throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); } - if (fraction == 0.0 && compatible) { quantiles[i] = minValue_; } - else if (fraction == 1.0 && compatible) { quantiles[i] = maxValue_; } + if (fraction == 0.0 && isCompatible()) { quantiles[i] = minValue_; } + else if (fraction == 1.0 && isCompatible()) { quantiles[i] = maxValue_; } else { if (quant == null) { quant = getQuantileCalculator(); @@ -418,27 +419,27 @@ public double getRank(final float value) { int level = 0; int weight = 1; long total = 0; - while (level < numLevels_) { - final int fromIndex = levels_[level]; - final int toIndex = levels_[level + 1]; // exclusive + while (level < getNumLevels()) { + final int fromIndex = getLevelsArrayAt(level); + final int toIndex = getLevelsArrayAt(level + 1); // exclusive for (int i = fromIndex; i < toIndex; i++) { if (items_[i] < value) { total += weight; - } else if (level > 0 || isLevelZeroSorted_) { + } else if (level > 0 || isLevelZeroSorted()) { break; // levels above 0 are sorted, no point comparing further } } level++; weight *= 2; } - return (double) total / n_; + return (double) total / getN(); } /** * @return the iterator for this class */ public KllFloatsSketchIterator iterator() { - return new KllFloatsSketchIterator(items_, levels_, numLevels_); + return new KllFloatsSketchIterator(items_, getLevelsArray(), getNumLevels()); } /** @@ -447,25 +448,22 @@ public KllFloatsSketchIterator iterator() { */ public void merge(final KllFloatsSketch other) { if (other == null || other.isEmpty()) { return; } - if (m_ != other.m_) { - throw new SketchesArgumentException("incompatible M: " + m_ + " and " + other.m_); - } - final long finalN = n_ + other.n_; + final long finalN = getN() + other.getN(); //update this sketch with level0 items from the other sketch - for (int i = other.levels_[0]; i < other.levels_[1]; i++) { + for (int i = other.getLevelsArrayAt(0); i < other.getLevelsArrayAt(1); i++) { update(other.items_[i]); } - if (other.numLevels_ >= 2) { //now merge other levels if they exist + if (other.getNumLevels() >= 2) { //now merge other levels if they exist mergeHigherLevels(other, finalN); } //update min, max values, n if (Float.isNaN(minValue_) || other.minValue_ < minValue_) { minValue_ = other.minValue_; } if (Float.isNaN(maxValue_) || other.maxValue_ > maxValue_) { maxValue_ = other.maxValue_; } - n_ = finalN; + setN(finalN); - assert KllHelper.sumTheSampleWeights(numLevels_, levels_) == n_; + assert KllHelper.sumTheSampleWeights(getNumLevels(), getLevelsArray()) == getN(); if (other.isEstimationMode()) { - minK_ = min(minK_, other.minK_); + setDyMinK(min(getDyMinK(), other.getDyMinK())); } } @@ -473,7 +471,7 @@ public void merge(final KllFloatsSketch other) { public byte[] toByteArray() { final byte[] bytes = new byte[getCurrentCompactSerializedSizeBytes()]; final WritableMemory wmem = WritableMemory.writableWrap(bytes); - final boolean singleItem = n_ == 1; + final boolean singleItem = getN() == 1; final boolean empty = isEmpty(); //load the preamble wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) @@ -482,37 +480,38 @@ public byte[] toByteArray() { wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); final byte flags = (byte) ( (empty ? EMPTY_BIT_MASK : 0) - | (isLevelZeroSorted_ ? LEVEL_ZERO_SORTED_BIT_MASK : 0) + | (isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) | (singleItem ? SINGLE_ITEM_BIT_MASK : 0)); // (leave blank) wmem.putByte(FLAGS_BYTE_ADR, flags); - wmem.putShort(K_SHORT_ADR, (short) k_); - wmem.putByte(M_BYTE_ADR, (byte) m_); + wmem.putShort(K_SHORT_ADR, (short) getK()); + wmem.putByte(M_BYTE_ADR, (byte) M); if (empty) { return bytes; } //load data int offset = DATA_START_ADR_SINGLE_ITEM; if (!singleItem) { - wmem.putLong(N_LONG_ADR, n_); - wmem.putShort(MIN_K_SHORT_ADR, (short) minK_); - wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) numLevels_); + wmem.putLong(N_LONG_ADR, getN()); + wmem.putShort(DY_MIN_K_SHORT_ADR, (short) getDyMinK()); + wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) getNumLevels()); offset = DATA_START_ADR_FLOAT; // the last integer in levels_ is not serialized because it can be derived - final int len = levels_.length - 1; - wmem.putIntArray(offset, levels_, 0, len); + final int len = getLevelsArray().length - 1; + wmem.putIntArray(offset, getLevelsArray(), 0, len); offset += len * Integer.BYTES; wmem.putFloat(offset, minValue_); offset += Float.BYTES; wmem.putFloat(offset, maxValue_); offset += Float.BYTES; } - wmem.putFloatArray(offset, items_, levels_[0], getNumRetained()); + wmem.putFloatArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); return bytes; } @Override public byte[] toUpdatableByteArray() { - final int itemCap = KllHelper.computeTotalItemCapacity(k_, m_, numLevels_); - final int numBytes = KllHelper.getSerializedSizeBytes(numLevels_, itemCap, IS_DOUBLE, true); + final int k = getK(); + final int itemCap = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); + final int numBytes = KllHelper.getSerializedSizeBytes(getNumLevels(), itemCap, IS_DOUBLE, true); final byte[] bytes = new byte[numBytes]; final WritableMemory wmem = WritableMemory.writableWrap(bytes); //load the preamble @@ -520,45 +519,46 @@ public byte[] toUpdatableByteArray() { wmem.putByte(SER_VER_BYTE_ADR, SERIAL_VERSION_EMPTY_FULL); wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); final byte flags = (byte) - ((isLevelZeroSorted_ ? LEVEL_ZERO_SORTED_BIT_MASK : 0) + ((isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) | UPDATABLE_BIT_MASK); // (leave blank) wmem.putByte(FLAGS_BYTE_ADR, flags); - wmem.putShort(K_SHORT_ADR, (short) k_); - wmem.putByte(M_BYTE_ADR, (byte) m_); + wmem.putShort(K_SHORT_ADR, (short) k); + wmem.putByte(M_BYTE_ADR, (byte) M); //load data - wmem.putLong(N_LONG_ADR, n_); - wmem.putShort(MIN_K_SHORT_ADR, (short) minK_); - wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) numLevels_); + wmem.putLong(N_LONG_ADR, getN()); + wmem.putShort(DY_MIN_K_SHORT_ADR, (short) getDyMinK()); + wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) getNumLevels()); int offset = DATA_START_ADR_FLOAT; // the last integer in levels_ IS serialized - final int len = levels_.length; - wmem.putIntArray(offset, levels_, 0, len); + final int len = getLevelsArray().length; + wmem.putIntArray(offset, getLevelsArray(), 0, len); offset += len * Integer.BYTES; wmem.putDouble(offset, minValue_); offset += Float.BYTES; wmem.putDouble(offset, maxValue_); offset += Float.BYTES; - wmem.putFloatArray(offset, items_, levels_[0], getNumRetained()); + wmem.putFloatArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); return bytes; } @Override public String toString(final boolean withLevels, final boolean withData) { + final int k = getK(); final String epsPct = String.format("%.3f%%", getNormalizedRankError(false) * 100); final String epsPMFPct = String.format("%.3f%%", getNormalizedRankError(true) * 100); final StringBuilder sb = new StringBuilder(); sb.append(Util.LS).append("### KLL Floats Sketch summary:").append(Util.LS); - sb.append(" K : ").append(k_).append(Util.LS); - sb.append(" min K : ").append(minK_).append(Util.LS); - sb.append(" M : ").append(m_).append(Util.LS); - sb.append(" N : ").append(n_).append(Util.LS); + sb.append(" K : ").append(k).append(Util.LS); + sb.append(" Dynamic min K : ").append(getDyMinK()).append(Util.LS); + sb.append(" M : ").append(M).append(Util.LS); + sb.append(" N : ").append(getN()).append(Util.LS); sb.append(" Epsilon : ").append(epsPct).append(Util.LS); sb.append(" Epsison PMF : ").append(epsPMFPct).append(Util.LS); sb.append(" Empty : ").append(isEmpty()).append(Util.LS); sb.append(" Estimation Mode : ").append(isEstimationMode()).append(Util.LS); - sb.append(" Levels : ").append(numLevels_).append(Util.LS); - sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted_).append(Util.LS); + sb.append(" Levels : ").append(getNumLevels()).append(Util.LS); + sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted()).append(Util.LS); sb.append(" Capacity Items : ").append(items_.length).append(Util.LS); sb.append(" Retained Items : ").append(getNumRetained()).append(Util.LS); sb.append(" Storage Bytes : ").append(getSerializedSizeBytes()).append(Util.LS); @@ -569,29 +569,29 @@ public String toString(final boolean withLevels, final boolean withData) { if (withLevels) { sb.append("### KLL sketch levels:").append(Util.LS) .append(" level, offset: nominal capacity, actual size").append(Util.LS); - for (int i = 0; i < numLevels_; i++) { - sb.append(" ").append(i).append(", ").append(levels_[i]).append(": ") - .append(KllHelper.levelCapacity(k_, numLevels_, i, m_)) - .append(", ").append(KllHelper.currentLevelSize(i, numLevels_, levels_)).append(Util.LS); + for (int i = 0; i < getNumLevels(); i++) { + sb.append(" ").append(i).append(", ").append(getLevelsArrayAt(i)).append(": ") + .append(KllHelper.levelCapacity(k, getNumLevels(), i, M)) + .append(", ").append(KllHelper.currentLevelSize(i, getNumLevels(), getLevelsArray())).append(Util.LS); } sb.append("### End sketch levels").append(Util.LS); } if (withData) { sb.append("### KLL sketch data {index, item}:").append(Util.LS); - if (levels_[0] > 0) { + if (getLevelsArrayAt(0) > 0) { sb.append(" Garbage:" + Util.LS); - for (int i = 0; i < levels_[0]; i++) { + for (int i = 0; i < getLevelsArrayAt(0); i++) { if (items_[i] == 0.0f) { continue; } sb.append(" ").append(i + ", ").append(items_[i]).append(Util.LS); } } int level = 0; - while (level < numLevels_) { - final int fromIndex = levels_[level]; - final int toIndex = levels_[level + 1]; // exclusive + while (level < getNumLevels()) { + final int fromIndex = getLevelsArrayAt(level); + final int toIndex = getLevelsArrayAt(level + 1); // exclusive if (fromIndex < toIndex) { - sb.append(" level[").append(level).append("]: offset: " + levels_[level] + " wt: " + (1 << level)); + sb.append(" level[").append(level).append("]: offset: " + getLevelsArrayAt(level) + " wt: " + (1 << level)); sb.append(Util.LS); } for (int i = fromIndex; i < toIndex; i++) { @@ -599,7 +599,7 @@ public String toString(final boolean withLevels, final boolean withData) { } level++; } - sb.append(" level[" + level + "]: offset: " + levels_[level] + " (Exclusive)"); + sb.append(" level[" + level + "]: offset: " + getLevelsArrayAt(level) + " (Exclusive)"); sb.append(Util.LS); sb.append("### End sketch data").append(Util.LS); } @@ -621,14 +621,14 @@ public void update(final float value) { if (value < minValue_) { minValue_ = value; } if (value > maxValue_) { maxValue_ = value; } } - if (levels_[0] == 0) { + if (getLevelsArrayAt(0) == 0) { compressWhileUpdating(); } - n_++; - isLevelZeroSorted_ = false; - final int nextPos = levels_[0] - 1; - assert levels_[0] >= 0; - levels_[0] = nextPos; + incN(); + setLevelZeroSorted(false); + final int nextPos = getLevelsArrayAt(0) - 1; + assert getLevelsArrayAt(0) >= 0; + setLevelsArrayAt(0, nextPos); items_[nextPos] = value; } @@ -636,7 +636,7 @@ public void update(final float value) { private KllFloatsQuantileCalculator getQuantileCalculator() { sortLevelZero(); // sort in the sketch to reuse if possible - return new KllFloatsQuantileCalculator(items_, levels_, numLevels_, n_); + return new KllFloatsQuantileCalculator(items_, getLevelsArray(), getNumLevels(), getN()); } private double[] getPmfOrCdf(final float[] splitPoints, final boolean isCdf) { @@ -645,10 +645,10 @@ private double[] getPmfOrCdf(final float[] splitPoints, final boolean isCdf) { final double[] buckets = new double[splitPoints.length + 1]; int level = 0; int weight = 1; - while (level < numLevels_) { - final int fromIndex = levels_[level]; - final int toIndex = levels_[level + 1]; // exclusive - if (level == 0 && !isLevelZeroSorted_) { + while (level < getNumLevels()) { + final int fromIndex = getLevelsArrayAt(level); + final int toIndex = getLevelsArrayAt(level + 1); // exclusive + if (level == 0 && !isLevelZeroSorted()) { incrementBucketsUnsortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); } else { incrementBucketsSortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); @@ -661,11 +661,11 @@ private double[] getPmfOrCdf(final float[] splitPoints, final boolean isCdf) { double subtotal = 0; for (int i = 0; i < buckets.length; i++) { subtotal += buckets[i]; - buckets[i] = subtotal / n_; + buckets[i] = subtotal / getN(); } } else { for (int i = 0; i < buckets.length; i++) { - buckets[i] /= n_; + buckets[i] /= getN(); } } return buckets; @@ -707,19 +707,19 @@ private void incrementBucketsSortedLevel(final int fromIndex, final int toIndex, // The following code is only valid in the special case of exactly reaching capacity while updating. // It cannot be used while merging, while reducing k, or anything else. private void compressWhileUpdating() { - final int level = KllHelper.findLevelToCompact(k_, m_, numLevels_, levels_); + final int level = KllHelper.findLevelToCompact(getK(), M, getNumLevels(), getLevelsArray()); // It is important to do add the new top level right here. Be aware that this operation // grows the buffer and shifts the data and also the boundaries of the data and grows the // levels array and increments numLevels_ - if (level == numLevels_ - 1) { + if (level == getNumLevels() - 1) { addEmptyTopLevelToCompletelyFullSketch(); } - final int rawBeg = levels_[level]; - final int rawLim = levels_[level + 1]; + final int rawBeg = getLevelsArrayAt(level); + final int rawLim = getLevelsArrayAt(level + 1); // +2 is OK because we already added a new top level if necessary - final int popAbove = levels_[level + 2] - rawLim; + final int popAbove = getLevelsArrayAt(level + 2) - rawLim; final int rawPop = rawLim - rawBeg; final boolean oddPop = isOdd(rawPop); final int adjBeg = oddPop ? rawBeg + 1 : rawBeg; @@ -739,82 +739,82 @@ private void compressWhileUpdating() { items_, rawLim, popAbove, items_, adjBeg + halfAdjPop); } - levels_[level + 1] -= halfAdjPop; // adjust boundaries of the level above + setLevelsArrayAtMinusEq(level + 1, halfAdjPop); // adjust boundaries of the level above if (oddPop) { - levels_[level] = levels_[level + 1] - 1; // the current level now contains one item - items_[levels_[level]] = items_[rawBeg]; // namely this leftover guy + setLevelsArrayAt(level, getLevelsArrayAt(level + 1) - 1); // the current level now contains one item + items_[getLevelsArrayAt(level)] = items_[rawBeg]; // namely this leftover guy } else { - levels_[level] = levels_[level + 1]; // the current level is now empty + setLevelsArrayAt(level, getLevelsArrayAt(level + 1)); // the current level is now empty } // verify that we freed up halfAdjPop array slots just below the current level - assert levels_[level] == rawBeg + halfAdjPop; + assert getLevelsArrayAt(level) == rawBeg + halfAdjPop; // finally, we need to shift up the data in the levels below // so that the freed-up space can be used by level zero if (level > 0) { - final int amount = rawBeg - levels_[0]; - System.arraycopy(items_, levels_[0], items_, levels_[0] + halfAdjPop, amount); + final int amount = rawBeg - getLevelsArrayAt(0); + System.arraycopy(items_, getLevelsArrayAt(0), items_, getLevelsArrayAt(0) + halfAdjPop, amount); for (int lvl = 0; lvl < level; lvl++) { - levels_[lvl] += halfAdjPop; + setLevelsArrayAtPlusEq(lvl, halfAdjPop); } } } private void addEmptyTopLevelToCompletelyFullSketch() { - final int curTotalCap = levels_[numLevels_]; + final int curTotalCap = getLevelsArrayAt(getNumLevels()); // make sure that we are following a certain growth scheme - assert levels_[0] == 0; //definition of full + assert getLevelsArrayAt(0) == 0; //definition of full assert items_.length == curTotalCap; // note that merging MIGHT over-grow levels_, in which case we might not have to grow it here - if (levels_.length < numLevels_ + 2) { - levels_ = KllHelper.growIntArray(levels_, numLevels_ + 2); + if (getLevelsArray().length < getNumLevels() + 2) { + setLevelsArray(KllHelper.growIntArray(getLevelsArray(), getNumLevels() + 2)); } - final int deltaCap = KllHelper.levelCapacity(k_, numLevels_ + 1, 0, m_); + final int deltaCap = KllHelper.levelCapacity(getK(), getNumLevels() + 1, 0, M); final int newTotalCap = curTotalCap + deltaCap; final float[] newBuf = new float[newTotalCap]; // copy (and shift) the current data into the new buffer - System.arraycopy(items_, levels_[0], newBuf, levels_[0] + deltaCap, curTotalCap); + System.arraycopy(items_, getLevelsArrayAt(0), newBuf, getLevelsArrayAt(0) + deltaCap, curTotalCap); items_ = newBuf; // this loop includes the old "extra" index at the top - for (int i = 0; i <= numLevels_; i++) { - levels_[i] += deltaCap; + for (int i = 0; i <= getNumLevels(); i++) { + setLevelsArrayAtPlusEq(i, deltaCap); } - assert levels_[numLevels_] == newTotalCap; + assert getLevelsArrayAt(getNumLevels()) == newTotalCap; - numLevels_++; - levels_[numLevels_] = newTotalCap; // initialize the new "extra" index at the top + incNumLevels(); + setLevelsArrayAt(getNumLevels(), newTotalCap); // initialize the new "extra" index at the top } private void sortLevelZero() { - if (!isLevelZeroSorted_) { - Arrays.sort(items_, levels_[0], levels_[1]); - isLevelZeroSorted_ = true; + if (!isLevelZeroSorted()) { + Arrays.sort(items_, getLevelsArrayAt(0), getLevelsArrayAt(1)); + setLevelZeroSorted(true); } } private void mergeHigherLevels(final KllFloatsSketch other, final long finalN) { final int tmpSpaceNeeded = getNumRetained() - + KllHelper.getNumRetainedAboveLevelZero(other.numLevels_, other.levels_); + + KllHelper.getNumRetainedAboveLevelZero(other.getNumLevels(), other.getLevelsArray()); final float[] workbuf = new float[tmpSpaceNeeded]; final int ub = KllHelper.ubOnNumLevels(finalN); final int[] worklevels = new int[ub + 2]; // ub+1 does not work final int[] outlevels = new int[ub + 2]; - final int provisionalNumLevels = max(numLevels_, other.numLevels_); + final int provisionalNumLevels = max(getNumLevels(), other.getNumLevels()); populateWorkArrays(other, workbuf, worklevels, provisionalNumLevels); // notice that workbuf is being used as both the input and output here - final int[] result = KllFloatsHelper.generalFloatsCompress(k_, m_, provisionalNumLevels, workbuf, - worklevels, workbuf, outlevels, isLevelZeroSorted_, random); + final int[] result = KllFloatsHelper.generalFloatsCompress(getK(), M, provisionalNumLevels, workbuf, + worklevels, workbuf, outlevels, isLevelZeroSorted(), random); final int finalNumLevels = result[0]; final int finalCapacity = result[1]; final int finalPop = result[2]; @@ -827,16 +827,16 @@ private void mergeHigherLevels(final KllFloatsSketch other, final long finalN) { System.arraycopy(workbuf, outlevels[0], newbuf, freeSpaceAtBottom, finalPop); final int theShift = freeSpaceAtBottom - outlevels[0]; - if (levels_.length < finalNumLevels + 1) { - levels_ = new int[finalNumLevels + 1]; + if (getLevelsArray().length < finalNumLevels + 1) { + setLevelsArray(new int[finalNumLevels + 1]); } for (int lvl = 0; lvl < finalNumLevels + 1; lvl++) { // includes the "extra" index - levels_[lvl] = outlevels[lvl] + theShift; + setLevelsArrayAt(lvl, outlevels[lvl] + theShift); } items_ = newbuf; - numLevels_ = finalNumLevels; + setNumLevels(finalNumLevels); } private void populateWorkArrays(final KllFloatsSketch other, final float[] workbuf, @@ -844,22 +844,22 @@ private void populateWorkArrays(final KllFloatsSketch other, final float[] workb worklevels[0] = 0; // Note: the level zero data from "other" was already inserted into "self" - final int selfPopZero = KllHelper.currentLevelSize(0, numLevels_, levels_); - System.arraycopy(items_, levels_[0], workbuf, worklevels[0], selfPopZero); + final int selfPopZero = KllHelper.currentLevelSize(0, getNumLevels(), getLevelsArray()); + System.arraycopy(items_, getLevelsArrayAt(0), workbuf, worklevels[0], selfPopZero); worklevels[1] = worklevels[0] + selfPopZero; for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { - final int selfPop = KllHelper.currentLevelSize(lvl, numLevels_, levels_); - final int otherPop = KllHelper.currentLevelSize(lvl, other.numLevels_, other.levels_); + final int selfPop = KllHelper.currentLevelSize(lvl, getNumLevels(), getLevelsArray()); + final int otherPop = KllHelper.currentLevelSize(lvl, other.getNumLevels(), other.getLevelsArray()); worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; if (selfPop > 0 && otherPop == 0) { - System.arraycopy(items_, levels_[lvl], workbuf, worklevels[lvl], selfPop); + System.arraycopy(items_, getLevelsArrayAt(lvl), workbuf, worklevels[lvl], selfPop); } else if (selfPop == 0 && otherPop > 0) { - System.arraycopy(other.items_, other.levels_[lvl], workbuf, worklevels[lvl], otherPop); + System.arraycopy(other.items_, other.getLevelsArrayAt(lvl), workbuf, worklevels[lvl], otherPop); } else if (selfPop > 0 && otherPop > 0) { - KllFloatsHelper.mergeSortedFloatArrays(items_, levels_[lvl], selfPop, other.items_, - other.levels_[lvl], otherPop, workbuf, worklevels[lvl]); + KllFloatsHelper.mergeSortedFloatArrays(items_, getLevelsArrayAt(lvl), selfPop, other.items_, + other.getLevelsArrayAt(lvl), otherPop, workbuf, worklevels[lvl]); } } } diff --git a/src/main/java/org/apache/datasketches/kll/PreambleUtil.java b/src/main/java/org/apache/datasketches/kll/PreambleUtil.java index 1203d9753..b19d64ada 100644 --- a/src/main/java/org/apache/datasketches/kll/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/PreambleUtil.java @@ -112,7 +112,7 @@ private PreambleUtil() {} // MULTI-ITEM static final int N_LONG_ADR = 8; // to 15 - static final int MIN_K_SHORT_ADR = 16; // to 17 + static final int DY_MIN_K_SHORT_ADR = 16; // to 17 static final int NUM_LEVELS_BYTE_ADR = 18; // FLOAT SKETCH 19 is reserved for future use in float sketch @@ -182,7 +182,7 @@ static class MemoryCheck { final int k; final int m; long n; - int minK; + int dyMinK; int dataStart; int numLevels; int[] levels; @@ -216,7 +216,7 @@ static class MemoryCheck { if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } layout = updatable ? Layout.FLOAT_UPDATABLE : Layout.FLOAT_FULL_COMPACT; n = extractN(srcMem); - minK = extractMinK(srcMem); + dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_FLOAT; break; @@ -228,13 +228,13 @@ static class MemoryCheck { layout = Layout.FLOAT_UPDATABLE; n = extractN(srcMem); if (n != 0) { throwCustom(21, (int) n); } - minK = extractMinK(srcMem); + dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_FLOAT; } else { layout = Layout.FLOAT_EMPTY_COMPACT; n = 0; - minK = k; + dyMinK = k; numLevels = 1; dataStart = DATA_START_ADR_SINGLE_ITEM; //ignore if empty } @@ -247,13 +247,13 @@ static class MemoryCheck { layout = Layout.FLOAT_UPDATABLE; n = extractN(srcMem); if (n != 1) { throwCustom(22, (int)n); } - minK = extractMinK(srcMem); + dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_FLOAT; } else { layout = Layout.FLOAT_SINGLE_COMPACT; n = 1; - minK = k; + dyMinK = k; numLevels = 1; dataStart = DATA_START_ADR_SINGLE_ITEM; } @@ -264,7 +264,7 @@ static class MemoryCheck { if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } layout = updatable ? Layout.DOUBLE_UPDATABLE : Layout.DOUBLE_FULL_COMPACT; n = extractN(srcMem); - minK = extractMinK(srcMem); + dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_DOUBLE; break; @@ -276,13 +276,13 @@ static class MemoryCheck { layout = Layout.DOUBLE_UPDATABLE; n = extractN(srcMem); if (n != 0) { throwCustom(21, (int) n); } - minK = extractMinK(srcMem); + dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_DOUBLE; } else { layout = Layout.DOUBLE_EMPTY_COMPACT; n = 0; - minK = k; + dyMinK = k; numLevels = 1; dataStart = DATA_START_ADR_SINGLE_ITEM; //ignore if empty } @@ -295,13 +295,13 @@ static class MemoryCheck { layout = Layout.DOUBLE_UPDATABLE; n = extractN(srcMem); if (n != 1) { throwCustom(22, (int)n); } - minK = extractMinK(srcMem); + dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_DOUBLE; } else { layout = Layout.DOUBLE_SINGLE_COMPACT; n = 1; - minK = k; + dyMinK = k; numLevels = 1; dataStart = DATA_START_ADR_SINGLE_ITEM; } @@ -370,8 +370,8 @@ static long extractN(final Memory mem) { return mem.getLong(N_LONG_ADR); } - static int extractMinK(final Memory mem) { - return mem.getShort(MIN_K_SHORT_ADR) & 0XFFFF; + static int extractDyMinK(final Memory mem) { + return mem.getShort(DY_MIN_K_SHORT_ADR) & 0XFFFF; } static int extractNumLevels(final Memory mem) { @@ -422,7 +422,7 @@ static void insertN(final WritableMemory wmem, final long value) { } static void insertMinK(final WritableMemory wmem, final int value) { - wmem.putShort(MIN_K_SHORT_ADR, (short) value); + wmem.putShort(DY_MIN_K_SHORT_ADR, (short) value); } static void insertNumLevels(final WritableMemory wmem, final int value) { diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java index 32c5ebb4c..64bf8e353 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java @@ -215,9 +215,9 @@ public void checkTestResults() { for (int i = 0; i < n; i++) { sketch.update(inputArray[i]); } - int numLevels = sketch.numLevels_; + int numLevels = sketch.getNumLevels(); int numSamples = sketch.getNumRetained(); - int[] levels = sketch.levels_; + int[] levels = sketch.getLevelsArray(); long hashedSamples = simpleHashOfSubArray(sketch.getItems(), levels[0], numSamples); System.out.print(testI); assert correctResultsWithReset[(7 * testI) + 4] == numLevels; diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java index 2b57766c4..39f5e6267 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java @@ -216,9 +216,9 @@ public void checkTestResults() { for (int i = 0; i < n; i++) { sketch.update(inputArray[i]); } - int numLevels = sketch.numLevels_; + int numLevels = sketch.getNumLevels(); int numSamples = sketch.getNumRetained(); - int[] levels = sketch.levels_; + int[] levels = sketch.getLevelsArray(); long hashedSamples = simpleHashOfSubArray(sketch.getItems(), levels[0], numSamples); System.out.print(testI); assert correctResultsWithReset[(7 * testI) + 4] == numLevels; diff --git a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java index b3e396956..a1699caea 100644 --- a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java @@ -53,7 +53,7 @@ public void checkUpdatableSerDe() { KllDoublesSketch sk = new KllDoublesSketch(200); for (int i = 1; i <= 533; i++) { sk.update(i); } int retained = sk.getNumRetained(); - int numLevels = ((BaseKllSketch)sk).numLevels_; + int numLevels = ((BaseKllSketch)sk).getNumLevels(); println("NumLevels: " + numLevels); println("NumRetained: " + retained); diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index 10594895c..630f9fe12 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -116,9 +116,9 @@ public void checkMisc() { sk.toByteArray(); final double[] items = sk.getItems(); assertEquals(items.length, 16); - final int[] levels = sk.levels_; + final int[] levels = sk.getLevelsArray(); assertEquals(levels.length, 3); - assertEquals(sk.numLevels_, 2); + assertEquals(sk.getNumLevels(), 2); } @Test //enable static println(..) for visual checking diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index c684888e0..6e8f56e19 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -116,9 +116,9 @@ public void checkMisc() { sk.toByteArray(); final float[] items = sk.getItems(); assertEquals(items.length, 16); - final int[] levels = sk.levels_; + final int[] levels = sk.getLevelsArray(); assertEquals(levels.length, 3); - assertEquals(sk.numLevels_, 2); + assertEquals(sk.getNumLevels(), 2); } @Test //enable static println(..) for visual checking From 185bb384931d11b03548dde9e268873fd284df91 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 5 Mar 2022 12:24:55 -0800 Subject: [PATCH 06/31] Interim 6 --- .../datasketches/kll/HeapKllSketch.java | 142 +++++++++++++ .../datasketches/kll/KllDoublesSketch.java | 82 +------- .../datasketches/kll/KllFloatsSketch.java | 80 +------- .../apache/datasketches/kll/KllHelper.java | 37 +--- .../{BaseKllSketch.java => KllSketch.java} | 188 +++++++++--------- .../kll/KllDoublesSketchTest.java | 2 +- .../datasketches/kll/KllFloatsSketchTest.java | 2 +- .../datasketches/kll/KllHelperTest.java | 12 +- .../datasketches/kll/MiscDoublesTest.java | 6 +- .../datasketches/kll/MiscFloatsTest.java | 6 +- 10 files changed, 275 insertions(+), 282 deletions(-) create mode 100644 src/main/java/org/apache/datasketches/kll/HeapKllSketch.java rename src/main/java/org/apache/datasketches/kll/{BaseKllSketch.java => KllSketch.java} (56%) diff --git a/src/main/java/org/apache/datasketches/kll/HeapKllSketch.java b/src/main/java/org/apache/datasketches/kll/HeapKllSketch.java new file mode 100644 index 000000000..28f5ce2ec --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/HeapKllSketch.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +abstract class HeapKllSketch extends KllSketch { + + /* + * Data is stored in items_. + * The data for level i lies in positions levels_[i] through levels_[i + 1] - 1 inclusive. + * Hence, levels_ must contain (numLevels_ + 1) indices. + * The valid portion of items_ is completely packed, except for level 0, + * which is filled from the top down. + * + * Invariants: + * 1) After a compaction, or an update, or a merge, all levels are sorted except for level zero. + * 2) After a compaction, (sum of capacities) - (sum of items) >= 1, + * so there is room for least 1 more item in level zero. + * 3) There are no gaps except at the bottom, so if levels_[0] = 0, + * the sketch is exactly filled to capacity and must be compacted. + * 4) Sum of weights of all retained items == N. + * 5) curTotalCap = items_.length = levels_[numLevels_]. + */ + + private int dyMinK_; // dynamic minK for error estimation after merging with different k + private long n_; // number of items input into this sketch + private int numLevels_; // one-based number of current levels, + private int[] levels_; // array of index offsets into the items[]. Size = numLevels + 1. + private boolean isLevelZeroSorted_; + + /** + * Heap constructor. + * @param k configured size of sketch. Range [m, 2^16] + */ + HeapKllSketch(final int k, final SketchType sketchType) { + super(k, sketchType); + KllHelper.checkK(k); + dyMinK_ = k; + numLevels_ = 1; + levels_ = new int[] {k, k}; + isLevelZeroSorted_ = false; + } + + @Override + int getDyMinK() { + return dyMinK_; + } + + @Override + void setDyMinK(final int dyMinK) { + dyMinK_ = dyMinK; + } + + @Override + int getNumLevels() { + return numLevels_; + } + + @Override + void setNumLevels(final int numLevels) { + numLevels_ = numLevels; + } + + @Override + void incNumLevels() { + numLevels_++; + } + + @Override + int[] getLevelsArray() { + return levels_; + } + + @Override + int getLevelsArrayAt(final int index) { + return levels_[index]; + } + + @Override + void setLevelsArray(final int[] levels) { + this.levels_ = levels; + } + + @Override + void setLevelsArrayAt(final int index, final int value) { + this.levels_[index] = value; + } + + @Override + void setLevelsArrayAtPlusEq(final int index, final int plusEq) { + this.levels_[index] += plusEq; + } + + @Override + void setLevelsArrayAtMinusEq(final int index, final int minusEq) { + this.levels_[index] -= minusEq; + } + + @Override + boolean isLevelZeroSorted() { + return isLevelZeroSorted_; + } + + @Override + void setLevelZeroSorted(final boolean sorted) { + this.isLevelZeroSorted_ = sorted; + } + + @Override + void setN(final long n) { + n_ = n; + } + + @Override + void incN() { + n_++; + } + + // public functions + + @Override + public long getN() { + return n_; + } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 554a49372..76ae9387d 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -22,11 +22,9 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.Util.isOdd; -import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; -import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.PreambleUtil.DOUBLES_SKETCH_BIT_MASK; import static org.apache.datasketches.kll.PreambleUtil.DY_MIN_K_SHORT_ADR; import static org.apache.datasketches.kll.PreambleUtil.EMPTY_BIT_MASK; @@ -51,7 +49,6 @@ import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; -import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.kll.PreambleUtil.MemoryCheck; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -60,13 +57,12 @@ * Please refer to the documentation in the package-info:
* {@link org.apache.datasketches.kll} */ -public class KllDoublesSketch extends BaseKllSketch { +public class KllDoublesSketch extends HeapKllSketch { // Specific to the doubles sketch private double[] items_; // the continuous array of double items private double minValue_; private double maxValue_; - private static final boolean IS_DOUBLE = true; /** * Heap constructor with the default k = 200, which has a rank error of about 1.65%. @@ -82,26 +78,7 @@ public KllDoublesSketch() { * @param k parameter that controls size of the sketch and accuracy of estimates */ public KllDoublesSketch(final int k) { - this(k, DEFAULT_M, true); - } - - /** - * Used for testing only. - * @param k configured size of sketch. Range [m, 2^16] - * @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0. - */ - KllDoublesSketch(final int k, final boolean compatible) { - this(k, DEFAULT_M, compatible); - } - - /** - * Heap constructor. - * @param k configured size of sketch. Range [m, 2^16] - * @param m minimum level size. Default is 8. - * @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0. - */ - private KllDoublesSketch(final int k, final int m, final boolean compatible) { - super(k, m, compatible); + super(k, SketchType.DOUBLE_SKETCH); items_ = new double[k]; minValue_ = Double.NaN; maxValue_ = Double.NaN; @@ -113,9 +90,8 @@ private KllDoublesSketch(final int k, final int m, final boolean compatible) { * @param memChk the MemoryCheck object */ private KllDoublesSketch(final Memory mem, final MemoryCheck memChk) { - super(memChk.k, memChk.m, true); + super(memChk.k, SketchType.DOUBLE_SKETCH); setLevelZeroSorted(memChk.level0Sorted); - final int k = getK(); if (memChk.empty) { setNumLevels(1); @@ -227,46 +203,6 @@ public double getMinValue() { return minValue_; } - //Size related - - /** - * Returns upper bound on the compact serialized size of a sketch given a parameter k and stream - * length. This method can be used if allocation of storage is necessary beforehand. - * @param k parameter that controls size of the sketch and accuracy of estimates - * @param n stream length - * @return upper bound on the compact serialized size - */ - public static int getMaxSerializedSizeBytes(final int k, final long n) { - final LevelStats lvlStats = getAllLevelStatsGivenN(k, DEFAULT_M, n, false, false, IS_DOUBLE); - return lvlStats.getCompactBytes(); - } - - /** - * Returns the current compact number of bytes this sketch would require to store. - * @return the current compact number of bytes this sketch would require to store. - */ - public int getCurrentCompactSerializedSizeBytes() { - return KllHelper.getSerializedSizeBytes(getNumLevels(), getNumRetained(), IS_DOUBLE, false); - } - - /** - * Returns the current updatable number of bytes this sketch would require to store. - * @return the current updatable number of bytes this sketch would require to store. - */ - public int getCurrentUpdatableSerializedSizeBytes() { - return KllHelper.getSerializedSizeBytes(getNumLevels(), getNumRetained(), IS_DOUBLE, true); - } - - /** - * Returns the number of bytes this sketch would require to store. - * @return the number of bytes this sketch would require to store. - * @deprecated use {@link #getCurrentCompactSerializedSizeBytes() } - */ - @Deprecated - public int getSerializedSizeBytes() { - return getCurrentCompactSerializedSizeBytes(); - } - /** * Returns an approximation to the Probability Mass Function (PMF) of the input stream * given a set of splitPoints (values). @@ -511,8 +447,7 @@ public byte[] toByteArray() { @Override public byte[] toUpdatableByteArray() { final int k = getK(); - final int itemCap = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); - final int numBytes = KllHelper.getSerializedSizeBytes(getNumLevels(), itemCap, IS_DOUBLE, true); + final int numBytes = getCurrentUpdatableSerializedSizeBytes(); final byte[] bytes = new byte[numBytes]; final WritableMemory wmem = WritableMemory.writableWrap(bytes); //load the preamble @@ -562,7 +497,7 @@ public String toString(final boolean withLevels, final boolean withData) { sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted()).append(Util.LS); sb.append(" Capacity Items : ").append(items_.length).append(Util.LS); sb.append(" Retained Items : ").append(getNumRetained()).append(Util.LS); - sb.append(" Storage Bytes : ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS); + sb.append(" Compact Storage Bytes: ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS); sb.append(" Min Value : ").append(minValue_).append(Util.LS); sb.append(" Max Value : ").append(maxValue_).append(Util.LS); sb.append("### End sketch summary").append(Util.LS); @@ -604,7 +539,6 @@ public String toString(final boolean withLevels, final boolean withData) { sb.append(Util.LS); sb.append("### End sketch data").append(Util.LS); } - return sb.toString(); } @@ -744,7 +678,6 @@ private void compressWhileUpdating() { if (oddPop) { setLevelsArrayAt(level, getLevelsArrayAt(level + 1) - 1); // the current level now contains one item - items_[getLevelsArrayAt(level)] = items_[rawBeg]; // namely this leftover guy } else { setLevelsArrayAt(level, getLevelsArrayAt(level + 1)); // the current level is now empty @@ -831,7 +764,8 @@ private void mergeHigherLevels(final KllDoublesSketch other, final long finalN) final int theShift = freeSpaceAtBottom - outlevels[0]; if (getLevelsArray().length < finalNumLevels + 1) { - setLevelsArray(new int[finalNumLevels + 1]); + +; } for (int lvl = 0; lvl < finalNumLevels + 1; lvl++) { // includes the "extra" index @@ -867,7 +801,7 @@ private void populateWorkArrays(final KllDoublesSketch other, final double[] wor } } - // only for testing + // for testing double[] getItems() { return items_; diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 78176a2b8..e550c2980 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -22,11 +22,9 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.Util.isOdd; -import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; -import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.PreambleUtil.DY_MIN_K_SHORT_ADR; import static org.apache.datasketches.kll.PreambleUtil.EMPTY_BIT_MASK; import static org.apache.datasketches.kll.PreambleUtil.FAMILY_BYTE_ADR; @@ -50,7 +48,6 @@ import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; -import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.kll.PreambleUtil.MemoryCheck; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -60,13 +57,12 @@ * Please refer to the documentation in the package-info:
* {@link org.apache.datasketches.kll} */ -public class KllFloatsSketch extends BaseKllSketch { +public class KllFloatsSketch extends HeapKllSketch { // Specific to the floats sketch private float[] items_; // the continuous array of float items private float minValue_; private float maxValue_; - private static final boolean IS_DOUBLE = false; /** * Heap constructor with the default k = 200, which has a rank error of about 1.65%. @@ -82,26 +78,7 @@ public KllFloatsSketch() { * @param k parameter that controls size of the sketch and accuracy of estimates */ public KllFloatsSketch(final int k) { - this(k, DEFAULT_M, true); - } - - /** - * Used for testing only. - * @param k configured size of sketch. Range [m, 2^16] - * @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0. - */ - KllFloatsSketch(final int k, final boolean compatible) { - this(k, DEFAULT_M, compatible); - } - - /** - * Heap constructor. - * @param k configured size of sketch. Range [m, 2^16] - * @param m minimum level size. Default is 8. - * @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0. - */ - private KllFloatsSketch(final int k, final int m, final boolean compatible) { - super(k, m, compatible); + super(k, SketchType.FLOAT_SKETCH); items_ = new float[k]; minValue_ = Float.NaN; maxValue_ = Float.NaN; @@ -113,7 +90,7 @@ private KllFloatsSketch(final int k, final int m, final boolean compatible) { * @param memChk the MemoryCheck object */ private KllFloatsSketch(final Memory mem, final MemoryCheck memChk) { - super(memChk.k, memChk.m, true); + super(memChk.k, SketchType.FLOAT_SKETCH); setLevelZeroSorted(memChk.level0Sorted); final int k = getK(); if (memChk.empty) { @@ -129,7 +106,7 @@ private KllFloatsSketch(final Memory mem, final MemoryCheck memChk) { setDyMinK(k); setNumLevels(1); setLevelsArray(new int[getNumLevels() + 1]); - final int itemCapacity = KllHelper.computeTotalItemCapacity(k, DEFAULT_M, getNumLevels()); + final int itemCapacity = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); setLevelsArrayAt(0,itemCapacity - 1); setLevelsArrayAt(getNumLevels(), itemCapacity); //load the last integer in levels_ items_ = new float[itemCapacity]; @@ -142,7 +119,7 @@ private KllFloatsSketch(final Memory mem, final MemoryCheck memChk) { setNumLevels(memChk.numLevels); setLevelsArray(new int[getNumLevels() + 1]); int offset = DATA_START_ADR_FLOAT; - final int itemCapacity = KllHelper.computeTotalItemCapacity(k, DEFAULT_M, getNumLevels()); + final int itemCapacity = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); if (memChk.updatable) { // If updatable the last integer in levels_ IS serialized. mem.getIntArray(offset, getLevelsArray(), 0, getNumLevels() + 1); //load levels_ @@ -226,46 +203,6 @@ public float getMinValue() { return minValue_; } - //Size related - - /** - * Returns upper bound on the compact serialized size of a sketch given a parameter k and stream - * length. This method can be used if allocation of storage is necessary beforehand. - * @param k parameter that controls size of the sketch and accuracy of estimates - * @param n stream length - * @return upper bound on the compact serialized size - */ - public static int getMaxSerializedSizeBytes(final int k, final long n) { - final LevelStats lvlStats = getAllLevelStatsGivenN(k, DEFAULT_M, n, false, false, IS_DOUBLE); - return lvlStats.getCompactBytes(); - } - - /** - * Returns the current compact number of bytes this sketch would require to store. - * @return the current compact number of bytes this sketch would require to store. - */ - public int getCurrentCompactSerializedSizeBytes() { - return KllHelper.getSerializedSizeBytes(getNumLevels(), getNumRetained(), IS_DOUBLE, false); - } - - /** - * Returns the current updatable number of bytes this sketch would require to store. - * @return the current updatable number of bytes this sketch would require to store. - */ - public int getCurrentUpdatableSerializedSizeBytes() { - return KllHelper.getSerializedSizeBytes(getNumLevels(), getNumRetained(), IS_DOUBLE, true); - } - - /** - * Returns the number of bytes this sketch would require to store. - * @return the number of bytes this sketch would require to store. - * @deprecated use {@link #getCurrentCompactSerializedSizeBytes() } - */ - @Deprecated - public int getSerializedSizeBytes() { - return getCurrentCompactSerializedSizeBytes(); - } - /** * Returns an approximation to the Probability Mass Function (PMF) of the input stream * given a set of splitPoints (values). @@ -510,8 +447,7 @@ public byte[] toByteArray() { @Override public byte[] toUpdatableByteArray() { final int k = getK(); - final int itemCap = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); - final int numBytes = KllHelper.getSerializedSizeBytes(getNumLevels(), itemCap, IS_DOUBLE, true); + final int numBytes = getCurrentUpdatableSerializedSizeBytes(); final byte[] bytes = new byte[numBytes]; final WritableMemory wmem = WritableMemory.writableWrap(bytes); //load the preamble @@ -561,7 +497,7 @@ public String toString(final boolean withLevels, final boolean withData) { sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted()).append(Util.LS); sb.append(" Capacity Items : ").append(items_.length).append(Util.LS); sb.append(" Retained Items : ").append(getNumRetained()).append(Util.LS); - sb.append(" Storage Bytes : ").append(getSerializedSizeBytes()).append(Util.LS); + sb.append(" Compact Storage Bytes: ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS); sb.append(" Min Value : ").append(minValue_).append(Util.LS); sb.append(" Max Value : ").append(maxValue_).append(Util.LS); sb.append("### End sketch summary").append(Util.LS); @@ -603,7 +539,6 @@ public String toString(final boolean withLevels, final boolean withData) { sb.append(Util.LS); sb.append("### End sketch data").append(Util.LS); } - return sb.toString(); } @@ -740,6 +675,7 @@ private void compressWhileUpdating() { items_, adjBeg + halfAdjPop); } setLevelsArrayAtMinusEq(level + 1, halfAdjPop); // adjust boundaries of the level above + if (oddPop) { setLevelsArrayAt(level, getLevelsArrayAt(level + 1) - 1); // the current level now contains one item items_[getLevelsArrayAt(level)] = items_[rawBeg]; // namely this leftover guy diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 6e3cac0c7..8f286c7f8 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -21,14 +21,11 @@ import static java.lang.Math.pow; import static org.apache.datasketches.Util.floorPowerOf2; -import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_DOUBLE; -import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_FLOAT; -import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.PreambleUtil.MAX_K; import static org.apache.datasketches.kll.PreambleUtil.MIN_K; -import static org.apache.datasketches.kll.PreambleUtil.N_LONG_ADR; import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.kll.KllSketch.SketchType; class KllHelper { static final String LS = System.getProperty("line.separator"); @@ -58,20 +55,20 @@ static int ubOnNumLevels(final long n) { } static LevelStats getAllLevelStatsGivenN(final int k, final int m, final long n, - final boolean printDetail, final boolean printSummaries, final boolean isDouble) { + final boolean printDetail, final boolean printSummaries, final SketchType sketchType) { long cumN; int numLevels = 0; LevelStats lvlStats; do { numLevels++; - lvlStats = getLevelStats(k, m, numLevels, printDetail, printSummaries, isDouble); + lvlStats = getLevelStats(k, m, numLevels, printDetail, printSummaries, sketchType); cumN = lvlStats.getMaxN(); } while (cumN < n); return lvlStats; } static LevelStats getLevelStats(final int k, final int m, final int numLevels, - final boolean printDetail, final boolean printSummary, final boolean isDouble) { + final boolean printDetail, final boolean printSummary, final SketchType sketchType) { int cumN = 0; int cumCap = 0; if (printDetail) { @@ -87,8 +84,8 @@ static LevelStats getLevelStats(final int k, final int m, final int numLevels, System.out.printf("%6d%,12d%8d%,16d\n", level, 1 << level, levelCap, maxNAtLevel); } } - final int compactBytes = getSerializedSizeBytes(numLevels, cumCap, isDouble, false); - final int updatableBytes = getSerializedSizeBytes(numLevels, cumCap, isDouble, true); + final int compactBytes = KllSketch.getSerializedSizeBytes(numLevels, cumCap, sketchType, false); + final int updatableBytes = KllSketch.getSerializedSizeBytes(numLevels, cumCap, sketchType, true); if (printDetail) { System.out.printf(" TOTALS%10s %8d%,16d\n", "", cumCap, cumN); System.out.println(" COMPACT BYTES: " + compactBytes); @@ -114,7 +111,6 @@ static class LevelStats { this.maxCap = maxCap; this.compactBytes = compactBytes; this.updatableBytes = updatableBytes; - } @Override @@ -136,23 +132,6 @@ public String toString() { public int getMaxCap() { return maxCap; } } - static int getSerializedSizeBytes(final int numLevels, final int numRetained, final boolean isDouble, - final boolean updatable) { - int levelsBytes = 0; - if (!updatable) { - if (numRetained == 0) { return N_LONG_ADR; } - if (numRetained == 1) { return DATA_START_ADR_SINGLE_ITEM + (isDouble ? Double.BYTES : Float.BYTES); } - levelsBytes = numLevels * Integer.BYTES; - } else { - levelsBytes = (numLevels + 1) * Integer.BYTES; - } - if (isDouble) { - return DATA_START_ADR_DOUBLE + levelsBytes + (numRetained + 2) * Double.BYTES; //+2 is for min & max - } else { - return DATA_START_ADR_FLOAT + levelsBytes + (numRetained + 2) * Float.BYTES; - } - } - /** * Returns the maximum number of items that this sketch can handle * @param k The sizing / accuracy parameter of the sketch in items. @@ -293,10 +272,6 @@ static int currentLevelSize(final int level, final int numLevels, final int[] le return levels[level + 1] - levels[level]; } - static int getNumRetained(final int numLevels, final int[] levels) { - return levels[numLevels] - levels[0]; - } - static int getNumRetainedAboveLevelZero(final int numLevels, final int[] levels) { return levels[numLevels] - levels[1]; } diff --git a/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java similarity index 56% rename from src/main/java/org/apache/datasketches/kll/BaseKllSketch.java rename to src/main/java/org/apache/datasketches/kll/KllSketch.java index 6c7e90593..0334c300c 100644 --- a/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -26,139 +26,143 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static java.lang.Math.round; +import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; +import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLE_SKETCH; +import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_DOUBLE; +import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_FLOAT; +import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.PreambleUtil.MAX_K; import static org.apache.datasketches.kll.PreambleUtil.MIN_K; +import static org.apache.datasketches.kll.PreambleUtil.N_LONG_ADR; import java.util.Random; -abstract class BaseKllSketch { - - /* - * Data is stored in items_. - * The data for level i lies in positions levels_[i] through levels_[i + 1] - 1 inclusive. - * Hence, levels_ must contain (numLevels_ + 1) indices. - * The valid portion of items_ is completely packed, except for level 0, - * which is filled from the top down. - * - * Invariants: - * 1) After a compaction, or an update, or a merge, all levels are sorted except for level zero. - * 2) After a compaction, (sum of capacities) - (sum of items) >= 1, - * so there is room for least 1 more item in level zero. - * 3) There are no gaps except at the bottom, so if levels_[0] = 0, - * the sketch is exactly filled to capacity and must be compacted. - * 4) Sum of weights of all retained items == N. - * 5) curTotalCap = items_.length = levels_[numLevels_]. - */ +import org.apache.datasketches.kll.KllHelper.LevelStats; - static final int M = DEFAULT_M; // configured minimum buffer "width", Must always be 8 for now. - private final int k_; // configured value of K - private int dyMinK_; // dynamic minK for error estimation after merging with different k - private long n_; // number of items input into this sketch - private int numLevels_; // one-based number of current levels, - private int[] levels_; // array of index offsets into the items[]. Size = numLevels + 1. - private boolean isLevelZeroSorted_; - - private final boolean compatible; //compatible with quantiles sketch treatment of rank 0.0 and 1.0. +abstract class KllSketch { static final Random random = new Random(); + static final int M = DEFAULT_M; // configured minimum buffer "width", Must always be 8 for now. + static final boolean compatible = true; //rank 0.0 and 1.0. compatible with classic Quantiles Sketch + private final int k; //configured value of K - /** - * Heap constructor. - * @param k configured size of sketch. Range [m, 2^16] - * @param m minimum level size. Default is 8. - * @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0. - */ - BaseKllSketch(final int k, final int m, final boolean compatible) { - KllHelper.checkK(k); - k_ = k; - dyMinK_ = k; - numLevels_ = 1; - levels_ = new int[] {k, k}; - isLevelZeroSorted_ = false; - this.compatible = compatible; - } + enum SketchType { FLOAT_SKETCH, DOUBLE_SKETCH } - int getDyMinK() { - return dyMinK_; - } + static SketchType sketchType; - void setDyMinK(final int dyMinK) { - dyMinK_ = dyMinK; + KllSketch(final int k, final SketchType sketchType) { + this.k = k; + KllSketch.sketchType = sketchType; } - int getNumLevels() { - return numLevels_; - } + abstract int getDyMinK(); - void setNumLevels(final int numLevels) { - numLevels_ = numLevels; - } + abstract void setDyMinK(int dyMinK); - void incNumLevels() { - numLevels_++; - } + abstract int getNumLevels(); - int[] getLevelsArray() { - return levels_; - } + abstract void setNumLevels(int numLevels); - int getLevelsArrayAt(final int index) { - return levels_[index]; - } + abstract void incNumLevels(); - void setLevelsArray(final int[] levels) { - this.levels_ = levels; - } + abstract int[] getLevelsArray(); - void setLevelsArrayAt(final int index, final int value) { - this.levels_[index] = value; - } + abstract int getLevelsArrayAt(int index); - void setLevelsArrayAtPlusEq(final int index, final int plusEq) { - this.levels_[index] += plusEq; - } + abstract void setLevelsArray(int[] levels); + + abstract void setLevelsArrayAt(int index, int value); - void setLevelsArrayAtMinusEq(final int index, final int minusEq) { - this.levels_[index] -= minusEq; + abstract void setLevelsArrayAtPlusEq(int index, int plusEq); + + abstract void setLevelsArrayAtMinusEq(int index, int minusEq); + + abstract boolean isLevelZeroSorted(); + + abstract void setLevelZeroSorted(boolean sorted); + + boolean isCompatible() { + return compatible; } - boolean isLevelZeroSorted() { - return isLevelZeroSorted_; + abstract void setN(long n); + + abstract void incN(); + + static int getSerializedSizeBytes(final int numLevels, final int numRetained, final SketchType sketchType, + final boolean updatable) { + int levelsBytes = 0; + if (!updatable) { + if (numRetained == 0) { return N_LONG_ADR; } + if (numRetained == 1) { + return DATA_START_ADR_SINGLE_ITEM + (sketchType == DOUBLE_SKETCH ? Double.BYTES : Float.BYTES); + } + levelsBytes = numLevels * Integer.BYTES; + } else { + levelsBytes = (numLevels + 1) * Integer.BYTES; + } + if (sketchType == DOUBLE_SKETCH) { + return DATA_START_ADR_DOUBLE + levelsBytes + (numRetained + 2) * Double.BYTES; //+2 is for min & max + } else { + return DATA_START_ADR_FLOAT + levelsBytes + (numRetained + 2) * Float.BYTES; + } } - void setLevelZeroSorted(final boolean sorted) { - this.isLevelZeroSorted_ = sorted; + //Public Methods + + /** + * Returns upper bound on the compact serialized size of a sketch given a parameter k and stream + * length. This method can be used if allocation of storage is necessary beforehand. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param n stream length + * @return upper bound on the compact serialized size + */ + public static int getMaxSerializedSizeBytes(final int k, final long n) { + final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, sketchType); + return lvlStats.getCompactBytes(); } - boolean isCompatible() { - return this.compatible; + /** + * Returns the current compact number of bytes this sketch would require to store. + * @return the current compact number of bytes this sketch would require to store. + */ + public int getCurrentCompactSerializedSizeBytes() { + return KllSketch.getSerializedSizeBytes(getNumLevels(), getNumRetained(), sketchType, false); } - void setN(final long n) { - n_ = n; + /** + * Returns the current updatable number of bytes this sketch would require to store. + * @return the current updatable number of bytes this sketch would require to store. + */ + public int getCurrentUpdatableSerializedSizeBytes() { + final int itemCap = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); + return KllSketch.getSerializedSizeBytes(getNumLevels(), itemCap, sketchType, true); } - void incN() { - n_++; + /** + * Returns the number of bytes this sketch would require to store. + * @return the number of bytes this sketch would require to store. + * @deprecated use getCurrentCompactSerializedSizeBytes() + */ + @Deprecated + public int getSerializedSizeBytes() { + return getCurrentCompactSerializedSizeBytes(); } - // public functions /** * Returns the parameter k * @return parameter k */ public int getK() { - return k_; + return k; } /** * Returns the length of the input stream. * @return stream length */ - public long getN() { - return n_; - } + public abstract long getN(); /** * Gets the approximate value of k to use given epsilon, the normalized rank error. @@ -193,7 +197,7 @@ public static int getKFromEpsilon(final double epsilon, final boolean pmf) { * @see KllDoublesSketch */ public double getNormalizedRankError(final boolean pmf) { - return KllHelper.getNormalizedRankError(dyMinK_, pmf); + return KllHelper.getNormalizedRankError(getDyMinK(), pmf); } /** @@ -214,7 +218,7 @@ public static double getNormalizedRankError(final int k, final boolean pmf) { * @return the number of retained items (samples) in the sketch */ public int getNumRetained() { - return KllHelper.getNumRetained(numLevels_, levels_); + return getLevelsArrayAt(getNumLevels()) - getLevelsArrayAt(0); } /** @@ -222,7 +226,7 @@ public int getNumRetained() { * @return empty flag */ public boolean isEmpty() { - return n_ == 0; + return getN() == 0; } /** @@ -230,7 +234,7 @@ public boolean isEmpty() { * @return estimation mode flag */ public boolean isEstimationMode() { - return numLevels_ > 1; + return getNumLevels() > 1; } /** diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index f21024240..20d7c378a 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -386,7 +386,7 @@ public void nanSplitPoint() { @Test public void getMaxSerializedSizeBytes() { final int sizeBytes = - KllDoublesSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30); + KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30); assertEquals(sizeBytes, 5708); } diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index 6c9b48519..3360fc96e 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -386,7 +386,7 @@ public void nanSplitPoint() { @Test public void getMaxSerializedSizeBytes() { final int sizeBytes = - KllFloatsSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30); + KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30); assertEquals(sizeBytes, 2908); } diff --git a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java index a1699caea..489d722a0 100644 --- a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java @@ -21,9 +21,11 @@ import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; import static org.apache.datasketches.kll.KllHelper.getLevelStats; +import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLE_SKETCH; import static org.testng.Assert.assertEquals; import org.apache.datasketches.kll.KllHelper.LevelStats; +import org.apache.datasketches.kll.KllSketch.SketchType; import org.apache.datasketches.memory.Memory; import org.testng.annotations.Test; @@ -35,7 +37,7 @@ public void testGetAllLevelStats() { long n = 1L << 30; int k = 200; int m = 8; - LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, false, false, true); + LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, false, false, DOUBLE_SKETCH); assertEquals(lvlStats.getCompactBytes(), 5708); } @@ -44,7 +46,7 @@ public void getStatsAtNumLevels() { int k = 200; int m = 8; int numLevels = 23; - LevelStats lvlStats = getLevelStats(k, m, numLevels, false, false, true); + LevelStats lvlStats = getLevelStats(k, m, numLevels, false, false, DOUBLE_SKETCH); assertEquals(lvlStats.getCompactBytes(), 5708); } @@ -53,7 +55,7 @@ public void checkUpdatableSerDe() { KllDoublesSketch sk = new KllDoublesSketch(200); for (int i = 1; i <= 533; i++) { sk.update(i); } int retained = sk.getNumRetained(); - int numLevels = ((BaseKllSketch)sk).getNumLevels(); + int numLevels = ((HeapKllSketch)sk).getNumLevels(); println("NumLevels: " + numLevels); println("NumRetained: " + retained); @@ -92,7 +94,7 @@ public void testGetAllLevelStats2() { long n = 533; int k = 200; int m = 8; - LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, true, true, true); + LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, true, true, DOUBLE_SKETCH); } //@Test @@ -100,7 +102,7 @@ public void getStatsAtNumLevels2() { int k = 20; int m = 8; int numLevels = 2; - LevelStats lvlStats = getLevelStats(k, m, numLevels, true, true, true); + LevelStats lvlStats = getLevelStats(k, m, numLevels, true, true, DOUBLE_SKETCH); } /** diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index 630f9fe12..ec96559f6 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -41,8 +41,8 @@ public void checkGetKFromEps() { final int k = DEFAULT_K; final double eps = KllHelper.getNormalizedRankError(k, false); final double epsPmf = KllHelper.getNormalizedRankError(k, true); - final int kEps = BaseKllSketch.getKFromEpsilon(eps, false); - final int kEpsPmf = BaseKllSketch.getKFromEpsilon(epsPmf, true); + final int kEps = KllSketch.getKFromEpsilon(eps, false); + final int kEpsPmf = KllSketch.getKFromEpsilon(epsPmf, true); assertEquals(kEps, k); assertEquals(kEpsPmf, k); } @@ -108,7 +108,7 @@ public void checkHeapifyExceptions5() { @Test public void checkMisc() { - KllDoublesSketch sk = new KllDoublesSketch(8, true); + KllDoublesSketch sk = new KllDoublesSketch(8); assertTrue(Objects.isNull(sk.getQuantiles(10))); sk.toString(true, true); for (int i = 0; i < 20; i++) { sk.update(i); } diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index 6e8f56e19..7751b87b4 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -41,8 +41,8 @@ public void checkGetKFromEps() { final int k = DEFAULT_K; final double eps = KllHelper.getNormalizedRankError(k, false); final double epsPmf = KllHelper.getNormalizedRankError(k, true); - final int kEps = BaseKllSketch.getKFromEpsilon(eps, false); - final int kEpsPmf = BaseKllSketch.getKFromEpsilon(epsPmf, true); + final int kEps = KllSketch.getKFromEpsilon(eps, false); + final int kEpsPmf = KllSketch.getKFromEpsilon(epsPmf, true); assertEquals(kEps, k); assertEquals(kEpsPmf, k); } @@ -108,7 +108,7 @@ public void checkHeapifyExceptions5() { @Test public void checkMisc() { - KllFloatsSketch sk = new KllFloatsSketch(8, true); + KllFloatsSketch sk = new KllFloatsSketch(8); assertTrue(Objects.isNull(sk.getQuantiles(10))); sk.toString(true, true); for (int i = 0; i < 20; i++) { sk.update(i); } From d51ee56700d82b69f7ddda80c97e68b14f34b0ce Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 5 Mar 2022 13:23:04 -0800 Subject: [PATCH 07/31] Interim 7 Fixes a bug introduced in Interim 6 --- .../datasketches/kll/KllDoublesSketch.java | 49 +++++++++---------- .../datasketches/kll/KllFloatsSketch.java | 44 ++++++++--------- ...{HeapKllSketch.java => KllHeapSketch.java} | 4 +- .../apache/datasketches/kll/KllHelper.java | 4 +- ...PreambleUtil.java => KllPreambleUtil.java} | 4 +- .../apache/datasketches/kll/KllSketch.java | 14 +++--- .../kll/KllDoublesSketchTest.java | 24 ++++----- .../datasketches/kll/KllFloatsSketchTest.java | 22 ++++----- .../datasketches/kll/KllHelperTest.java | 2 +- .../datasketches/kll/MiscDoublesTest.java | 2 +- .../datasketches/kll/MiscFloatsTest.java | 2 +- 11 files changed, 85 insertions(+), 86 deletions(-) rename src/main/java/org/apache/datasketches/kll/{HeapKllSketch.java => KllHeapSketch.java} (97%) rename src/main/java/org/apache/datasketches/kll/{PreambleUtil.java => KllPreambleUtil.java} (99%) diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 76ae9387d..ef87d9678 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -22,34 +22,34 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.Util.isOdd; -import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_DOUBLE; -import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; -import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; -import static org.apache.datasketches.kll.PreambleUtil.DOUBLES_SKETCH_BIT_MASK; -import static org.apache.datasketches.kll.PreambleUtil.DY_MIN_K_SHORT_ADR; -import static org.apache.datasketches.kll.PreambleUtil.EMPTY_BIT_MASK; -import static org.apache.datasketches.kll.PreambleUtil.FAMILY_BYTE_ADR; -import static org.apache.datasketches.kll.PreambleUtil.FLAGS_BYTE_ADR; -import static org.apache.datasketches.kll.PreambleUtil.K_SHORT_ADR; -import static org.apache.datasketches.kll.PreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; -import static org.apache.datasketches.kll.PreambleUtil.M_BYTE_ADR; -import static org.apache.datasketches.kll.PreambleUtil.NUM_LEVELS_BYTE_ADR; -import static org.apache.datasketches.kll.PreambleUtil.N_LONG_ADR; -import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_BYTE_ADR; -import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_DOUBLE; -import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; -import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_EMPTY_FULL; -import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_SINGLE; -import static org.apache.datasketches.kll.PreambleUtil.SER_VER_BYTE_ADR; -import static org.apache.datasketches.kll.PreambleUtil.SINGLE_ITEM_BIT_MASK; -import static org.apache.datasketches.kll.PreambleUtil.UPDATABLE_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DOUBLES_SKETCH_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.DY_MIN_K_SHORT_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.FAMILY_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.FLAGS_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.K_SHORT_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.M_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.NUM_LEVELS_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; +import static org.apache.datasketches.kll.KllPreambleUtil.SER_VER_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; import java.util.Arrays; import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; -import org.apache.datasketches.kll.PreambleUtil.MemoryCheck; +import org.apache.datasketches.kll.KllPreambleUtil.MemoryCheck; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -57,7 +57,7 @@ * Please refer to the documentation in the package-info:
* {@link org.apache.datasketches.kll} */ -public class KllDoublesSketch extends HeapKllSketch { +public class KllDoublesSketch extends KllHeapSketch { // Specific to the doubles sketch private double[] items_; // the continuous array of double items @@ -764,8 +764,7 @@ private void mergeHigherLevels(final KllDoublesSketch other, final long finalN) final int theShift = freeSpaceAtBottom - outlevels[0]; if (getLevelsArray().length < finalNumLevels + 1) { - -; + setLevelsArray(new int[finalNumLevels + 1]); } for (int lvl = 0; lvl < finalNumLevels + 1; lvl++) { // includes the "extra" index diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index e550c2980..bb71c8371 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -22,33 +22,33 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.Util.isOdd; -import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_FLOAT; -import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; -import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; -import static org.apache.datasketches.kll.PreambleUtil.DY_MIN_K_SHORT_ADR; -import static org.apache.datasketches.kll.PreambleUtil.EMPTY_BIT_MASK; -import static org.apache.datasketches.kll.PreambleUtil.FAMILY_BYTE_ADR; -import static org.apache.datasketches.kll.PreambleUtil.FLAGS_BYTE_ADR; -import static org.apache.datasketches.kll.PreambleUtil.K_SHORT_ADR; -import static org.apache.datasketches.kll.PreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; -import static org.apache.datasketches.kll.PreambleUtil.M_BYTE_ADR; -import static org.apache.datasketches.kll.PreambleUtil.NUM_LEVELS_BYTE_ADR; -import static org.apache.datasketches.kll.PreambleUtil.N_LONG_ADR; -import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_BYTE_ADR; -import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; -import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_FLOAT; -import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_EMPTY_FULL; -import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_SINGLE; -import static org.apache.datasketches.kll.PreambleUtil.SER_VER_BYTE_ADR; -import static org.apache.datasketches.kll.PreambleUtil.SINGLE_ITEM_BIT_MASK; -import static org.apache.datasketches.kll.PreambleUtil.UPDATABLE_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DY_MIN_K_SHORT_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.FAMILY_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.FLAGS_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.K_SHORT_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.M_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.NUM_LEVELS_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; +import static org.apache.datasketches.kll.KllPreambleUtil.SER_VER_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; import java.util.Arrays; import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; -import org.apache.datasketches.kll.PreambleUtil.MemoryCheck; +import org.apache.datasketches.kll.KllPreambleUtil.MemoryCheck; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -57,7 +57,7 @@ * Please refer to the documentation in the package-info:
* {@link org.apache.datasketches.kll} */ -public class KllFloatsSketch extends HeapKllSketch { +public class KllFloatsSketch extends KllHeapSketch { // Specific to the floats sketch private float[] items_; // the continuous array of float items diff --git a/src/main/java/org/apache/datasketches/kll/HeapKllSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java similarity index 97% rename from src/main/java/org/apache/datasketches/kll/HeapKllSketch.java rename to src/main/java/org/apache/datasketches/kll/KllHeapSketch.java index 28f5ce2ec..5d0c494d5 100644 --- a/src/main/java/org/apache/datasketches/kll/HeapKllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java @@ -19,7 +19,7 @@ package org.apache.datasketches.kll; -abstract class HeapKllSketch extends KllSketch { +abstract class KllHeapSketch extends KllSketch { /* * Data is stored in items_. @@ -48,7 +48,7 @@ abstract class HeapKllSketch extends KllSketch { * Heap constructor. * @param k configured size of sketch. Range [m, 2^16] */ - HeapKllSketch(final int k, final SketchType sketchType) { + KllHeapSketch(final int k, final SketchType sketchType) { super(k, sketchType); KllHelper.checkK(k); dyMinK_ = k; diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 8f286c7f8..40208837c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -21,8 +21,8 @@ import static java.lang.Math.pow; import static org.apache.datasketches.Util.floorPowerOf2; -import static org.apache.datasketches.kll.PreambleUtil.MAX_K; -import static org.apache.datasketches.kll.PreambleUtil.MIN_K; +import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; +import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.kll.KllSketch.SketchType; diff --git a/src/main/java/org/apache/datasketches/kll/PreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java similarity index 99% rename from src/main/java/org/apache/datasketches/kll/PreambleUtil.java rename to src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index b19d64ada..c643927e7 100644 --- a/src/main/java/org/apache/datasketches/kll/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -85,9 +85,9 @@ * * @author Lee Rhodes */ -final class PreambleUtil { +final class KllPreambleUtil { - private PreambleUtil() {} + private KllPreambleUtil() {} static final String LS = System.getProperty("line.separator"); diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 0334c300c..27b84b635 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -27,14 +27,14 @@ import static java.lang.Math.min; import static java.lang.Math.round; import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; +import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; +import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; +import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLE_SKETCH; -import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_DOUBLE; -import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_FLOAT; -import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; -import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M; -import static org.apache.datasketches.kll.PreambleUtil.MAX_K; -import static org.apache.datasketches.kll.PreambleUtil.MIN_K; -import static org.apache.datasketches.kll.PreambleUtil.N_LONG_ADR; import java.util.Random; diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index 20d7c378a..ecbafc6ea 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -20,9 +20,9 @@ package org.apache.datasketches.kll; //import static org.apache.datasketches.Util.getResourceBytes; -import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; -import static org.apache.datasketches.kll.PreambleUtil.MAX_K; -import static org.apache.datasketches.kll.PreambleUtil.MIN_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; +import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -160,23 +160,23 @@ public void merge() { final KllDoublesSketch sketch2 = new KllDoublesSketch(); final int n = 10000; for (int i = 0; i < n; i++) { - sketch1.update(i); - sketch2.update(2 * n - i - 1); + sketch1.update(i * 1.0); + sketch2.update((2 * n - i - 1) * 1.0); } - assertEquals(sketch1.getMinValue(), 0.0f); - assertEquals(sketch1.getMaxValue(), n - 1f); + assertEquals(sketch1.getMinValue(), 0.0); + assertEquals(sketch1.getMaxValue(), (n - 1)*1.0); - assertEquals(sketch2.getMinValue(), n); - assertEquals(sketch2.getMaxValue(), 2f * n - 1f); + assertEquals(sketch2.getMinValue(), n * 1.0); + assertEquals(sketch2.getMaxValue(), (2 * n - 1) * 1.0); sketch1.merge(sketch2); assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), 2L * n); - assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), 2f * n - 1); - assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_256); + assertEquals(sketch1.getMinValue(), 0.0); + assertEquals(sketch1.getMaxValue(), (2 * n - 1) * 1.0); + assertEquals(sketch1.getQuantile(0.5), n * 1.0, n * PMF_EPS_FOR_K_256); } @Test diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index 3360fc96e..87d992c51 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -19,9 +19,9 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; -import static org.apache.datasketches.kll.PreambleUtil.MAX_K; -import static org.apache.datasketches.kll.PreambleUtil.MIN_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; +import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; import static org.apache.datasketches.Util.getResourceBytes; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -160,23 +160,23 @@ public void merge() { final KllFloatsSketch sketch2 = new KllFloatsSketch(); final int n = 10000; for (int i = 0; i < n; i++) { - sketch1.update(i); - sketch2.update(2 * n - i - 1); + sketch1.update(i * 1.0f); + sketch2.update((2 * n - i - 1) * 1.0f); } assertEquals(sketch1.getMinValue(), 0.0f); - assertEquals(sketch1.getMaxValue(), n - 1f); + assertEquals(sketch1.getMaxValue(), (n - 1) * 1.0f); - assertEquals(sketch2.getMinValue(), n); - assertEquals(sketch2.getMaxValue(), 2f * n - 1f); + assertEquals(sketch2.getMinValue(), n * 1.0f); + assertEquals(sketch2.getMaxValue(), (2 * n - 1) * 1.0f); sketch1.merge(sketch2); assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), 2L * n); - assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), 2f * n - 1); - assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_256); + assertEquals(sketch1.getMinValue(), 0.0f); + assertEquals(sketch1.getMaxValue(), (2 * n - 1) * 1.0f); + assertEquals(sketch1.getQuantile(0.5), n * 1.0f, n * PMF_EPS_FOR_K_256); } @Test diff --git a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java index 489d722a0..40f02d7ea 100644 --- a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java @@ -55,7 +55,7 @@ public void checkUpdatableSerDe() { KllDoublesSketch sk = new KllDoublesSketch(200); for (int i = 1; i <= 533; i++) { sk.update(i); } int retained = sk.getNumRetained(); - int numLevels = ((HeapKllSketch)sk).getNumLevels(); + int numLevels = ((KllHeapSketch)sk).getNumLevels(); println("NumLevels: " + numLevels); println("NumRetained: " + retained); diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index ec96559f6..28998e3fb 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -19,7 +19,7 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index 7751b87b4..5f9ebbe39 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -19,7 +19,7 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; From 5610b3abf619db7ec0bd62f754b8eb7b3782a8f5 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 7 Mar 2022 10:08:28 -0800 Subject: [PATCH 08/31] Interim 8 --- .../kll/KllDirectDoublesSketch.java | 88 +++++--- .../datasketches/kll/KllDirectSketch.java | 209 +++++++++++++++++ .../datasketches/kll/KllDoublesSketch.java | 78 ++++--- .../datasketches/kll/KllFloatsSketch.java | 78 ++++--- .../datasketches/kll/KllHeapSketch.java | 69 +++--- .../apache/datasketches/kll/KllHelper.java | 2 +- .../datasketches/kll/KllPreambleUtil.java | 90 ++++++-- .../apache/datasketches/kll/KllSketch.java | 211 +++++++++--------- .../datasketches/kll/KllHelperTest.java | 4 +- .../datasketches/kll/MiscFloatsTest.java | 13 ++ 10 files changed, 579 insertions(+), 263 deletions(-) create mode 100644 src/main/java/org/apache/datasketches/kll/KllDirectSketch.java diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 4d4d0b0d9..3f239c123 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -19,50 +19,64 @@ package org.apache.datasketches.kll; -//import static java.lang.Math.max; -//import static java.lang.Math.min; -//import static org.apache.datasketches.Util.isOdd; -//import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; -//import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_DOUBLE; -//import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM; -//import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K; -//import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M; -//import static org.apache.datasketches.kll.PreambleUtil.DOUBLES_SKETCH_BIT_MASK; -//import static org.apache.datasketches.kll.PreambleUtil.EMPTY_BIT_MASK; -//import static org.apache.datasketches.kll.PreambleUtil.FAMILY_BYTE_ADR; -//import static org.apache.datasketches.kll.PreambleUtil.FLAGS_BYTE_ADR; -//import static org.apache.datasketches.kll.PreambleUtil.K_SHORT_ADR; -//import static org.apache.datasketches.kll.PreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; -//import static org.apache.datasketches.kll.PreambleUtil.MIN_K_SHORT_ADR; -//import static org.apache.datasketches.kll.PreambleUtil.M_BYTE_ADR; -//import static org.apache.datasketches.kll.PreambleUtil.NUM_LEVELS_BYTE_ADR; -//import static org.apache.datasketches.kll.PreambleUtil.N_LONG_ADR; -//import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_BYTE_ADR; -//import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_DOUBLE; -//import static org.apache.datasketches.kll.PreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; -//import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_EMPTY_FULL; -//import static org.apache.datasketches.kll.PreambleUtil.SERIAL_VERSION_SINGLE; -//import static org.apache.datasketches.kll.PreambleUtil.SER_VER_BYTE_ADR; -//import static org.apache.datasketches.kll.PreambleUtil.SINGLE_ITEM_BIT_MASK; -//import static org.apache.datasketches.kll.PreambleUtil.UPDATABLE_BIT_MASK; -// -//import java.util.Arrays; -// -//import org.apache.datasketches.Family; -//import org.apache.datasketches.SketchesArgumentException; -//import org.apache.datasketches.Util; -//import org.apache.datasketches.kll.KllHelper.LevelStats; -//import org.apache.datasketches.kll.PreambleUtil.MemoryCheck; -//import org.apache.datasketches.memory.Memory; -//import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.kll.KllPreambleUtil.SketchType; +import org.apache.datasketches.memory.WritableMemory; /** * Please refer to the documentation in the package-info:
* {@link org.apache.datasketches.kll} */ -public class KllDirectDoublesSketch { +public class KllDirectDoublesSketch extends KllDirectSketch { + KllDirectDoublesSketch(final WritableMemory wmem) { + super(wmem, SketchType.DOUBLE_SKETCH); + } + @Override + public byte[] toByteArray() { + return null; + } + + @Override + public String toString(final boolean withLevels, final boolean withData) { + return null; + } + + @Override + public byte[] toUpdatableByteArray() { + return null; + } + + @Override + int[] getLevelsArray() { + return null; + } + + @Override + int getLevelsArrayAt(final int index) { + return 0; + } + + + @Override + void setLevelsArray(final int[] levels) { + + } + + @Override + void setLevelsArrayAt(final int index, final int value) { + + } + + @Override + void setLevelsArrayAtMinusEq(final int index, final int minusEq) { + + } + + @Override + void setLevelsArrayAtPlusEq(final int index, final int plusEq) { + + } } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java new file mode 100644 index 000000000..8b5cb4519 --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.kll.KllPreambleUtil.extractDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.extractK; +import static org.apache.datasketches.kll.KllPreambleUtil.extractLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.extractN; +import static org.apache.datasketches.kll.KllPreambleUtil.extractNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.insertDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.insertN; +import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; + +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.kll.KllPreambleUtil.Layout; +import org.apache.datasketches.kll.KllPreambleUtil.MemoryCheck; +import org.apache.datasketches.kll.KllPreambleUtil.SketchType; +import org.apache.datasketches.memory.WritableMemory; + +abstract class KllDirectSketch extends KllSketch { + final WritableMemory wmem; + final Layout layout; + final boolean compact; + final int dataStartBytes; + + KllDirectSketch(final WritableMemory wmem, final SketchType sketchType) { + super(sketchType); + final MemoryCheck memChk = new MemoryCheck(wmem); + this.wmem = wmem; + this.layout = memChk.layout; + this.compact = !memChk.updatable; + this.dataStartBytes = memChk.dataStart; + } + + @Override + public int getK() { + return extractK(wmem); + } + + @Override + public long getN() { + return extractN(wmem); + } + + @Override + public int getNumRetained() { + if (compact) { + final int itemCapacity = KllHelper.computeTotalItemCapacity(getK(), M, getNumLevels()); + return itemCapacity - getLevelsArrayAt(0); + } + return getLevelsArrayAt(getLevelsArrayAt(getNumLevels()) - getLevelsArrayAt(0) ); + } + + @Override + public abstract byte[] toByteArray(); + + @Override + public abstract String toString(final boolean withLevels, final boolean withData); + + @Override + public abstract byte[] toUpdatableByteArray(); + + @Override + int getDyMinK() { + return extractDyMinK(wmem); + } + + @Override + int[] getLevelsArray() { + final int lengthInts = getLevelsArrLengthInts(); + final int[] levelsArr = new int[lengthInts]; + wmem.getIntArray(dataStartBytes, levelsArr, 0, lengthInts); + return levelsArr; + } + + @Override + int getLevelsArrayAt(final int index) { + return wmem.getInt(dataStartBytes + index * Integer.BYTES); + } + + @Override + int getNumLevels() { + return extractNumLevels(wmem); + } + + @Override + void incN() { + if (compact) { kllDirectSketchThrow(30); } + long n = extractN(wmem); + insertN(wmem, ++n); + } + + @Override + void incNumLevels() { + if (compact) { kllDirectSketchThrow(30); } + int numLevels = extractNumLevels(wmem); + insertNumLevels(wmem, ++numLevels); + } + + @Override + boolean isLevelZeroSorted() { + return extractLevelZeroSortedFlag(wmem); + } + + @Override + void setDyMinK(final int dyMinK) { + if (compact) { kllDirectSketchThrow(30); } + insertDyMinK(wmem, dyMinK); + } + + @Override + void setLevelsArray(final int[] levels) { + if (compact) { kllDirectSketchThrow(30); } + final int lengthInts = getLevelsArrLengthInts(); + wmem.putIntArray(dataStartBytes, levels, 0, lengthInts); + } + + @Override + void setLevelsArrayAt(final int index, final int value) { + if (compact) { kllDirectSketchThrow(30); } + wmem.putInt(dataStartBytes + index * Integer.BYTES, value); + } + + @Override + void setLevelsArrayAtMinusEq(final int index, final int minusEq) { + if (compact) { kllDirectSketchThrow(30); } + final int old = wmem.getInt(dataStartBytes + index * Integer.BYTES); + wmem.putInt(dataStartBytes + index * Integer.BYTES, old - minusEq); + } + + @Override + void setLevelsArrayAtPlusEq(final int index, final int plusEq) { + if (compact) { kllDirectSketchThrow(30); } + final int old = wmem.getInt(dataStartBytes + index * Integer.BYTES); + wmem.putInt(dataStartBytes + index * Integer.BYTES, old + plusEq); + } + + @Override + void setLevelZeroSorted(final boolean sorted) { + if (compact) { kllDirectSketchThrow(30); } + insertLevelZeroSortedFlag(wmem, sorted); + } + + @Override + void setN(final long n) { + if (compact) { kllDirectSketchThrow(30); } + insertN(wmem, n); + } + + @Override + void setNumLevels(final int numLevels) { + if (compact) { kllDirectSketchThrow(30); } + insertNumLevels(wmem, numLevels); + } + + int getItemsDataStartBytes() { + return dataStartBytes + getLevelsArrLengthInts() * Integer.BYTES; + } + + int getItemsArrLengthItems() { + if (compact) { return getNumRetained(); } + return getLevelsArrayAt(getNumLevels()); + } + + int getLevelsArrLengthInts() { + final int lengthInts; + + switch (layout) { + case FLOAT_EMPTY_COMPACT: + case FLOAT_SINGLE_COMPACT: + case DOUBLE_EMPTY_COMPACT: + case DOUBLE_SINGLE_COMPACT: { return 0; } + + case FLOAT_FULL_COMPACT: { lengthInts = getNumLevels(); break; } + case DOUBLE_FULL_COMPACT: { lengthInts = getNumLevels(); break; } + case FLOAT_UPDATABLE: { lengthInts = getNumLevels() + 1; break; } + case DOUBLE_UPDATABLE: { lengthInts = getNumLevels() + 1; break; } + default: return 0; + } + return lengthInts; + } + + + private static void kllDirectSketchThrow(final int errNo) { + String msg = ""; + switch (errNo) { + case 30: msg = "Sketch Memory is immutable, cannot write."; break; + } + throw new SketchesArgumentException(msg); + } +} diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index ef87d9678..320eb6ad4 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -50,6 +50,7 @@ import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; import org.apache.datasketches.kll.KllPreambleUtil.MemoryCheck; +import org.apache.datasketches.kll.KllPreambleUtil.SketchType; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -503,22 +504,24 @@ public String toString(final boolean withLevels, final boolean withData) { sb.append("### End sketch summary").append(Util.LS); if (withLevels) { - sb.append("### KLL sketch levels:").append(Util.LS) + sb.append("### KLL levels array:").append(Util.LS) .append(" level, offset: nominal capacity, actual size").append(Util.LS); - for (int i = 0; i < getNumLevels(); i++) { - sb.append(" ").append(i).append(", ").append(getLevelsArrayAt(i)).append(": ") - .append(KllHelper.levelCapacity(k, getNumLevels(), i, M)) - .append(", ").append(KllHelper.currentLevelSize(i, getNumLevels(), getLevelsArray())).append(Util.LS); + int level = 0; + for ( ; level < getNumLevels(); level++) { + sb.append(" ").append(level).append(", ").append(getLevelsArrayAt(level)).append(": ") + .append(KllHelper.levelCapacity(k, getNumLevels(), level, M)) + .append(", ").append(KllHelper.currentLevelSize(level, getNumLevels(), getLevelsArray())).append(Util.LS); } - sb.append("### End sketch levels").append(Util.LS); + sb.append(" ").append(level).append(", ").append(getLevelsArrayAt(level)).append(": (Exclusive)") + .append(Util.LS); + sb.append("### End levels array").append(Util.LS); } if (withData) { - sb.append("### KLL sketch data {index, item}:").append(Util.LS); + sb.append("### KLL items data {index, item}:").append(Util.LS); if (getLevelsArrayAt(0) > 0) { sb.append(" Garbage:" + Util.LS); for (int i = 0; i < getLevelsArrayAt(0); i++) { - if (items_[i] == 0.0f) { continue; } sb.append(" ").append(i + ", ").append(items_[i]).append(Util.LS); } } @@ -537,7 +540,7 @@ public String toString(final boolean withLevels, final boolean withData) { } sb.append(" level[" + level + "]: offset: " + getLevelsArrayAt(level) + " (Exclusive)"); sb.append(Util.LS); - sb.append("### End sketch data").append(Util.LS); + sb.append("### End items data").append(Util.LS); } return sb.toString(); } @@ -644,18 +647,18 @@ private void incrementBucketsSortedLevel(final int fromIndex, final int toIndex, private void compressWhileUpdating() { final int level = KllHelper.findLevelToCompact(getK(), M, getNumLevels(), getLevelsArray()); - // It is important to do add the new top level right here. Be aware that this operation - // grows the buffer and shifts the data and also the boundaries of the data and grows the - // levels array and increments numLevels_ + // It is important to add the new top level right here. Be aware that this next operation + // grows the items array, shifts the items data and the level boundaries of the data. + // It also grows the levels array and increments numLevels_. if (level == getNumLevels() - 1) { addEmptyTopLevelToCompletelyFullSketch(); } final int rawBeg = getLevelsArrayAt(level); - final int rawLim = getLevelsArrayAt(level + 1); + final int rawEnd = getLevelsArrayAt(level + 1); // +2 is OK because we already added a new top level if necessary - final int popAbove = getLevelsArrayAt(level + 2) - rawLim; - final int rawPop = rawLim - rawBeg; + final int popAbove = getLevelsArrayAt(level + 2) - rawEnd; + final int rawPop = rawEnd - rawBeg; final boolean oddPop = isOdd(rawPop); final int adjBeg = oddPop ? rawBeg + 1 : rawBeg; final int adjPop = oddPop ? rawPop - 1 : rawPop; @@ -671,7 +674,7 @@ private void compressWhileUpdating() { KllDoublesHelper.randomlyHalveDownDoubles(items_, adjBeg, adjPop, random); KllDoublesHelper.mergeSortedDoubleArrays( items_, adjBeg, halfAdjPop, - items_, rawLim, popAbove, + items_, rawEnd, popAbove, items_, adjBeg + halfAdjPop); } setLevelsArrayAtMinusEq(level + 1, halfAdjPop); // adjust boundaries of the level above @@ -697,36 +700,47 @@ private void compressWhileUpdating() { } } + /** + * This grows the levels arr by 1 (if needed) and increases the capacity of the items array at the bottom + */ private void addEmptyTopLevelToCompletelyFullSketch() { - final int curTotalCap = getLevelsArrayAt(getNumLevels()); + final int curTotalItemsCap = getLevelsArrayAt(getNumLevels()); // make sure that we are following a certain growth scheme assert getLevelsArrayAt(0) == 0; //definition of full - assert items_.length == curTotalCap; + assert items_.length == curTotalItemsCap; - // note that merging MIGHT over-grow levels_, in which case we might not have to grow it here - if (getLevelsArray().length < getNumLevels() + 2) { - setLevelsArray(KllHelper.growIntArray(getLevelsArray(), getNumLevels() + 2)); - } + //this is a little out of sequence so that we can pre-compute the total required increase in space + final int deltaItemsCap = KllHelper.levelCapacity(getK(), getNumLevels() + 1, 0, M); + final int newTotalItemsCap = curTotalItemsCap + deltaItemsCap; - final int deltaCap = KllHelper.levelCapacity(getK(), getNumLevels() + 1, 0, M); - final int newTotalCap = curTotalCap + deltaCap; + // Check if growing the levels arr if required. + // Note that merging MIGHT over-grow levels_, in which case we might not have to grow it + final boolean growLevelsArr = getLevelsArray().length < getNumLevels() + 2; + + //int totalDeltaSpaceRequired = deltaItemsCap * Double.BYTES; + //if (growLevelsArr) { totalDeltaSpaceRequired += Integer.BYTES; } + //insert memory space management here + + if (growLevelsArr) { + setLevelsArray(KllHelper.growIntArray(getLevelsArray(), getNumLevels() + 2)); //grow levels arr by one + } - final double[] newBuf = new double[newTotalCap]; + final double[] newBuf = new double[newTotalItemsCap]; // copy (and shift) the current data into the new buffer - System.arraycopy(items_, getLevelsArrayAt(0), newBuf, getLevelsArrayAt(0) + deltaCap, curTotalCap); - items_ = newBuf; + System.arraycopy(items_, getLevelsArrayAt(0), newBuf, getLevelsArrayAt(0) + deltaItemsCap, curTotalItemsCap); + items_ = newBuf; //grow the items arr - // this loop includes the old "extra" index at the top - for (int i = 0; i <= getNumLevels(); i++) { - setLevelsArrayAtPlusEq(i,deltaCap); + // This loop updates all level indices excluding the "extra" index at the top + for (int level = 0; level <= getNumLevels(); level++) { + setLevelsArrayAtPlusEq(level,deltaItemsCap); } - assert getLevelsArrayAt(getNumLevels()) == newTotalCap; + assert getLevelsArrayAt(getNumLevels()) == newTotalItemsCap; incNumLevels(); - setLevelsArrayAt(getNumLevels(), newTotalCap); // initialize the new "extra" index at the top + setLevelsArrayAt(getNumLevels(), newTotalItemsCap); // initialize the new "extra" index at the top } private void sortLevelZero() { diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index bb71c8371..1c70adbe0 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -49,6 +49,7 @@ import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; import org.apache.datasketches.kll.KllPreambleUtil.MemoryCheck; +import org.apache.datasketches.kll.KllPreambleUtil.SketchType; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -503,22 +504,24 @@ public String toString(final boolean withLevels, final boolean withData) { sb.append("### End sketch summary").append(Util.LS); if (withLevels) { - sb.append("### KLL sketch levels:").append(Util.LS) + sb.append("### KLL levels array:").append(Util.LS) .append(" level, offset: nominal capacity, actual size").append(Util.LS); - for (int i = 0; i < getNumLevels(); i++) { - sb.append(" ").append(i).append(", ").append(getLevelsArrayAt(i)).append(": ") - .append(KllHelper.levelCapacity(k, getNumLevels(), i, M)) - .append(", ").append(KllHelper.currentLevelSize(i, getNumLevels(), getLevelsArray())).append(Util.LS); + int level = 0; + for ( ; level < getNumLevels(); level++) { + sb.append(" ").append(level).append(", ").append(getLevelsArrayAt(level)).append(": ") + .append(KllHelper.levelCapacity(k, getNumLevels(), level, M)) + .append(", ").append(KllHelper.currentLevelSize(level, getNumLevels(), getLevelsArray())).append(Util.LS); } - sb.append("### End sketch levels").append(Util.LS); + sb.append(" ").append(level).append(", ").append(getLevelsArrayAt(level)).append(": (Exclusive)") + .append(Util.LS); + sb.append("### End levels array").append(Util.LS); } if (withData) { - sb.append("### KLL sketch data {index, item}:").append(Util.LS); + sb.append("### KLL items data {index, item}:").append(Util.LS); if (getLevelsArrayAt(0) > 0) { sb.append(" Garbage:" + Util.LS); for (int i = 0; i < getLevelsArrayAt(0); i++) { - if (items_[i] == 0.0f) { continue; } sb.append(" ").append(i + ", ").append(items_[i]).append(Util.LS); } } @@ -537,7 +540,7 @@ public String toString(final boolean withLevels, final boolean withData) { } sb.append(" level[" + level + "]: offset: " + getLevelsArrayAt(level) + " (Exclusive)"); sb.append(Util.LS); - sb.append("### End sketch data").append(Util.LS); + sb.append("### End items data").append(Util.LS); } return sb.toString(); } @@ -644,18 +647,18 @@ private void incrementBucketsSortedLevel(final int fromIndex, final int toIndex, private void compressWhileUpdating() { final int level = KllHelper.findLevelToCompact(getK(), M, getNumLevels(), getLevelsArray()); - // It is important to do add the new top level right here. Be aware that this operation - // grows the buffer and shifts the data and also the boundaries of the data and grows the - // levels array and increments numLevels_ + // It is important to add the new top level right here. Be aware that this next operation + // grows the items array, shifts the items data and the level boundaries of the data. + // It also grows the levels array and increments numLevels_. if (level == getNumLevels() - 1) { addEmptyTopLevelToCompletelyFullSketch(); } final int rawBeg = getLevelsArrayAt(level); - final int rawLim = getLevelsArrayAt(level + 1); + final int rawEnd = getLevelsArrayAt(level + 1); // +2 is OK because we already added a new top level if necessary - final int popAbove = getLevelsArrayAt(level + 2) - rawLim; - final int rawPop = rawLim - rawBeg; + final int popAbove = getLevelsArrayAt(level + 2) - rawEnd; + final int rawPop = rawEnd - rawBeg; final boolean oddPop = isOdd(rawPop); final int adjBeg = oddPop ? rawBeg + 1 : rawBeg; final int adjPop = oddPop ? rawPop - 1 : rawPop; @@ -671,7 +674,7 @@ private void compressWhileUpdating() { KllFloatsHelper.randomlyHalveDownFloats(items_, adjBeg, adjPop, random); KllFloatsHelper.mergeSortedFloatArrays( items_, adjBeg, halfAdjPop, - items_, rawLim, popAbove, + items_, rawEnd, popAbove, items_, adjBeg + halfAdjPop); } setLevelsArrayAtMinusEq(level + 1, halfAdjPop); // adjust boundaries of the level above @@ -697,36 +700,47 @@ private void compressWhileUpdating() { } } + /** + * This grows the levels arr by 1 (if needed) and increases the capacity of the items array at the bottom + */ private void addEmptyTopLevelToCompletelyFullSketch() { - final int curTotalCap = getLevelsArrayAt(getNumLevels()); + final int curTotalItemsCap = getLevelsArrayAt(getNumLevels()); // make sure that we are following a certain growth scheme assert getLevelsArrayAt(0) == 0; //definition of full - assert items_.length == curTotalCap; + assert items_.length == curTotalItemsCap; - // note that merging MIGHT over-grow levels_, in which case we might not have to grow it here - if (getLevelsArray().length < getNumLevels() + 2) { - setLevelsArray(KllHelper.growIntArray(getLevelsArray(), getNumLevels() + 2)); - } + //this is a little out of sequence so that we can pre-compute the total required increase in space + final int deltaItemsCap = KllHelper.levelCapacity(getK(), getNumLevels() + 1, 0, M); + final int newTotalItemsCap = curTotalItemsCap + deltaItemsCap; - final int deltaCap = KllHelper.levelCapacity(getK(), getNumLevels() + 1, 0, M); - final int newTotalCap = curTotalCap + deltaCap; + // Check if growing the levels arr if required. + // Note that merging MIGHT over-grow levels_, in which case we might not have to grow it + final boolean growLevelsArr = getLevelsArray().length < getNumLevels() + 2; + + //int totalDeltaSpaceRequired = deltaItemsCap * Float.BYTES; + //if (growLevelsArr) { totalDeltaSpaceRequired += Integer.BYTES; } + //insert memory space management here + + if (growLevelsArr) { + setLevelsArray(KllHelper.growIntArray(getLevelsArray(), getNumLevels() + 2)); //grow levels arr by one + } - final float[] newBuf = new float[newTotalCap]; + final float[] itemsBuf = new float[newTotalItemsCap]; // copy (and shift) the current data into the new buffer - System.arraycopy(items_, getLevelsArrayAt(0), newBuf, getLevelsArrayAt(0) + deltaCap, curTotalCap); - items_ = newBuf; + System.arraycopy(items_, getLevelsArrayAt(0), itemsBuf, getLevelsArrayAt(0) + deltaItemsCap, curTotalItemsCap); + items_ = itemsBuf; //grow the items arr - // this loop includes the old "extra" index at the top - for (int i = 0; i <= getNumLevels(); i++) { - setLevelsArrayAtPlusEq(i, deltaCap); + // This loop updates all level indices excluding the "extra" index at the top + for (int level = 0; level <= getNumLevels(); level++) { + setLevelsArrayAtPlusEq(level, deltaItemsCap); } - assert getLevelsArrayAt(getNumLevels()) == newTotalCap; + assert getLevelsArrayAt(getNumLevels()) == newTotalItemsCap; incNumLevels(); - setLevelsArrayAt(getNumLevels(), newTotalCap); // initialize the new "extra" index at the top + setLevelsArrayAt(getNumLevels(), newTotalItemsCap); // initialize the new "extra" index at the top } private void sortLevelZero() { diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java index 5d0c494d5..27aacaef9 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java @@ -19,6 +19,8 @@ package org.apache.datasketches.kll; +import org.apache.datasketches.kll.KllPreambleUtil.SketchType; + abstract class KllHeapSketch extends KllSketch { /* @@ -38,9 +40,11 @@ abstract class KllHeapSketch extends KllSketch { * 5) curTotalCap = items_.length = levels_[numLevels_]. */ - private int dyMinK_; // dynamic minK for error estimation after merging with different k - private long n_; // number of items input into this sketch - private int numLevels_; // one-based number of current levels, + private long n_; // number of items input into this sketch. + private final int k; // configured value of K. + private int dyMinK_; // dynamic minK for error estimation after merging with different k. + + private int numLevels_; // one-based number of current levels. private int[] levels_; // array of index offsets into the items[]. Size = numLevels + 1. private boolean isLevelZeroSorted_; @@ -49,22 +53,43 @@ abstract class KllHeapSketch extends KllSketch { * @param k configured size of sketch. Range [m, 2^16] */ KllHeapSketch(final int k, final SketchType sketchType) { - super(k, sketchType); + super(sketchType); KllHelper.checkK(k); + this.k = k; dyMinK_ = k; numLevels_ = 1; levels_ = new int[] {k, k}; isLevelZeroSorted_ = false; } + @Override + public int getK() { + return k; + } + + @Override + public long getN() { + return n_; + } + + @Override + public int getNumRetained() { + return levels_[numLevels_] - levels_[0]; + } + @Override int getDyMinK() { return dyMinK_; } @Override - void setDyMinK(final int dyMinK) { - dyMinK_ = dyMinK; + int[] getLevelsArray() { + return levels_; + } + + @Override + int getLevelsArrayAt(final int index) { + return levels_[index]; } @Override @@ -73,8 +98,8 @@ int getNumLevels() { } @Override - void setNumLevels(final int numLevels) { - numLevels_ = numLevels; + void incN() { + n_++; } @Override @@ -83,13 +108,13 @@ void incNumLevels() { } @Override - int[] getLevelsArray() { - return levels_; + boolean isLevelZeroSorted() { + return isLevelZeroSorted_; } @Override - int getLevelsArrayAt(final int index) { - return levels_[index]; + void setDyMinK(final int dyMinK) { + dyMinK_ = dyMinK; } @Override @@ -102,19 +127,14 @@ void setLevelsArrayAt(final int index, final int value) { this.levels_[index] = value; } - @Override - void setLevelsArrayAtPlusEq(final int index, final int plusEq) { - this.levels_[index] += plusEq; - } - @Override void setLevelsArrayAtMinusEq(final int index, final int minusEq) { this.levels_[index] -= minusEq; } @Override - boolean isLevelZeroSorted() { - return isLevelZeroSorted_; + void setLevelsArrayAtPlusEq(final int index, final int plusEq) { + this.levels_[index] += plusEq; } @Override @@ -128,15 +148,8 @@ void setN(final long n) { } @Override - void incN() { - n_++; - } - - // public functions - - @Override - public long getN() { - return n_; + void setNumLevels(final int numLevels) { + numLevels_ = numLevels; } } diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 40208837c..1ca58638c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -25,7 +25,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.kll.KllSketch.SketchType; +import org.apache.datasketches.kll.KllPreambleUtil.SketchType; class KllHelper { static final String LS = System.getProperty("line.separator"); diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index c643927e7..a58df7986 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -81,6 +81,32 @@ * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | * || | 8 | * 1 ||----------------------------------data----------------------------------------| + * + * The structure of the data block depends on Layout: + * + * For FLOAT_SINGLE_COMPACT or DOUBLE_SINGLE_COMPACT: + * The single data item is at offset DATA_START_ADR_SINGLE_ITEM = 8 + * + * For FLOAT_FULL_COMPACT: + * The int[] levels array starts at offset DATA_START_ADR_FLOAT = 20 with a length of numLevels integers; + * Followed by Float Min_Value, then Float Max_Value + * Followed by an array of Floats of length retainedItems() + * + * For DOUBLE_FULL_COMPACT + * The int[] levels array starts at offset DATA_START_ADR_DOUBLE = 24 with a length of numLevels integers; + * Followed by Double Min_Value, then Double Max_Value + * Followed by an array of Doubles of length retainedItems() + * + * For FLOAT_UPDATABLE + * The int[] levels array starts at offset DATA_START_ADR_FLOAT = 20 with a length of (numLevels + 1) integers; + * Followed by Float Min_Value, then Float Max_Value + * Followed by an array of Floats of length KllHelper.computeTotalItemCapacity(...). + * + * For DOUBLE_UPDATABLE + * The int[] levels array starts at offset DATA_START_ADR_DOUBLE = 24 with a length of (numLevels + 1) integers; + * Followed by Double Min_Value, then Double Max_Value + * Followed by an array of Doubles of length KllHelper.computeTotalItemCapacity(...). + * * } * * @author Lee Rhodes @@ -140,6 +166,8 @@ enum Layout { DOUBLE_FULL_COMPACT, DOUBLE_EMPTY_COMPACT, DOUBLE_SINGLE_COMPACT, FLOAT_UPDATABLE, DOUBLE_UPDATABLE } + enum SketchType { FLOAT_SKETCH, DOUBLE_SKETCH } + /** * Returns a human readable string summary of the internal state of the given byte array. * Used primarily in testing. @@ -202,18 +230,18 @@ static class MemoryCheck { m = extractM(srcMem); KllHelper.checkK(k); - if (m != 8) { throwCustom(7, m); } - if (familyID != Family.KLL.getID()) { throwCustom(0, familyID); } + if (m != 8) { memoryCheckThrow(7, m); } + if (familyID != Family.KLL.getID()) { memoryCheckThrow(0, familyID); } famName = idToFamily(familyID).toString(); - if (famName != "KLL") { throwCustom(23, 0); } + if (famName != "KLL") { memoryCheckThrow(23, 0); } final int checkFlags = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); - if ((checkFlags & 5) == 5) { throwCustom(20, flags); } + if ((checkFlags & 5) == 5) { memoryCheckThrow(20, flags); } switch (checkFlags) { case 0: { //not empty, not single item, float full - if (preInts != PREAMBLE_INTS_FLOAT) { throwCustom(6, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } + if (preInts != PREAMBLE_INTS_FLOAT) { memoryCheckThrow(6, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } layout = updatable ? Layout.FLOAT_UPDATABLE : Layout.FLOAT_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); @@ -222,12 +250,12 @@ static class MemoryCheck { break; } case 1: { //empty, not single item, float empty - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } if (updatable) { layout = Layout.FLOAT_UPDATABLE; n = extractN(srcMem); - if (n != 0) { throwCustom(21, (int) n); } + if (n != 0) { memoryCheckThrow(21, (int) n); } dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_FLOAT; @@ -241,12 +269,12 @@ static class MemoryCheck { break; } case 4: { //not empty, single item, float single item - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { throwCustom(4, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { memoryCheckThrow(4, serVer); } if (updatable) { layout = Layout.FLOAT_UPDATABLE; n = extractN(srcMem); - if (n != 1) { throwCustom(22, (int)n); } + if (n != 1) { memoryCheckThrow(22, (int)n); } dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_FLOAT; @@ -260,8 +288,8 @@ static class MemoryCheck { break; } case 8: { //not empty, not single item, double full - if (preInts != PREAMBLE_INTS_DOUBLE) { throwCustom(5, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } + if (preInts != PREAMBLE_INTS_DOUBLE) { memoryCheckThrow(5, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } layout = updatable ? Layout.DOUBLE_UPDATABLE : Layout.DOUBLE_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); @@ -270,12 +298,12 @@ static class MemoryCheck { break; } case 9: { //empty, not single item, double empty - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { throwCustom(2, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } if (updatable) { layout = Layout.DOUBLE_UPDATABLE; n = extractN(srcMem); - if (n != 0) { throwCustom(21, (int) n); } + if (n != 0) { memoryCheckThrow(21, (int) n); } dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_DOUBLE; @@ -289,12 +317,12 @@ static class MemoryCheck { break; } case 12: { //not empty, single item, double single item - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { throwCustom(1, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { throwCustom(4, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { memoryCheckThrow(4, serVer); } if (updatable) { layout = Layout.DOUBLE_UPDATABLE; n = extractN(srcMem); - if (n != 1) { throwCustom(22, (int)n); } + if (n != 1) { memoryCheckThrow(22, (int)n); } dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_DOUBLE; @@ -310,7 +338,7 @@ static class MemoryCheck { } } - private static void throwCustom(final int errNo, final int value) { + private static void memoryCheckThrow(final int errNo, final int value) { String msg = ""; switch (errNo) { case 0: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; @@ -358,6 +386,14 @@ static boolean extractSingleItemFlag(final Memory mem) { return (extractFlags(mem) & SINGLE_ITEM_BIT_MASK) != 0; } + static boolean extractDoubleSketchFlag(final Memory mem) { + return (extractFlags(mem) & DOUBLES_SKETCH_BIT_MASK) != 0; + } + + static boolean extractUpdatableFlag(final Memory mem) { + return (extractFlags(mem) & UPDATABLE_BIT_MASK) != 0; + } + static int extractK(final Memory mem) { return mem.getShort(K_SHORT_ADR) & 0XFFFF; } @@ -409,6 +445,16 @@ static void insertSingleItemFlag(final WritableMemory wmem, final boolean singl insertFlags(wmem, singleItem ? flags | SINGLE_ITEM_BIT_MASK : flags & ~SINGLE_ITEM_BIT_MASK); } + static void insertDoubleSketchFlag(final WritableMemory wmem, final boolean doubleSketch) { + final int flags = extractFlags(wmem); + insertFlags(wmem, doubleSketch ? flags | DOUBLES_SKETCH_BIT_MASK : flags & ~DOUBLES_SKETCH_BIT_MASK); + } + + static void insertUpdatableFlag(final WritableMemory wmem, final boolean updatable) { + final int flags = extractFlags(wmem); + insertFlags(wmem, updatable ? flags | UPDATABLE_BIT_MASK : flags & ~UPDATABLE_BIT_MASK); + } + static void insertK(final WritableMemory wmem, final int value) { wmem.putShort(K_SHORT_ADR, (short) value); } @@ -421,7 +467,7 @@ static void insertN(final WritableMemory wmem, final long value) { wmem.putLong(N_LONG_ADR, value); } - static void insertMinK(final WritableMemory wmem, final int value) { + static void insertDyMinK(final WritableMemory wmem, final int value) { wmem.putShort(DY_MIN_K_SHORT_ADR, (short) value); } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 27b84b635..ea399de19 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -34,60 +34,71 @@ import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; -import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLE_SKETCH; +import static org.apache.datasketches.kll.KllPreambleUtil.SketchType.DOUBLE_SKETCH; import java.util.Random; import org.apache.datasketches.kll.KllHelper.LevelStats; +import org.apache.datasketches.kll.KllPreambleUtil.SketchType; abstract class KllSketch { static final Random random = new Random(); static final int M = DEFAULT_M; // configured minimum buffer "width", Must always be 8 for now. static final boolean compatible = true; //rank 0.0 and 1.0. compatible with classic Quantiles Sketch - private final int k; //configured value of K - - enum SketchType { FLOAT_SKETCH, DOUBLE_SKETCH } - static SketchType sketchType; - KllSketch(final int k, final SketchType sketchType) { - this.k = k; + KllSketch(final SketchType sketchType) { KllSketch.sketchType = sketchType; } - abstract int getDyMinK(); - - abstract void setDyMinK(int dyMinK); - - abstract int getNumLevels(); - - abstract void setNumLevels(int numLevels); - - abstract void incNumLevels(); - - abstract int[] getLevelsArray(); - - abstract int getLevelsArrayAt(int index); - - abstract void setLevelsArray(int[] levels); - - abstract void setLevelsArrayAt(int index, int value); - - abstract void setLevelsArrayAtPlusEq(int index, int plusEq); - - abstract void setLevelsArrayAtMinusEq(int index, int minusEq); - - abstract boolean isLevelZeroSorted(); - - abstract void setLevelZeroSorted(boolean sorted); - - boolean isCompatible() { - return compatible; + /** + * Gets the approximate value of k to use given epsilon, the normalized rank error. + * @param epsilon the normalized rank error between zero and one. + * @param pmf if true, this function returns the value of k assuming the input epsilon + * is the desired "double-sided" epsilon for the getPMF() function. Otherwise, this function + * returns the value of k assuming the input epsilon is the desired "single-sided" + * epsilon for all the other queries. + * @return the value of k given a value of epsilon. + * @see KllDoublesSketch + */ + // constants were derived as the best fit to 99 percentile empirically measured max error in + // thousands of trials + public static int getKFromEpsilon(final double epsilon, final boolean pmf) { + //Ensure that eps is >= than the lowest possible eps given MAX_K and pmf=false. + final double eps = max(epsilon, 4.7634E-5); + final double kdbl = pmf + ? exp(log(2.446 / eps) / 0.9433) + : exp(log(2.296 / eps) / 0.9723); + final double krnd = round(kdbl); + final double del = abs(krnd - kdbl); + final int k = (int) (del < 1E-6 ? krnd : ceil(kdbl)); + return max(MIN_K, min(MAX_K, k)); } - abstract void setN(long n); + /** + * Returns upper bound on the compact serialized size of a sketch given a parameter k and stream + * length. This method can be used if allocation of storage is necessary beforehand. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param n stream length + * @return upper bound on the compact serialized size + */ + public static int getMaxSerializedSizeBytes(final int k, final long n) { + final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, sketchType); + return lvlStats.getCompactBytes(); + } - abstract void incN(); + /** + * Gets the normalized rank error given k and pmf. + * Static method version of the getNormalizedRankError(boolean). + * @param k the configuration parameter + * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function. + * Otherwise, it is the "single-sided" normalized rank error for all the other queries. + * @return if pmf is true, the normalized rank error for the getPMF() function. + * Otherwise, it is the "single-sided" normalized rank error for all the other queries. + */ + public static double getNormalizedRankError(final int k, final boolean pmf) { + return KllHelper.getNormalizedRankError(k, pmf); + } static int getSerializedSizeBytes(final int numLevels, final int numRetained, final SketchType sketchType, final boolean updatable) { @@ -108,20 +119,6 @@ static int getSerializedSizeBytes(final int numLevels, final int numRetained, fi } } - //Public Methods - - /** - * Returns upper bound on the compact serialized size of a sketch given a parameter k and stream - * length. This method can be used if allocation of storage is necessary beforehand. - * @param k parameter that controls size of the sketch and accuracy of estimates - * @param n stream length - * @return upper bound on the compact serialized size - */ - public static int getMaxSerializedSizeBytes(final int k, final long n) { - final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, sketchType); - return lvlStats.getCompactBytes(); - } - /** * Returns the current compact number of bytes this sketch would require to store. * @return the current compact number of bytes this sketch would require to store. @@ -135,28 +132,15 @@ public int getCurrentCompactSerializedSizeBytes() { * @return the current updatable number of bytes this sketch would require to store. */ public int getCurrentUpdatableSerializedSizeBytes() { - final int itemCap = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); + final int itemCap = KllHelper.computeTotalItemCapacity(getK(), M, getNumLevels()); return KllSketch.getSerializedSizeBytes(getNumLevels(), itemCap, sketchType, true); } - /** - * Returns the number of bytes this sketch would require to store. - * @return the number of bytes this sketch would require to store. - * @deprecated use getCurrentCompactSerializedSizeBytes() - */ - @Deprecated - public int getSerializedSizeBytes() { - return getCurrentCompactSerializedSizeBytes(); - } - - /** * Returns the parameter k * @return parameter k */ - public int getK() { - return k; - } + public abstract int getK(); /** * Returns the length of the input stream. @@ -164,30 +148,6 @@ public int getK() { */ public abstract long getN(); - /** - * Gets the approximate value of k to use given epsilon, the normalized rank error. - * @param epsilon the normalized rank error between zero and one. - * @param pmf if true, this function returns the value of k assuming the input epsilon - * is the desired "double-sided" epsilon for the getPMF() function. Otherwise, this function - * returns the value of k assuming the input epsilon is the desired "single-sided" - * epsilon for all the other queries. - * @return the value of k given a value of epsilon. - * @see KllDoublesSketch - */ - // constants were derived as the best fit to 99 percentile empirically measured max error in - // thousands of trials - public static int getKFromEpsilon(final double epsilon, final boolean pmf) { - //Ensure that eps is >= than the lowest possible eps given MAX_K and pmf=false. - final double eps = max(epsilon, 4.7634E-5); - final double kdbl = pmf - ? exp(log(2.446 / eps) / 0.9433) - : exp(log(2.296 / eps) / 0.9723); - final double krnd = round(kdbl); - final double del = abs(krnd - kdbl); - final int k = (int) (del < 1E-6 ? krnd : ceil(kdbl)); - return max(MIN_K, min(MAX_K, k)); - } - /** * Gets the approximate rank error of this sketch normalized as a fraction between zero and one. * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function. @@ -201,24 +161,21 @@ public double getNormalizedRankError(final boolean pmf) { } /** - * Gets the normalized rank error given k and pmf. - * Static method version of the getNormalizedRankError(boolean). - * @param k the configuration parameter - * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function. - * Otherwise, it is the "single-sided" normalized rank error for all the other queries. - * @return if pmf is true, the normalized rank error for the getPMF() function. - * Otherwise, it is the "single-sided" normalized rank error for all the other queries. + * Returns the number of retained items (samples) in the sketch. + * @return the number of retained items (samples) in the sketch */ - public static double getNormalizedRankError(final int k, final boolean pmf) { - return KllHelper.getNormalizedRankError(k, pmf); + public int getNumRetained() { + return getLevelsArrayAt(getLevelsArrayAt(getNumLevels()) - getLevelsArrayAt(0)); } /** - * Returns the number of retained items (samples) in the sketch. - * @return the number of retained items (samples) in the sketch + * Returns the number of bytes this sketch would require to store. + * @return the number of bytes this sketch would require to store. + * @deprecated use getCurrentCompactSerializedSizeBytes() */ - public int getNumRetained() { - return getLevelsArrayAt(getNumLevels()) - getLevelsArrayAt(0); + @Deprecated + public int getSerializedSizeBytes() { + return getCurrentCompactSerializedSizeBytes(); } /** @@ -243,12 +200,6 @@ public boolean isEstimationMode() { */ public abstract byte[] toByteArray(); - /** - * Returns serialized sketch in an updatable byte array form. - * @return serialized sketch in an updatable byte array form. - */ - public abstract byte[] toUpdatableByteArray(); - @Override public String toString() { return toString(false, false); @@ -262,4 +213,46 @@ public String toString() { */ public abstract String toString(final boolean withLevels, final boolean withData); + /** + * Returns serialized sketch in an updatable byte array form. + * @return serialized sketch in an updatable byte array form. + */ + public abstract byte[] toUpdatableByteArray(); + + //Restricted Methods + + abstract int getDyMinK(); + + abstract int[] getLevelsArray(); + + abstract int getLevelsArrayAt(int index); + + abstract int getNumLevels(); + + abstract void incN(); + + abstract void incNumLevels(); + + boolean isCompatible() { + return compatible; + } + + abstract boolean isLevelZeroSorted(); + + abstract void setDyMinK(int dyMinK); + + abstract void setLevelsArray(int[] levels); + + abstract void setLevelsArrayAt(int index, int value); + + abstract void setLevelsArrayAtMinusEq(int index, int minusEq); + + abstract void setLevelsArrayAtPlusEq(int index, int plusEq); + + abstract void setLevelZeroSorted(boolean sorted); + + abstract void setN(long n); + + abstract void setNumLevels(int numLevels); + } diff --git a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java index 40f02d7ea..97d09cbd5 100644 --- a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java @@ -21,11 +21,11 @@ import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; import static org.apache.datasketches.kll.KllHelper.getLevelStats; -import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLE_SKETCH; +import static org.apache.datasketches.kll.KllPreambleUtil.SketchType.DOUBLE_SKETCH; import static org.testng.Assert.assertEquals; import org.apache.datasketches.kll.KllHelper.LevelStats; -import org.apache.datasketches.kll.KllSketch.SketchType; +import org.apache.datasketches.kll.KllPreambleUtil.SketchType; import org.apache.datasketches.memory.Memory; import org.testng.annotations.Test; diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index 5f9ebbe39..051c2b12f 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -136,6 +136,19 @@ public void visualCheckToString() { println(LS + s2); } + @Test + public void viewAddLevels() { + KllFloatsSketch sk = new KllFloatsSketch(20); + float f = 1.0f; + int i = 1; + for ( ; i <= 20; i++) { sk.update(f++); } + println(sk.toString(true, true)); + for ( ; i <= 54; i++) { sk.update(f++); } + println(sk.toString(true, true)); + for ( ; i <= 108; i++) { sk.update(f++); } + println(sk.toString(true, true)); + } + @Test public void printlnTest() { println("PRINTING: " + this.getClass().getName()); From b33196eae6d59b007dba428bdfc20730d10de12a Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 9 Mar 2022 15:40:15 -0800 Subject: [PATCH 09/31] Interim 9 --- .../kll/KllDirectDoublesSketch.java | 32 +--- .../datasketches/kll/KllDirectSketch.java | 65 +++++-- .../datasketches/kll/KllDoublesSketch.java | 4 +- .../datasketches/kll/KllFloatsSketch.java | 8 +- .../datasketches/kll/KllPreambleUtil.java | 159 ++++++++++++++++-- .../datasketches/kll/MiscDoublesTest.java | 131 ++++++++++++++- .../datasketches/kll/MiscFloatsTest.java | 128 +++++++++++++- 7 files changed, 452 insertions(+), 75 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 3f239c123..030f2d5ba 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -34,6 +34,7 @@ public class KllDirectDoublesSketch extends KllDirectSketch { @Override public byte[] toByteArray() { + return null; } @@ -47,36 +48,5 @@ public byte[] toUpdatableByteArray() { return null; } - @Override - int[] getLevelsArray() { - return null; - } - - @Override - int getLevelsArrayAt(final int index) { - return 0; - } - - - @Override - void setLevelsArray(final int[] levels) { - - } - - @Override - void setLevelsArrayAt(final int index, final int value) { - - } - - @Override - void setLevelsArrayAtMinusEq(final int index, final int minusEq) { - - } - - @Override - void setLevelsArrayAtPlusEq(final int index, final int plusEq) { - - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index 8b5cb4519..4e1d15098 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -85,9 +85,45 @@ int getDyMinK() { @Override int[] getLevelsArray() { - final int lengthInts = getLevelsArrLengthInts(); - final int[] levelsArr = new int[lengthInts]; - wmem.getIntArray(dataStartBytes, levelsArr, 0, lengthInts); + final int memLengthInts; + final int outLengthInts; + final int[] levelsArr; + final int k = getK(); + switch (layout) { + case FLOAT_EMPTY_COMPACT: + case DOUBLE_EMPTY_COMPACT: { + memLengthInts = 0; + outLengthInts = 2; + levelsArr = new int[] {k, k}; + break; + } + case FLOAT_SINGLE_COMPACT: + case DOUBLE_SINGLE_COMPACT: { + memLengthInts = 0; + outLengthInts = 2; + levelsArr = new int[] {k - 1, k}; + break; + } + case FLOAT_FULL_COMPACT: + case DOUBLE_FULL_COMPACT: { + memLengthInts = getNumLevels(); + outLengthInts = getNumLevels() + 1; + levelsArr = new int[outLengthInts]; + wmem.getIntArray(dataStartBytes, levelsArr, 0, memLengthInts); + final int itemCapacity = KllHelper.computeTotalItemCapacity(getK(), M, getNumLevels()); + levelsArr[getNumLevels()] = itemCapacity; + break; + } + case FLOAT_UPDATABLE: + case DOUBLE_UPDATABLE: { + memLengthInts = getNumLevels() + 1; + outLengthInts = memLengthInts; + levelsArr = new int[outLengthInts]; + wmem.getIntArray(dataStartBytes, levelsArr, 0, outLengthInts); + break; + } + default: return null; + } return levelsArr; } @@ -180,22 +216,25 @@ int getItemsArrLengthItems() { return getLevelsArrayAt(getNumLevels()); } + /** + * For determining the actual length of the array as stored in Memory + * @return the actual length of the array as stored in Memory + */ int getLevelsArrLengthInts() { - final int lengthInts; + final int memLengthInts; switch (layout) { case FLOAT_EMPTY_COMPACT: - case FLOAT_SINGLE_COMPACT: case DOUBLE_EMPTY_COMPACT: - case DOUBLE_SINGLE_COMPACT: { return 0; } - - case FLOAT_FULL_COMPACT: { lengthInts = getNumLevels(); break; } - case DOUBLE_FULL_COMPACT: { lengthInts = getNumLevels(); break; } - case FLOAT_UPDATABLE: { lengthInts = getNumLevels() + 1; break; } - case DOUBLE_UPDATABLE: { lengthInts = getNumLevels() + 1; break; } - default: return 0; + case FLOAT_SINGLE_COMPACT: + case DOUBLE_SINGLE_COMPACT: { memLengthInts = 0; break; } + case FLOAT_FULL_COMPACT: + case DOUBLE_FULL_COMPACT: { memLengthInts = getNumLevels(); break; } + case FLOAT_UPDATABLE: + case DOUBLE_UPDATABLE: { memLengthInts = getNumLevels() + 1; break; } + default: return 0; //can't get here } - return lengthInts; + return memLengthInts; } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 320eb6ad4..b22afd90b 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -455,8 +455,8 @@ public byte[] toUpdatableByteArray() { wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) PREAMBLE_INTS_DOUBLE); wmem.putByte(SER_VER_BYTE_ADR, SERIAL_VERSION_EMPTY_FULL); wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); - final byte flags = (byte) - ((isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) + final byte flags = (byte) ( + (isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) | DOUBLES_SKETCH_BIT_MASK | UPDATABLE_BIT_MASK); wmem.putByte(FLAGS_BYTE_ADR, flags); diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 1c70adbe0..22894d312 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -455,8 +455,8 @@ public byte[] toUpdatableByteArray() { wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) PREAMBLE_INTS_FLOAT); wmem.putByte(SER_VER_BYTE_ADR, SERIAL_VERSION_EMPTY_FULL); wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); - final byte flags = (byte) - ((isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) + final byte flags = (byte) ( + (isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) | UPDATABLE_BIT_MASK); // (leave blank) wmem.putByte(FLAGS_BYTE_ADR, flags); @@ -471,9 +471,9 @@ public byte[] toUpdatableByteArray() { final int len = getLevelsArray().length; wmem.putIntArray(offset, getLevelsArray(), 0, len); offset += len * Integer.BYTES; - wmem.putDouble(offset, minValue_); + wmem.putFloat(offset, minValue_); offset += Float.BYTES; - wmem.putDouble(offset, maxValue_); + wmem.putFloat(offset, maxValue_); offset += Float.BYTES; wmem.putFloatArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); return bytes; diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index a58df7986..3e22ce3ee 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -20,9 +20,11 @@ package org.apache.datasketches.kll; import static org.apache.datasketches.Family.idToFamily; +import static org.apache.datasketches.Util.zeroPad; import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -53,7 +55,7 @@ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | * 1 ||---------------------------------N_LONG---------------------------------------| * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | - * 2 ||<--------------data----------------| unused |numLevels|-------min K-----------| + * 2 ||<--------------data----------------| unused |numLevels|--dynamic-min K--------| * * Serialized float sketch layout, Empty (8 bytes) and Single Item (12 bytes): * Adr: @@ -71,7 +73,7 @@ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | * 1 ||---------------------------------N_LONG---------------------------------------| * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | - * 2 ||--------------unused------------------------|numLevels|-------min K-----------| + * 2 ||--------------unused------------------------|numLevels|--dynamic-min K--------| * || | 24 | * 3 ||<---------------------------------data----------------------------------------| * @@ -191,12 +193,61 @@ static String toString(final Memory mem) { return null; //memoryToString(mem); } - - - - + static String memoryToString(final Memory mem) { + final MemoryCheck memChk = new MemoryCheck(mem); + final int flags = memChk.flags & 0XFF; + final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", " + + zeroPad(Integer.toBinaryString(flags), 8); + final int preInts = memChk.preInts; + final StringBuilder sb = new StringBuilder(); + sb.append(Util.LS).append("### KLL SKETCH MEMORY SUMMARY:").append(LS); + sb.append("Byte 0 : Preamble Ints : ").append(preInts).append(LS); + sb.append("Byte 1 : SerVer : ").append(memChk.serVer).append(LS); + sb.append("Byte 2 : FamilyID : ").append(memChk.familyID).append(LS); + sb.append(" FamilyName : ").append(memChk.famName).append(LS); + sb.append("Byte 3 : Flags Field : ").append(flagsStr).append(LS); + sb.append(" Bit Flag Name").append(LS); + sb.append(" 0 EMPTY COMPACT : ").append(memChk.empty).append(LS); + sb.append(" 1 LEVEL_ZERO_SORTED : ").append(memChk.level0Sorted).append(LS); + sb.append(" 2 SINGLE_ITEM COMPACT: ").append(memChk.singleItem).append(LS); + sb.append(" 3 DOUBLES_SKETCH : ").append(memChk.doublesSketch).append(LS); + sb.append(" 4 UPDATABLE : ").append(memChk.updatable).append(LS); + sb.append("Bytes 4-5 : K : ").append(memChk.k).append(LS); + sb.append("Byte 6 : Min Level Cap, M : ").append(memChk.m).append(LS); + sb.append("Byte 7 : (Reserved) : ").append(LS); + + switch (memChk.layout) { + case DOUBLE_FULL_COMPACT: + case FLOAT_FULL_COMPACT: + case FLOAT_UPDATABLE: + case DOUBLE_UPDATABLE: + { + sb.append("Bytes 8-15: N : ").append(memChk.n).append(LS); + sb.append("Bytes 16-17: DyMinK : ").append(memChk.dyMinK).append(LS); + sb.append("Byte 18 : NumLevels : ").append(memChk.numLevels).append(LS); + break; + } + case FLOAT_EMPTY_COMPACT: + case FLOAT_SINGLE_COMPACT: + case DOUBLE_EMPTY_COMPACT: + case DOUBLE_SINGLE_COMPACT: + { + sb.append("Assumed : N : ").append(memChk.n).append(LS); + sb.append("Assumed : DyMinK : ").append(memChk.dyMinK).append(LS); + sb.append("Assumed : NumLevels : ").append(memChk.numLevels).append(LS); + break; + } + default: break; //can never happen + } + sb.append("PreambleBytes : ").append(preInts * 4).append(LS); + sb.append("Sketch Bytes : ").append(memChk.sketchBytes).append(LS); + sb.append("Memory Capacity Bytes : ").append(mem.getCapacity()).append(LS); + sb.append("### END KLL Sketch Memory Summary").append(LS); + return sb.toString(); + } static class MemoryCheck { + // first 8 bytes final int preInts; // = extractPreInts(srcMem); final int serVer; final int familyID; @@ -209,12 +260,19 @@ static class MemoryCheck { final boolean updatable; final int k; final int m; + + Layout layout; + // next 8 bytes, depending on the Layout, the next fields may be filled with assumed values. long n; + // next 4 bytes int dyMinK; - int dataStart; int numLevels; + // derived + int dataStart; int[] levels; - Layout layout; + int itemsStart; + int memItemsCap; + int sketchBytes; MemoryCheck(final Memory srcMem) { preInts = extractPreInts(srcMem); @@ -239,36 +297,59 @@ static class MemoryCheck { if ((checkFlags & 5) == 5) { memoryCheckThrow(20, flags); } switch (checkFlags) { - case 0: { //not empty, not single item, float full + case 0: { //FloatFullCompact or FloatUpdatable (full) if (preInts != PREAMBLE_INTS_FLOAT) { memoryCheckThrow(6, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } - layout = updatable ? Layout.FLOAT_UPDATABLE : Layout.FLOAT_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_FLOAT; + levels = new int[numLevels + 1]; + if (updatable) { + layout = Layout.FLOAT_UPDATABLE; + srcMem.getIntArray(dataStart, levels, 0, numLevels + 1); + itemsStart = dataStart + levels.length * Integer.BYTES; + memItemsCap = KllHelper.computeTotalItemCapacity(k, m, numLevels); + sketchBytes = itemsStart + (memItemsCap + 2) * Float.BYTES; + } else { + layout = Layout.FLOAT_FULL_COMPACT; + srcMem.getIntArray(dataStart, levels, 0, numLevels); + levels[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); + itemsStart = dataStart + (levels.length - 1) * Integer.BYTES; + memItemsCap = levels[numLevels] - levels[0]; + sketchBytes = itemsStart + (memItemsCap + 2) * Float.BYTES; + } break; } - case 1: { //empty, not single item, float empty + case 1: { //FloatEmptyCompact or FloatUpdatable (empty) if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } if (updatable) { - layout = Layout.FLOAT_UPDATABLE; + layout = Layout.FLOAT_UPDATABLE; //empty n = extractN(srcMem); if (n != 0) { memoryCheckThrow(21, (int) n); } dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_FLOAT; + levels = new int[numLevels + 1]; + srcMem.getIntArray(dataStart, levels, 0, numLevels + 1); + itemsStart = dataStart + levels.length * Integer.BYTES; + memItemsCap = KllHelper.computeTotalItemCapacity(k, m, numLevels); + sketchBytes = itemsStart + memItemsCap * Float.BYTES; } else { layout = Layout.FLOAT_EMPTY_COMPACT; n = 0; dyMinK = k; numLevels = 1; dataStart = DATA_START_ADR_SINGLE_ITEM; //ignore if empty + levels = new int[] {k, k}; + itemsStart = dataStart; + memItemsCap = 0; + sketchBytes = itemsStart; } break; } - case 4: { //not empty, single item, float single item + case 4: { //FloatSingleCompact or FloatUpdatable (single) if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } if (serVer != SERIAL_VERSION_SINGLE) { memoryCheckThrow(4, serVer); } if (updatable) { @@ -278,45 +359,77 @@ static class MemoryCheck { dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_FLOAT; + levels = new int[numLevels + 1]; + srcMem.getIntArray(dataStart, levels, 0, numLevels + 1); + itemsStart = dataStart + levels.length * Integer.BYTES; + memItemsCap = KllHelper.computeTotalItemCapacity(k, m, numLevels); + sketchBytes = itemsStart + (memItemsCap + 2) * Float.BYTES; } else { layout = Layout.FLOAT_SINGLE_COMPACT; n = 1; dyMinK = k; numLevels = 1; + levels = new int[] {k - 1, k}; dataStart = DATA_START_ADR_SINGLE_ITEM; + itemsStart = dataStart; + memItemsCap = 1; + sketchBytes = itemsStart + memItemsCap * Float.BYTES; } break; } - case 8: { //not empty, not single item, double full + case 8: { //DoubleFullCompact or DoubleUpdatable (full) if (preInts != PREAMBLE_INTS_DOUBLE) { memoryCheckThrow(5, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } - layout = updatable ? Layout.DOUBLE_UPDATABLE : Layout.DOUBLE_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_DOUBLE; + levels = new int[numLevels + 1]; + if (updatable) { + layout = Layout.DOUBLE_UPDATABLE; + srcMem.getIntArray(dataStart, levels, 0, numLevels + 1); + itemsStart = dataStart + levels.length * Integer.BYTES; + memItemsCap = KllHelper.computeTotalItemCapacity(k, m, numLevels); + sketchBytes = itemsStart + (memItemsCap + 2) * Double.BYTES; + } else { + layout = Layout.DOUBLE_FULL_COMPACT; + srcMem.getIntArray(dataStart, levels, 0, numLevels); + levels[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); + itemsStart = dataStart + (levels.length - 1) * Integer.BYTES; + memItemsCap = levels[numLevels] - levels[0]; + sketchBytes = itemsStart + (memItemsCap + 2) * Double.BYTES; + } break; } - case 9: { //empty, not single item, double empty + case 9: { //DoubleEmptyCompact or DoubleUpdatable (empty) if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } if (updatable) { - layout = Layout.DOUBLE_UPDATABLE; + layout = Layout.DOUBLE_UPDATABLE; //empty n = extractN(srcMem); if (n != 0) { memoryCheckThrow(21, (int) n); } dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_DOUBLE; + levels = new int[numLevels + 1]; + srcMem.getIntArray(dataStart, levels, 0, numLevels + 1); + itemsStart = dataStart + levels.length * Integer.BYTES; + memItemsCap = KllHelper.computeTotalItemCapacity(k, m, numLevels); + sketchBytes = itemsStart + memItemsCap * Double.BYTES; } else { layout = Layout.DOUBLE_EMPTY_COMPACT; n = 0; dyMinK = k; numLevels = 1; dataStart = DATA_START_ADR_SINGLE_ITEM; //ignore if empty + levels = new int[] {k, k}; + itemsStart = dataStart; + memItemsCap = 0; + sketchBytes = itemsStart; } break; } - case 12: { //not empty, single item, double single item + case 12: { //DoubleSingleCompact or DoubleUpdatable (single) if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } if (serVer != SERIAL_VERSION_SINGLE) { memoryCheckThrow(4, serVer); } if (updatable) { @@ -326,15 +439,25 @@ static class MemoryCheck { dyMinK = extractDyMinK(srcMem); numLevels = extractNumLevels(srcMem); dataStart = DATA_START_ADR_DOUBLE; + levels = new int[numLevels + 1]; + srcMem.getIntArray(dataStart, levels, 0, numLevels + 1); + itemsStart = dataStart + levels.length * Integer.BYTES; + memItemsCap = KllHelper.computeTotalItemCapacity(k, m, numLevels); + sketchBytes = itemsStart + memItemsCap * Double.BYTES; } else { layout = Layout.DOUBLE_SINGLE_COMPACT; n = 1; dyMinK = k; numLevels = 1; + levels = new int[] {k - 1, k}; dataStart = DATA_START_ADR_SINGLE_ITEM; + itemsStart = dataStart; + memItemsCap = 1; + sketchBytes = itemsStart + memItemsCap * Double.BYTES; } break; } + default: break; //can't happen } } diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index 28998e3fb..5e806a546 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -125,7 +125,8 @@ public void checkMisc() { public void visualCheckToString() { final KllDoublesSketch sketch = new KllDoublesSketch(20); for (int i = 0; i < 10; i++) { sketch.update(i + 1); } - println(sketch.toString(true, true)); + final String s1 = sketch.toString(true, true); + println(s1); final KllDoublesSketch sketch2 = new KllDoublesSketch(20); for (int i = 0; i < 400; i++) { sketch2.update(i + 1); } @@ -136,6 +137,134 @@ public void visualCheckToString() { println(LS + s2); } + @Test + public void viewCompactions() { + KllDoublesSketch sk = new KllDoublesSketch(20); + show(sk, 20); + show(sk, 21); //compaction 1 + show(sk, 43); + show(sk, 44); //compaction 2 + show(sk, 54); + show(sk, 55); //compaction 3 + show(sk, 73); + show(sk, 74); //compaction 4 + show(sk, 88); + show(sk, 89); //compaction 5 + show(sk, 96); + show(sk, 97); //compaction 6 + show(sk, 108); + } + + private static void show(final KllDoublesSketch sk, int limit) { + int i = (int) sk.getN(); + for ( ; i < limit; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + } + + @Test + public void checkMemoryToStringDoubleCompact() { + KllDoublesSketch sk = new KllDoublesSketch(20); + KllDoublesSketch sk2; + byte[] compBytes; + byte[] compBytes2; + WritableMemory wmem; + String s; + + for (int i = 1; i <= 21; i++) { sk.update(i); } + println(sk.toString(true, true)); + + println("CASE 0: DOUBLE_FULL_COMPACT"); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + sk2 = KllDoublesSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + assertEquals(compBytes, compBytes2); + + println("CASE 1: DOUBLE_EMPTY_COMPACT"); + sk = new KllDoublesSketch(20); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + sk2 = KllDoublesSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + assertEquals(compBytes, compBytes2); + + println("CASE 4: DOUBLE_SINGLE_COMPACT"); + sk = new KllDoublesSketch(20); + sk.update(1); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + sk2 = KllDoublesSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + assertEquals(compBytes, compBytes2); + } + + @Test + public void checkMemoryToStringDoubleUpdatable() { + KllDoublesSketch sk = new KllDoublesSketch(20); + KllDoublesSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + for (int i = 1; i <= 21; i++) { sk.update(i); } + println(sk.toString(true, true)); + + println("CASE 0: DOUBLE_UPDATABLE"); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + sk2 = KllDoublesSketch.heapify(wmem); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + assertEquals(upBytes, upBytes2); + + println("CASE 1: DOUBLE_UPDATABLE (empty)"); + sk = new KllDoublesSketch(20); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + sk2 = KllDoublesSketch.heapify(wmem); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + assertEquals(upBytes, upBytes2); + + println("CASE 4: DOUBLE_UPDATABLE (single)"); + sk = new KllDoublesSketch(20); + sk.update(1); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + sk2 = KllDoublesSketch.heapify(wmem); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + assertEquals(upBytes, upBytes2); + } + @Test public void printlnTest() { println("PRINTING: " + this.getClass().getName()); diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index 051c2b12f..0643a5f8d 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -130,6 +130,7 @@ public void visualCheckToString() { final KllFloatsSketch sketch2 = new KllFloatsSketch(20); for (int i = 0; i < 400; i++) { sketch2.update(i + 1); } + println("\n" + sketch2.toString(true, true)); sketch2.merge(sketch); final String s2 = sketch2.toString(true, true); @@ -137,16 +138,131 @@ public void visualCheckToString() { } @Test - public void viewAddLevels() { + public void viewCompactions() { KllFloatsSketch sk = new KllFloatsSketch(20); - float f = 1.0f; - int i = 1; - for ( ; i <= 20; i++) { sk.update(f++); } + show(sk, 20); + show(sk, 21); //compaction 1 + show(sk, 43); + show(sk, 44); //compaction 2 + show(sk, 54); + show(sk, 55); //compaction 3 + show(sk, 73); + show(sk, 74); //compaction 4 + show(sk, 88); + show(sk, 89); //compaction 5 + show(sk, 96); + show(sk, 97); //compaction 6 + show(sk, 108); + } + + private static void show(final KllFloatsSketch sk, int limit) { + int i = (int) sk.getN(); + for ( ; i < limit; i++) { sk.update(i + 1); } println(sk.toString(true, true)); - for ( ; i <= 54; i++) { sk.update(f++); } + } + + @Test + public void checkMemoryToStringFloatCompact() { + KllFloatsSketch sk = new KllFloatsSketch(20); + KllFloatsSketch sk2; + byte[] compBytes; + byte[] compBytes2; + WritableMemory wmem; + String s; + + for (int i = 1; i <= 21; i++) { sk.update(i); } println(sk.toString(true, true)); - for ( ; i <= 108; i++) { sk.update(f++); } + + println("CASE 0: FLOAT_FULL_COMPACT"); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + sk2 = KllFloatsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + assertEquals(compBytes, compBytes2); + + println("CASE 1: FLOAT_EMPTY_COMPACT"); + sk = new KllFloatsSketch(20); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + sk2 = KllFloatsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + assertEquals(compBytes, compBytes2); + + println("CASE 4: FLOAT_SINGLE_COMPACT"); + sk = new KllFloatsSketch(20); + sk.update(1); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + sk2 = KllFloatsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + assertEquals(compBytes, compBytes2); + } + + @Test + public void checkMemoryToStringFloatUpdatable() { + KllFloatsSketch sk = new KllFloatsSketch(20); + KllFloatsSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + for (int i = 1; i <= 21; i++) { sk.update(i); } println(sk.toString(true, true)); + + println("CASE 0: FLOAT_UPDATABLE"); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + sk2 = KllFloatsSketch.heapify(wmem); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + assertEquals(upBytes, upBytes2); + + println("CASE 1: FLOAT_UPDATABLE (empty)"); + sk = new KllFloatsSketch(20); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + sk2 = KllFloatsSketch.heapify(wmem); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + assertEquals(upBytes, upBytes2); + + println("CASE 4: FLOAT_UPDATABLE (single)"); + sk = new KllFloatsSketch(20); + sk.update(1); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + sk2 = KllFloatsSketch.heapify(wmem); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println(s); + assertEquals(upBytes, upBytes2); } @Test From 75cff2317df62a23fca06de6d4ff9ddddcc8e9fb Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 16 Mar 2022 17:41:09 -0700 Subject: [PATCH 10/31] Interim 10 --- .../kll/KllDirectDoublesSketch.java | 106 +- .../datasketches/kll/KllDirectSketch.java | 196 ++- .../datasketches/kll/KllDoublesSketch.java | 621 +-------- .../datasketches/kll/KllFloatsSketch.java | 665 ++-------- .../datasketches/kll/KllHeapSketch.java | 40 +- .../datasketches/kll/KllPreambleUtil.java | 243 +--- .../apache/datasketches/kll/KllSketch.java | 1147 ++++++++++++++++- .../datasketches/kll/MemoryValidate.java | 353 +++++ .../kll/KllDoublesSketchTest.java | 7 +- .../datasketches/kll/KllFloatsSketchTest.java | 3 +- .../datasketches/kll/MiscDoublesTest.java | 383 +++++- .../datasketches/kll/MiscFloatsTest.java | 301 ++++- tools/SketchesCheckstyle.xml | 2 +- 13 files changed, 2496 insertions(+), 1571 deletions(-) create mode 100644 src/main/java/org/apache/datasketches/kll/MemoryValidate.java diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 030f2d5ba..368d2697e 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -28,13 +28,19 @@ */ public class KllDirectDoublesSketch extends KllDirectSketch { - KllDirectDoublesSketch(final WritableMemory wmem) { + + public KllDirectDoublesSketch(final WritableMemory wmem) { super(wmem, SketchType.DOUBLE_SKETCH); } + //public int getNumRetained() + @SuppressWarnings("unused") + public double[] getCDF(final double[] splitPoints) { + return null; + } + @Override public byte[] toByteArray() { - return null; } @@ -48,5 +54,101 @@ public byte[] toUpdatableByteArray() { return null; } + @Override + double[] getDoubleItemsArray() { + return null; + } + + @Override + float[] getFloatItemsArray() { + return null; + } + + @Override + double getMaxDoubleValue() { + return 0; + } + + @Override + float getMaxFloatValue() { + return 0; + } + + @Override + double getMinDoubleValue() { + return 0; + } + + @Override + float getMinFloatValue() { + return 0; + } + + @Override + void setDoubleItemsArray(final double[] floatItems) { + } + + @Override + void setFloatItemsArray(final float[] floatItems) { + } + + @Override + void setMaxDoubleValue(final double value) { + } + + @Override + void setMaxFloatValue(final float value) { + } + + @Override + void setMinDoubleValue(final double value) { + } + + @Override + void setMinFloatValue(final float value) { + } + + @Override + void setLevelsArray(final int[] levelsArr) { + + } + + //int getDyMinK + + //int[] getLevelsArray + + //int getLevelsArrayAt() + + //int getNumLevels + + //void incN() + + //void incNumLevels() + + //boolean isLevelZeroSorted() + + //void setDyMinK() + + //void updateLevelsArray() + + //void setLevelsArrayAt() + + //void setLevelsArrayAtMinusEq() + + //void setLevelsArrayAtPlusEq() + + //void setLevelZeroSorted() + + //void setN() + + //void setNumLevels() + + //int getItemsDataStartBytes() + + //int getItemsArrLengthItems() + + //int getLevelsArrLengthints() + + } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index 4e1d15098..ed37ec853 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -28,26 +28,56 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertLevelZeroSortedFlag; import static org.apache.datasketches.kll.KllPreambleUtil.insertN; import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; +//import static org.apache.datasketches.kll.KllPreambleUtil.SketchType.DOUBLE_SKETCH; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.kll.KllPreambleUtil.Layout; -import org.apache.datasketches.kll.KllPreambleUtil.MemoryCheck; import org.apache.datasketches.kll.KllPreambleUtil.SketchType; import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; abstract class KllDirectSketch extends KllSketch { + //All these members are constant for the life of this object. If the WritableMemory changes, it will require + //rebuilding this class final WritableMemory wmem; final Layout layout; - final boolean compact; - final int dataStartBytes; + final boolean updatable; + final int numLevels_; + final int memItemsCap; + final int sketchBytes; + final WritableMemory levelsWmem; + final WritableMemory minMaxWmem; + final WritableMemory itemsWmem; + DefaultMemoryRequestServer defaultMemReqSvr = null; + + /** + * For the direct sketches it is important that the methods implemented here are designed to work dynamically + * as the sketch grows off-heap. + * @param wmem the current WritableMemory + * @param sketchType either DOUBLE_SKETCH or FLOAT_SKETCH + */ KllDirectSketch(final WritableMemory wmem, final SketchType sketchType) { super(sketchType); - final MemoryCheck memChk = new MemoryCheck(wmem); + final MemoryValidate memVal = new MemoryValidate(wmem); this.wmem = wmem; - this.layout = memChk.layout; - this.compact = !memChk.updatable; - this.dataStartBytes = memChk.dataStart; + layout = memVal.layout; + updatable = memVal.updatable; + numLevels_ = memVal.numLevels; + memItemsCap = memVal.memItemsCap; + sketchBytes = memVal.sketchBytes; + levelsWmem = memVal.levelsWmem; + minMaxWmem = memVal.minMaxWmem; + itemsWmem = memVal.itemsWmem; + defaultMemReqSvr = updatable ? new DefaultMemoryRequestServer() : null; + } + + private static void kllDirectSketchThrow(final int errNo) { + String msg = ""; + switch (errNo) { + case 30: msg = "Sketch Memory is immutable, cannot write."; break; + } + throw new SketchesArgumentException(msg); } @Override @@ -61,12 +91,8 @@ public long getN() { } @Override - public int getNumRetained() { - if (compact) { - final int itemCapacity = KllHelper.computeTotalItemCapacity(getK(), M, getNumLevels()); - return itemCapacity - getLevelsArrayAt(0); - } - return getLevelsArrayAt(getLevelsArrayAt(getNumLevels()) - getLevelsArrayAt(0) ); + public boolean isUpdatable() { + return updatable; } @Override @@ -83,53 +109,42 @@ int getDyMinK() { return extractDyMinK(wmem); } + int getItemsArrLengthItems() { + if (updatable) { return getLevelsArray()[getNumLevels()]; } + return getNumRetained(); + } + + + @Override + String getLayout() { return layout.toString(); } + @Override int[] getLevelsArray() { + final int cap = getNumLevels() + 1; + final int[] myLevelsArr = new int[cap]; + levelsWmem.getIntArray(0, myLevelsArr, 0, cap); + return myLevelsArr; + } + + /** + * For determining the actual length of the array as stored in Memory + * @return the actual length of the array as stored in Memory + */ + int getLevelsArrLengthInts() { final int memLengthInts; - final int outLengthInts; - final int[] levelsArr; - final int k = getK(); + switch (layout) { case FLOAT_EMPTY_COMPACT: - case DOUBLE_EMPTY_COMPACT: { - memLengthInts = 0; - outLengthInts = 2; - levelsArr = new int[] {k, k}; - break; - } + case DOUBLE_EMPTY_COMPACT: case FLOAT_SINGLE_COMPACT: - case DOUBLE_SINGLE_COMPACT: { - memLengthInts = 0; - outLengthInts = 2; - levelsArr = new int[] {k - 1, k}; - break; - } + case DOUBLE_SINGLE_COMPACT: { memLengthInts = 0; break; } case FLOAT_FULL_COMPACT: - case DOUBLE_FULL_COMPACT: { - memLengthInts = getNumLevels(); - outLengthInts = getNumLevels() + 1; - levelsArr = new int[outLengthInts]; - wmem.getIntArray(dataStartBytes, levelsArr, 0, memLengthInts); - final int itemCapacity = KllHelper.computeTotalItemCapacity(getK(), M, getNumLevels()); - levelsArr[getNumLevels()] = itemCapacity; - break; - } + case DOUBLE_FULL_COMPACT: { memLengthInts = getNumLevels(); break; } case FLOAT_UPDATABLE: - case DOUBLE_UPDATABLE: { - memLengthInts = getNumLevels() + 1; - outLengthInts = memLengthInts; - levelsArr = new int[outLengthInts]; - wmem.getIntArray(dataStartBytes, levelsArr, 0, outLengthInts); - break; - } - default: return null; + case DOUBLE_UPDATABLE: { memLengthInts = getNumLevels() + 1; break; } + default: return 0; //can't get here } - return levelsArr; - } - - @Override - int getLevelsArrayAt(final int index) { - return wmem.getInt(dataStartBytes + index * Integer.BYTES); + return memLengthInts; } @Override @@ -139,14 +154,14 @@ int getNumLevels() { @Override void incN() { - if (compact) { kllDirectSketchThrow(30); } + if (!updatable) { kllDirectSketchThrow(30); } long n = extractN(wmem); insertN(wmem, ++n); } @Override void incNumLevels() { - if (compact) { kllDirectSketchThrow(30); } + if (!updatable) { kllDirectSketchThrow(30); } int numLevels = extractNumLevels(wmem); insertNumLevels(wmem, ++numLevels); } @@ -158,91 +173,32 @@ boolean isLevelZeroSorted() { @Override void setDyMinK(final int dyMinK) { - if (compact) { kllDirectSketchThrow(30); } + if (!updatable) { kllDirectSketchThrow(30); } insertDyMinK(wmem, dyMinK); } @Override - void setLevelsArray(final int[] levels) { - if (compact) { kllDirectSketchThrow(30); } - final int lengthInts = getLevelsArrLengthInts(); - wmem.putIntArray(dataStartBytes, levels, 0, lengthInts); - } - - @Override - void setLevelsArrayAt(final int index, final int value) { - if (compact) { kllDirectSketchThrow(30); } - wmem.putInt(dataStartBytes + index * Integer.BYTES, value); - } - - @Override - void setLevelsArrayAtMinusEq(final int index, final int minusEq) { - if (compact) { kllDirectSketchThrow(30); } - final int old = wmem.getInt(dataStartBytes + index * Integer.BYTES); - wmem.putInt(dataStartBytes + index * Integer.BYTES, old - minusEq); - } - - @Override - void setLevelsArrayAtPlusEq(final int index, final int plusEq) { - if (compact) { kllDirectSketchThrow(30); } - final int old = wmem.getInt(dataStartBytes + index * Integer.BYTES); - wmem.putInt(dataStartBytes + index * Integer.BYTES, old + plusEq); + void updateLevelsArray(final int[] levels) { + if (!updatable) { kllDirectSketchThrow(30); } + levelsWmem.putIntArray(0, levels, 0, levels.length); } @Override void setLevelZeroSorted(final boolean sorted) { - if (compact) { kllDirectSketchThrow(30); } + if (!updatable) { kllDirectSketchThrow(30); } insertLevelZeroSortedFlag(wmem, sorted); } @Override void setN(final long n) { - if (compact) { kllDirectSketchThrow(30); } + if (!updatable) { kllDirectSketchThrow(30); } insertN(wmem, n); } + @Override void setNumLevels(final int numLevels) { - if (compact) { kllDirectSketchThrow(30); } + if (!updatable) { kllDirectSketchThrow(30); } insertNumLevels(wmem, numLevels); } - - int getItemsDataStartBytes() { - return dataStartBytes + getLevelsArrLengthInts() * Integer.BYTES; - } - - int getItemsArrLengthItems() { - if (compact) { return getNumRetained(); } - return getLevelsArrayAt(getNumLevels()); - } - - /** - * For determining the actual length of the array as stored in Memory - * @return the actual length of the array as stored in Memory - */ - int getLevelsArrLengthInts() { - final int memLengthInts; - - switch (layout) { - case FLOAT_EMPTY_COMPACT: - case DOUBLE_EMPTY_COMPACT: - case FLOAT_SINGLE_COMPACT: - case DOUBLE_SINGLE_COMPACT: { memLengthInts = 0; break; } - case FLOAT_FULL_COMPACT: - case DOUBLE_FULL_COMPACT: { memLengthInts = getNumLevels(); break; } - case FLOAT_UPDATABLE: - case DOUBLE_UPDATABLE: { memLengthInts = getNumLevels() + 1; break; } - default: return 0; //can't get here - } - return memLengthInts; - } - - - private static void kllDirectSketchThrow(final int errNo) { - String msg = ""; - switch (errNo) { - case 30: msg = "Sketch Memory is immutable, cannot write."; break; - } - throw new SketchesArgumentException(msg); - } } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index b22afd90b..997521ea5 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -21,49 +21,22 @@ import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.Util.isOdd; -import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; -import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; -import static org.apache.datasketches.kll.KllPreambleUtil.DOUBLES_SKETCH_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.DY_MIN_K_SHORT_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.FAMILY_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.FLAGS_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.K_SHORT_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.M_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.NUM_LEVELS_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; -import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; -import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; -import static org.apache.datasketches.kll.KllPreambleUtil.SER_VER_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; - -import java.util.Arrays; - -import org.apache.datasketches.Family; + import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.Util; -import org.apache.datasketches.kll.KllPreambleUtil.MemoryCheck; import org.apache.datasketches.kll.KllPreambleUtil.SketchType; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * Please refer to the documentation in the package-info:
* {@link org.apache.datasketches.kll} */ -public class KllDoublesSketch extends KllHeapSketch { +public final class KllDoublesSketch extends KllHeapSketch { // Specific to the doubles sketch - private double[] items_; // the continuous array of double items - private double minValue_; - private double maxValue_; + private double[] doubleItems_; // the continuous array of double items + private double minDoubleValue_; + private double maxDoubleValue_; /** * Heap constructor with the default k = 200, which has a rank error of about 1.65%. @@ -80,64 +53,19 @@ public KllDoublesSketch() { */ public KllDoublesSketch(final int k) { super(k, SketchType.DOUBLE_SKETCH); - items_ = new double[k]; - minValue_ = Double.NaN; - maxValue_ = Double.NaN; + doubleItems_ = new double[k]; + minDoubleValue_ = Double.NaN; + maxDoubleValue_ = Double.NaN; } /** * Private heapify constructor. * @param mem Memory object that contains data serialized by this sketch. - * @param memChk the MemoryCheck object + * @param memVal the MemoryCheck object */ - private KllDoublesSketch(final Memory mem, final MemoryCheck memChk) { - super(memChk.k, SketchType.DOUBLE_SKETCH); - setLevelZeroSorted(memChk.level0Sorted); - final int k = getK(); - if (memChk.empty) { - setNumLevels(1); - setLevelsArray(new int[] {k, k}); - setLevelZeroSorted(false); - setDyMinK(k); - items_ = new double[k]; - minValue_ = Double.NaN; - maxValue_ = Double.NaN; - } else if (memChk.singleItem) { - setN(1); - setDyMinK(k); - setNumLevels(1); - setLevelsArray(new int[getNumLevels() + 1]); - final int itemCapacity = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); - setLevelsArrayAt(0, itemCapacity - 1); - setLevelsArrayAt(getNumLevels(), itemCapacity); //load the last integer in levels_ - items_ = new double[itemCapacity]; - items_[getLevelsArrayAt(0)] = mem.getDouble(DATA_START_ADR_SINGLE_ITEM); - minValue_ = items_[getLevelsArrayAt(0)]; - maxValue_ = items_[getLevelsArrayAt(0)]; - } else { - setN(memChk.n); - setDyMinK(memChk.dyMinK); - setNumLevels(memChk.numLevels); - setLevelsArray(new int[getNumLevels() + 1]); - int offset = DATA_START_ADR_DOUBLE; - final int itemCapacity = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); - if (memChk.updatable) { - // If updatable the last integer in levels_ IS serialized. - mem.getIntArray(offset, getLevelsArray(), 0, getNumLevels() + 1); //load levels_ - offset += (getNumLevels() + 1) * Integer.BYTES; - } else { - // If compact the last integer in levels_ is not serialized. - mem.getIntArray(offset, getLevelsArray(), 0, getNumLevels()); //load levels_ - offset += getNumLevels() * Integer.BYTES; - setLevelsArrayAt(getNumLevels(), itemCapacity); //load the last integer in levels_ - } - minValue_ = mem.getDouble(offset); - offset += Double.BYTES; - maxValue_ = mem.getDouble(offset); - offset += Double.BYTES; - items_ = new double[itemCapacity]; - mem.getDoubleArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); - } + private KllDoublesSketch(final Memory mem, final MemoryValidate memVal) { + super(memVal.k, SketchType.DOUBLE_SKETCH); + buildHeapKllSketchFromMemory(memVal); } /** @@ -150,15 +78,13 @@ private KllDoublesSketch(final Memory mem, final MemoryCheck memChk) { //To simplify the code, the PreambleUtil.MemoryCheck does nearly all the validity checking. //The verified Memory is then passed to the actual private heapify constructor. public static KllDoublesSketch heapify(final Memory mem) { - final MemoryCheck memChk = new MemoryCheck(mem); + final MemoryValidate memChk = new MemoryValidate(mem); if (!memChk.doublesSketch) { throw new SketchesArgumentException("Memory object is not a KllDoublesSketch."); } return new KllDoublesSketch(mem, memChk); } - // public functions - /** * Returns an approximation to the Cumulative Distribution Function (CDF), which is the * cumulative analog of the PMF, of the input stream given a set of splitPoint (values). @@ -181,7 +107,7 @@ public static KllDoublesSketch heapify(final Memory mem) { * array. */ public double[] getCDF(final double[] splitPoints) { - return getPmfOrCdf(splitPoints, true); + return getDoublesPmfOrCdf(splitPoints, true); } /** @@ -190,9 +116,7 @@ public double[] getCDF(final double[] splitPoints) { * * @return the max value of the stream */ - public double getMaxValue() { - return maxValue_; - } + public double getMaxValue() { return getMaxDoubleValue(); } /** * Returns the min value of the stream. @@ -200,9 +124,7 @@ public double getMaxValue() { * * @return the min value of the stream */ - public double getMinValue() { - return minValue_; - } + public double getMinValue() { return getMinDoubleValue(); } /** * Returns an approximation to the Probability Mass Function (PMF) of the input stream @@ -226,7 +148,7 @@ public double getMinValue() { * splitPoint, with the exception that the last interval will include maximum value. */ public double[] getPMF(final double[] splitPoints) { - return getPmfOrCdf(splitPoints, false); + return getDoublesPmfOrCdf(splitPoints, false); } /** @@ -248,27 +170,7 @@ public double[] getPMF(final double[] splitPoints) { * @return the approximation to the value at the given fraction */ public double getQuantile(final double fraction) { - if (isEmpty()) { return Double.NaN; } - if (fraction < 0.0 || fraction > 1.0) { - throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); - } - if (isCompatible()) { - if (fraction == 0.0) { return minValue_; } - if (fraction == 1.0) { return maxValue_; } - } - final KllDoublesQuantileCalculator quant = getQuantileCalculator(); - return quant.getQuantile(fraction); - } - - /** - * Gets the upper bound of the value interval in which the true quantile of the given rank - * exists with a confidence of at least 99%. - * @param fraction the given normalized rank as a fraction - * @return the upper bound of the value interval in which the true quantile of the given rank - * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. - */ - public double getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); + return getDoublesQuantile(fraction); } /** @@ -301,24 +203,7 @@ public double getQuantileLowerBound(final double fraction) { * array. */ public double[] getQuantiles(final double[] fractions) { - if (isEmpty()) { return null; } - KllDoublesQuantileCalculator quant = null; - final double[] quantiles = new double[fractions.length]; - for (int i = 0; i < fractions.length; i++) { - final double fraction = fractions[i]; - if (fraction < 0.0 || fraction > 1.0) { - throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); - } - if (fraction == 0.0 && isCompatible()) { quantiles[i] = minValue_; } - else if (fraction == 1.0 && isCompatible()) { quantiles[i] = maxValue_; } - else { - if (quant == null) { - quant = getQuantileCalculator(); - } - quantiles[i] = quant.getQuantile(fraction); - } - } - return quantiles; + return getDoublesQuantiles(fractions); } /** @@ -340,6 +225,17 @@ public double[] getQuantiles(final int numEvenlySpaced) { return getQuantiles(org.apache.datasketches.Util.evenlySpaced(0.0, 1.0, numEvenlySpaced)); } + /** + * Gets the upper bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. + * @param fraction the given normalized rank as a fraction + * @return the upper bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. + */ + public double getQuantileUpperBound(final double fraction) { + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); + } + /** * Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1, * inclusive. @@ -353,31 +249,14 @@ public double[] getQuantiles(final int numEvenlySpaced) { * @return an approximate rank of the given value */ public double getRank(final double value) { - if (isEmpty()) { return Double.NaN; } - int level = 0; - int weight = 1; - long total = 0; - while (level < getNumLevels()) { - final int fromIndex = getLevelsArrayAt(level); - final int toIndex = getLevelsArrayAt(level + 1); // exclusive - for (int i = fromIndex; i < toIndex; i++) { - if (items_[i] < value) { - total += weight; - } else if (level > 0 || isLevelZeroSorted()) { - break; // levels above 0 are sorted, no point comparing further - } - } - level++; - weight *= 2; - } - return (double) total / getN(); + return getDoubleRank(value); } /** * @return the iterator for this class */ public KllDoublesSketchIterator iterator() { - return new KllDoublesSketchIterator(items_, getLevelsArray(), getNumLevels()); + return new KllDoublesSketchIterator(getDoubleItemsArray(), getLevelsArray(), getNumLevels()); } /** @@ -385,164 +264,22 @@ public KllDoublesSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllDoublesSketch other) { - if (other == null || other.isEmpty()) { return; } - final long finalN = getN() + other.getN(); - //update this sketch with level0 items from the other sketch - for (int i = other.getLevelsArrayAt(0); i < other.getLevelsArrayAt(1); i++) { - update(other.items_[i]); - } - if (other.getNumLevels() >= 2) { //now merge other levels if they exist - mergeHigherLevels(other, finalN); - } - //update min, max values, n - if (Double.isNaN(minValue_) || other.minValue_ < minValue_) { minValue_ = other.minValue_; } - if (Double.isNaN(maxValue_) || other.maxValue_ > maxValue_) { maxValue_ = other.maxValue_; } - setN(finalN); - - assert KllHelper.sumTheSampleWeights(getNumLevels(), getLevelsArray()) == getN(); - if (other.isEstimationMode()) { - setDyMinK(min(getDyMinK(), other.getDyMinK())); - } + mergeDouble(other); } @Override public byte[] toByteArray() { - final byte[] bytes = new byte[getCurrentCompactSerializedSizeBytes()]; - final WritableMemory wmem = WritableMemory.writableWrap(bytes); - final boolean singleItem = getN() == 1; - final boolean empty = isEmpty(); - //load the preamble - wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) - (empty || singleItem ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_DOUBLE)); - wmem.putByte(SER_VER_BYTE_ADR, singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION_EMPTY_FULL); - wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); - final byte flags = (byte) ( - (empty ? EMPTY_BIT_MASK : 0) - | (isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) - | (singleItem ? SINGLE_ITEM_BIT_MASK : 0) - | DOUBLES_SKETCH_BIT_MASK); - wmem.putByte(FLAGS_BYTE_ADR, flags); - wmem.putShort(K_SHORT_ADR, (short) getK()); - wmem.putByte(M_BYTE_ADR, (byte) M); - if (empty) { return bytes; } - //load data - int offset = DATA_START_ADR_SINGLE_ITEM; - if (!singleItem) { - wmem.putLong(N_LONG_ADR, getN()); - wmem.putShort(DY_MIN_K_SHORT_ADR, (short) getDyMinK()); - wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) getNumLevels()); - offset = DATA_START_ADR_DOUBLE; - // the last integer in levels_ is not serialized because it can be derived - final int len = getLevelsArray().length - 1; - wmem.putIntArray(offset, getLevelsArray(), 0, len); - offset += len * Integer.BYTES; - wmem.putDouble(offset, minValue_); - offset += Double.BYTES; - wmem.putDouble(offset, maxValue_); - offset += Double.BYTES; - } - wmem.putDoubleArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); - return bytes; + return toGenericCompactByteArray(); } @Override - public byte[] toUpdatableByteArray() { - final int k = getK(); - final int numBytes = getCurrentUpdatableSerializedSizeBytes(); - final byte[] bytes = new byte[numBytes]; - final WritableMemory wmem = WritableMemory.writableWrap(bytes); - //load the preamble - wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) PREAMBLE_INTS_DOUBLE); - wmem.putByte(SER_VER_BYTE_ADR, SERIAL_VERSION_EMPTY_FULL); - wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); - final byte flags = (byte) ( - (isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) - | DOUBLES_SKETCH_BIT_MASK - | UPDATABLE_BIT_MASK); - wmem.putByte(FLAGS_BYTE_ADR, flags); - wmem.putShort(K_SHORT_ADR, (short) k); - wmem.putByte(M_BYTE_ADR, (byte) M); - //load data - wmem.putLong(N_LONG_ADR, getN()); - wmem.putShort(DY_MIN_K_SHORT_ADR, (short) getDyMinK()); - wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) getNumLevels()); - int offset = DATA_START_ADR_DOUBLE; - // the last integer in levels_ IS serialized - final int len = getLevelsArray().length; - wmem.putIntArray(offset, getLevelsArray(), 0, len); - offset += len * Integer.BYTES; - wmem.putDouble(offset, minValue_); - offset += Double.BYTES; - wmem.putDouble(offset, maxValue_); - offset += Double.BYTES; - wmem.putDoubleArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); - return bytes; + public String toString(final boolean withLevels, final boolean withData) { + return toGenericString(withLevels, withData); } @Override - public String toString(final boolean withLevels, final boolean withData) { - final int k = getK(); - final String epsPct = String.format("%.3f%%", getNormalizedRankError(false) * 100); - final String epsPMFPct = String.format("%.3f%%", getNormalizedRankError(true) * 100); - final StringBuilder sb = new StringBuilder(); - sb.append(Util.LS).append("### KLL Doubles Sketch summary:").append(Util.LS); - sb.append(" K : ").append(k).append(Util.LS); - sb.append(" Dynamic min K : ").append(getDyMinK()).append(Util.LS); - sb.append(" M : ").append(M).append(Util.LS); - sb.append(" N : ").append(getN()).append(Util.LS); - sb.append(" Epsilon : ").append(epsPct).append(Util.LS); - sb.append(" Epsison PMF : ").append(epsPMFPct).append(Util.LS); - sb.append(" Empty : ").append(isEmpty()).append(Util.LS); - sb.append(" Estimation Mode : ").append(isEstimationMode()).append(Util.LS); - sb.append(" Levels : ").append(getNumLevels()).append(Util.LS); - sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted()).append(Util.LS); - sb.append(" Capacity Items : ").append(items_.length).append(Util.LS); - sb.append(" Retained Items : ").append(getNumRetained()).append(Util.LS); - sb.append(" Compact Storage Bytes: ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS); - sb.append(" Min Value : ").append(minValue_).append(Util.LS); - sb.append(" Max Value : ").append(maxValue_).append(Util.LS); - sb.append("### End sketch summary").append(Util.LS); - - if (withLevels) { - sb.append("### KLL levels array:").append(Util.LS) - .append(" level, offset: nominal capacity, actual size").append(Util.LS); - int level = 0; - for ( ; level < getNumLevels(); level++) { - sb.append(" ").append(level).append(", ").append(getLevelsArrayAt(level)).append(": ") - .append(KllHelper.levelCapacity(k, getNumLevels(), level, M)) - .append(", ").append(KllHelper.currentLevelSize(level, getNumLevels(), getLevelsArray())).append(Util.LS); - } - sb.append(" ").append(level).append(", ").append(getLevelsArrayAt(level)).append(": (Exclusive)") - .append(Util.LS); - sb.append("### End levels array").append(Util.LS); - } - - if (withData) { - sb.append("### KLL items data {index, item}:").append(Util.LS); - if (getLevelsArrayAt(0) > 0) { - sb.append(" Garbage:" + Util.LS); - for (int i = 0; i < getLevelsArrayAt(0); i++) { - sb.append(" ").append(i + ", ").append(items_[i]).append(Util.LS); - } - } - int level = 0; - while (level < getNumLevels()) { - final int fromIndex = getLevelsArrayAt(level); - final int toIndex = getLevelsArrayAt(level + 1); // exclusive - if (fromIndex < toIndex) { - sb.append(" level[").append(level).append("]: offset: " + getLevelsArrayAt(level) + " wt: " + (1 << level)); - sb.append(Util.LS); - } - for (int i = fromIndex; i < toIndex; i++) { - sb.append(" ").append(i + ", ").append(items_[i]).append(Util.LS); - } - level++; - } - sb.append(" level[" + level + "]: offset: " + getLevelsArrayAt(level) + " (Exclusive)"); - sb.append(Util.LS); - sb.append("### End items data").append(Util.LS); - } - return sb.toString(); + public byte[] toUpdatableByteArray() { + return toGenericUpdatableByteArray(); } /** @@ -550,274 +287,56 @@ public String toString(final boolean withLevels, final boolean withData) { * * @param value an item from a stream of items. NaNs are ignored. */ - public void update(final double value) { - if (Double.isNaN(value)) { return; } - if (isEmpty()) { - minValue_ = value; - maxValue_ = value; - } else { - if (value < minValue_) { minValue_ = value; } - if (value > maxValue_) { maxValue_ = value; } - } - if (getLevelsArrayAt(0) == 0) { - compressWhileUpdating(); - } - incN(); - setLevelZeroSorted(false); - final int nextPos = getLevelsArrayAt(0) - 1; - assert getLevelsArrayAt(0) >= 0; - setLevelsArrayAt(0, nextPos); - items_[nextPos] = value; + public void update(final double value) { //possibly move proxy + updateDouble(value); } - // Restricted Methods + @Override //Used internally + double[] getDoubleItemsArray() { return doubleItems_; } - private KllDoublesQuantileCalculator getQuantileCalculator() { - sortLevelZero(); // sort in the sketch to reuse if possible - return new KllDoublesQuantileCalculator(items_, getLevelsArray(), getNumLevels(), getN()); - } + @Override //Dummy + float[] getFloatItemsArray() { return null; } - private double[] getPmfOrCdf(final double[] splitPoints, final boolean isCdf) { - if (isEmpty()) { return null; } - KllDoublesHelper.validateDoubleValues(splitPoints); - final double[] buckets = new double[splitPoints.length + 1]; - int level = 0; - int weight = 1; - while (level < getNumLevels()) { - final int fromIndex = getLevelsArrayAt(level); - final int toIndex = getLevelsArrayAt(level + 1); // exclusive - if (level == 0 && !isLevelZeroSorted()) { - incrementBucketsUnsortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); - } else { - incrementBucketsSortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); - } - level++; - weight *= 2; - } - // normalize and, if CDF, convert to cumulative - if (isCdf) { - double subtotal = 0; - for (int i = 0; i < buckets.length; i++) { - subtotal += buckets[i]; - buckets[i] = subtotal / getN(); - } - } else { - for (int i = 0; i < buckets.length; i++) { - buckets[i] /= getN(); - } - } - return buckets; - } - - private void incrementBucketsUnsortedLevel(final int fromIndex, final int toIndex, - final int weight, final double[] splitPoints, final double[] buckets) { - for (int i = fromIndex; i < toIndex; i++) { - int j; - for (j = 0; j < splitPoints.length; j++) { - if (items_[i] < splitPoints[j]) { - break; - } - } - buckets[j] += weight; - } - } - - private void incrementBucketsSortedLevel(final int fromIndex, final int toIndex, - final int weight, final double[] splitPoints, final double[] buckets) { - int i = fromIndex; - int j = 0; - while (i < toIndex && j < splitPoints.length) { - if (items_[i] < splitPoints[j]) { - buckets[j] += weight; // this sample goes into this bucket - i++; // move on to next sample and see whether it also goes into this bucket - } else { - j++; // no more samples for this bucket - } - } - // now either i == toIndex (we are out of samples), or - // j == numSplitPoints (we are out of buckets, but there are more samples remaining) - // we only need to do something in the latter case - if (j == splitPoints.length) { - buckets[j] += weight * (toIndex - i); - } - } - - // The following code is only valid in the special case of exactly reaching capacity while updating. - // It cannot be used while merging, while reducing k, or anything else. - private void compressWhileUpdating() { - final int level = KllHelper.findLevelToCompact(getK(), M, getNumLevels(), getLevelsArray()); - - // It is important to add the new top level right here. Be aware that this next operation - // grows the items array, shifts the items data and the level boundaries of the data. - // It also grows the levels array and increments numLevels_. - if (level == getNumLevels() - 1) { - addEmptyTopLevelToCompletelyFullSketch(); - } - - final int rawBeg = getLevelsArrayAt(level); - final int rawEnd = getLevelsArrayAt(level + 1); - // +2 is OK because we already added a new top level if necessary - final int popAbove = getLevelsArrayAt(level + 2) - rawEnd; - final int rawPop = rawEnd - rawBeg; - final boolean oddPop = isOdd(rawPop); - final int adjBeg = oddPop ? rawBeg + 1 : rawBeg; - final int adjPop = oddPop ? rawPop - 1 : rawPop; - final int halfAdjPop = adjPop / 2; - - // level zero might not be sorted, so we must sort it if we wish to compact it - if (level == 0) { - Arrays.sort(items_, adjBeg, adjBeg + adjPop); - } - if (popAbove == 0) { - KllDoublesHelper.randomlyHalveUpDoubles(items_, adjBeg, adjPop, random); - } else { - KllDoublesHelper.randomlyHalveDownDoubles(items_, adjBeg, adjPop, random); - KllDoublesHelper.mergeSortedDoubleArrays( - items_, adjBeg, halfAdjPop, - items_, rawEnd, popAbove, - items_, adjBeg + halfAdjPop); - } - setLevelsArrayAtMinusEq(level + 1, halfAdjPop); // adjust boundaries of the level above - - if (oddPop) { - setLevelsArrayAt(level, getLevelsArrayAt(level + 1) - 1); // the current level now contains one item - items_[getLevelsArrayAt(level)] = items_[rawBeg]; // namely this leftover guy - } else { - setLevelsArrayAt(level, getLevelsArrayAt(level + 1)); // the current level is now empty - } - - // verify that we freed up halfAdjPop array slots just below the current level - assert getLevelsArrayAt(level) == rawBeg + halfAdjPop; - - // finally, we need to shift up the data in the levels below - // so that the freed-up space can be used by level zero - if (level > 0) { - final int amount = rawBeg - getLevelsArrayAt(0); - System.arraycopy(items_, getLevelsArrayAt(0), items_, getLevelsArrayAt(0) + halfAdjPop, amount); - for (int lvl = 0; lvl < level; lvl++) { - setLevelsArrayAtPlusEq(lvl, halfAdjPop); - } - } - } - - /** - * This grows the levels arr by 1 (if needed) and increases the capacity of the items array at the bottom - */ - private void addEmptyTopLevelToCompletelyFullSketch() { - final int curTotalItemsCap = getLevelsArrayAt(getNumLevels()); - - // make sure that we are following a certain growth scheme - assert getLevelsArrayAt(0) == 0; //definition of full - assert items_.length == curTotalItemsCap; - - //this is a little out of sequence so that we can pre-compute the total required increase in space - final int deltaItemsCap = KllHelper.levelCapacity(getK(), getNumLevels() + 1, 0, M); - final int newTotalItemsCap = curTotalItemsCap + deltaItemsCap; - - // Check if growing the levels arr if required. - // Note that merging MIGHT over-grow levels_, in which case we might not have to grow it - final boolean growLevelsArr = getLevelsArray().length < getNumLevels() + 2; - - //int totalDeltaSpaceRequired = deltaItemsCap * Double.BYTES; - //if (growLevelsArr) { totalDeltaSpaceRequired += Integer.BYTES; } - //insert memory space management here - - if (growLevelsArr) { - setLevelsArray(KllHelper.growIntArray(getLevelsArray(), getNumLevels() + 2)); //grow levels arr by one - } - - final double[] newBuf = new double[newTotalItemsCap]; - - // copy (and shift) the current data into the new buffer - System.arraycopy(items_, getLevelsArrayAt(0), newBuf, getLevelsArrayAt(0) + deltaItemsCap, curTotalItemsCap); - items_ = newBuf; //grow the items arr - - // This loop updates all level indices excluding the "extra" index at the top - for (int level = 0; level <= getNumLevels(); level++) { - setLevelsArrayAtPlusEq(level,deltaItemsCap); - } - - assert getLevelsArrayAt(getNumLevels()) == newTotalItemsCap; - - incNumLevels(); - setLevelsArrayAt(getNumLevels(), newTotalItemsCap); // initialize the new "extra" index at the top + double[] getItems() { + return getDoubleItemsArray(); } - private void sortLevelZero() { - if (!isLevelZeroSorted()) { - Arrays.sort(items_, getLevelsArrayAt(0), getLevelsArrayAt(1)); - setLevelZeroSorted(true); - } - } + @Override //Used internally + double getMaxDoubleValue() { return maxDoubleValue_; } - private void mergeHigherLevels(final KllDoublesSketch other, final long finalN) { - final int tmpSpaceNeeded = getNumRetained() - + KllHelper.getNumRetainedAboveLevelZero(other.getNumLevels(), other.getLevelsArray()); - final double[] workbuf = new double[tmpSpaceNeeded]; - final int ub = KllHelper.ubOnNumLevels(finalN); - final int[] worklevels = new int[ub + 2]; // ub+1 does not work - final int[] outlevels = new int[ub + 2]; + @Override //Dummy + float getMaxFloatValue() { return (float) maxDoubleValue_; } - final int provisionalNumLevels = max(getNumLevels(), other.getNumLevels()); + @Override //Used internally + double getMinDoubleValue() { return minDoubleValue_; } - populateWorkArrays(other, workbuf, worklevels, provisionalNumLevels); + @Override //Dummy + float getMinFloatValue() { return (float) minDoubleValue_; } - // notice that workbuf is being used as both the input and output here - final int[] result = KllDoublesHelper.generalDoublesCompress(getK(), M, provisionalNumLevels, workbuf, - worklevels, workbuf, outlevels, isLevelZeroSorted(), random); - final int finalNumLevels = result[0]; - final int finalCapacity = result[1]; - final int finalPop = result[2]; + @Override //Used internally + void setDoubleItemsArray(final double[] doubleItems) { doubleItems_ = doubleItems; } - assert finalNumLevels <= ub; // ub may be much bigger + @Override //Dummy + void setFloatItemsArray(final float[] floatItems) { } - // now we need to transfer the results back into the "self" sketch - final double[] newbuf = finalCapacity == items_.length ? items_ : new double[finalCapacity]; - final int freeSpaceAtBottom = finalCapacity - finalPop; - System.arraycopy(workbuf, outlevels[0], newbuf, freeSpaceAtBottom, finalPop); - final int theShift = freeSpaceAtBottom - outlevels[0]; + @Override //Used internally + void setMaxDoubleValue(final double value) { maxDoubleValue_ = value; } - if (getLevelsArray().length < finalNumLevels + 1) { - setLevelsArray(new int[finalNumLevels + 1]); - } + @Override //Dummy + void setMaxFloatValue(final float value) { } - for (int lvl = 0; lvl < finalNumLevels + 1; lvl++) { // includes the "extra" index - setLevelsArrayAt(lvl, outlevels[lvl] + theShift); - } + @Override //Used internally + void setMinDoubleValue(final double value) { minDoubleValue_ = value; } - items_ = newbuf; - setNumLevels(finalNumLevels); - } + // for testing - private void populateWorkArrays(final KllDoublesSketch other, final double[] workbuf, - final int[] worklevels, final int provisionalNumLevels) { - worklevels[0] = 0; - - // Note: the level zero data from "other" was already inserted into "self" - final int selfPopZero = KllHelper.currentLevelSize(0, getNumLevels(), getLevelsArray()); - System.arraycopy(items_, getLevelsArrayAt(0), workbuf, worklevels[0], selfPopZero); - worklevels[1] = worklevels[0] + selfPopZero; - - for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { - final int selfPop = KllHelper.currentLevelSize(lvl, getNumLevels(), getLevelsArray()); - final int otherPop = KllHelper.currentLevelSize(lvl, other.getNumLevels(), other.getLevelsArray()); - worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; - - if (selfPop > 0 && otherPop == 0) { - System.arraycopy(items_, getLevelsArrayAt(lvl), workbuf, worklevels[lvl], selfPop); - } else if (selfPop == 0 && otherPop > 0) { - System.arraycopy(other.items_, other.getLevelsArrayAt(lvl), workbuf, worklevels[lvl], otherPop); - } else if (selfPop > 0 && otherPop > 0) { - KllDoublesHelper.mergeSortedDoubleArrays(items_, getLevelsArrayAt(lvl), selfPop, other.items_, - other.getLevelsArrayAt(lvl), otherPop, workbuf, worklevels[lvl]); - } - } - } + @Override //Dummy + void setMinFloatValue(final float value) { } - // for testing + @Override + void updateLevelsArray(final int[] levels) { + // TODO Auto-generated method stub - double[] getItems() { - return items_; } } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 22894d312..f4fc507c0 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -21,49 +21,22 @@ import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.Util.isOdd; -import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; -import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; -import static org.apache.datasketches.kll.KllPreambleUtil.DY_MIN_K_SHORT_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.FAMILY_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.FLAGS_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.K_SHORT_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.M_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.NUM_LEVELS_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; -import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; -import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; -import static org.apache.datasketches.kll.KllPreambleUtil.SER_VER_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; - -import java.util.Arrays; - -import org.apache.datasketches.Family; + import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.Util; -import org.apache.datasketches.kll.KllPreambleUtil.MemoryCheck; import org.apache.datasketches.kll.KllPreambleUtil.SketchType; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; -// (leave blank) /** * Please refer to the documentation in the package-info:
* {@link org.apache.datasketches.kll} */ -public class KllFloatsSketch extends KllHeapSketch { +public final class KllFloatsSketch extends KllHeapSketch { // Specific to the floats sketch - private float[] items_; // the continuous array of float items - private float minValue_; - private float maxValue_; + private float[] floatItems_; // the continuous array of float items + private float minFloatValue_; + private float maxFloatValue_; /** * Heap constructor with the default k = 200, which has a rank error of about 1.65%. @@ -80,64 +53,19 @@ public KllFloatsSketch() { */ public KllFloatsSketch(final int k) { super(k, SketchType.FLOAT_SKETCH); - items_ = new float[k]; - minValue_ = Float.NaN; - maxValue_ = Float.NaN; + floatItems_ = new float[k]; + minFloatValue_ = Float.NaN; + maxFloatValue_ = Float.NaN; } /** * Private heapify constructor. * @param mem Memory object that contains data serialized by this sketch. - * @param memChk the MemoryCheck object + * @param memVal the MemoryCheck object */ - private KllFloatsSketch(final Memory mem, final MemoryCheck memChk) { - super(memChk.k, SketchType.FLOAT_SKETCH); - setLevelZeroSorted(memChk.level0Sorted); - final int k = getK(); - if (memChk.empty) { - setNumLevels(1); - setLevelsArray(new int[] {k, k}); - setLevelZeroSorted(false); - setDyMinK(k); - items_ = new float[k]; - minValue_ = Float.NaN; - maxValue_ = Float.NaN; - } else if (memChk.singleItem) { - setN(1); - setDyMinK(k); - setNumLevels(1); - setLevelsArray(new int[getNumLevels() + 1]); - final int itemCapacity = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); - setLevelsArrayAt(0,itemCapacity - 1); - setLevelsArrayAt(getNumLevels(), itemCapacity); //load the last integer in levels_ - items_ = new float[itemCapacity]; - items_[getLevelsArrayAt(0)] = mem.getFloat(DATA_START_ADR_SINGLE_ITEM); - minValue_ = items_[getLevelsArrayAt(0)]; - maxValue_ = items_[getLevelsArrayAt(0)]; - } else { - setN(memChk.n); - setDyMinK(memChk.dyMinK); - setNumLevels(memChk.numLevels); - setLevelsArray(new int[getNumLevels() + 1]); - int offset = DATA_START_ADR_FLOAT; - final int itemCapacity = KllHelper.computeTotalItemCapacity(k, M, getNumLevels()); - if (memChk.updatable) { - // If updatable the last integer in levels_ IS serialized. - mem.getIntArray(offset, getLevelsArray(), 0, getNumLevels() + 1); //load levels_ - offset += (getNumLevels() + 1) * Integer.BYTES; - } else { - // If compact the last integer in levels_ is not serialized. - mem.getIntArray(offset, getLevelsArray(), 0, getNumLevels()); //load levels_ - offset += getNumLevels() * Integer.BYTES; - setLevelsArrayAt(getNumLevels(), itemCapacity); //load the last integer in levels_ - } - minValue_ = mem.getFloat(offset); - offset += Float.BYTES; - maxValue_ = mem.getFloat(offset); - offset += Float.BYTES; - items_ = new float[itemCapacity]; - mem.getFloatArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); - } + private KllFloatsSketch(final Memory mem, final MemoryValidate memVal) { + super(memVal.k, SketchType.FLOAT_SKETCH); + buildHeapKllSketchFromMemory(memVal); } /** @@ -150,15 +78,13 @@ private KllFloatsSketch(final Memory mem, final MemoryCheck memChk) { //To simplify the code, the PreambleUtil.MemoryCheck does nearly all the validity checking. //The verified Memory is then passed to the actual private heapify constructor. public static KllFloatsSketch heapify(final Memory mem) { - final MemoryCheck memChk = new MemoryCheck(mem); - if (memChk.doublesSketch) { + final MemoryValidate memVal = new MemoryValidate(mem); + if (memVal.doublesSketch) { throw new SketchesArgumentException("Memory object is not a KllFloatsSketch."); } - return new KllFloatsSketch(mem, memChk); + return new KllFloatsSketch(mem, memVal); } - // public functions - /** * Returns an approximation to the Cumulative Distribution Function (CDF), which is the * cumulative analog of the PMF, of the input stream given a set of splitPoint (values). @@ -181,18 +107,40 @@ public static KllFloatsSketch heapify(final Memory mem) { * array. */ public double[] getCDF(final float[] splitPoints) { - return getPmfOrCdf(splitPoints, true); + return getFloatsPmfOrCdf(splitPoints, true); } + @Override //Used internally + float[] getFloatItemsArray() { return floatItems_; } + + @Override //Used internally + void setFloatItemsArray(final float[] floatItems) { floatItems_ = floatItems; } + + @Override //Dummy + double[] getDoubleItemsArray() { return null; } + + @Override //Dummy + void setDoubleItemsArray(final double[] doubleItems) { } + + @Override //Dummy + double getMaxDoubleValue() { return maxFloatValue_; } + + @Override //Used internally + float getMaxFloatValue() { return maxFloatValue_; } + /** * Returns the max value of the stream. * If the sketch is empty this returns NaN. * * @return the max value of the stream */ - public float getMaxValue() { - return maxValue_; - } + public float getMaxValue() { return maxFloatValue_; } + + @Override //Dummy + double getMinDoubleValue() { return minFloatValue_; } + + @Override //Used internally + float getMinFloatValue() { return minFloatValue_; } /** * Returns the min value of the stream. @@ -200,9 +148,19 @@ public float getMaxValue() { * * @return the min value of the stream */ - public float getMinValue() { - return minValue_; - } + public float getMinValue() { return minFloatValue_; } + + @Override //Dummy + void setMaxDoubleValue(final double value) { } + + @Override //Used internally + void setMaxFloatValue(final float value) { maxFloatValue_ = value; } + + @Override //Dummy + void setMinDoubleValue(final double value) { } + + @Override //Used internally + void setMinFloatValue(final float value) { minFloatValue_ = value; } /** * Returns an approximation to the Probability Mass Function (PMF) of the input stream @@ -226,7 +184,7 @@ public float getMinValue() { * splitPoint, with the exception that the last interval will include maximum value. */ public double[] getPMF(final float[] splitPoints) { - return getPmfOrCdf(splitPoints, false); + return getFloatsPmfOrCdf(splitPoints, false); } /** @@ -248,38 +206,7 @@ public double[] getPMF(final float[] splitPoints) { * @return the approximation to the value at the given fraction */ public float getQuantile(final double fraction) { - if (isEmpty()) { return Float.NaN; } - if (fraction < 0.0 || fraction > 1.0) { - throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); - } - if (isCompatible()) { - if (fraction == 0.0) { return minValue_; } - if (fraction == 1.0) { return maxValue_; } - } - final KllFloatsQuantileCalculator quant = getQuantileCalculator(); - return quant.getQuantile(fraction); - } - - /** - * Gets the upper bound of the value interval in which the true quantile of the given rank - * exists with a confidence of at least 99%. - * @param fraction the given normalized rank as a fraction - * @return the upper bound of the value interval in which the true quantile of the given rank - * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. - */ - public float getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); - } - - /** - * Gets the lower bound of the value interval in which the true quantile of the given rank - * exists with a confidence of at least 99%. - * @param fraction the given normalized rank as a fraction - * @return the lower bound of the value interval in which the true quantile of the given rank - * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. - */ - public float getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false))); + return getFloatsQuantile(fraction); } /** @@ -301,24 +228,7 @@ public float getQuantileLowerBound(final double fraction) { * array. */ public float[] getQuantiles(final double[] fractions) { - if (isEmpty()) { return null; } - KllFloatsQuantileCalculator quant = null; - final float[] quantiles = new float[fractions.length]; - for (int i = 0; i < fractions.length; i++) { - final double fraction = fractions[i]; - if (fraction < 0.0 || fraction > 1.0) { - throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); - } - if (fraction == 0.0 && isCompatible()) { quantiles[i] = minValue_; } - else if (fraction == 1.0 && isCompatible()) { quantiles[i] = maxValue_; } - else { - if (quant == null) { - quant = getQuantileCalculator(); - } - quantiles[i] = quant.getQuantile(fraction); - } - } - return quantiles; + return getFloatsQuantiles(fractions); } /** @@ -340,6 +250,28 @@ public float[] getQuantiles(final int numEvenlySpaced) { return getQuantiles(org.apache.datasketches.Util.evenlySpaced(0.0, 1.0, numEvenlySpaced)); } + /** + * Gets the upper bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. + * @param fraction the given normalized rank as a fraction + * @return the upper bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. + */ + public float getQuantileUpperBound(final double fraction) { + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); + } + + /** + * Gets the lower bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. + * @param fraction the given normalized rank as a fraction + * @return the lower bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. + */ + public float getQuantileLowerBound(final double fraction) { + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false))); + } + /** * Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1, * inclusive. @@ -353,31 +285,14 @@ public float[] getQuantiles(final int numEvenlySpaced) { * @return an approximate rank of the given value */ public double getRank(final float value) { - if (isEmpty()) { return Double.NaN; } - int level = 0; - int weight = 1; - long total = 0; - while (level < getNumLevels()) { - final int fromIndex = getLevelsArrayAt(level); - final int toIndex = getLevelsArrayAt(level + 1); // exclusive - for (int i = fromIndex; i < toIndex; i++) { - if (items_[i] < value) { - total += weight; - } else if (level > 0 || isLevelZeroSorted()) { - break; // levels above 0 are sorted, no point comparing further - } - } - level++; - weight *= 2; - } - return (double) total / getN(); + return getFloatRank(value); } /** * @return the iterator for this class */ public KllFloatsSketchIterator iterator() { - return new KllFloatsSketchIterator(items_, getLevelsArray(), getNumLevels()); + return new KllFloatsSketchIterator(getFloatItemsArray(), getLevelsArray(), getNumLevels()); } /** @@ -385,164 +300,22 @@ public KllFloatsSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllFloatsSketch other) { - if (other == null || other.isEmpty()) { return; } - final long finalN = getN() + other.getN(); - //update this sketch with level0 items from the other sketch - for (int i = other.getLevelsArrayAt(0); i < other.getLevelsArrayAt(1); i++) { - update(other.items_[i]); - } - if (other.getNumLevels() >= 2) { //now merge other levels if they exist - mergeHigherLevels(other, finalN); - } - //update min, max values, n - if (Float.isNaN(minValue_) || other.minValue_ < minValue_) { minValue_ = other.minValue_; } - if (Float.isNaN(maxValue_) || other.maxValue_ > maxValue_) { maxValue_ = other.maxValue_; } - setN(finalN); - - assert KllHelper.sumTheSampleWeights(getNumLevels(), getLevelsArray()) == getN(); - if (other.isEstimationMode()) { - setDyMinK(min(getDyMinK(), other.getDyMinK())); - } + mergeFloat(other); } @Override public byte[] toByteArray() { - final byte[] bytes = new byte[getCurrentCompactSerializedSizeBytes()]; - final WritableMemory wmem = WritableMemory.writableWrap(bytes); - final boolean singleItem = getN() == 1; - final boolean empty = isEmpty(); - //load the preamble - wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) - (empty || singleItem ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_FLOAT)); - wmem.putByte(SER_VER_BYTE_ADR, singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION_EMPTY_FULL); - wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); - final byte flags = (byte) ( - (empty ? EMPTY_BIT_MASK : 0) - | (isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) - | (singleItem ? SINGLE_ITEM_BIT_MASK : 0)); - // (leave blank) - wmem.putByte(FLAGS_BYTE_ADR, flags); - wmem.putShort(K_SHORT_ADR, (short) getK()); - wmem.putByte(M_BYTE_ADR, (byte) M); - if (empty) { return bytes; } - //load data - int offset = DATA_START_ADR_SINGLE_ITEM; - if (!singleItem) { - wmem.putLong(N_LONG_ADR, getN()); - wmem.putShort(DY_MIN_K_SHORT_ADR, (short) getDyMinK()); - wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) getNumLevels()); - offset = DATA_START_ADR_FLOAT; - // the last integer in levels_ is not serialized because it can be derived - final int len = getLevelsArray().length - 1; - wmem.putIntArray(offset, getLevelsArray(), 0, len); - offset += len * Integer.BYTES; - wmem.putFloat(offset, minValue_); - offset += Float.BYTES; - wmem.putFloat(offset, maxValue_); - offset += Float.BYTES; - } - wmem.putFloatArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); - return bytes; + return toGenericCompactByteArray(); } @Override public byte[] toUpdatableByteArray() { - final int k = getK(); - final int numBytes = getCurrentUpdatableSerializedSizeBytes(); - final byte[] bytes = new byte[numBytes]; - final WritableMemory wmem = WritableMemory.writableWrap(bytes); - //load the preamble - wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) PREAMBLE_INTS_FLOAT); - wmem.putByte(SER_VER_BYTE_ADR, SERIAL_VERSION_EMPTY_FULL); - wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); - final byte flags = (byte) ( - (isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) - | UPDATABLE_BIT_MASK); - // (leave blank) - wmem.putByte(FLAGS_BYTE_ADR, flags); - wmem.putShort(K_SHORT_ADR, (short) k); - wmem.putByte(M_BYTE_ADR, (byte) M); - //load data - wmem.putLong(N_LONG_ADR, getN()); - wmem.putShort(DY_MIN_K_SHORT_ADR, (short) getDyMinK()); - wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) getNumLevels()); - int offset = DATA_START_ADR_FLOAT; - // the last integer in levels_ IS serialized - final int len = getLevelsArray().length; - wmem.putIntArray(offset, getLevelsArray(), 0, len); - offset += len * Integer.BYTES; - wmem.putFloat(offset, minValue_); - offset += Float.BYTES; - wmem.putFloat(offset, maxValue_); - offset += Float.BYTES; - wmem.putFloatArray(offset, items_, getLevelsArrayAt(0), getNumRetained()); - return bytes; + return toGenericUpdatableByteArray(); } @Override public String toString(final boolean withLevels, final boolean withData) { - final int k = getK(); - final String epsPct = String.format("%.3f%%", getNormalizedRankError(false) * 100); - final String epsPMFPct = String.format("%.3f%%", getNormalizedRankError(true) * 100); - final StringBuilder sb = new StringBuilder(); - sb.append(Util.LS).append("### KLL Floats Sketch summary:").append(Util.LS); - sb.append(" K : ").append(k).append(Util.LS); - sb.append(" Dynamic min K : ").append(getDyMinK()).append(Util.LS); - sb.append(" M : ").append(M).append(Util.LS); - sb.append(" N : ").append(getN()).append(Util.LS); - sb.append(" Epsilon : ").append(epsPct).append(Util.LS); - sb.append(" Epsison PMF : ").append(epsPMFPct).append(Util.LS); - sb.append(" Empty : ").append(isEmpty()).append(Util.LS); - sb.append(" Estimation Mode : ").append(isEstimationMode()).append(Util.LS); - sb.append(" Levels : ").append(getNumLevels()).append(Util.LS); - sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted()).append(Util.LS); - sb.append(" Capacity Items : ").append(items_.length).append(Util.LS); - sb.append(" Retained Items : ").append(getNumRetained()).append(Util.LS); - sb.append(" Compact Storage Bytes: ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS); - sb.append(" Min Value : ").append(minValue_).append(Util.LS); - sb.append(" Max Value : ").append(maxValue_).append(Util.LS); - sb.append("### End sketch summary").append(Util.LS); - - if (withLevels) { - sb.append("### KLL levels array:").append(Util.LS) - .append(" level, offset: nominal capacity, actual size").append(Util.LS); - int level = 0; - for ( ; level < getNumLevels(); level++) { - sb.append(" ").append(level).append(", ").append(getLevelsArrayAt(level)).append(": ") - .append(KllHelper.levelCapacity(k, getNumLevels(), level, M)) - .append(", ").append(KllHelper.currentLevelSize(level, getNumLevels(), getLevelsArray())).append(Util.LS); - } - sb.append(" ").append(level).append(", ").append(getLevelsArrayAt(level)).append(": (Exclusive)") - .append(Util.LS); - sb.append("### End levels array").append(Util.LS); - } - - if (withData) { - sb.append("### KLL items data {index, item}:").append(Util.LS); - if (getLevelsArrayAt(0) > 0) { - sb.append(" Garbage:" + Util.LS); - for (int i = 0; i < getLevelsArrayAt(0); i++) { - sb.append(" ").append(i + ", ").append(items_[i]).append(Util.LS); - } - } - int level = 0; - while (level < getNumLevels()) { - final int fromIndex = getLevelsArrayAt(level); - final int toIndex = getLevelsArrayAt(level + 1); // exclusive - if (fromIndex < toIndex) { - sb.append(" level[").append(level).append("]: offset: " + getLevelsArrayAt(level) + " wt: " + (1 << level)); - sb.append(Util.LS); - } - for (int i = fromIndex; i < toIndex; i++) { - sb.append(" ").append(i + ", ").append(items_[i]).append(Util.LS); - } - level++; - } - sb.append(" level[" + level + "]: offset: " + getLevelsArrayAt(level) + " (Exclusive)"); - sb.append(Util.LS); - sb.append("### End items data").append(Util.LS); - } - return sb.toString(); + return toGenericString(withLevels, withData); } /** @@ -551,273 +324,19 @@ public String toString(final boolean withLevels, final boolean withData) { * @param value an item from a stream of items. NaNs are ignored. */ public void update(final float value) { - if (Float.isNaN(value)) { return; } - if (isEmpty()) { - minValue_ = value; - maxValue_ = value; - } else { - if (value < minValue_) { minValue_ = value; } - if (value > maxValue_) { maxValue_ = value; } - } - if (getLevelsArrayAt(0) == 0) { - compressWhileUpdating(); - } - incN(); - setLevelZeroSorted(false); - final int nextPos = getLevelsArrayAt(0) - 1; - assert getLevelsArrayAt(0) >= 0; - setLevelsArrayAt(0, nextPos); - items_[nextPos] = value; - } - - // Restricted Methods - - private KllFloatsQuantileCalculator getQuantileCalculator() { - sortLevelZero(); // sort in the sketch to reuse if possible - return new KllFloatsQuantileCalculator(items_, getLevelsArray(), getNumLevels(), getN()); - } - - private double[] getPmfOrCdf(final float[] splitPoints, final boolean isCdf) { - if (isEmpty()) { return null; } - KllFloatsHelper.validateFloatValues(splitPoints); - final double[] buckets = new double[splitPoints.length + 1]; - int level = 0; - int weight = 1; - while (level < getNumLevels()) { - final int fromIndex = getLevelsArrayAt(level); - final int toIndex = getLevelsArrayAt(level + 1); // exclusive - if (level == 0 && !isLevelZeroSorted()) { - incrementBucketsUnsortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); - } else { - incrementBucketsSortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); - } - level++; - weight *= 2; - } - // normalize and, if CDF, convert to cumulative - if (isCdf) { - double subtotal = 0; - for (int i = 0; i < buckets.length; i++) { - subtotal += buckets[i]; - buckets[i] = subtotal / getN(); - } - } else { - for (int i = 0; i < buckets.length; i++) { - buckets[i] /= getN(); - } - } - return buckets; - } - - private void incrementBucketsUnsortedLevel(final int fromIndex, final int toIndex, - final int weight, final float[] splitPoints, final double[] buckets) { - for (int i = fromIndex; i < toIndex; i++) { - int j; - for (j = 0; j < splitPoints.length; j++) { - if (items_[i] < splitPoints[j]) { - break; - } - } - buckets[j] += weight; - } - } - - private void incrementBucketsSortedLevel(final int fromIndex, final int toIndex, - final int weight, final float[] splitPoints, final double[] buckets) { - int i = fromIndex; - int j = 0; - while (i < toIndex && j < splitPoints.length) { - if (items_[i] < splitPoints[j]) { - buckets[j] += weight; // this sample goes into this bucket - i++; // move on to next sample and see whether it also goes into this bucket - } else { - j++; // no more samples for this bucket - } - } - // now either i == toIndex (we are out of samples), or - // j == numSplitPoints (we are out of buckets, but there are more samples remaining) - // we only need to do something in the latter case - if (j == splitPoints.length) { - buckets[j] += weight * (toIndex - i); - } - } - - // The following code is only valid in the special case of exactly reaching capacity while updating. - // It cannot be used while merging, while reducing k, or anything else. - private void compressWhileUpdating() { - final int level = KllHelper.findLevelToCompact(getK(), M, getNumLevels(), getLevelsArray()); - - // It is important to add the new top level right here. Be aware that this next operation - // grows the items array, shifts the items data and the level boundaries of the data. - // It also grows the levels array and increments numLevels_. - if (level == getNumLevels() - 1) { - addEmptyTopLevelToCompletelyFullSketch(); - } - - final int rawBeg = getLevelsArrayAt(level); - final int rawEnd = getLevelsArrayAt(level + 1); - // +2 is OK because we already added a new top level if necessary - final int popAbove = getLevelsArrayAt(level + 2) - rawEnd; - final int rawPop = rawEnd - rawBeg; - final boolean oddPop = isOdd(rawPop); - final int adjBeg = oddPop ? rawBeg + 1 : rawBeg; - final int adjPop = oddPop ? rawPop - 1 : rawPop; - final int halfAdjPop = adjPop / 2; - - // level zero might not be sorted, so we must sort it if we wish to compact it - if (level == 0) { - Arrays.sort(items_, adjBeg, adjBeg + adjPop); - } - if (popAbove == 0) { - KllFloatsHelper.randomlyHalveUpFloats(items_, adjBeg, adjPop, random); - } else { - KllFloatsHelper.randomlyHalveDownFloats(items_, adjBeg, adjPop, random); - KllFloatsHelper.mergeSortedFloatArrays( - items_, adjBeg, halfAdjPop, - items_, rawEnd, popAbove, - items_, adjBeg + halfAdjPop); - } - setLevelsArrayAtMinusEq(level + 1, halfAdjPop); // adjust boundaries of the level above - - if (oddPop) { - setLevelsArrayAt(level, getLevelsArrayAt(level + 1) - 1); // the current level now contains one item - items_[getLevelsArrayAt(level)] = items_[rawBeg]; // namely this leftover guy - } else { - setLevelsArrayAt(level, getLevelsArrayAt(level + 1)); // the current level is now empty - } - - // verify that we freed up halfAdjPop array slots just below the current level - assert getLevelsArrayAt(level) == rawBeg + halfAdjPop; - - // finally, we need to shift up the data in the levels below - // so that the freed-up space can be used by level zero - if (level > 0) { - final int amount = rawBeg - getLevelsArrayAt(0); - System.arraycopy(items_, getLevelsArrayAt(0), items_, getLevelsArrayAt(0) + halfAdjPop, amount); - for (int lvl = 0; lvl < level; lvl++) { - setLevelsArrayAtPlusEq(lvl, halfAdjPop); - } - } - } - - /** - * This grows the levels arr by 1 (if needed) and increases the capacity of the items array at the bottom - */ - private void addEmptyTopLevelToCompletelyFullSketch() { - final int curTotalItemsCap = getLevelsArrayAt(getNumLevels()); - - // make sure that we are following a certain growth scheme - assert getLevelsArrayAt(0) == 0; //definition of full - assert items_.length == curTotalItemsCap; - - //this is a little out of sequence so that we can pre-compute the total required increase in space - final int deltaItemsCap = KllHelper.levelCapacity(getK(), getNumLevels() + 1, 0, M); - final int newTotalItemsCap = curTotalItemsCap + deltaItemsCap; - - // Check if growing the levels arr if required. - // Note that merging MIGHT over-grow levels_, in which case we might not have to grow it - final boolean growLevelsArr = getLevelsArray().length < getNumLevels() + 2; - - //int totalDeltaSpaceRequired = deltaItemsCap * Float.BYTES; - //if (growLevelsArr) { totalDeltaSpaceRequired += Integer.BYTES; } - //insert memory space management here - - if (growLevelsArr) { - setLevelsArray(KllHelper.growIntArray(getLevelsArray(), getNumLevels() + 2)); //grow levels arr by one - } - - final float[] itemsBuf = new float[newTotalItemsCap]; - - // copy (and shift) the current data into the new buffer - System.arraycopy(items_, getLevelsArrayAt(0), itemsBuf, getLevelsArrayAt(0) + deltaItemsCap, curTotalItemsCap); - items_ = itemsBuf; //grow the items arr - - // This loop updates all level indices excluding the "extra" index at the top - for (int level = 0; level <= getNumLevels(); level++) { - setLevelsArrayAtPlusEq(level, deltaItemsCap); - } - - assert getLevelsArrayAt(getNumLevels()) == newTotalItemsCap; - - incNumLevels(); - setLevelsArrayAt(getNumLevels(), newTotalItemsCap); // initialize the new "extra" index at the top - } - - private void sortLevelZero() { - if (!isLevelZeroSorted()) { - Arrays.sort(items_, getLevelsArrayAt(0), getLevelsArrayAt(1)); - setLevelZeroSorted(true); - } + updateFloat(value); } - private void mergeHigherLevels(final KllFloatsSketch other, final long finalN) { - final int tmpSpaceNeeded = getNumRetained() - + KllHelper.getNumRetainedAboveLevelZero(other.getNumLevels(), other.getLevelsArray()); - final float[] workbuf = new float[tmpSpaceNeeded]; - final int ub = KllHelper.ubOnNumLevels(finalN); - final int[] worklevels = new int[ub + 2]; // ub+1 does not work - final int[] outlevels = new int[ub + 2]; - - final int provisionalNumLevels = max(getNumLevels(), other.getNumLevels()); - - populateWorkArrays(other, workbuf, worklevels, provisionalNumLevels); - - // notice that workbuf is being used as both the input and output here - final int[] result = KllFloatsHelper.generalFloatsCompress(getK(), M, provisionalNumLevels, workbuf, - worklevels, workbuf, outlevels, isLevelZeroSorted(), random); - final int finalNumLevels = result[0]; - final int finalCapacity = result[1]; - final int finalPop = result[2]; - - assert finalNumLevels <= ub; // ub may be much bigger - - // now we need to transfer the results back into the "self" sketch - final float[] newbuf = finalCapacity == items_.length ? items_ : new float[finalCapacity]; - final int freeSpaceAtBottom = finalCapacity - finalPop; - System.arraycopy(workbuf, outlevels[0], newbuf, freeSpaceAtBottom, finalPop); - final int theShift = freeSpaceAtBottom - outlevels[0]; - - if (getLevelsArray().length < finalNumLevels + 1) { - setLevelsArray(new int[finalNumLevels + 1]); - } - - for (int lvl = 0; lvl < finalNumLevels + 1; lvl++) { // includes the "extra" index - setLevelsArrayAt(lvl, outlevels[lvl] + theShift); - } - - items_ = newbuf; - setNumLevels(finalNumLevels); - } + // for testing - private void populateWorkArrays(final KllFloatsSketch other, final float[] workbuf, - final int[] worklevels, final int provisionalNumLevels) { - worklevels[0] = 0; - - // Note: the level zero data from "other" was already inserted into "self" - final int selfPopZero = KllHelper.currentLevelSize(0, getNumLevels(), getLevelsArray()); - System.arraycopy(items_, getLevelsArrayAt(0), workbuf, worklevels[0], selfPopZero); - worklevels[1] = worklevels[0] + selfPopZero; - - for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { - final int selfPop = KllHelper.currentLevelSize(lvl, getNumLevels(), getLevelsArray()); - final int otherPop = KllHelper.currentLevelSize(lvl, other.getNumLevels(), other.getLevelsArray()); - worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; - - if (selfPop > 0 && otherPop == 0) { - System.arraycopy(items_, getLevelsArrayAt(lvl), workbuf, worklevels[lvl], selfPop); - } else if (selfPop == 0 && otherPop > 0) { - System.arraycopy(other.items_, other.getLevelsArrayAt(lvl), workbuf, worklevels[lvl], otherPop); - } else if (selfPop > 0 && otherPop > 0) { - KllFloatsHelper.mergeSortedFloatArrays(items_, getLevelsArrayAt(lvl), selfPop, other.items_, - other.getLevelsArrayAt(lvl), otherPop, workbuf, worklevels[lvl]); - } - } + float[] getItems() { + return getFloatItemsArray(); } - // for testing + @Override + void updateLevelsArray(final int[] levels) { + // TODO Auto-generated method stub - float[] getItems() { - return items_; } } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java index 27aacaef9..4491f9ffa 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java @@ -26,8 +26,8 @@ abstract class KllHeapSketch extends KllSketch { /* * Data is stored in items_. * The data for level i lies in positions levels_[i] through levels_[i + 1] - 1 inclusive. - * Hence, levels_ must contain (numLevels_ + 1) indices. - * The valid portion of items_ is completely packed, except for level 0, + * Hence, levels_ array must contain (numLevels_ + 1) indices. + * The valid portion of items_ is completely packed and sorted, except for level 0, * which is filled from the top down. * * Invariants: @@ -56,6 +56,7 @@ abstract class KllHeapSketch extends KllSketch { super(sketchType); KllHelper.checkK(k); this.k = k; + n_ = 0; dyMinK_ = k; numLevels_ = 1; levels_ = new int[] {k, k}; @@ -72,24 +73,17 @@ public long getN() { return n_; } - @Override - public int getNumRetained() { - return levels_[numLevels_] - levels_[0]; - } - @Override int getDyMinK() { return dyMinK_; } @Override - int[] getLevelsArray() { - return levels_; - } + String getLayout() { return "HEAP"; } @Override - int getLevelsArrayAt(final int index) { - return levels_[index]; + int[] getLevelsArray() { + return levels_; } @Override @@ -113,28 +107,18 @@ boolean isLevelZeroSorted() { } @Override - void setDyMinK(final int dyMinK) { - dyMinK_ = dyMinK; - } - - @Override - void setLevelsArray(final int[] levels) { - this.levels_ = levels; + public boolean isUpdatable() { + return true; } @Override - void setLevelsArrayAt(final int index, final int value) { - this.levels_[index] = value; - } - - @Override - void setLevelsArrayAtMinusEq(final int index, final int minusEq) { - this.levels_[index] -= minusEq; + void setDyMinK(final int dyMinK) { + dyMinK_ = dyMinK; } @Override - void setLevelsArrayAtPlusEq(final int index, final int plusEq) { - this.levels_[index] += plusEq; + void setLevelsArray(final int[] levelsArr) { + levels_ = levelsArr; } @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index 3e22ce3ee..df7233040 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -19,11 +19,8 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.Family.idToFamily; import static org.apache.datasketches.Util.zeroPad; -import org.apache.datasketches.Family; -import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -152,6 +149,7 @@ private KllPreambleUtil() {} // Other static values static final byte SERIAL_VERSION_EMPTY_FULL = 1; // Empty or full preamble, NOT single item format static final byte SERIAL_VERSION_SINGLE = 2; // only single-item format + static final byte SERIAL_VERSION_UPDATABLE = 3; // static final int PREAMBLE_INTS_EMPTY_SINGLE = 2; // for empty or single item static final int PREAMBLE_INTS_FLOAT = 5; // not empty nor single item, full preamble float static final int PREAMBLE_INTS_DOUBLE = 6; // not empty nor single item, full preamble double @@ -190,11 +188,11 @@ static String toString(final byte[] byteArr) { * @return the summary string. */ static String toString(final Memory mem) { - return null; //memoryToString(mem); + return memoryToString(mem); } static String memoryToString(final Memory mem) { - final MemoryCheck memChk = new MemoryCheck(mem); + final MemoryValidate memChk = new MemoryValidate(mem); final int flags = memChk.flags & 0XFF; final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", " + zeroPad(Integer.toBinaryString(flags), 8); @@ -246,241 +244,6 @@ static String memoryToString(final Memory mem) { return sb.toString(); } - static class MemoryCheck { - // first 8 bytes - final int preInts; // = extractPreInts(srcMem); - final int serVer; - final int familyID; - final String famName; - final int flags; - final boolean empty; - final boolean level0Sorted; - final boolean singleItem; - final boolean doublesSketch; - final boolean updatable; - final int k; - final int m; - - Layout layout; - // next 8 bytes, depending on the Layout, the next fields may be filled with assumed values. - long n; - // next 4 bytes - int dyMinK; - int numLevels; - // derived - int dataStart; - int[] levels; - int itemsStart; - int memItemsCap; - int sketchBytes; - - MemoryCheck(final Memory srcMem) { - preInts = extractPreInts(srcMem); - serVer = extractSerVer(srcMem); - familyID = extractFamilyID(srcMem); - flags = extractFlags(srcMem); - empty = (flags & EMPTY_BIT_MASK) > 0; - level0Sorted = (flags & LEVEL_ZERO_SORTED_BIT_MASK) > 0; - singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; - doublesSketch = (flags & DOUBLES_SKETCH_BIT_MASK) > 0; - updatable = (flags & UPDATABLE_BIT_MASK) > 0; - k = extractK(srcMem); - m = extractM(srcMem); - - KllHelper.checkK(k); - if (m != 8) { memoryCheckThrow(7, m); } - if (familyID != Family.KLL.getID()) { memoryCheckThrow(0, familyID); } - famName = idToFamily(familyID).toString(); - if (famName != "KLL") { memoryCheckThrow(23, 0); } - - final int checkFlags = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); - if ((checkFlags & 5) == 5) { memoryCheckThrow(20, flags); } - - switch (checkFlags) { - case 0: { //FloatFullCompact or FloatUpdatable (full) - if (preInts != PREAMBLE_INTS_FLOAT) { memoryCheckThrow(6, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } - n = extractN(srcMem); - dyMinK = extractDyMinK(srcMem); - numLevels = extractNumLevels(srcMem); - dataStart = DATA_START_ADR_FLOAT; - levels = new int[numLevels + 1]; - if (updatable) { - layout = Layout.FLOAT_UPDATABLE; - srcMem.getIntArray(dataStart, levels, 0, numLevels + 1); - itemsStart = dataStart + levels.length * Integer.BYTES; - memItemsCap = KllHelper.computeTotalItemCapacity(k, m, numLevels); - sketchBytes = itemsStart + (memItemsCap + 2) * Float.BYTES; - } else { - layout = Layout.FLOAT_FULL_COMPACT; - srcMem.getIntArray(dataStart, levels, 0, numLevels); - levels[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); - itemsStart = dataStart + (levels.length - 1) * Integer.BYTES; - memItemsCap = levels[numLevels] - levels[0]; - sketchBytes = itemsStart + (memItemsCap + 2) * Float.BYTES; - } - break; - } - case 1: { //FloatEmptyCompact or FloatUpdatable (empty) - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } - if (updatable) { - layout = Layout.FLOAT_UPDATABLE; //empty - n = extractN(srcMem); - if (n != 0) { memoryCheckThrow(21, (int) n); } - dyMinK = extractDyMinK(srcMem); - numLevels = extractNumLevels(srcMem); - dataStart = DATA_START_ADR_FLOAT; - levels = new int[numLevels + 1]; - srcMem.getIntArray(dataStart, levels, 0, numLevels + 1); - itemsStart = dataStart + levels.length * Integer.BYTES; - memItemsCap = KllHelper.computeTotalItemCapacity(k, m, numLevels); - sketchBytes = itemsStart + memItemsCap * Float.BYTES; - } else { - layout = Layout.FLOAT_EMPTY_COMPACT; - n = 0; - dyMinK = k; - numLevels = 1; - dataStart = DATA_START_ADR_SINGLE_ITEM; //ignore if empty - levels = new int[] {k, k}; - itemsStart = dataStart; - memItemsCap = 0; - sketchBytes = itemsStart; - } - break; - } - case 4: { //FloatSingleCompact or FloatUpdatable (single) - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { memoryCheckThrow(4, serVer); } - if (updatable) { - layout = Layout.FLOAT_UPDATABLE; - n = extractN(srcMem); - if (n != 1) { memoryCheckThrow(22, (int)n); } - dyMinK = extractDyMinK(srcMem); - numLevels = extractNumLevels(srcMem); - dataStart = DATA_START_ADR_FLOAT; - levels = new int[numLevels + 1]; - srcMem.getIntArray(dataStart, levels, 0, numLevels + 1); - itemsStart = dataStart + levels.length * Integer.BYTES; - memItemsCap = KllHelper.computeTotalItemCapacity(k, m, numLevels); - sketchBytes = itemsStart + (memItemsCap + 2) * Float.BYTES; - } else { - layout = Layout.FLOAT_SINGLE_COMPACT; - n = 1; - dyMinK = k; - numLevels = 1; - levels = new int[] {k - 1, k}; - dataStart = DATA_START_ADR_SINGLE_ITEM; - itemsStart = dataStart; - memItemsCap = 1; - sketchBytes = itemsStart + memItemsCap * Float.BYTES; - } - break; - } - case 8: { //DoubleFullCompact or DoubleUpdatable (full) - if (preInts != PREAMBLE_INTS_DOUBLE) { memoryCheckThrow(5, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } - n = extractN(srcMem); - dyMinK = extractDyMinK(srcMem); - numLevels = extractNumLevels(srcMem); - dataStart = DATA_START_ADR_DOUBLE; - levels = new int[numLevels + 1]; - if (updatable) { - layout = Layout.DOUBLE_UPDATABLE; - srcMem.getIntArray(dataStart, levels, 0, numLevels + 1); - itemsStart = dataStart + levels.length * Integer.BYTES; - memItemsCap = KllHelper.computeTotalItemCapacity(k, m, numLevels); - sketchBytes = itemsStart + (memItemsCap + 2) * Double.BYTES; - } else { - layout = Layout.DOUBLE_FULL_COMPACT; - srcMem.getIntArray(dataStart, levels, 0, numLevels); - levels[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); - itemsStart = dataStart + (levels.length - 1) * Integer.BYTES; - memItemsCap = levels[numLevels] - levels[0]; - sketchBytes = itemsStart + (memItemsCap + 2) * Double.BYTES; - } - break; - } - case 9: { //DoubleEmptyCompact or DoubleUpdatable (empty) - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } - if (updatable) { - layout = Layout.DOUBLE_UPDATABLE; //empty - n = extractN(srcMem); - if (n != 0) { memoryCheckThrow(21, (int) n); } - dyMinK = extractDyMinK(srcMem); - numLevels = extractNumLevels(srcMem); - dataStart = DATA_START_ADR_DOUBLE; - levels = new int[numLevels + 1]; - srcMem.getIntArray(dataStart, levels, 0, numLevels + 1); - itemsStart = dataStart + levels.length * Integer.BYTES; - memItemsCap = KllHelper.computeTotalItemCapacity(k, m, numLevels); - sketchBytes = itemsStart + memItemsCap * Double.BYTES; - } else { - layout = Layout.DOUBLE_EMPTY_COMPACT; - n = 0; - dyMinK = k; - numLevels = 1; - dataStart = DATA_START_ADR_SINGLE_ITEM; //ignore if empty - levels = new int[] {k, k}; - itemsStart = dataStart; - memItemsCap = 0; - sketchBytes = itemsStart; - } - break; - } - case 12: { //DoubleSingleCompact or DoubleUpdatable (single) - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { memoryCheckThrow(4, serVer); } - if (updatable) { - layout = Layout.DOUBLE_UPDATABLE; - n = extractN(srcMem); - if (n != 1) { memoryCheckThrow(22, (int)n); } - dyMinK = extractDyMinK(srcMem); - numLevels = extractNumLevels(srcMem); - dataStart = DATA_START_ADR_DOUBLE; - levels = new int[numLevels + 1]; - srcMem.getIntArray(dataStart, levels, 0, numLevels + 1); - itemsStart = dataStart + levels.length * Integer.BYTES; - memItemsCap = KllHelper.computeTotalItemCapacity(k, m, numLevels); - sketchBytes = itemsStart + memItemsCap * Double.BYTES; - } else { - layout = Layout.DOUBLE_SINGLE_COMPACT; - n = 1; - dyMinK = k; - numLevels = 1; - levels = new int[] {k - 1, k}; - dataStart = DATA_START_ADR_SINGLE_ITEM; - itemsStart = dataStart; - memItemsCap = 1; - sketchBytes = itemsStart + memItemsCap * Double.BYTES; - } - break; - } - default: break; //can't happen - } - } - - private static void memoryCheckThrow(final int errNo, final int value) { - String msg = ""; - switch (errNo) { - case 0: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; - case 1: msg = "Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; - case 2: msg = "Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: " + value; break; - case 3: msg = "Single Item Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; - case 4: msg = "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; - case 5: msg = "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; - case 6: msg = "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; - case 7: msg = "The M field must be set to " + DEFAULT_M + ", NOT: " + value; break; - case 20: msg = "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; - case 21: msg = "N != 0 and empty bit is set. N: " + value; break; - case 22: msg = "N != 1 and single item bit is set. N: " + value; break; - case 23: msg = "Family name is not KLL"; break; - } - throw new SketchesArgumentException(msg); - } - } - static int extractPreInts(final Memory mem) { return mem.getByte(PREAMBLE_INTS_BYTE_ADR) & 0XFF; } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index ea399de19..5111d31a8 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -26,21 +26,52 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static java.lang.Math.round; +import static org.apache.datasketches.Util.isOdd; import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; +import static org.apache.datasketches.kll.KllPreambleUtil.DOUBLES_SKETCH_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.DY_MIN_K_SHORT_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.FAMILY_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.FLAGS_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.K_SHORT_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; +import static org.apache.datasketches.kll.KllPreambleUtil.M_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.NUM_LEVELS_BYTE_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; +import static org.apache.datasketches.kll.KllPreambleUtil.SER_VER_BYTE_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; import static org.apache.datasketches.kll.KllPreambleUtil.SketchType.DOUBLE_SKETCH; +import java.util.Arrays; import java.util.Random; +import org.apache.datasketches.Family; +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.Util; import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.kll.KllPreambleUtil.SketchType; +import org.apache.datasketches.memory.WritableMemory; + +/** + * These methods are used by both direct and on-heap as well as Double and Float type sketches. + * + * @author lrhodes + */ abstract class KllSketch { static final Random random = new Random(); static final int M = DEFAULT_M; // configured minimum buffer "width", Must always be 8 for now. @@ -51,6 +82,8 @@ abstract class KllSketch { KllSketch.sketchType = sketchType; } + //Static methods + /** * Gets the approximate value of k to use given epsilon, the normalized rank error. * @param epsilon the normalized rank error between zero and one. @@ -119,11 +152,17 @@ static int getSerializedSizeBytes(final int numLevels, final int numRetained, fi } } + final static boolean isCompatible() { + return compatible; + } + + //Public Non-static methods + /** * Returns the current compact number of bytes this sketch would require to store. * @return the current compact number of bytes this sketch would require to store. */ - public int getCurrentCompactSerializedSizeBytes() { + public final int getCurrentCompactSerializedSizeBytes() { return KllSketch.getSerializedSizeBytes(getNumLevels(), getNumRetained(), sketchType, false); } @@ -131,7 +170,7 @@ public int getCurrentCompactSerializedSizeBytes() { * Returns the current updatable number of bytes this sketch would require to store. * @return the current updatable number of bytes this sketch would require to store. */ - public int getCurrentUpdatableSerializedSizeBytes() { + public final int getCurrentUpdatableSerializedSizeBytes() { final int itemCap = KllHelper.computeTotalItemCapacity(getK(), M, getNumLevels()); return KllSketch.getSerializedSizeBytes(getNumLevels(), itemCap, sketchType, true); } @@ -152,11 +191,13 @@ public int getCurrentUpdatableSerializedSizeBytes() { * Gets the approximate rank error of this sketch normalized as a fraction between zero and one. * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function. * Otherwise, it is the "single-sided" normalized rank error for all the other queries. + * The epsilon value returned is a best fit to 99 percentile empirically measured max error in + * thousands of trials * @return if pmf is true, returns the normalized rank error for the getPMF() function. * Otherwise, it is the "single-sided" normalized rank error for all the other queries. * @see KllDoublesSketch */ - public double getNormalizedRankError(final boolean pmf) { + public final double getNormalizedRankError(final boolean pmf) { return KllHelper.getNormalizedRankError(getDyMinK(), pmf); } @@ -164,8 +205,8 @@ public double getNormalizedRankError(final boolean pmf) { * Returns the number of retained items (samples) in the sketch. * @return the number of retained items (samples) in the sketch */ - public int getNumRetained() { - return getLevelsArrayAt(getLevelsArrayAt(getNumLevels()) - getLevelsArrayAt(0)); + public final int getNumRetained() { + return getLevelsArray()[getNumLevels()] - getLevelsArray()[0]; } /** @@ -182,7 +223,7 @@ public int getSerializedSizeBytes() { * Returns true if this sketch is empty. * @return empty flag */ - public boolean isEmpty() { + public final boolean isEmpty() { return getN() == 0; } @@ -190,10 +231,12 @@ public boolean isEmpty() { * Returns true if this sketch is in estimation mode. * @return estimation mode flag */ - public boolean isEstimationMode() { + public final boolean isEstimationMode() { return getNumLevels() > 1; } + public abstract boolean isUpdatable(); + /** * Returns serialized sketch in a compact byte array form. * @return serialized sketch in a compact byte array form. @@ -201,7 +244,7 @@ public boolean isEstimationMode() { public abstract byte[] toByteArray(); @Override - public String toString() { + public final String toString() { return toString(false, false); } @@ -219,13 +262,261 @@ public String toString() { */ public abstract byte[] toUpdatableByteArray(); - //Restricted Methods + //package-private non-static methods + + final void buildHeapKllSketchFromMemory(final MemoryValidate memVal) { + final boolean doubleType = (sketchType == DOUBLE_SKETCH); + final boolean updatable = memVal.updatable; + setLevelZeroSorted(memVal.level0Sorted); + setN(memVal.n); + setDyMinK(memVal.dyMinK); + setNumLevels(memVal.numLevels); + final int[] myLevelsArr = new int[getNumLevels() + 1]; + + if (updatable) { + memVal.levelsWmem.getIntArray(0, myLevelsArr, 0, getNumLevels() + 1); + setLevelsArray(myLevelsArr); + if (doubleType) { + setMinDoubleValue(memVal.minMaxWmem.getDouble(0)); + setMaxDoubleValue(memVal.minMaxWmem.getDouble(Double.BYTES)); + final int itemsCap = (int)memVal.itemsWmem.getCapacity() / Double.BYTES; + final double[] myItemsArr = new double[itemsCap]; + memVal.itemsWmem.getDoubleArray(0, myItemsArr, 0, itemsCap); + setDoubleItemsArray(myItemsArr); + } else { //float + setMinFloatValue(memVal.minMaxWmem.getFloat(0)); + setMaxFloatValue(memVal.minMaxWmem.getFloat(Float.BYTES)); + final int itemsCap = (int)memVal.itemsWmem.getCapacity() / Float.BYTES; + final float[] myItemsArr = new float[itemsCap]; + memVal.itemsWmem.getFloatArray(0, myItemsArr, 0, itemsCap); + setFloatItemsArray(myItemsArr); + } + } else { //compact + memVal.levelsMem.getIntArray(0, myLevelsArr, 0, getNumLevels() + 1); + setLevelsArray(myLevelsArr); + if (doubleType) { + setMinDoubleValue(memVal.minMaxMem.getDouble(0)); + setMaxDoubleValue(memVal.minMaxMem.getDouble(Double.BYTES)); + final int itemsCap = (int)memVal.itemsMem.getCapacity() / Double.BYTES; + final double[] myItemsArr = new double[itemsCap]; + memVal.itemsMem.getDoubleArray(0, myItemsArr, 0, itemsCap); + setDoubleItemsArray(myItemsArr); + } else { //float + setMinFloatValue(memVal.minMaxMem.getFloat(0)); + setMaxFloatValue(memVal.minMaxMem.getFloat(Float.BYTES)); + final int itemsCap = (int)memVal.itemsMem.getCapacity() / Float.BYTES; + final float[] myItemsArr = new float[itemsCap]; + memVal.itemsMem.getFloatArray(0, myItemsArr, 0, itemsCap); + setFloatItemsArray(myItemsArr); + } + } + } + + /** + * @return full size of internal items array including garbage; for a floats sketch this will be null. + */ + abstract double[] getDoubleItemsArray(); + + final double getDoubleRank(final double value) { + if (isEmpty()) { return Double.NaN; } + int level = 0; + int weight = 1; + long total = 0; + final double[] myDoubleItemsArr = getDoubleItemsArray(); + final int[] myLevelsArr = getLevelsArray(); + while (level < getNumLevels()) { + final int fromIndex = myLevelsArr[level]; + final int toIndex = myLevelsArr[level + 1]; // exclusive + for (int i = fromIndex; i < toIndex; i++) { + if (myDoubleItemsArr[i] < value) { + total += weight; + } else if (level > 0 || isLevelZeroSorted()) { + break; // levels above 0 are sorted, no point comparing further + } + } + level++; + weight *= 2; + } + return (double) total / getN(); + } + + final double[] getDoublesPmfOrCdf(final double[] splitPoints, final boolean isCdf) { + if (isEmpty()) { return null; } + KllDoublesHelper.validateDoubleValues(splitPoints); + final double[] buckets = new double[splitPoints.length + 1]; + final int myNumLevels = getNumLevels(); + final int[] myLevelsArr = getLevelsArray(); + int level = 0; + int weight = 1; + while (level < myNumLevels) { + final int fromIndex = myLevelsArr[level]; + final int toIndex = myLevelsArr[level + 1]; // exclusive + if (level == 0 && !isLevelZeroSorted()) { + incrementDoublesBucketsUnsortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); + } else { + incrementDoublesBucketsSortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); + } + level++; + weight *= 2; + } + // normalize and, if CDF, convert to cumulative + if (isCdf) { + double subtotal = 0; + for (int i = 0; i < buckets.length; i++) { + subtotal += buckets[i]; + buckets[i] = subtotal / getN(); + } + } else { + for (int i = 0; i < buckets.length; i++) { + buckets[i] /= getN(); + } + } + return buckets; + } + + final double getDoublesQuantile(final double fraction) { + if (isEmpty()) { return Double.NaN; } + if (fraction < 0.0 || fraction > 1.0) { + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); + } + if (isCompatible()) { + if (fraction == 0.0) { return getMinDoubleValue(); } + if (fraction == 1.0) { return getMaxDoubleValue(); } + } + final KllDoublesQuantileCalculator quant = getDoublesQuantileCalculator(); + return quant.getQuantile(fraction); + } + + final double[] getDoublesQuantiles(final double[] fractions) { + if (isEmpty()) { return null; } + KllDoublesQuantileCalculator quant = null; + final double[] quantiles = new double[fractions.length]; + for (int i = 0; i < fractions.length; i++) { + final double fraction = fractions[i]; + if (fraction < 0.0 || fraction > 1.0) { + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); + } + if (fraction == 0.0 && isCompatible()) { quantiles[i] = getMinDoubleValue(); } + else if (fraction == 1.0 && isCompatible()) { quantiles[i] = getMaxDoubleValue(); } + else { + if (quant == null) { + quant = getDoublesQuantileCalculator(); + } + quantiles[i] = quant.getQuantile(fraction); + } + } + return quantiles; + } abstract int getDyMinK(); + /** + * @return full size of internal items array including garbage; for a doubles sketch this will be null. + */ + abstract float[] getFloatItemsArray(); + + final double getFloatRank(final float value) { + if (isEmpty()) { return Double.NaN; } + int level = 0; + int weight = 1; + long total = 0; + final float[] myFloatItemsArr = getFloatItemsArray(); + final int[] myLevelsArr = getLevelsArray(); + while (level < getNumLevels()) { + final int fromIndex = myLevelsArr[level]; + final int toIndex = myLevelsArr[level + 1]; // exclusive + for (int i = fromIndex; i < toIndex; i++) { + if (myFloatItemsArr[i] < value) { + total += weight; + } else if (level > 0 || isLevelZeroSorted()) { + break; // levels above 0 are sorted, no point comparing further + } + } + level++; + weight *= 2; + } + return (double) total / getN(); + } + + final double[] getFloatsPmfOrCdf(final float[] splitPoints, final boolean isCdf) { + if (isEmpty()) { return null; } + KllFloatsHelper.validateFloatValues(splitPoints); + final double[] buckets = new double[splitPoints.length + 1]; + final int myNumLevels = getNumLevels(); + final int[] myLevelsArr = getLevelsArray(); + int level = 0; + int weight = 1; + while (level < myNumLevels) { + final int fromIndex = myLevelsArr[level]; + final int toIndex = myLevelsArr[level + 1]; // exclusive + if (level == 0 && !isLevelZeroSorted()) { + incrementFloatBucketsUnsortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); + } else { + incrementFloatBucketsSortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); + } + level++; + weight *= 2; + } + // normalize and, if CDF, convert to cumulative + if (isCdf) { + double subtotal = 0; + for (int i = 0; i < buckets.length; i++) { + subtotal += buckets[i]; + buckets[i] = subtotal / getN(); + } + } else { + for (int i = 0; i < buckets.length; i++) { + buckets[i] /= getN(); + } + } + return buckets; + } + + final float getFloatsQuantile(final double fraction) { + if (isEmpty()) { return Float.NaN; } + if (fraction < 0.0 || fraction > 1.0) { + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); + } + if (isCompatible()) { + if (fraction == 0.0) { return getMinFloatValue(); } + if (fraction == 1.0) { return getMaxFloatValue(); } + } + final KllFloatsQuantileCalculator quant = getFloatsQuantileCalculator(); + return quant.getQuantile(fraction); + } + + final float[] getFloatsQuantiles(final double[] fractions) { + if (isEmpty()) { return null; } + KllFloatsQuantileCalculator quant = null; + final float[] quantiles = new float[fractions.length]; + for (int i = 0; i < fractions.length; i++) { + final double fraction = fractions[i]; + if (fraction < 0.0 || fraction > 1.0) { + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); + } + if (fraction == 0.0 && isCompatible()) { quantiles[i] = getMinFloatValue(); } + else if (fraction == 1.0 && isCompatible()) { quantiles[i] = getMaxFloatValue(); } + else { + if (quant == null) { + quant = getFloatsQuantileCalculator(); + } + quantiles[i] = quant.getQuantile(fraction); + } + } + return quantiles; + } + + abstract String getLayout(); + abstract int[] getLevelsArray(); - abstract int getLevelsArrayAt(int index); + abstract double getMaxDoubleValue(); + + abstract float getMaxFloatValue(); + + abstract double getMinDoubleValue(); + + abstract float getMinFloatValue(); abstract int getNumLevels(); @@ -233,26 +524,846 @@ public String toString() { abstract void incNumLevels(); - boolean isCompatible() { - return compatible; + abstract boolean isLevelZeroSorted(); + + final void mergeDouble(final KllDoublesSketch other) { + if (other == null || other.isEmpty()) { return; } + final long finalN = getN() + other.getN(); + //update this sketch with level0 items from the other sketch + final double[] otherDoubleItemsArr = other.getDoubleItemsArray(); + final int[] otherLevelsArr = other.getLevelsArray(); + for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { + updateDouble(otherDoubleItemsArr[i]); + } + if (other.getNumLevels() >= 2) { //now merge other levels if they exist + mergeDoubleHigherLevels(other, finalN); + } + //update min, max values, n + final double myMin = getMinDoubleValue(); + final double otherMin = other.getMinDoubleValue(); + final double myMax = getMaxDoubleValue(); + final double otherMax = other.getMaxDoubleValue(); + if (Double.isNaN(myMin) || otherMin < myMin) { setMinDoubleValue(otherMin); } + if (Double.isNaN(myMax) || otherMax > myMax) { setMaxDoubleValue(otherMax); } + setN(finalN); + + assert KllHelper.sumTheSampleWeights(getNumLevels(), getLevelsArray()) == getN(); + if (other.isEstimationMode()) { + setDyMinK(min(getDyMinK(), other.getDyMinK())); + } } - abstract boolean isLevelZeroSorted(); + final void mergeFloat(final KllFloatsSketch other) { + if (other == null || other.isEmpty()) { return; } + final long finalN = getN() + other.getN(); + //update this sketch with level0 items from the other sketch + final float[] otherFloatItemsArr = other.getFloatItemsArray(); + final int[] otherLevelsArr = other.getLevelsArray(); + for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { + updateFloat(otherFloatItemsArr[i]); + } + if (other.getNumLevels() >= 2) { //now merge other levels if they exist + mergeFloatHigherLevels(other, finalN); + } + //update min, max values, n + final float myMin = getMinFloatValue(); + final float otherMin = other.getMinFloatValue(); + final float myMax = getMaxFloatValue(); + final float otherMax = other.getMaxFloatValue(); + if (Float.isNaN(myMin) || otherMin < myMin) { setMinFloatValue(otherMin); } + if (Float.isNaN(myMax) || otherMax > myMax) { setMaxFloatValue(otherMax); } + setN(finalN); - abstract void setDyMinK(int dyMinK); + assert KllHelper.sumTheSampleWeights(getNumLevels(), getLevelsArray()) == getN(); + if (other.isEstimationMode()) { + setDyMinK(min(getDyMinK(), other.getDyMinK())); + } + } - abstract void setLevelsArray(int[] levels); + abstract void setDoubleItemsArray(double[] floatItems); - abstract void setLevelsArrayAt(int index, int value); + abstract void setDyMinK(int dyMinK); + + abstract void setFloatItemsArray(float[] floatItems); - abstract void setLevelsArrayAtMinusEq(int index, int minusEq); + //Only for internal changes to the array, NOT for changing its size + abstract void updateLevelsArray(int[] levels); - abstract void setLevelsArrayAtPlusEq(int index, int plusEq); + abstract void setLevelsArray(int[] levelsArr); abstract void setLevelZeroSorted(boolean sorted); + abstract void setMaxDoubleValue(double value); + + abstract void setMaxFloatValue(float value); + + abstract void setMinDoubleValue(double value); + + abstract void setMinFloatValue(float value); + abstract void setN(long n); abstract void setNumLevels(int numLevels); + final byte[] toGenericCompactByteArray() { //From Heap Only + final boolean doubleType = (sketchType == DOUBLE_SKETCH); + final byte[] byteArr = new byte[getCurrentCompactSerializedSizeBytes()]; + final WritableMemory wmem = WritableMemory.writableWrap(byteArr); + final boolean singleItem = getN() == 1; + final boolean empty = isEmpty(); + //load the preamble + if (doubleType) { + wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) + (empty || singleItem ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_DOUBLE)); + } else { + wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) + (empty || singleItem ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_FLOAT)); + } + wmem.putByte(SER_VER_BYTE_ADR, singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION_EMPTY_FULL); + wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); + byte flags = (byte) ( + (empty ? EMPTY_BIT_MASK : 0) + | (singleItem ? SINGLE_ITEM_BIT_MASK : 0) + | (isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0)); + + flags |= (byte) (doubleType ? DOUBLES_SKETCH_BIT_MASK : 0); + wmem.putByte(FLAGS_BYTE_ADR, flags); + wmem.putShort(K_SHORT_ADR, (short) getK()); + wmem.putByte(M_BYTE_ADR, (byte) M); + if (empty) { return byteArr; } + + //load data + int offset = DATA_START_ADR_SINGLE_ITEM; + final int[] myLevelsArr = getLevelsArray(); + if (!singleItem) { + wmem.putLong(N_LONG_ADR, getN()); + wmem.putShort(DY_MIN_K_SHORT_ADR, (short) getDyMinK()); + wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) getNumLevels()); + offset = (doubleType) ? DATA_START_ADR_DOUBLE : DATA_START_ADR_FLOAT; + // the last integer in levels_ is not serialized because it can be derived + final int len = myLevelsArr.length - 1; + wmem.putIntArray(offset, myLevelsArr, 0, len); + offset += len * Integer.BYTES; + if (doubleType) { + wmem.putDouble(offset, getMinDoubleValue()); + offset += Double.BYTES; + wmem.putDouble(offset, getMaxDoubleValue()); + offset += Double.BYTES; + wmem.putDoubleArray(offset, getDoubleItemsArray(), myLevelsArr[0], getNumRetained()); + } else { + wmem.putFloat(offset, getMinFloatValue()); + offset += Float.BYTES; + wmem.putFloat(offset, getMaxFloatValue()); + offset += Float.BYTES; + wmem.putFloatArray(offset, getFloatItemsArray(), myLevelsArr[0], getNumRetained()); + } + } else { //single item + if (doubleType) { + final double value = getDoubleItemsArray()[myLevelsArr[0]]; + wmem.putDouble(offset, value); + } else { + final float value = getFloatItemsArray()[myLevelsArr[0]]; + wmem.putFloat(offset, value); + } + } + return byteArr; + } + + @SuppressWarnings("null") + final String toGenericString(final boolean withLevels, final boolean withData) { + final boolean doubleType = (sketchType == DOUBLE_SKETCH); + final int k = getK(); + final String epsPct = String.format("%.3f%%", getNormalizedRankError(false) * 100); + final String epsPMFPct = String.format("%.3f%%", getNormalizedRankError(true) * 100); + final StringBuilder sb = new StringBuilder(); + final String skType = (doubleType) ? "Doubles" : "Floats"; + sb.append(Util.LS).append("### KLL ").append(skType).append("Sketch summary:").append(Util.LS); + sb.append(" K : ").append(k).append(Util.LS); + sb.append(" Dynamic min K : ").append(getDyMinK()).append(Util.LS); + sb.append(" M : ").append(M).append(Util.LS); + sb.append(" N : ").append(getN()).append(Util.LS); + sb.append(" Epsilon : ").append(epsPct).append(Util.LS); + sb.append(" Epsison PMF : ").append(epsPMFPct).append(Util.LS); + sb.append(" Empty : ").append(isEmpty()).append(Util.LS); + sb.append(" Estimation Mode : ").append(isEstimationMode()).append(Util.LS); + sb.append(" Levels : ").append(getNumLevels()).append(Util.LS); + sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted()).append(Util.LS); + final int cap = (doubleType) ? getDoubleItemsArray().length : getFloatItemsArray().length; + sb.append(" Capacity Items : ").append(cap).append(Util.LS); + sb.append(" Retained Items : ").append(getNumRetained()).append(Util.LS); + sb.append(" Compact Storage Bytes: ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS); + if (doubleType) { + sb.append(" Min Value : ").append(getMinDoubleValue()).append(Util.LS); + sb.append(" Max Value : ").append(getMaxDoubleValue()).append(Util.LS); + } else { + sb.append(" Min Value : ").append(getMinFloatValue()).append(Util.LS); + sb.append(" Max Value : ").append(getMaxFloatValue()).append(Util.LS); + } + sb.append("### End sketch summary").append(Util.LS); + + final int myNumLevels = getNumLevels(); + final int[] myLevelsArr = getLevelsArray(); + double[] myDoubleItemsArr = null; + float[] myFloatItemsArr = null; + if (doubleType) { + myDoubleItemsArr = getDoubleItemsArray(); + } else { + myFloatItemsArr = getFloatItemsArray(); + } + + if (withLevels) { + sb.append("### KLL levels array:").append(Util.LS) + .append(" level, offset: nominal capacity, actual size").append(Util.LS); + int level = 0; + for ( ; level < myNumLevels; level++) { + sb.append(" ").append(level).append(", ").append(myLevelsArr[level]).append(": ") + .append(KllHelper.levelCapacity(k, myNumLevels, level, M)) + .append(", ").append(KllHelper.currentLevelSize(level, myNumLevels, myLevelsArr)).append(Util.LS); + } + sb.append(" ").append(level).append(", ").append(myLevelsArr[level]).append(": (Exclusive)") + .append(Util.LS); + sb.append("### End levels array").append(Util.LS); + } + + if (withData) { + sb.append("### KLL items data {index, item}:").append(Util.LS); + if (myLevelsArr[0] > 0) { + sb.append(" Garbage:" + Util.LS); + if (doubleType) { + for (int i = 0; i < myLevelsArr[0]; i++) { + sb.append(" ").append(i + ", ").append(myDoubleItemsArr[i]).append(Util.LS); + } + } else { + for (int i = 0; i < myLevelsArr[0]; i++) { + sb.append(" ").append(i + ", ").append(myFloatItemsArr[i]).append(Util.LS); + } + } + } + int level = 0; + if (doubleType) { + while (level < myNumLevels) { + final int fromIndex = myLevelsArr[level]; + final int toIndex = myLevelsArr[level + 1]; // exclusive + if (fromIndex < toIndex) { + sb.append(" level[").append(level).append("]: offset: " + myLevelsArr[level] + " wt: " + (1 << level)); + sb.append(Util.LS); + } + + for (int i = fromIndex; i < toIndex; i++) { + sb.append(" ").append(i + ", ").append(myDoubleItemsArr[i]).append(Util.LS); + } + level++; + } + } + else { + while (level < myNumLevels) { + final int fromIndex = myLevelsArr[level]; + final int toIndex = myLevelsArr[level + 1]; // exclusive + if (fromIndex <= toIndex) { + sb.append(" level[").append(level).append("]: offset: " + myLevelsArr[level] + " wt: " + (1 << level)); + sb.append(Util.LS); + } + + for (int i = fromIndex; i < toIndex; i++) { + sb.append(" ").append(i + ", ").append(myFloatItemsArr[i]).append(Util.LS); + } + level++; + } + } + sb.append(" level[" + level + "]: offset: " + myLevelsArr[level] + " (Exclusive)"); + sb.append(Util.LS); + sb.append("### End items data").append(Util.LS); + } + return sb.toString(); + } + + final byte[] toGenericUpdatableByteArray() { + final boolean doubleType = (sketchType == DOUBLE_SKETCH); + final byte[] byteArr = new byte[getCurrentUpdatableSerializedSizeBytes()]; + final WritableMemory wmem = WritableMemory.writableWrap(byteArr); + final boolean singleItem = getN() == 1; + final boolean empty = isEmpty(); + //load the preamble + if (doubleType) { + wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) PREAMBLE_INTS_DOUBLE); //ignore empty, singleItem + } else { + wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) PREAMBLE_INTS_FLOAT); //ignore empty, singleItem + } + wmem.putByte(SER_VER_BYTE_ADR, SERIAL_VERSION_UPDATABLE); + wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); + byte flags = (byte) ( + (empty ? EMPTY_BIT_MASK : 0) //set but not used + | (singleItem ? SINGLE_ITEM_BIT_MASK : 0) //set but not used + | (isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) + | UPDATABLE_BIT_MASK); + flags |= (byte) (doubleType ? DOUBLES_SKETCH_BIT_MASK : 0); + wmem.putByte(FLAGS_BYTE_ADR, flags); + wmem.putShort(K_SHORT_ADR, (short) getK()); + wmem.putByte(M_BYTE_ADR, (byte) M); + //load data + wmem.putLong(N_LONG_ADR, getN()); + wmem.putShort(DY_MIN_K_SHORT_ADR, (short) getDyMinK()); + wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) getNumLevels()); + int offset = (doubleType) ? DATA_START_ADR_DOUBLE : DATA_START_ADR_FLOAT; + // the last integer in levels_ IS serialized + final int[] myLevelsArr = getLevelsArray(); + final int len = myLevelsArr.length; + wmem.putIntArray(offset, myLevelsArr, 0, len); + offset += len * Integer.BYTES; + if (doubleType) { + wmem.putDouble(offset, getMinDoubleValue()); + offset += Double.BYTES; + wmem.putDouble(offset, getMaxDoubleValue()); + offset += Double.BYTES; + final double[] doubleItemsArr = getDoubleItemsArray(); + wmem.putDoubleArray(offset, doubleItemsArr, 0, doubleItemsArr.length); + } else { + wmem.putFloat(offset, getMinFloatValue()); + offset += Float.BYTES; + wmem.putFloat(offset, getMaxFloatValue()); + offset += Float.BYTES; + final float[] floatItemsArr = getFloatItemsArray(); + wmem.putFloatArray(offset, floatItemsArr, 0, floatItemsArr.length); + } + return byteArr; + } + + final void updateDouble(final double value) { + if (Double.isNaN(value)) { return; } + if (isEmpty()) { + setMinDoubleValue(value); + setMaxDoubleValue(value); + } else { + if (value < getMinDoubleValue()) { setMinDoubleValue(value); } + if (value > getMaxDoubleValue()) { setMaxDoubleValue(value); } + } + int[] myLevelsArr = getLevelsArray(); + double[] myDoubleItemsArr = getDoubleItemsArray(); + if (myLevelsArr[0] == 0) { + compressWhileUpdatingDoublesSketch(); + } + myLevelsArr = getLevelsArray(); //refresh + myDoubleItemsArr = getDoubleItemsArray(); + incN(); + setLevelZeroSorted(false); + final int nextPos = myLevelsArr[0] - 1; + assert myLevelsArr[0] >= 0; + myLevelsArr[0] = nextPos; + myDoubleItemsArr[nextPos] = value; + } + + final void updateFloat(final float value) { + if (Float.isNaN(value)) { return; } + if (isEmpty()) { + setMinFloatValue(value); + setMaxFloatValue(value); + } else { + if (value < getMinFloatValue()) { setMinFloatValue(value); } + if (value > getMaxFloatValue()) { setMaxFloatValue(value); } + } + int[] myLevelsArr = getLevelsArray(); + float[] myFloatItemsArr = getFloatItemsArray(); + if (myLevelsArr[0] == 0) { + compressWhileUpdatingFloatsSketch(); + } + myLevelsArr = getLevelsArray(); //refresh + myFloatItemsArr = getFloatItemsArray(); + incN(); + setLevelZeroSorted(false); + final int nextPos = myLevelsArr[0] - 1; + assert myLevelsArr[0] >= 0; + myLevelsArr[0] = nextPos; + myFloatItemsArr[nextPos] = value; + } + + //Private non-static methods + + /** + * This grows the levels arr by 1 (if needed) and increases the capacity of the items array at the bottom + */ + private void addEmptyTopLevelToCompletelyFullDoublesSketch() { + final int[] myCurLevelsArr = getLevelsArray(); + final double[] myCurDoubleItemsArr = getDoubleItemsArray(); + final int myCurNumLevels = getNumLevels(); + final int myCurTotalItemsCap = myCurLevelsArr[myCurNumLevels]; + final int[] myNewLevelsArr; + final double[] myNewDoubleItemsArr; + final int myNewNumLevels; + final int myNewTotalItemsCap; + + // make sure that we are following a certain growth scheme + assert myCurLevelsArr[0] == 0; //definition of full + assert myCurDoubleItemsArr.length == myCurTotalItemsCap; + + //this is a little out of sequence so that we can pre-compute the total required increase in space + final int deltaItemsCap = KllHelper.levelCapacity(getK(), myCurNumLevels + 1, 0, M); + myNewTotalItemsCap = myCurTotalItemsCap + deltaItemsCap; + + // Check if growing the levels arr if required. + // Note that merging MIGHT over-grow levels_, in which case we might not have to grow it + final boolean growLevelsArr = myCurLevelsArr.length < myCurNumLevels + 2; + + //INSERT SPACE MANAGEMENT HERE + //int totalDeltaSpaceRequired = deltaItemsCap * Double.BYTES; + //if (growLevelsArr) { totalDeltaSpaceRequired += Integer.BYTES; } + // ... + + // GROW LEVELS ARRAY + if (growLevelsArr) { + //grow levels arr by one and copy the old data to the new array, extra space at the top. + myNewLevelsArr = Arrays.copyOf(myCurLevelsArr, myCurNumLevels + 2); + assert myNewLevelsArr.length == myCurLevelsArr.length + 1; + myNewNumLevels = myCurNumLevels + 1; + incNumLevels(); //increment the class member + } else { + myNewLevelsArr = myCurLevelsArr; + myNewNumLevels = myCurNumLevels; + } + // This loop updates all level indices EXCLUDING the "extra" index at the top + for (int level = 0; level <= myNewNumLevels - 1; level++) { + myNewLevelsArr[level] += deltaItemsCap; + } + myNewLevelsArr[myNewNumLevels] = myNewTotalItemsCap; // initialize the new "extra" index at the top + setLevelsArray(myNewLevelsArr); + + // GROW ITEMS ARRAY + myNewDoubleItemsArr = new double[myNewTotalItemsCap]; + // copy and shift the current data into the new array + System.arraycopy(myCurDoubleItemsArr, 0, myNewDoubleItemsArr, deltaItemsCap, myCurTotalItemsCap); + //Update the items array + setDoubleItemsArray(myNewDoubleItemsArr); + } + + /** + * This grows the levels arr by 1 (if needed) and increases the capacity of the items array at the bottom + */ + private void addEmptyTopLevelToCompletelyFullFloatsSketch() { + final int[] myCurLevelsArr = getLevelsArray(); + final float[] myCurFloatItemsArr = getFloatItemsArray(); + final int myCurNumLevels = getNumLevels(); + final int myCurTotalItemsCap = myCurLevelsArr[myCurNumLevels]; + final int[] myNewLevelsArr; + final float[] myNewFloatItemsArr; + final int myNewNumLevels; + final int myNewTotalItemsCap; + + // make sure that we are following a certain growth scheme + assert myCurLevelsArr[0] == 0; //definition of full + assert myCurFloatItemsArr.length == myCurTotalItemsCap; + + //this is a little out of sequence so that we can pre-compute the total required increase in space + final int deltaItemsCap = KllHelper.levelCapacity(getK(), myCurNumLevels + 1, 0, M); + myNewTotalItemsCap = myCurTotalItemsCap + deltaItemsCap; + + // Check if growing the levels arr if required. + // Note that merging MIGHT over-grow levels_, in which case we might not have to grow it + final boolean growLevelsArr = myCurLevelsArr.length < myCurNumLevels + 2; + + //INSERT SPACE MANAGEMENT HERE + //int totalDeltaSpaceRequired = deltaItemsCap * Float.BYTES; + //if (growLevelsArr) { totalDeltaSpaceRequired += Integer.BYTES; } + // ... + + // GROW LEVELS ARRAY + if (growLevelsArr) { + //grow levels arr by one and copy the old data to the new array, extra space at the top. + myNewLevelsArr = Arrays.copyOf(myCurLevelsArr, myCurNumLevels + 2); + assert myNewLevelsArr.length == myCurLevelsArr.length + 1; + myNewNumLevels = myCurNumLevels + 1; + incNumLevels(); //increment the class member + } else { + myNewLevelsArr = myCurLevelsArr; + myNewNumLevels = myCurNumLevels; + } + // This loop updates all level indices EXCLUDING the "extra" index at the top + for (int level = 0; level <= myNewNumLevels - 1; level++) { + myNewLevelsArr[level] += deltaItemsCap; + } + myNewLevelsArr[myNewNumLevels] = myNewTotalItemsCap; // initialize the new "extra" index at the top + setLevelsArray(myNewLevelsArr); + + // GROW ITEMS ARRAY + myNewFloatItemsArr = new float[myNewTotalItemsCap]; + // copy and shift the current items data into the new array + System.arraycopy(myCurFloatItemsArr, 0, myNewFloatItemsArr, deltaItemsCap, myCurTotalItemsCap); + //Update the items array + setFloatItemsArray(myNewFloatItemsArr); + } + + // The following code is only valid in the special case of exactly reaching capacity while updating. + // It cannot be used while merging, while reducing k, or anything else. + private void compressWhileUpdatingDoublesSketch() { + final int level = KllHelper.findLevelToCompact(getK(), M, getNumLevels(), getLevelsArray()); + + // It is important to add the new top level right here. Be aware that this next operation + // grows the items array, shifts the items data and the level boundaries of the data. + // It also grows the levels array and increments numLevels_. + if (level == getNumLevels() - 1) { + addEmptyTopLevelToCompletelyFullDoublesSketch(); + } + final int[] myLevelsArr = getLevelsArray(); //new levels arr + final int rawBeg = myLevelsArr[level]; + final int rawEnd = myLevelsArr[level + 1]; + // +2 is OK because we already added a new top level if necessary + final int popAbove = myLevelsArr[level + 2] - rawEnd; + final int rawPop = rawEnd - rawBeg; + final boolean oddPop = isOdd(rawPop); + final int adjBeg = oddPop ? rawBeg + 1 : rawBeg; + final int adjPop = oddPop ? rawPop - 1 : rawPop; + final int halfAdjPop = adjPop / 2; + + // level zero might not be sorted, so we must sort it if we wish to compact it + + final double[] myDoubleItemsArr = getDoubleItemsArray(); + if (level == 0) { + Arrays.sort(myDoubleItemsArr, adjBeg, adjBeg + adjPop); + } + if (popAbove == 0) { + KllDoublesHelper.randomlyHalveUpDoubles(myDoubleItemsArr, adjBeg, adjPop, random); + } else { + KllDoublesHelper.randomlyHalveDownDoubles(myDoubleItemsArr, adjBeg, adjPop, random); + KllDoublesHelper.mergeSortedDoubleArrays( + myDoubleItemsArr, adjBeg, halfAdjPop, + myDoubleItemsArr, rawEnd, popAbove, + myDoubleItemsArr, adjBeg + halfAdjPop); + } + myLevelsArr[level + 1] -= halfAdjPop; // adjust boundaries of the level above + + if (oddPop) { + myLevelsArr[level] = myLevelsArr[level + 1] - 1; // the current level now contains one item + myDoubleItemsArr[myLevelsArr[level]] = myDoubleItemsArr[rawBeg]; // namely this leftover guy + } else { + myLevelsArr[level] = myLevelsArr[level + 1]; // the current level is now empty + } + + // verify that we freed up halfAdjPop array slots just below the current level + assert myLevelsArr[level] == rawBeg + halfAdjPop; + + // finally, we need to shift up the data in the levels below + // so that the freed-up space can be used by level zero + if (level > 0) { + final int amount = rawBeg - myLevelsArr[0]; + System.arraycopy(myDoubleItemsArr, myLevelsArr[0], + myDoubleItemsArr, myLevelsArr[0] + halfAdjPop, amount); + for (int lvl = 0; lvl < level; lvl++) { + myLevelsArr[lvl] += halfAdjPop; + } + } + } + + // The following code is only valid in the special case of exactly reaching capacity while updating. + // It cannot be used while merging, while reducing k, or anything else. + private void compressWhileUpdatingFloatsSketch() { + final int level = KllHelper.findLevelToCompact(getK(), M, getNumLevels(), getLevelsArray()); + + // It is important to add the new top level right here. Be aware that this next operation + // grows the items array, shifts the items data and the level boundaries of the data. + // It also grows the levels array and increments numLevels_. + if (level == getNumLevels() - 1) { + addEmptyTopLevelToCompletelyFullFloatsSketch(); + } + final int[] myLevelsArr = getLevelsArray(); //new levels arr + final int rawBeg = myLevelsArr[level]; + final int rawEnd = myLevelsArr[level + 1]; + // +2 is OK because we already added a new top level if necessary + final int popAbove = myLevelsArr[level + 2] - rawEnd; + final int rawPop = rawEnd - rawBeg; + final boolean oddPop = isOdd(rawPop); + final int adjBeg = oddPop ? rawBeg + 1 : rawBeg; + final int adjPop = oddPop ? rawPop - 1 : rawPop; + final int halfAdjPop = adjPop / 2; + + // level zero might not be sorted, so we must sort it if we wish to compact it + + final float[] myFloatItemsArr = getFloatItemsArray(); + if (level == 0) { + Arrays.sort(myFloatItemsArr, adjBeg, adjBeg + adjPop); + } + if (popAbove == 0) { + KllFloatsHelper.randomlyHalveUpFloats(myFloatItemsArr, adjBeg, adjPop, random); + } else { + KllFloatsHelper.randomlyHalveDownFloats(myFloatItemsArr, adjBeg, adjPop, random); + KllFloatsHelper.mergeSortedFloatArrays( + myFloatItemsArr, adjBeg, halfAdjPop, + myFloatItemsArr, rawEnd, popAbove, + myFloatItemsArr, adjBeg + halfAdjPop); + } + myLevelsArr[level + 1] -= halfAdjPop; // adjust boundaries of the level above + + if (oddPop) { + myLevelsArr[level] = myLevelsArr[level + 1] - 1; // the current level now contains one item + myFloatItemsArr[myLevelsArr[level]] = myFloatItemsArr[rawBeg]; // namely this leftover guy + } else { + myLevelsArr[level] = myLevelsArr[level + 1]; // the current level is now empty + } + + // verify that we freed up halfAdjPop array slots just below the current level + assert myLevelsArr[level] == rawBeg + halfAdjPop; + + // finally, we need to shift up the data in the levels below + // so that the freed-up space can be used by level zero + if (level > 0) { + final int amount = rawBeg - myLevelsArr[0]; + System.arraycopy(myFloatItemsArr, myLevelsArr[0], + myFloatItemsArr, myLevelsArr[0] + halfAdjPop, amount); + for (int lvl = 0; lvl < level; lvl++) { + myLevelsArr[lvl] += halfAdjPop; + } + } + } + + private KllDoublesQuantileCalculator getDoublesQuantileCalculator() { + final int[] myLevelsArr = getLevelsArray(); + final double[] myDoubleItemsArr = getDoubleItemsArray(); + if (!isLevelZeroSorted()) { + Arrays.sort(getDoubleItemsArray(), myLevelsArr[0], myLevelsArr[1]); + setLevelZeroSorted(true); + } + return new KllDoublesQuantileCalculator(myDoubleItemsArr, myLevelsArr, getNumLevels(), getN()); + } + + private KllFloatsQuantileCalculator getFloatsQuantileCalculator() { + final int[] myLevelsArr = getLevelsArray(); + final float[] myFloatItemsArr = getFloatItemsArray(); + if (!isLevelZeroSorted()) { + Arrays.sort(myFloatItemsArr, myLevelsArr[0], myLevelsArr[1]); + setLevelZeroSorted(true); + } + return new KllFloatsQuantileCalculator(myFloatItemsArr, myLevelsArr, getNumLevels(), getN()); + } + + private void incrementDoublesBucketsSortedLevel(final int fromIndex, final int toIndex, + final int weight, final double[] splitPoints, final double[] buckets) { + final double[] myDoubleItemsArr = getDoubleItemsArray(); + int i = fromIndex; + int j = 0; + while (i < toIndex && j < splitPoints.length) { + if (myDoubleItemsArr[i] < splitPoints[j]) { + buckets[j] += weight; // this sample goes into this bucket + i++; // move on to next sample and see whether it also goes into this bucket + } else { + j++; // no more samples for this bucket + } + } + // now either i == toIndex (we are out of samples), or + // j == numSplitPoints (we are out of buckets, but there are more samples remaining) + // we only need to do something in the latter case + if (j == splitPoints.length) { + buckets[j] += weight * (toIndex - i); + } + } + + private void incrementDoublesBucketsUnsortedLevel(final int fromIndex, final int toIndex, + final int weight, final double[] splitPoints, final double[] buckets) { + final double[] myDoubleItemsArr = getDoubleItemsArray(); + for (int i = fromIndex; i < toIndex; i++) { + int j; + for (j = 0; j < splitPoints.length; j++) { + if (myDoubleItemsArr[i] < splitPoints[j]) { + break; + } + } + buckets[j] += weight; + } + } + + private void incrementFloatBucketsSortedLevel(final int fromIndex, final int toIndex, + final int weight, final float[] splitPoints, final double[] buckets) { + final float[] myFloatItemsArr = getFloatItemsArray(); + int i = fromIndex; + int j = 0; + while (i < toIndex && j < splitPoints.length) { + if (myFloatItemsArr[i] < splitPoints[j]) { + buckets[j] += weight; // this sample goes into this bucket + i++; // move on to next sample and see whether it also goes into this bucket + } else { + j++; // no more samples for this bucket + } + } + // now either i == toIndex (we are out of samples), or + // j == numSplitPoints (we are out of buckets, but there are more samples remaining) + // we only need to do something in the latter case + if (j == splitPoints.length) { + buckets[j] += weight * (toIndex - i); + } + } + + private void incrementFloatBucketsUnsortedLevel(final int fromIndex, final int toIndex, + final int weight, final float[] splitPoints, final double[] buckets) { + final float[] myFloatItemsArr = getFloatItemsArray(); + for (int i = fromIndex; i < toIndex; i++) { + int j; + for (j = 0; j < splitPoints.length; j++) { + if (myFloatItemsArr[i] < splitPoints[j]) { + break; + } + } + buckets[j] += weight; + } + } + + private void mergeDoubleHigherLevels(final KllDoublesSketch other, final long finalN) { + final int myCurNumLevels = getNumLevels(); + final int myCurLevelsArrLength = getLevelsArray().length; + final int myCurItemsArrLength = getDoubleItemsArray().length; + + final int tmpSpaceNeeded = getNumRetained() + + KllHelper.getNumRetainedAboveLevelZero(other.getNumLevels(), other.getLevelsArray()); + final double[] workbuf = new double[tmpSpaceNeeded]; + final int ub = KllHelper.ubOnNumLevels(finalN); + final int[] worklevels = new int[ub + 2]; // ub+1 does not work + final int[] outlevels = new int[ub + 2]; + + final int provisionalNumLevels = max(myCurNumLevels, other.getNumLevels()); + + populateDoubleWorkArrays(other, workbuf, worklevels, provisionalNumLevels); + + // notice that workbuf is being used as both the input and output here + final int[] result = KllDoublesHelper.generalDoublesCompress(getK(), M, provisionalNumLevels, workbuf, + worklevels, workbuf, outlevels, isLevelZeroSorted(), random); + final int finalNumLevels = result[0]; + final int finalCapacity = result[1]; + final int finalPop = result[2]; + + assert finalNumLevels <= ub; // ub may be much bigger + + // now we need to transfer the results back into the "self" sketch + final double[] newbuf = finalCapacity == myCurItemsArrLength + ? getDoubleItemsArray() : new double[finalCapacity]; + final int freeSpaceAtBottom = finalCapacity - finalPop; + System.arraycopy(workbuf, outlevels[0], newbuf, freeSpaceAtBottom, finalPop); + final int theShift = freeSpaceAtBottom - outlevels[0]; + + final int finalLevelsArrLen; + if (myCurLevelsArrLength < finalNumLevels + 1) { + finalLevelsArrLen = finalNumLevels + 1; + } else { finalLevelsArrLen = myCurLevelsArrLength; } + + final int[] myFinalLevelsArr = new int[finalLevelsArrLen]; + + for (int lvl = 0; lvl < finalNumLevels + 1; lvl++) { // includes the "extra" index + myFinalLevelsArr[lvl] = outlevels[lvl] + theShift; + } + + //MEMORY MANAGEMENT +// final int itemsDeltaBytes = (newbuf.length - myCurItemsArrLength) * Double.BYTES; +// final int levelsDeltaBytes = finalLevelsArrLen * Integer.BYTES; +// final int totalDeltaBytes = itemsDeltaBytes + levelsDeltaBytes; + + setLevelsArray(myFinalLevelsArr); + setDoubleItemsArray(newbuf); + setNumLevels(finalNumLevels); + } + + private void mergeFloatHigherLevels(final KllFloatsSketch other, final long finalN) { + final int myCurNumLevels = getNumLevels(); + final int myCurLevelsArrLength = getLevelsArray().length; + final int myCurItemsArrLength = getFloatItemsArray().length; + + final int tmpSpaceNeeded = getNumRetained() + + KllHelper.getNumRetainedAboveLevelZero(other.getNumLevels(), other.getLevelsArray()); + final float[] workbuf = new float[tmpSpaceNeeded]; + final int ub = KllHelper.ubOnNumLevels(finalN); + final int[] worklevels = new int[ub + 2]; // ub+1 does not work + final int[] outlevels = new int[ub + 2]; + + final int provisionalNumLevels = max(myCurNumLevels, other.getNumLevels()); + + populateFloatWorkArrays(other, workbuf, worklevels, provisionalNumLevels); + + // notice that workbuf is being used as both the input and output here + final int[] result = KllFloatsHelper.generalFloatsCompress(getK(), M, provisionalNumLevels, workbuf, + worklevels, workbuf, outlevels, isLevelZeroSorted(), random); + final int finalNumLevels = result[0]; + final int finalCapacity = result[1]; + final int finalPop = result[2]; + + assert finalNumLevels <= ub; // ub may be much bigger + + // now we need to transfer the results back into the "self" sketch + final float[] newbuf = finalCapacity == myCurItemsArrLength + ? getFloatItemsArray() : new float[finalCapacity]; + final int freeSpaceAtBottom = finalCapacity - finalPop; + System.arraycopy(workbuf, outlevels[0], newbuf, freeSpaceAtBottom, finalPop); + final int theShift = freeSpaceAtBottom - outlevels[0]; + + final int finalLevelsArrLen; + if (myCurLevelsArrLength < finalNumLevels + 1) { + finalLevelsArrLen = finalNumLevels + 1; + } else { finalLevelsArrLen = myCurLevelsArrLength; } + + final int[] myFinalLevelsArr = new int[finalLevelsArrLen]; + + for (int lvl = 0; lvl < finalNumLevels + 1; lvl++) { // includes the "extra" index + myFinalLevelsArr[lvl] = outlevels[lvl] + theShift; + } + + //MEMORY MANAGEMENT +// final int itemsDeltaBytes = (newbuf.length - myCurItemsArrLength) * Float.BYTES; +// final int levelsDeltaBytes = finalLevelsArrLen * Integer.BYTES; +// final int totalDeltaBytes = itemsDeltaBytes + levelsDeltaBytes; + + setLevelsArray(myFinalLevelsArr); + setFloatItemsArray(newbuf); + setNumLevels(finalNumLevels); + } + + private void populateDoubleWorkArrays(final KllDoublesSketch other, final double[] workbuf, + final int[] worklevels, final int provisionalNumLevels) { + worklevels[0] = 0; + final int[] myLevelsArr = getLevelsArray(); + final int[] otherLevelsArr = other.getLevelsArray(); + final double[] myDoubleItemsArr = getDoubleItemsArray(); + final double[] otherDoubleItemsArr = other.getDoubleItemsArray(); + + // Note: the level zero data from "other" was already inserted into "self" + final int selfPopZero = KllHelper.currentLevelSize(0, getNumLevels(),myLevelsArr); + System.arraycopy(myDoubleItemsArr, myLevelsArr[0], workbuf, worklevels[0], selfPopZero); + worklevels[1] = worklevels[0] + selfPopZero; + + for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { + final int selfPop = KllHelper.currentLevelSize(lvl, getNumLevels(), myLevelsArr); + final int otherPop = KllHelper.currentLevelSize(lvl, other.getNumLevels(), otherLevelsArr); + worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; + + if (selfPop > 0 && otherPop == 0) { + System.arraycopy(myDoubleItemsArr, myLevelsArr[lvl], workbuf, worklevels[lvl], selfPop); + } else if (selfPop == 0 && otherPop > 0) { + System.arraycopy(otherDoubleItemsArr, otherLevelsArr[lvl], workbuf, worklevels[lvl], otherPop); + } else if (selfPop > 0 && otherPop > 0) { + KllDoublesHelper.mergeSortedDoubleArrays(myDoubleItemsArr, myLevelsArr[lvl], selfPop, otherDoubleItemsArr, + otherLevelsArr[lvl], otherPop, workbuf, worklevels[lvl]); + } + } + } + + private void populateFloatWorkArrays(final KllFloatsSketch other, final float[] workbuf, + final int[] worklevels, final int provisionalNumLevels) { + worklevels[0] = 0; + final int[] myLevelsArr = getLevelsArray(); + final int[] otherLevelsArr = other.getLevelsArray(); + final float[] myFloatItemsArr = getFloatItemsArray(); + final float[] otherFloatItemsArr = other.getFloatItemsArray(); + + // Note: the level zero data from "other" was already inserted into "self" + final int selfPopZero = KllHelper.currentLevelSize(0, getNumLevels(), myLevelsArr); + System.arraycopy( myFloatItemsArr, myLevelsArr[0], workbuf, worklevels[0], selfPopZero); + worklevels[1] = worklevels[0] + selfPopZero; + + for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { + final int selfPop = KllHelper.currentLevelSize(lvl, getNumLevels(), myLevelsArr); + final int otherPop = KllHelper.currentLevelSize(lvl, other.getNumLevels(), otherLevelsArr); + worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; + + if (selfPop > 0 && otherPop == 0) { + System.arraycopy( myFloatItemsArr, myLevelsArr[lvl], workbuf, worklevels[lvl], selfPop); + } else if (selfPop == 0 && otherPop > 0) { + System.arraycopy(otherFloatItemsArr, otherLevelsArr[lvl], workbuf, worklevels[lvl], otherPop); + } else if (selfPop > 0 && otherPop > 0) { + KllFloatsHelper.mergeSortedFloatArrays( myFloatItemsArr, myLevelsArr[lvl], selfPop, otherFloatItemsArr, + otherLevelsArr[lvl], otherPop, workbuf, worklevels[lvl]); + } + } + } + } diff --git a/src/main/java/org/apache/datasketches/kll/MemoryValidate.java b/src/main/java/org/apache/datasketches/kll/MemoryValidate.java new file mode 100644 index 000000000..1cbad775b --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/MemoryValidate.java @@ -0,0 +1,353 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.Family.idToFamily; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; +import static org.apache.datasketches.kll.KllPreambleUtil.DOUBLES_SKETCH_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; +import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.extractDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.extractFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.extractFlags; +import static org.apache.datasketches.kll.KllPreambleUtil.extractK; +import static org.apache.datasketches.kll.KllPreambleUtil.extractM; +import static org.apache.datasketches.kll.KllPreambleUtil.extractN; +import static org.apache.datasketches.kll.KllPreambleUtil.extractNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.extractPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.extractSerVer; + +import org.apache.datasketches.Family; +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.kll.KllPreambleUtil.Layout; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; + +final class MemoryValidate { + // first 8 bytes + final int preInts; // = extractPreInts(srcMem); + final int serVer; + final int familyID; + final String famName; + final int flags; + boolean empty; + boolean singleItem; + final boolean level0Sorted; + final boolean doublesSketch; + final boolean updatable; + final int k; + final int m; + + Layout layout; + // depending on the layout, the next 8-16 bytes of the preamble, may be filled with assumed values. + // For example, if the layout is compact & empty, n = 0, if compact and single, n = 1, etc. + long n; + // next 4 bytes + int dyMinK; + int numLevels; + // derived + int memItemsCap; //capacity of Items array for exporting and for Updatable form + int memItemsRetained; //actual items retained in Compact form + int sketchBytes; + Memory levelsMem; //if sk = empty or single, this is derived + Memory minMaxMem; //if sk = empty or single, this is derived + Memory itemsMem; //if sk = empty or single, this is derived + WritableMemory levelsWmem; + WritableMemory minMaxWmem; + WritableMemory itemsWmem; + + MemoryValidate(final Memory srcMem) { + preInts = extractPreInts(srcMem); + serVer = extractSerVer(srcMem); + + familyID = extractFamilyID(srcMem); + if (familyID != Family.KLL.getID()) { memoryCheckThrow(0, familyID); } + famName = idToFamily(familyID).toString(); + if (famName != "KLL") { memoryCheckThrow(23, 0); } + + flags = extractFlags(srcMem); + empty = (flags & EMPTY_BIT_MASK) > 0; + level0Sorted = (flags & LEVEL_ZERO_SORTED_BIT_MASK) > 0; + singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; + doublesSketch = (flags & DOUBLES_SKETCH_BIT_MASK) > 0; + updatable = (flags & UPDATABLE_BIT_MASK) > 0; + k = extractK(srcMem); + KllHelper.checkK(k); + m = extractM(srcMem); + if (m != 8) { memoryCheckThrow(7, m); } + + if (updatable) { updatableMemoryValidate((WritableMemory) srcMem); } + else { compactMemoryValidate(srcMem); } + + } + + void compactMemoryValidate(final Memory srcMem) { + final int checkFlags = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); + if ((checkFlags & 5) == 5) { memoryCheckThrow(20, flags); } + + switch (checkFlags) { + case 0: { //Float Compact FULL + if (preInts != PREAMBLE_INTS_FLOAT) { memoryCheckThrow(6, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } + layout = Layout.FLOAT_FULL_COMPACT; + n = extractN(srcMem); + dyMinK = extractDyMinK(srcMem); + numLevels = extractNumLevels(srcMem); + int offset = DATA_START_ADR_FLOAT; + // LEVELS MEM + final int[] myLevelsArr = new int[numLevels + 1]; + srcMem.getIntArray(offset, myLevelsArr, 0, numLevels); //copies all except the last one + myLevelsArr[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); //load the last one + levelsMem = Memory.wrap(myLevelsArr); //separate from srcMem, + offset += levelsMem.getCapacity() - Integer.BYTES; // but one larger than srcMem + // MIN/MAX MEM + minMaxMem = srcMem.region(offset, 2 * Float.BYTES); + offset += minMaxMem.getCapacity(); + // ITEMS MEM + memItemsCap = myLevelsArr[numLevels]; + memItemsRetained = memItemsCap - myLevelsArr[0]; + final float[] myItemsArr = new float[memItemsCap]; + srcMem.getFloatArray(offset, myItemsArr, myLevelsArr[0], memItemsRetained); + itemsMem = Memory.wrap(myItemsArr); + sketchBytes = offset + memItemsRetained * Float.BYTES; + break; + } + case 1: { //Float Compact EMPTY + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } + layout = Layout.FLOAT_EMPTY_COMPACT; + n = 0; //assumed + dyMinK = k; //assumed + numLevels = 1; //assumed + + // LEVELS MEM + levelsMem = Memory.wrap(new int[] {k, k}); + // MIN/MAX MEM + minMaxMem = Memory.wrap(new float[] {Float.NaN, Float.NaN}); + // ITEMS MEM + memItemsCap = k; + memItemsRetained = 0; + itemsMem = Memory.wrap(new float[k]); + sketchBytes = DATA_START_ADR_SINGLE_ITEM; //also used for empty + break; + } + case 4: { //Float Compact SINGLE + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { memoryCheckThrow(4, serVer); } + layout = Layout.FLOAT_SINGLE_COMPACT; + n = 1; + dyMinK = k; + numLevels = 1; + + // LEVELS MEM + levelsMem = Memory.wrap(new int[] {k - 1, k}); + final float minMax = srcMem.getFloat(DATA_START_ADR_SINGLE_ITEM); + // MIN/MAX MEM + minMaxMem = Memory.wrap(new float[] {minMax, minMax}); + // ITEMS MEM + memItemsCap = k; + memItemsRetained = 1; + final float[] myFloatItems = new float[k]; + myFloatItems[k - 1] = minMax; + itemsMem = Memory.wrap(myFloatItems); + sketchBytes = DATA_START_ADR_SINGLE_ITEM + Float.BYTES; + break; + } + case 8: { //Double Compact FULL + if (preInts != PREAMBLE_INTS_DOUBLE) { memoryCheckThrow(5, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } + layout = Layout.DOUBLE_FULL_COMPACT; + n = extractN(srcMem); + dyMinK = extractDyMinK(srcMem); + numLevels = extractNumLevels(srcMem); + int offset = DATA_START_ADR_DOUBLE; + // LEVELS MEM + final int[] myLevelsArr = new int[numLevels + 1]; + srcMem.getIntArray(offset, myLevelsArr, 0, numLevels); //all except the last one + myLevelsArr[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); //load the last one + levelsMem = Memory.wrap(myLevelsArr); //separate from srcMem + offset += levelsMem.getCapacity() - Integer.BYTES; + // MIN/MAX MEM + minMaxMem = srcMem.region(offset, 2 * Double.BYTES); + offset += minMaxMem.getCapacity(); + // ITEMS MEM + memItemsCap = myLevelsArr[numLevels]; + memItemsRetained = memItemsCap - myLevelsArr[0]; + final double[] myItemsArr = new double[memItemsCap]; + srcMem.getDoubleArray(offset, myItemsArr, myLevelsArr[0], memItemsRetained); + itemsMem = Memory.wrap(myItemsArr); + sketchBytes = offset + memItemsRetained * Double.BYTES; + break; + } + case 9: { //Double Compact EMPTY + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } + layout = Layout.DOUBLE_EMPTY_COMPACT; + n = 0; + dyMinK = k; + numLevels = 1; + + // LEVELS MEM + levelsMem = Memory.wrap(new int[] {k, k}); + // MIN/MAX MEM + minMaxMem = Memory.wrap(new double[] {Double.NaN, Double.NaN}); + // ITEMS MEM + memItemsCap = k; + memItemsRetained = 0; + itemsMem = Memory.wrap(new double[k]); + sketchBytes = DATA_START_ADR_SINGLE_ITEM; //also used for empty + break; + } + case 12: { //Double Compact SINGLE + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { memoryCheckThrow(4, serVer); } + layout = Layout.DOUBLE_SINGLE_COMPACT; + n = 1; + dyMinK = k; + numLevels = 1; + + // LEVELS MEM + levelsMem = Memory.wrap(new int[] {k - 1, k}); + final double minMax = srcMem.getDouble(DATA_START_ADR_SINGLE_ITEM); + // MIN/MAX MEM + minMaxMem = Memory.wrap(new double[] {minMax, minMax}); + // ITEMS MEM + memItemsCap = k; + memItemsRetained = 1; + final double[] myDoubleItems = new double[k]; + myDoubleItems[k - 1] = minMax; + itemsMem = Memory.wrap(myDoubleItems); + sketchBytes = DATA_START_ADR_SINGLE_ITEM + Double.BYTES; + break; + } + default: break; //can't happen + } + } + + void updatableMemoryValidate(final WritableMemory wSrcMem) { + final int checkFlags = (doublesSketch ? 8 : 0); + if ((checkFlags & 5) == 5) { memoryCheckThrow(20, flags); } + //System.out.println(KllPreambleUtil.memoryToString(wSrcMem)); + + switch (checkFlags) { + case 0: { //Float Updatable FULL + if (preInts != PREAMBLE_INTS_FLOAT) { memoryCheckThrow(6, preInts); } + if (serVer != SERIAL_VERSION_UPDATABLE) { memoryCheckThrow(10, serVer); } + layout = Layout.FLOAT_UPDATABLE; + n = extractN(wSrcMem); + empty = n == 0; + singleItem = n == 1; + dyMinK = extractDyMinK(wSrcMem); + numLevels = extractNumLevels(wSrcMem); + int offset = DATA_START_ADR_FLOAT; + //LEVELS + levelsWmem = wSrcMem.writableRegion(offset, (numLevels + 1) * Integer.BYTES); + offset += (int)levelsWmem.getCapacity(); + //MIN/MAX + minMaxWmem = wSrcMem.writableRegion(offset, 2 * Float.BYTES); + offset += (int)minMaxWmem.getCapacity(); + //ITEMS + memItemsCap = levelsWmem.getInt(numLevels * Integer.BYTES); + itemsWmem = wSrcMem.writableRegion(offset, memItemsCap * Float.BYTES); + offset += itemsWmem.getCapacity(); + sketchBytes = offset; + break; + } + + case 8: { //Double Updatable FULL + if (preInts != PREAMBLE_INTS_DOUBLE) { memoryCheckThrow(5, preInts); } + if (serVer != SERIAL_VERSION_UPDATABLE) { memoryCheckThrow(10, serVer); } + layout = Layout.DOUBLE_UPDATABLE; + n = extractN(wSrcMem); + empty = n == 0; + singleItem = n == 1; + dyMinK = extractDyMinK(wSrcMem); + numLevels = extractNumLevels(wSrcMem); + + int offset = DATA_START_ADR_DOUBLE; + //LEVELS + levelsWmem = wSrcMem.writableRegion(offset, (numLevels + 1) * Integer.BYTES); + offset += (int)levelsWmem.getCapacity(); + //MIN/MAX + minMaxWmem = wSrcMem.writableRegion(offset, 2 * Double.BYTES); + offset += (int)minMaxWmem.getCapacity(); + //ITEMS + memItemsCap = levelsWmem.getInt(numLevels * Integer.BYTES); + itemsWmem = wSrcMem.writableRegion(offset, memItemsCap * Double.BYTES); + offset += itemsWmem.getCapacity(); + sketchBytes = offset; + break; + } + default: break; //can't happen + } + } + +// @SuppressWarnings("unused") +// private static void printMemInts(final Memory mem) { +// final int capInts = (int)(mem.getCapacity() / 4); +// for (int i = 0; i < capInts; i++) { +// System.out.println(mem.getInt(i * 4)); +// } +// } +// +// @SuppressWarnings("unused") +// private static void printMemFloats(final Memory mem) { +// final int capFlts = (int)(mem.getCapacity() / 4); +// for (int i = 0; i < capFlts; i++) { +// System.out.println(mem.getFloat(i * 4)); +// } +// } + + + private static void memoryCheckThrow(final int errNo, final int value) { + String msg = ""; + switch (errNo) { + case 0: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; + case 1: msg = "Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; + case 2: msg = "Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: " + value; break; + case 3: msg = "Single Item Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; + case 4: msg = "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; + case 5: msg = "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; + case 6: msg = "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; + case 7: msg = "The M field must be set to " + DEFAULT_M + ", NOT: " + value; break; + case 8: msg = "The dynamic MinK must be equal to K, NOT: " + value; break; + case 9: msg = "numLevels must be one, NOT: " + value; break; + case 10: msg = "Updatable Bit: 1 -> SerVer: " + SERIAL_VERSION_UPDATABLE + ", NOT: " + value; break; + case 20: msg = "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; + case 21: msg = "N != 0 and empty bit is set. N: " + value; break; + case 22: msg = "N != 1 and single item bit is set. N: " + value; break; + case 23: msg = "Family name is not KLL"; break; + } + throw new SketchesArgumentException(msg); + } + +} + diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index ecbafc6ea..6f68c467c 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -19,7 +19,7 @@ package org.apache.datasketches.kll; -//import static org.apache.datasketches.Util.getResourceBytes; +//import static org.apache.datasketches.Util.getResourceBytes; //don't have matching numbers from C++ import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; @@ -89,7 +89,8 @@ public void oneItem() { @Test public void manyItemsEstimationMode() { final KllDoublesSketch sketch = new KllDoublesSketch(); - final int n = 1000000; + final int n = 1_000_000; + for (int i = 0; i < n; i++) { sketch.update(i); assertEquals(sketch.getN(), i + 1); @@ -165,7 +166,7 @@ public void merge() { } assertEquals(sketch1.getMinValue(), 0.0); - assertEquals(sketch1.getMaxValue(), (n - 1)*1.0); + assertEquals(sketch1.getMaxValue(), (n - 1) * 1.0); assertEquals(sketch2.getMinValue(), n * 1.0); assertEquals(sketch2.getMaxValue(), (2 * n - 1) * 1.0); diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index 87d992c51..50e429956 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -89,7 +89,8 @@ public void oneItem() { @Test public void manyItemsEstimationMode() { final KllFloatsSketch sketch = new KllFloatsSketch(); - final int n = 1000000; + final int n = 1_000_000; + for (int i = 0; i < n; i++) { sketch.update(i); assertEquals(sketch.getN(), i + 1); diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index 5e806a546..aaf88d5e6 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -21,6 +21,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; import java.util.Objects; @@ -162,105 +163,455 @@ private static void show(final KllDoublesSketch sk, int limit) { } @Test - public void checkMemoryToStringDoubleCompact() { + public void checkGrowLevels() { KllDoublesSketch sk = new KllDoublesSketch(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + assertEquals(sk.getNumLevels(), 2); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLevelsArray()[2], 33); + } + + @Test + public void checkSketchInitializeDoubleHeap() { + int k = 20; //don't change this + KllDoublesSketch sk; + + println("#### CASE: DOUBLE FULL HEAP"); + sk = new KllDoublesSketch(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE HEAP EMPTY"); + sk = new KllDoublesSketch(k); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE HEAP SINGLE"); + sk = new KllDoublesSketch(k); + sk.update(1); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeDoubleHeapifyCompactMem() { + int k = 20; //don't change this + KllDoublesSketch sk; + KllDoublesSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: DOUBLE FULL HEAPIFIED FROM COMPACT"); + sk2 = new KllDoublesSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM COMPACT"); + sk2 = new KllDoublesSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM COMPACT"); + sk2 = new KllDoublesSketch(k); + sk2.update(1); + println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeDoubleHeapifyUpdatableMem() { + int k = 20; //don't change this + KllDoublesSketch sk; + KllDoublesSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: DOUBLE FULL HEAPIFIED FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + //@Test //TODO Work on Direct + public void checkSketchInitializeDirectDoubleUpdatableMem() { + int k = 20; //don't change this + KllDirectDoublesSketch sk; + KllDoublesSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: DOUBLE FULL DIRECT FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = new KllDirectDoublesSketch(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = new KllDirectDoublesSketch(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = new KllDirectDoublesSketch(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkMemoryToStringDoubleCompact() { + int k = 20; // don't change this + KllDoublesSketch sk; KllDoublesSketch sk2; byte[] compBytes; byte[] compBytes2; WritableMemory wmem; String s; - for (int i = 1; i <= 21; i++) { sk.update(i); } - println(sk.toString(true, true)); - - println("CASE 0: DOUBLE_FULL_COMPACT"); + println("#### CASE: DOUBLE FULL COMPACT"); + sk = new KllDoublesSketch(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } compBytes = sk.toByteArray(); wmem = WritableMemory.writableWrap(compBytes); s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); println(s); sk2 = KllDoublesSketch.heapify(wmem); compBytes2 = sk2.toByteArray(); wmem = WritableMemory.writableWrap(compBytes2); s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); println(s); assertEquals(compBytes, compBytes2); - println("CASE 1: DOUBLE_EMPTY_COMPACT"); + println("#### CASE: DOUBLE EMPTY COMPACT"); sk = new KllDoublesSketch(20); compBytes = sk.toByteArray(); wmem = WritableMemory.writableWrap(compBytes); s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); println(s); sk2 = KllDoublesSketch.heapify(wmem); compBytes2 = sk2.toByteArray(); wmem = WritableMemory.writableWrap(compBytes2); s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); println(s); assertEquals(compBytes, compBytes2); - println("CASE 4: DOUBLE_SINGLE_COMPACT"); + println("#### CASE: DOUBLE SINGLE COMPACT"); sk = new KllDoublesSketch(20); sk.update(1); compBytes = sk.toByteArray(); wmem = WritableMemory.writableWrap(compBytes); s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); println(s); sk2 = KllDoublesSketch.heapify(wmem); compBytes2 = sk2.toByteArray(); wmem = WritableMemory.writableWrap(compBytes2); s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); println(s); assertEquals(compBytes, compBytes2); } @Test public void checkMemoryToStringDoubleUpdatable() { - KllDoublesSketch sk = new KllDoublesSketch(20); + int k = 20; //don't change this + KllDoublesSketch sk; KllDoublesSketch sk2; byte[] upBytes; byte[] upBytes2; WritableMemory wmem; String s; - for (int i = 1; i <= 21; i++) { sk.update(i); } - println(sk.toString(true, true)); - - println("CASE 0: DOUBLE_UPDATABLE"); + println("#### CASE: DOUBLE FULL UPDATABLE"); + sk = new KllDoublesSketch(20); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } upBytes = sk.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes); s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); println(s); sk2 = KllDoublesSketch.heapify(wmem); upBytes2 = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes2); s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); println(s); assertEquals(upBytes, upBytes2); - println("CASE 1: DOUBLE_UPDATABLE (empty)"); - sk = new KllDoublesSketch(20); + println("#### CASE: DOUBLE EMPTY UPDATABLE"); + sk = new KllDoublesSketch(k); upBytes = sk.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes); s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); println(s); sk2 = KllDoublesSketch.heapify(wmem); upBytes2 = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes2); s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); println(s); assertEquals(upBytes, upBytes2); - println("CASE 4: DOUBLE_UPDATABLE (single)"); - sk = new KllDoublesSketch(20); + println("#### CASE: DOUBLE SINGLE UPDATABL"); + sk = new KllDoublesSketch(k); sk.update(1); upBytes = sk.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes); s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); println(s); sk2 = KllDoublesSketch.heapify(wmem); upBytes2 = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes2); s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); println(s); assertEquals(upBytes, upBytes2); } diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index 0643a5f8d..81437d957 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -21,6 +21,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; import java.util.Objects; @@ -162,105 +163,369 @@ private static void show(final KllFloatsSketch sk, int limit) { } @Test - public void checkMemoryToStringFloatCompact() { + public void checkGrowLevels() { KllFloatsSketch sk = new KllFloatsSketch(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + assertEquals(sk.getNumLevels(), 2); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLevelsArray()[2], 33); + } + + @Test + public void checkSketchInitializeFloatHeap() { + int k = 20; //don't change this + KllFloatsSketch sk; + + println("#### CASE: FLOAT FULL HEAP"); + sk = new KllFloatsSketch(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: FLOAT HEAP EMPTY"); + sk = new KllFloatsSketch(k); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: FLOAT HEAP SINGLE"); + sk = new KllFloatsSketch(k); + sk.update(1); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeFloatHeapifyCompactMem() { + int k = 20; //don't change this + KllFloatsSketch sk; + KllFloatsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: FLOAT FULL HEAPIFIED FROM COMPACT"); + sk2 = new KllFloatsSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: FLOAT EMPTY HEAPIFIED FROM COMPACT"); + sk2 = new KllFloatsSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: FLOAT SINGLE HEAPIFIED FROM COMPACT"); + sk2 = new KllFloatsSketch(k); + sk2.update(1); + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeFloatHeapifyUpdatableMem() { + int k = 20; //don't change this + KllFloatsSketch sk; + KllFloatsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: FLOAT FULL HEAPIFIED FROM UPDATABLE"); + sk2 = new KllFloatsSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: FLOAT EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = new KllFloatsSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: FLOAT SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = new KllFloatsSketch(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isUpdatable()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkMemoryToStringFloatCompact() { + int k = 20; //don't change this + KllFloatsSketch sk; KllFloatsSketch sk2; byte[] compBytes; byte[] compBytes2; WritableMemory wmem; String s; - for (int i = 1; i <= 21; i++) { sk.update(i); } - println(sk.toString(true, true)); - - println("CASE 0: FLOAT_FULL_COMPACT"); + println("#### CASE: FLOAT FULL COMPACT"); + sk = new KllFloatsSketch(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } compBytes = sk.toByteArray(); wmem = WritableMemory.writableWrap(compBytes); s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); println(s); sk2 = KllFloatsSketch.heapify(wmem); compBytes2 = sk2.toByteArray(); wmem = WritableMemory.writableWrap(compBytes2); s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); println(s); assertEquals(compBytes, compBytes2); - println("CASE 1: FLOAT_EMPTY_COMPACT"); - sk = new KllFloatsSketch(20); + println("#### CASE: FLOAT EMPTY COMPACT"); + sk = new KllFloatsSketch(k); compBytes = sk.toByteArray(); wmem = WritableMemory.writableWrap(compBytes); s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); println(s); sk2 = KllFloatsSketch.heapify(wmem); compBytes2 = sk2.toByteArray(); wmem = WritableMemory.writableWrap(compBytes2); s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); println(s); assertEquals(compBytes, compBytes2); - println("CASE 4: FLOAT_SINGLE_COMPACT"); - sk = new KllFloatsSketch(20); + println("#### CASE: FLOAT SINGLE COMPACT"); + sk = new KllFloatsSketch(k); sk.update(1); compBytes = sk.toByteArray(); wmem = WritableMemory.writableWrap(compBytes); s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); println(s); sk2 = KllFloatsSketch.heapify(wmem); compBytes2 = sk2.toByteArray(); wmem = WritableMemory.writableWrap(compBytes2); s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); println(s); assertEquals(compBytes, compBytes2); } @Test public void checkMemoryToStringFloatUpdatable() { - KllFloatsSketch sk = new KllFloatsSketch(20); + int k = 20; //don't change this + KllFloatsSketch sk; KllFloatsSketch sk2; byte[] upBytes; byte[] upBytes2; WritableMemory wmem; String s; - for (int i = 1; i <= 21; i++) { sk.update(i); } - println(sk.toString(true, true)); - - println("CASE 0: FLOAT_UPDATABLE"); + println("#### CASE: FLOAT FULL UPDATABLE"); + sk = new KllFloatsSketch(20); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } upBytes = sk.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes); s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); println(s); sk2 = KllFloatsSketch.heapify(wmem); upBytes2 = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes2); s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); println(s); assertEquals(upBytes, upBytes2); - println("CASE 1: FLOAT_UPDATABLE (empty)"); - sk = new KllFloatsSketch(20); + println("#### CASE: FLOAT EMPTY UPDATABLE"); + sk = new KllFloatsSketch(k); upBytes = sk.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes); s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); println(s); sk2 = KllFloatsSketch.heapify(wmem); upBytes2 = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes2); s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); println(s); assertEquals(upBytes, upBytes2); - println("CASE 4: FLOAT_UPDATABLE (single)"); - sk = new KllFloatsSketch(20); + println("#### CASE: FLOAT SINGLE UPDATABLE"); + sk = new KllFloatsSketch(k); sk.update(1); upBytes = sk.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes); s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); println(s); sk2 = KllFloatsSketch.heapify(wmem); upBytes2 = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes2); s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); println(s); assertEquals(upBytes, upBytes2); } diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml index 3fef29680..8a587554c 100644 --- a/tools/SketchesCheckstyle.xml +++ b/tools/SketchesCheckstyle.xml @@ -173,7 +173,7 @@ under the License. - + From 60cbfaabd43ab33eb542148eb4dbd8fbef98a264 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 25 Mar 2022 14:30:54 -0700 Subject: [PATCH 11/31] Interim 11. Almost done. KllDirectDoublesSketch working and tested. Next: Create the KllDirectFloatsSketch and tests. --- .../kll/KllDirectDoublesSketch.java | 319 +++-- .../datasketches/kll/KllDirectSketch.java | 236 +++- .../datasketches/kll/KllDoublesSketch.java | 56 +- .../datasketches/kll/KllFloatsSketch.java | 171 +-- .../datasketches/kll/KllHeapSketch.java | 51 +- .../apache/datasketches/kll/KllHelper.java | 49 +- .../datasketches/kll/KllPreambleUtil.java | 2 - .../apache/datasketches/kll/KllSketch.java | 1181 ++++++++++------- .../datasketches/kll/MemoryValidate.java | 301 ++--- .../kll/KllDirectDoublesSketchTest.java | 595 +++++++++ .../kll/KllDoublesSketchTest.java | 32 +- .../kll/KllDoublesValidationTest.java | 2 +- .../datasketches/kll/KllFloatsSketchTest.java | 43 +- .../kll/KllFloatsValidationTest.java | 2 +- .../datasketches/kll/KllHelperTest.java | 80 +- .../datasketches/kll/MemoryValidateTest.java | 166 +++ .../kll/MiscDirectDoublesTest.java | 444 +++++++ .../datasketches/kll/MiscDoublesTest.java | 132 +- .../datasketches/kll/MiscFloatsTest.java | 44 +- 19 files changed, 2708 insertions(+), 1198 deletions(-) create mode 100644 src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java create mode 100644 src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 368d2697e..99da03077 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -19,136 +19,233 @@ package org.apache.datasketches.kll; -import org.apache.datasketches.kll.KllPreambleUtil.SketchType; +import static java.lang.Math.max; +import static java.lang.Math.min; + +import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; /** - * Please refer to the documentation in the package-info:
- * {@link org.apache.datasketches.kll} + * This class implements an off-heap doubles KllSketch via a WritableMemory instance of the sketch. + * + *

Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}

+ * + * @author Lee Rhodes, Kevin Lang */ -public class KllDirectDoublesSketch extends KllDirectSketch { +public final class KllDirectDoublesSketch extends KllDirectSketch { - - public KllDirectDoublesSketch(final WritableMemory wmem) { - super(wmem, SketchType.DOUBLE_SKETCH); + /** + * + * @param wmem the current WritableMemory + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + */ + public KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr) { + super(SketchType.DOUBLES_SKETCH, wmem, memReqSvr); } //public int getNumRetained() - @SuppressWarnings("unused") + + /** + * Returns an approximation to the Cumulative Distribution Function (CDF), which is the + * cumulative analog of the PMF, of the input stream given a set of splitPoint (values). + * + *

The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function. + * + *

If the sketch is empty this returns null.

+ * + * @param splitPoints an array of m unique, monotonically increasing double values + * that divide the real number line into m+1 consecutive disjoint intervals. + * The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and + * exclusive of the right splitPoint, with the exception that the last interval will include + * the maximum value. + * It is not necessary to include either the min or max values in these split points. + * + * @return an array of m+1 double values, which are a consecutive approximation to the CDF + * of the input stream given the splitPoints. The value at array position j of the returned + * CDF array is the sum of the returned values in positions 0 through j of the returned PMF + * array. + */ public double[] getCDF(final double[] splitPoints) { - return null; + return getDoublesPmfOrCdf(splitPoints, true); + } + + /** + * Returns the max value of the stream. + * If the sketch is empty this returns NaN. + * + * @return the max value of the stream + */ + public double getMaxValue() { + return getMaxDoubleValue(); + } + + /** + * Returns the min value of the stream. + * If the sketch is empty this returns NaN. + * + * @return the min value of the stream + */ + public double getMinValue() { + return getMinDoubleValue(); + } + + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * given a set of splitPoints (values). + * + *

The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(true) function. + * + *

If the sketch is empty this returns null.

+ * + * @param splitPoints an array of m unique, monotonically increasing double values + * that divide the real number line into m+1 consecutive disjoint intervals. + * The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and + * exclusive of the right splitPoint, with the exception that the last interval will include + * the maximum value. + * It is not necessary to include either the min or max values in these split points. + * + * @return an array of m+1 doubles each of which is an approximation + * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right + * splitPoint, with the exception that the last interval will include maximum value. + */ + public double[] getPMF(final double[] splitPoints) { + return getDoublesPmfOrCdf(splitPoints, false); + } + + /** + * Returns an approximation to the value of the data item + * that would be preceded by the given fraction of a hypothetical sorted + * version of the input stream so far. + * + *

We note that this method has a fairly large overhead (microseconds instead of nanoseconds) + * so it should not be called multiple times to get different quantiles from the same + * sketch. Instead use getQuantiles(), which pays the overhead only once. + * + *

If the sketch is empty this returns NaN. + * + * @param fraction the specified fractional position in the hypothetical sorted stream. + * These are also called normalized ranks or fractional ranks. + * If fraction = 0.0, the true minimum value of the stream is returned. + * If fraction = 1.0, the true maximum value of the stream is returned. + * + * @return the approximation to the value at the given fraction + */ + public double getQuantile(final double fraction) { + return getDoublesQuantile(fraction); + } + + /** + * Gets the lower bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. + * @param fraction the given normalized rank as a fraction + * @return the lower bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. + */ + public double getQuantileLowerBound(final double fraction) { + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false))); + } + + /** + * This is a more efficient multiple-query version of getQuantile(). + * + *

This returns an array that could have been generated by using getQuantile() with many + * different fractional ranks, but would be very inefficient. + * This method incurs the internal set-up overhead once and obtains multiple quantile values in + * a single query. It is strongly recommend that this method be used instead of multiple calls + * to getQuantile(). + * + *

If the sketch is empty this returns null. + * + * @param fractions given array of fractional positions in the hypothetical sorted stream. + * These are also called normalized ranks or fractional ranks. + * These fractions must be in the interval [0.0, 1.0], inclusive. + * + * @return array of approximations to the given fractions in the same order as given fractions + * array. + */ + public double[] getQuantiles(final double[] fractions) { + return getDoublesQuantiles(fractions); + } + + /** + * This is also a more efficient multiple-query version of getQuantile() and allows the caller to + * specify the number of evenly spaced fractional ranks. + * + *

If the sketch is empty this returns null. + * + * @param numEvenlySpaced an integer that specifies the number of evenly spaced fractional ranks. + * This must be a positive integer greater than 0. A value of 1 will return the min value. + * A value of 2 will return the min and the max value. A value of 3 will return the min, + * the median and the max value, etc. + * + * @return array of approximations to the given fractions in the same order as given fractions + * array. + */ + public double[] getQuantiles(final int numEvenlySpaced) { + if (isEmpty()) { return null; } + return getQuantiles(org.apache.datasketches.Util.evenlySpaced(0.0, 1.0, numEvenlySpaced)); + } + + /** + * Gets the upper bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. + * @param fraction the given normalized rank as a fraction + * @return the upper bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. + */ + public double getQuantileUpperBound(final double fraction) { + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); + } + + /** + * Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1, + * inclusive. + * + *

The resulting approximation has a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function. + * + *

If the sketch is empty this returns NaN.

+ * + * @param value to be ranked + * @return an approximate rank of the given value + */ + public double getRank(final double value) { + return getDoubleRank(value); + } + + /** + * @return the iterator for this class + */ + public KllDoublesSketchIterator iterator() { + return new KllDoublesSketchIterator(getDoubleItemsArray(), getLevelsArray(), getNumLevels()); + } + + /** + * Merges another sketch into this one. + * @param other sketch to merge into this one + */ + public void merge(final KllSketch other) { + if (!other.isDirect()) { kllSketchThrow(32); } + if (!other.isDoublesSketch()) { kllSketchThrow(33); } + mergeDoubleImpl(other); } @Override public byte[] toByteArray() { - return null; + return toCompactByteArrayImpl(); } @Override public String toString(final boolean withLevels, final boolean withData) { - return null; - } - - @Override - public byte[] toUpdatableByteArray() { - return null; - } - - @Override - double[] getDoubleItemsArray() { - return null; - } - - @Override - float[] getFloatItemsArray() { - return null; - } - - @Override - double getMaxDoubleValue() { - return 0; - } - - @Override - float getMaxFloatValue() { - return 0; - } - - @Override - double getMinDoubleValue() { - return 0; - } - - @Override - float getMinFloatValue() { - return 0; - } - - @Override - void setDoubleItemsArray(final double[] floatItems) { - } - - @Override - void setFloatItemsArray(final float[] floatItems) { - } - - @Override - void setMaxDoubleValue(final double value) { - } - - @Override - void setMaxFloatValue(final float value) { + return toStringImpl(withLevels, withData); } - @Override - void setMinDoubleValue(final double value) { - } - - @Override - void setMinFloatValue(final float value) { + public void update(final double value) { + updateDouble(value); } - @Override - void setLevelsArray(final int[] levelsArr) { - - } - - //int getDyMinK - - //int[] getLevelsArray - - //int getLevelsArrayAt() - - //int getNumLevels - - //void incN() - - //void incNumLevels() - - //boolean isLevelZeroSorted() - - //void setDyMinK() - - //void updateLevelsArray() - - //void setLevelsArrayAt() - - //void setLevelsArrayAtMinusEq() - - //void setLevelsArrayAtPlusEq() - - //void setLevelZeroSorted() - - //void setN() - - //void setNumLevels() - - //int getItemsDataStartBytes() - - //int getItemsArrLengthItems() - - //int getLevelsArrLengthints() - - } - diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index ed37ec853..be181249b 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -28,56 +28,39 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertLevelZeroSortedFlag; import static org.apache.datasketches.kll.KllPreambleUtil.insertN; import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; -//import static org.apache.datasketches.kll.KllPreambleUtil.SketchType.DOUBLE_SKETCH; +import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; +import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; -import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.kll.KllPreambleUtil.Layout; -import org.apache.datasketches.kll.KllPreambleUtil.SketchType; +import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.memory.DefaultMemoryRequestServer; + abstract class KllDirectSketch extends KllSketch { //All these members are constant for the life of this object. If the WritableMemory changes, it will require //rebuilding this class - final WritableMemory wmem; final Layout layout; final boolean updatable; - final int numLevels_; - final int memItemsCap; - final int sketchBytes; - final WritableMemory levelsWmem; - final WritableMemory minMaxWmem; - final WritableMemory itemsWmem; - DefaultMemoryRequestServer defaultMemReqSvr = null; - + WritableMemory levelsArrUpdatable; + WritableMemory minMaxArrUpdatable; + WritableMemory itemsArrUpdatable; /** * For the direct sketches it is important that the methods implemented here are designed to work dynamically * as the sketch grows off-heap. - * @param wmem the current WritableMemory * @param sketchType either DOUBLE_SKETCH or FLOAT_SKETCH + * @param wmem the current WritableMemory + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory */ - KllDirectSketch(final WritableMemory wmem, final SketchType sketchType) { - super(sketchType); + KllDirectSketch(final SketchType sketchType, final WritableMemory wmem, final MemoryRequestServer memReqSvr) { + super(sketchType, wmem, memReqSvr); final MemoryValidate memVal = new MemoryValidate(wmem); - this.wmem = wmem; layout = memVal.layout; updatable = memVal.updatable; - numLevels_ = memVal.numLevels; - memItemsCap = memVal.memItemsCap; - sketchBytes = memVal.sketchBytes; - levelsWmem = memVal.levelsWmem; - minMaxWmem = memVal.minMaxWmem; - itemsWmem = memVal.itemsWmem; - defaultMemReqSvr = updatable ? new DefaultMemoryRequestServer() : null; - } - - private static void kllDirectSketchThrow(final int errNo) { - String msg = ""; - switch (errNo) { - case 30: msg = "Sketch Memory is immutable, cannot write."; break; - } - throw new SketchesArgumentException(msg); + if (!updatable) { kllSketchThrow(31); } + levelsArrUpdatable = memVal.levelsArrUpdatable; + minMaxArrUpdatable = memVal.minMaxArrUpdatable; + itemsArrUpdatable = memVal.itemsArrUpdatable; } @Override @@ -86,65 +69,83 @@ public int getK() { } @Override - public long getN() { - return extractN(wmem); + double getMaxDoubleValue() { + return minMaxArrUpdatable.getDouble(Double.BYTES); } @Override - public boolean isUpdatable() { - return updatable; + float getMaxFloatValue() { + return minMaxArrUpdatable.getFloat(Float.BYTES); } @Override - public abstract byte[] toByteArray(); + double getMinDoubleValue() { + return minMaxArrUpdatable.getDouble(0); + } @Override - public abstract String toString(final boolean withLevels, final boolean withData); + float getMinFloatValue() { + return minMaxArrUpdatable.getFloat(0); + } @Override - public abstract byte[] toUpdatableByteArray(); + public long getN() { + return extractN(wmem); + } + + @Override + double[] getDoubleItemsArray() { + if (sketchType == FLOATS_SKETCH) { return null; } + final int items = getItemsArrLengthItems(); + final double[] itemsArr = new double[items]; + itemsArrUpdatable.getDoubleArray(0, itemsArr, 0, items); + return itemsArr; + } + + @Override + double getDoubleItemsArrayAt(final int index) { + if (sketchType == FLOATS_SKETCH) { return Double.NaN; } + return itemsArrUpdatable.getDouble(index * Double.BYTES); + } @Override int getDyMinK() { return extractDyMinK(wmem); } - int getItemsArrLengthItems() { - if (updatable) { return getLevelsArray()[getNumLevels()]; } - return getNumRetained(); + @Override + float[] getFloatItemsArray() { + if (sketchType == DOUBLES_SKETCH) { return null; } + final int items = getItemsArrLengthItems(); + final float[] itemsArr = new float[items]; + itemsArrUpdatable.getFloatArray(0, itemsArr, 0, items); + return itemsArr; + } + + @Override + float getFloatItemsArrayAt(final int index) { + if (sketchType == DOUBLES_SKETCH) { return Float.NaN; } + return itemsArrUpdatable.getFloat(index * Float.BYTES); } + int getItemsArrLengthItems() { + return getLevelsArray()[getNumLevels()]; + } @Override String getLayout() { return layout.toString(); } @Override int[] getLevelsArray() { - final int cap = getNumLevels() + 1; - final int[] myLevelsArr = new int[cap]; - levelsWmem.getIntArray(0, myLevelsArr, 0, cap); + final int numInts = getNumLevels() + 1; + final int[] myLevelsArr = new int[numInts]; + levelsArrUpdatable.getIntArray(0, myLevelsArr, 0, numInts); return myLevelsArr; } - /** - * For determining the actual length of the array as stored in Memory - * @return the actual length of the array as stored in Memory - */ - int getLevelsArrLengthInts() { - final int memLengthInts; - - switch (layout) { - case FLOAT_EMPTY_COMPACT: - case DOUBLE_EMPTY_COMPACT: - case FLOAT_SINGLE_COMPACT: - case DOUBLE_SINGLE_COMPACT: { memLengthInts = 0; break; } - case FLOAT_FULL_COMPACT: - case DOUBLE_FULL_COMPACT: { memLengthInts = getNumLevels(); break; } - case FLOAT_UPDATABLE: - case DOUBLE_UPDATABLE: { memLengthInts = getNumLevels() + 1; break; } - default: return 0; //can't get here - } - return memLengthInts; + @Override + int getLevelsArrayAt(final int index) { + return levelsArrUpdatable.getInt(index * Integer.BYTES); } @Override @@ -154,14 +155,14 @@ int getNumLevels() { @Override void incN() { - if (!updatable) { kllDirectSketchThrow(30); } + if (!updatable) { kllSketchThrow(30); } long n = extractN(wmem); insertN(wmem, ++n); } @Override void incNumLevels() { - if (!updatable) { kllDirectSketchThrow(30); } + if (!updatable) { kllSketchThrow(30); } int numLevels = extractNumLevels(wmem); insertNumLevels(wmem, ++numLevels); } @@ -171,34 +172,123 @@ boolean isLevelZeroSorted() { return extractLevelZeroSortedFlag(wmem); } + @Override + void setDoubleItemsArray(final double[] doubleItems) { + if (!updatable) { kllSketchThrow(30); } + itemsArrUpdatable.putDoubleArray(0, doubleItems, 0, doubleItems.length); + } + + @Override + void setDoubleItemsArrayAt(final int index, final double value) { + itemsArrUpdatable.putDouble(index * Double.BYTES, value); + } + @Override void setDyMinK(final int dyMinK) { - if (!updatable) { kllDirectSketchThrow(30); } + if (!updatable) { kllSketchThrow(30); } insertDyMinK(wmem, dyMinK); } @Override - void updateLevelsArray(final int[] levels) { - if (!updatable) { kllDirectSketchThrow(30); } - levelsWmem.putIntArray(0, levels, 0, levels.length); + void setFloatItemsArray(final float[] floatItems) { + if (!updatable) { kllSketchThrow(30); } + itemsArrUpdatable.putFloatArray(0, floatItems, 0, floatItems.length); + } + + @Override + void setFloatItemsArrayAt(final int index, final float value) { + itemsArrUpdatable.putFloat(index * Float.BYTES, value); + } + + @Override + void setItemsArrayUpdatable(final WritableMemory itemsMem) { + itemsArrUpdatable = itemsMem; + } + + @Override + void setLevelsArray(final int[] levelsArr) { + if (!updatable) { kllSketchThrow(30); } + levelsArrUpdatable.putIntArray(0, levelsArr, 0, levelsArr.length); + } + + @Override + void setLevelsArrayAt(final int index, final int value) { + levelsArrUpdatable.putInt(index * Integer.BYTES, value); + } + + @Override + void setLevelsArrayAtMinusEq(final int index, final int minusEq) { + final int offset = index * Integer.BYTES; + final int curV = levelsArrUpdatable.getInt(offset); + levelsArrUpdatable.putInt(offset, curV - minusEq); + } + + @Override + void setLevelsArrayAtPlusEq(final int index, final int plusEq) { + final int offset = index * Integer.BYTES; + final int curV = levelsArrUpdatable.getInt(offset); + levelsArrUpdatable.putInt(offset, curV + plusEq); + } + + @Override + void setLevelsArrayUpdatable(final WritableMemory levelsMem) { + levelsArrUpdatable = levelsMem; } @Override void setLevelZeroSorted(final boolean sorted) { - if (!updatable) { kllDirectSketchThrow(30); } + if (!updatable) { kllSketchThrow(30); } insertLevelZeroSortedFlag(wmem, sorted); } + @Override + void setMaxDoubleValue(final double value) { + if (!updatable) { kllSketchThrow(30); } + minMaxArrUpdatable.putDouble(Double.BYTES, value); + } + + @Override + void setMaxFloatValue(final float value) { + if (!updatable) { kllSketchThrow(30); } + minMaxArrUpdatable.putFloat(Float.BYTES, value); + } + + @Override + void setMinDoubleValue(final double value) { + if (!updatable) { kllSketchThrow(30); } + minMaxArrUpdatable.putDouble(0, value); + } + + @Override + void setMinFloatValue(final float value) { + if (!updatable) { kllSketchThrow(30); } + minMaxArrUpdatable.putFloat(0, value); + } + + @Override + void setMinMaxArrayUpdatable(final WritableMemory minMaxMem) { + minMaxArrUpdatable = minMaxMem; + } + @Override void setN(final long n) { - if (!updatable) { kllDirectSketchThrow(30); } + if (!updatable) { kllSketchThrow(30); } insertN(wmem, n); } @Override void setNumLevels(final int numLevels) { - if (!updatable) { kllDirectSketchThrow(30); } + if (!updatable) { kllSketchThrow(30); } insertNumLevels(wmem, numLevels); } + + @Override + public byte[] toUpdatableByteArray() { + final int bytes = (int) wmem.getCapacity(); + final byte[] byteArr = new byte[bytes]; + wmem.getByteArray(0, byteArr, 0, bytes); + return byteArr; + } + } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 997521ea5..6ccd34ff9 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -24,12 +24,15 @@ import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.kll.KllPreambleUtil.SketchType; import org.apache.datasketches.memory.Memory; /** - * Please refer to the documentation in the package-info:
- * {@link org.apache.datasketches.kll} + * This class implements an on-heap doubles KllSketch. + * + *

Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}

+ * + * @author Lee Rhodes, Kevin Lang */ public final class KllDoublesSketch extends KllHeapSketch { @@ -52,7 +55,7 @@ public KllDoublesSketch() { * @param k parameter that controls size of the sketch and accuracy of estimates */ public KllDoublesSketch(final int k) { - super(k, SketchType.DOUBLE_SKETCH); + super(k, SketchType.DOUBLES_SKETCH); doubleItems_ = new double[k]; minDoubleValue_ = Double.NaN; maxDoubleValue_ = Double.NaN; @@ -64,7 +67,7 @@ public KllDoublesSketch(final int k) { * @param memVal the MemoryCheck object */ private KllDoublesSketch(final Memory mem, final MemoryValidate memVal) { - super(memVal.k, SketchType.DOUBLE_SKETCH); + super(memVal.k, SketchType.DOUBLES_SKETCH); buildHeapKllSketchFromMemory(memVal); } @@ -263,23 +266,10 @@ public KllDoublesSketchIterator iterator() { * Merges another sketch into this one. * @param other sketch to merge into this one */ - public void merge(final KllDoublesSketch other) { - mergeDouble(other); - } - - @Override - public byte[] toByteArray() { - return toGenericCompactByteArray(); - } - - @Override - public String toString(final boolean withLevels, final boolean withData) { - return toGenericString(withLevels, withData); - } - - @Override - public byte[] toUpdatableByteArray() { - return toGenericUpdatableByteArray(); + public void merge(final KllSketch other) { + if (other.isDirect()) { kllSketchThrow(35); } + if (!other.isDoublesSketch()) { kllSketchThrow(33); } + mergeDoubleImpl(other); } /** @@ -294,12 +284,14 @@ public void update(final double value) { //possibly move proxy @Override //Used internally double[] getDoubleItemsArray() { return doubleItems_; } + @Override + double getDoubleItemsArrayAt(final int index) { return doubleItems_[index]; } + @Override //Dummy float[] getFloatItemsArray() { return null; } - double[] getItems() { - return getDoubleItemsArray(); - } + @Override //Dummy + float getFloatItemsArrayAt(final int index) { return Float.NaN; } @Override //Used internally double getMaxDoubleValue() { return maxDoubleValue_; } @@ -316,9 +308,15 @@ public void update(final double value) { //possibly move proxy @Override //Used internally void setDoubleItemsArray(final double[] doubleItems) { doubleItems_ = doubleItems; } + @Override //Used internally + void setDoubleItemsArrayAt(final int index, final double value) { doubleItems_[index] = value; } + @Override //Dummy void setFloatItemsArray(final float[] floatItems) { } + @Override //Dummy + void setFloatItemsArrayAt(final int index, final float value) { } + @Override //Used internally void setMaxDoubleValue(final double value) { maxDoubleValue_ = value; } @@ -328,15 +326,7 @@ void setMaxFloatValue(final float value) { } @Override //Used internally void setMinDoubleValue(final double value) { minDoubleValue_ = value; } - // for testing - @Override //Dummy void setMinFloatValue(final float value) { } - @Override - void updateLevelsArray(final int[] levels) { - // TODO Auto-generated method stub - - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index f4fc507c0..b4955e306 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -21,15 +21,20 @@ import static java.lang.Math.max; import static java.lang.Math.min; +import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.kll.KllPreambleUtil.SketchType; +import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.memory.Memory; /** - * Please refer to the documentation in the package-info:
- * {@link org.apache.datasketches.kll} + * This class implements an on-heap floats KllSketch. + * + *

Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}

+ * + * @author Lee Rhodes, Kevin Lang */ public final class KllFloatsSketch extends KllHeapSketch { @@ -52,7 +57,7 @@ public KllFloatsSketch() { * @param k parameter that controls size of the sketch and accuracy of estimates */ public KllFloatsSketch(final int k) { - super(k, SketchType.FLOAT_SKETCH); + super(k, SketchType.FLOATS_SKETCH); floatItems_ = new float[k]; minFloatValue_ = Float.NaN; maxFloatValue_ = Float.NaN; @@ -64,7 +69,7 @@ public KllFloatsSketch(final int k) { * @param memVal the MemoryCheck object */ private KllFloatsSketch(final Memory mem, final MemoryValidate memVal) { - super(memVal.k, SketchType.FLOAT_SKETCH); + super(memVal.k, SketchType.FLOATS_SKETCH); buildHeapKllSketchFromMemory(memVal); } @@ -85,6 +90,21 @@ public static KllFloatsSketch heapify(final Memory mem) { return new KllFloatsSketch(mem, memVal); } + /** + * Returns upper bound on the compact serialized size of a FloatsSketch given a parameter + * k and stream length. This method can be used if allocation of storage + * is necessary beforehand. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param n stream length + * @return upper bound on the compact serialized size + * @deprecated use {@link #getMaxSerializedSizeBytes(int, long, SketchType, boolean)} instead. + */ + @Deprecated + public static int getMaxSerializedSizeBytes(final int k, final long n) { + final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, SketchType.FLOATS_SKETCH); + return lvlStats.getCompactBytes(); + } + /** * Returns an approximation to the Cumulative Distribution Function (CDF), which is the * cumulative analog of the PMF, of the input stream given a set of splitPoint (values). @@ -110,37 +130,13 @@ public double[] getCDF(final float[] splitPoints) { return getFloatsPmfOrCdf(splitPoints, true); } - @Override //Used internally - float[] getFloatItemsArray() { return floatItems_; } - - @Override //Used internally - void setFloatItemsArray(final float[] floatItems) { floatItems_ = floatItems; } - - @Override //Dummy - double[] getDoubleItemsArray() { return null; } - - @Override //Dummy - void setDoubleItemsArray(final double[] doubleItems) { } - - @Override //Dummy - double getMaxDoubleValue() { return maxFloatValue_; } - - @Override //Used internally - float getMaxFloatValue() { return maxFloatValue_; } - /** * Returns the max value of the stream. * If the sketch is empty this returns NaN. * * @return the max value of the stream */ - public float getMaxValue() { return maxFloatValue_; } - - @Override //Dummy - double getMinDoubleValue() { return minFloatValue_; } - - @Override //Used internally - float getMinFloatValue() { return minFloatValue_; } + public float getMaxValue() { return getMaxFloatValue(); } /** * Returns the min value of the stream. @@ -148,19 +144,7 @@ void setDoubleItemsArray(final double[] doubleItems) { } * * @return the min value of the stream */ - public float getMinValue() { return minFloatValue_; } - - @Override //Dummy - void setMaxDoubleValue(final double value) { } - - @Override //Used internally - void setMaxFloatValue(final float value) { maxFloatValue_ = value; } - - @Override //Dummy - void setMinDoubleValue(final double value) { } - - @Override //Used internally - void setMinFloatValue(final float value) { minFloatValue_ = value; } + public float getMinValue() { return getMinFloatValue(); } /** * Returns an approximation to the Probability Mass Function (PMF) of the input stream @@ -209,6 +193,17 @@ public float getQuantile(final double fraction) { return getFloatsQuantile(fraction); } + /** + * Gets the lower bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. + * @param fraction the given normalized rank as a fraction + * @return the lower bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. + */ + public float getQuantileLowerBound(final double fraction) { + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false))); + } + /** * This is a more efficient multiple-query version of getQuantile(). * @@ -261,17 +256,6 @@ public float getQuantileUpperBound(final double fraction) { return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); } - /** - * Gets the lower bound of the value interval in which the true quantile of the given rank - * exists with a confidence of at least 99%. - * @param fraction the given normalized rank as a fraction - * @return the lower bound of the value interval in which the true quantile of the given rank - * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. - */ - public float getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false))); - } - /** * Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1, * inclusive. @@ -288,6 +272,16 @@ public double getRank(final float value) { return getFloatRank(value); } + /** + * Returns the current number of compact bytes this FloatsSketch would require to store. + * @return the number of bytes this sketch would require to store. + * @deprecated use {@link KllSketch#getCurrentCompactSerializedSizeBytes()} + */ + @Deprecated + public int getSerializedSizeBytes() { + return getCurrentCompactSerializedSizeBytes(); + } + /** * @return the iterator for this class */ @@ -300,22 +294,9 @@ public KllFloatsSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllFloatsSketch other) { - mergeFloat(other); - } - - @Override - public byte[] toByteArray() { - return toGenericCompactByteArray(); - } - - @Override - public byte[] toUpdatableByteArray() { - return toGenericUpdatableByteArray(); - } - - @Override - public String toString(final boolean withLevels, final boolean withData) { - return toGenericString(withLevels, withData); + if (other.isDirect()) { kllSketchThrow(35); } + if (!other.isFloatsSketch()) { kllSketchThrow(34); } + mergeFloatImpl(other); } /** @@ -327,16 +308,52 @@ public void update(final float value) { updateFloat(value); } - // for testing + @Override //Dummy + double[] getDoubleItemsArray() { return null; } + + @Override //Dummy + double getDoubleItemsArrayAt(final int index) { return Double.NaN; } - float[] getItems() { - return getFloatItemsArray(); - } + @Override //Used internally + float[] getFloatItemsArray() { return floatItems_; } + + @Override //Used internally + float getFloatItemsArrayAt(final int index) { return floatItems_[index]; } + + @Override //Dummy + double getMaxDoubleValue() { return maxFloatValue_; } + + @Override //Used internally + float getMaxFloatValue() { return maxFloatValue_; } + + @Override //Dummy + double getMinDoubleValue() { return minFloatValue_; } + + @Override //Used internally + float getMinFloatValue() { return minFloatValue_; } + + @Override //Dummy + void setDoubleItemsArray(final double[] doubleItems) { } + + @Override //Dummy + void setDoubleItemsArrayAt(final int index, final double value) { } + + @Override //Used internally + void setFloatItemsArray(final float[] floatItems) { floatItems_ = floatItems; } @Override - void updateLevelsArray(final int[] levels) { - // TODO Auto-generated method stub + void setFloatItemsArrayAt(final int index, final float value) { floatItems_[index] = value; } - } + @Override //Dummy + void setMaxDoubleValue(final double value) { } + + @Override //Used internally + void setMaxFloatValue(final float value) { maxFloatValue_ = value; } + + @Override //Dummy + void setMinDoubleValue(final double value) { } + + @Override //Used internally + void setMinFloatValue(final float value) { minFloatValue_ = value; } } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java index 4491f9ffa..f871ac12f 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java @@ -19,7 +19,7 @@ package org.apache.datasketches.kll; -import org.apache.datasketches.kll.KllPreambleUtil.SketchType; +import org.apache.datasketches.memory.WritableMemory; abstract class KllHeapSketch extends KllSketch { @@ -51,9 +51,10 @@ abstract class KllHeapSketch extends KllSketch { /** * Heap constructor. * @param k configured size of sketch. Range [m, 2^16] + * @param sketchType either DOUBLE_SKETCH or FLOAT_SKETCH */ KllHeapSketch(final int k, final SketchType sketchType) { - super(sketchType); + super(sketchType, null, null); KllHelper.checkK(k); this.k = k; n_ = 0; @@ -63,27 +64,30 @@ abstract class KllHeapSketch extends KllSketch { isLevelZeroSorted_ = false; } + @Override + int getDyMinK() { + return dyMinK_; + } + @Override public int getK() { return k; } @Override - public long getN() { - return n_; - } + String getLayout() { return "HEAP"; } @Override - int getDyMinK() { - return dyMinK_; + int[] getLevelsArray() { + return levels_; } @Override - String getLayout() { return "HEAP"; } + int getLevelsArrayAt(final int index) { return levels_[index]; } @Override - int[] getLevelsArray() { - return levels_; + public long getN() { + return n_; } @Override @@ -106,26 +110,43 @@ boolean isLevelZeroSorted() { return isLevelZeroSorted_; } - @Override - public boolean isUpdatable() { - return true; - } - @Override void setDyMinK(final int dyMinK) { dyMinK_ = dyMinK; } + @Override + void setItemsArrayUpdatable(final WritableMemory itemsMem) { } //dummy + @Override void setLevelsArray(final int[] levelsArr) { levels_ = levelsArr; } + @Override + void setLevelsArrayAt(final int index, final int value) { levels_[index] = value; } + + @Override + void setLevelsArrayAtMinusEq(final int index, final int minusEq) { + levels_[index] -= minusEq; + } + + @Override + void setLevelsArrayAtPlusEq(final int index, final int plusEq) { + levels_[index] += plusEq; + } + + @Override + void setLevelsArrayUpdatable(final WritableMemory levelsMem) { } //dummy + @Override void setLevelZeroSorted(final boolean sorted) { this.isLevelZeroSorted_ = sorted; } + @Override + void setMinMaxArrayUpdatable(final WritableMemory minMaxMem) { } //dummy + @Override void setN(final long n) { n_ = n; diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 1ca58638c..2192d86af 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -25,26 +25,11 @@ import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.kll.KllPreambleUtil.SketchType; +import org.apache.datasketches.kll.KllSketch.SketchType; class KllHelper { static final String LS = System.getProperty("line.separator"); - /** - * Copy the old array into a new larger array. - * The extra space is at the top. - * @param oldArr the given old array with data - * @param newLen the new length larger than the oldArr.length. - * @return the new array - */ - static int[] growIntArray(final int[] oldArr, final int newLen) { - final int oldLen = oldArr.length; - assert newLen > oldLen; - final int[] newArr = new int[newLen]; - System.arraycopy(oldArr, 0, newArr, 0, oldLen); - return newArr; - } - /** * Returns very conservative upper bound of the number of levels based on n. * @param n the length of the stream @@ -72,8 +57,8 @@ static LevelStats getLevelStats(final int k, final int m, final int numLevels, int cumN = 0; int cumCap = 0; if (printDetail) { - System.out.println("Total Levels: " + numLevels); - System.out.printf("%6s%12s%8s%16s\n", "Level","Wt","Cap","N"); + println("Total Levels: " + numLevels); + printf("%6s%12s%8s%16s\n", "Level","Wt","Cap","N"); } for (int level = 0; level < numLevels; level++) { final long levelCap = levelCapacity(k, numLevels, level, m); @@ -81,19 +66,19 @@ static LevelStats getLevelStats(final int k, final int m, final int numLevels, cumN += maxNAtLevel; cumCap += (int)levelCap; if (printDetail) { - System.out.printf("%6d%,12d%8d%,16d\n", level, 1 << level, levelCap, maxNAtLevel); + printf("%6d%,12d%8d%,16d\n", level, 1 << level, levelCap, maxNAtLevel); } } final int compactBytes = KllSketch.getSerializedSizeBytes(numLevels, cumCap, sketchType, false); final int updatableBytes = KllSketch.getSerializedSizeBytes(numLevels, cumCap, sketchType, true); if (printDetail) { - System.out.printf(" TOTALS%10s %8d%,16d\n", "", cumCap, cumN); - System.out.println(" COMPACT BYTES: " + compactBytes); - System.out.println(" UPDATABLE BYTES: " + updatableBytes); - System.out.println(""); + printf(" TOTALS%10s %8d%,16d\n", "", cumCap, cumN); + println(" COMPACT BYTES: " + compactBytes); + println(" UPDATABLE BYTES: " + updatableBytes); + println(""); } final LevelStats lvlStats = new LevelStats(cumN, numLevels, cumCap, compactBytes, updatableBytes); - if (printSummary) { System.out.println(lvlStats.toString()); } + if (printSummary) { println(lvlStats.toString()); } return lvlStats; } @@ -130,6 +115,8 @@ public String toString() { public int getNumLevels() { return numLevels; } public int getMaxCap() { return maxCap; } + + public int getUpdatableBytes() { return updatableBytes; } } /** @@ -276,6 +263,20 @@ static int getNumRetainedAboveLevelZero(final int numLevels, final int[] levels) return levels[numLevels] - levels[1]; } + /** + * Println Object o + * @param o object to print + */ + static void println(final Object o) { + //System.out.println(o.toString()); + } + /** + * @param fmt format + * @param args arguments + */ + static void printf(final String fmt, final Object ... args) { + //System.out.printf(fmt, args); //Disable + } } diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index df7233040..8b553d035 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -166,8 +166,6 @@ enum Layout { DOUBLE_FULL_COMPACT, DOUBLE_EMPTY_COMPACT, DOUBLE_SINGLE_COMPACT, FLOAT_UPDATABLE, DOUBLE_UPDATABLE } - enum SketchType { FLOAT_SKETCH, DOUBLE_SKETCH } - /** * Returns a human readable string summary of the internal state of the given byte array. * Used primarily in testing. diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 5111d31a8..fb4252cd8 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -32,29 +32,29 @@ import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; -import static org.apache.datasketches.kll.KllPreambleUtil.DOUBLES_SKETCH_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.DY_MIN_K_SHORT_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.FAMILY_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.FLAGS_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.K_SHORT_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; -import static org.apache.datasketches.kll.KllPreambleUtil.M_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.NUM_LEVELS_BYTE_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_BYTE_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; -import static org.apache.datasketches.kll.KllPreambleUtil.SER_VER_BYTE_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.SketchType.DOUBLE_SKETCH; +import static org.apache.datasketches.kll.KllPreambleUtil.insertDoubleSketchFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.insertDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertEmptyFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.insertFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.insertK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.insertM; +import static org.apache.datasketches.kll.KllPreambleUtil.insertN; +import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; +import static org.apache.datasketches.kll.KllPreambleUtil.insertSingleItemFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.insertUpdatableFlag; +import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import java.util.Arrays; import java.util.Random; @@ -63,25 +63,46 @@ import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; import org.apache.datasketches.kll.KllHelper.LevelStats; -import org.apache.datasketches.kll.KllPreambleUtil.SketchType; +import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; - /** - * These methods are used by both direct and on-heap as well as Double and Float type sketches. + * This class is the root of the KLL sketch class hierarchy. + * + *

Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}

* - * @author lrhodes + * @author Lee Rhodes, Kevin Lang */ -abstract class KllSketch { +public abstract class KllSketch { static final Random random = new Random(); static final int M = DEFAULT_M; // configured minimum buffer "width", Must always be 8 for now. static final boolean compatible = true; //rank 0.0 and 1.0. compatible with classic Quantiles Sketch - static SketchType sketchType; + SketchType sketchType; + WritableMemory wmem; + MemoryRequestServer memReqSvr; + boolean direct; - KllSketch(final SketchType sketchType) { - KllSketch.sketchType = sketchType; + /** + * + * @param sketchType either DOUBLE_SKETCH or FLOAT_SKETCH + * @param wmem the current WritableMemory or null + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + */ + KllSketch(final SketchType sketchType, final WritableMemory wmem, final MemoryRequestServer memReqSvr) { + this.sketchType = sketchType; + this.wmem = wmem; + if (wmem != null) { + this.direct = true; + this.memReqSvr = memReqSvr; + } else { + this.direct = false; + this.memReqSvr = null; + } } +public enum SketchType { FLOATS_SKETCH, DOUBLES_SKETCH } + //Static methods /** @@ -91,8 +112,10 @@ abstract class KllSketch { * is the desired "double-sided" epsilon for the getPMF() function. Otherwise, this function * returns the value of k assuming the input epsilon is the desired "single-sided" * epsilon for all the other queries. + * + *

Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}

* @return the value of k given a value of epsilon. - * @see KllDoublesSketch */ // constants were derived as the best fit to 99 percentile empirically measured max error in // thousands of trials @@ -109,15 +132,17 @@ public static int getKFromEpsilon(final double epsilon, final boolean pmf) { } /** - * Returns upper bound on the compact serialized size of a sketch given a parameter k and stream - * length. This method can be used if allocation of storage is necessary beforehand. + * Returns upper bound on the serialized size of a KllSketch given the following parameters. * @param k parameter that controls size of the sketch and accuracy of estimates * @param n stream length - * @return upper bound on the compact serialized size + * @param sketchType either DOUBLES_SKETCH or FLOATS_SKETCH + * @param updatable true if updatable form, otherwise the standard compact form. + * @return upper bound on the serialized size of a KllSketch. */ - public static int getMaxSerializedSizeBytes(final int k, final long n) { + public static int getMaxSerializedSizeBytes(final int k, final long n, + final SketchType sketchType, final boolean updatable) { final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, sketchType); - return lvlStats.getCompactBytes(); + return updatable ? lvlStats.getUpdatableBytes() : lvlStats.getCompactBytes(); } /** @@ -133,19 +158,19 @@ public static double getNormalizedRankError(final int k, final boolean pmf) { return KllHelper.getNormalizedRankError(k, pmf); } - static int getSerializedSizeBytes(final int numLevels, final int numRetained, final SketchType sketchType, - final boolean updatable) { + static int getSerializedSizeBytes(final int numLevels, final int numRetained, + final SketchType sketchType, final boolean updatable) { int levelsBytes = 0; if (!updatable) { if (numRetained == 0) { return N_LONG_ADR; } if (numRetained == 1) { - return DATA_START_ADR_SINGLE_ITEM + (sketchType == DOUBLE_SKETCH ? Double.BYTES : Float.BYTES); + return DATA_START_ADR_SINGLE_ITEM + (sketchType == DOUBLES_SKETCH ? Double.BYTES : Float.BYTES); } levelsBytes = numLevels * Integer.BYTES; } else { levelsBytes = (numLevels + 1) * Integer.BYTES; } - if (sketchType == DOUBLE_SKETCH) { + if (sketchType == DOUBLES_SKETCH) { return DATA_START_ADR_DOUBLE + levelsBytes + (numRetained + 2) * Double.BYTES; //+2 is for min & max } else { return DATA_START_ADR_FLOAT + levelsBytes + (numRetained + 2) * Float.BYTES; @@ -156,6 +181,20 @@ final static boolean isCompatible() { return compatible; } + final static void kllSketchThrow(final int errNo) { + String msg = ""; + switch (errNo) { + case 30: msg = "Given sketch Memory is immutable, cannot write."; break; + case 31: msg = "Given sketch Memory is immutable and incompatible."; break; + case 32: msg = "Given sketch must be of type Direct."; break; + case 33: msg = "Given sketch must be of type Double."; break; + case 34: msg = "Given sketch must be of type Float."; break; + case 35: msg = "Given sketch must not be of type Direct."; break; + } + throw new SketchesArgumentException(msg); + } + + //Public Non-static methods /** @@ -195,10 +234,12 @@ public final int getCurrentUpdatableSerializedSizeBytes() { * thousands of trials * @return if pmf is true, returns the normalized rank error for the getPMF() function. * Otherwise, it is the "single-sided" normalized rank error for all the other queries. - * @see KllDoublesSketch + * + *

Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}

*/ public final double getNormalizedRankError(final boolean pmf) { - return KllHelper.getNormalizedRankError(getDyMinK(), pmf); + return getNormalizedRankError(getDyMinK(), pmf); } /** @@ -210,13 +251,16 @@ public final int getNumRetained() { } /** - * Returns the number of bytes this sketch would require to store. - * @return the number of bytes this sketch would require to store. - * @deprecated use getCurrentCompactSerializedSizeBytes() + * This returns the WritableMemory for Direct type sketches, + * otherwise returns null. + * @return the WritableMemory for Direct type sketches, otherwise null. */ - @Deprecated - public int getSerializedSizeBytes() { - return getCurrentCompactSerializedSizeBytes(); + public WritableMemory getWritableMemory() { + return wmem; + } + + public final boolean isDirect() { + return direct; } /** @@ -235,17 +279,17 @@ public final boolean isEstimationMode() { return getNumLevels() > 1; } - public abstract boolean isUpdatable(); - /** * Returns serialized sketch in a compact byte array form. * @return serialized sketch in a compact byte array form. */ - public abstract byte[] toByteArray(); + public byte[] toByteArray() { + return toCompactByteArrayImpl(); + } @Override - public final String toString() { - return toString(false, false); + public final String toString() { //TODO set back to false, false + return toString(true, true); } /** @@ -254,18 +298,22 @@ public final String toString() { * @param withData if true include sketch data * @return string representation of sketch summary */ - public abstract String toString(final boolean withLevels, final boolean withData); + public String toString(final boolean withLevels, final boolean withData) { + return toStringImpl(withLevels, withData); + } /** * Returns serialized sketch in an updatable byte array form. * @return serialized sketch in an updatable byte array form. */ - public abstract byte[] toUpdatableByteArray(); + public byte[] toUpdatableByteArray() { + return toUpdatableByteArrayImpl(); + } //package-private non-static methods final void buildHeapKllSketchFromMemory(final MemoryValidate memVal) { - final boolean doubleType = (sketchType == DOUBLE_SKETCH); + final boolean doubleType = (sketchType == DOUBLES_SKETCH); final boolean updatable = memVal.updatable; setLevelZeroSorted(memVal.level0Sorted); setN(memVal.n); @@ -274,39 +322,39 @@ final void buildHeapKllSketchFromMemory(final MemoryValidate memVal) { final int[] myLevelsArr = new int[getNumLevels() + 1]; if (updatable) { - memVal.levelsWmem.getIntArray(0, myLevelsArr, 0, getNumLevels() + 1); + memVal.levelsArrUpdatable.getIntArray(0, myLevelsArr, 0, getNumLevels() + 1); setLevelsArray(myLevelsArr); if (doubleType) { - setMinDoubleValue(memVal.minMaxWmem.getDouble(0)); - setMaxDoubleValue(memVal.minMaxWmem.getDouble(Double.BYTES)); - final int itemsCap = (int)memVal.itemsWmem.getCapacity() / Double.BYTES; + setMinDoubleValue(memVal.minMaxArrUpdatable.getDouble(0)); + setMaxDoubleValue(memVal.minMaxArrUpdatable.getDouble(Double.BYTES)); + final int itemsCap = (int)memVal.itemsArrUpdatable.getCapacity() / Double.BYTES; final double[] myItemsArr = new double[itemsCap]; - memVal.itemsWmem.getDoubleArray(0, myItemsArr, 0, itemsCap); + memVal.itemsArrUpdatable.getDoubleArray(0, myItemsArr, 0, itemsCap); setDoubleItemsArray(myItemsArr); } else { //float - setMinFloatValue(memVal.minMaxWmem.getFloat(0)); - setMaxFloatValue(memVal.minMaxWmem.getFloat(Float.BYTES)); - final int itemsCap = (int)memVal.itemsWmem.getCapacity() / Float.BYTES; + setMinFloatValue(memVal.minMaxArrUpdatable.getFloat(0)); + setMaxFloatValue(memVal.minMaxArrUpdatable.getFloat(Float.BYTES)); + final int itemsCap = (int)memVal.itemsArrUpdatable.getCapacity() / Float.BYTES; final float[] myItemsArr = new float[itemsCap]; - memVal.itemsWmem.getFloatArray(0, myItemsArr, 0, itemsCap); + memVal.itemsArrUpdatable.getFloatArray(0, myItemsArr, 0, itemsCap); setFloatItemsArray(myItemsArr); } } else { //compact - memVal.levelsMem.getIntArray(0, myLevelsArr, 0, getNumLevels() + 1); + memVal.levelsArrCompact.getIntArray(0, myLevelsArr, 0, getNumLevels() + 1); setLevelsArray(myLevelsArr); if (doubleType) { - setMinDoubleValue(memVal.minMaxMem.getDouble(0)); - setMaxDoubleValue(memVal.minMaxMem.getDouble(Double.BYTES)); - final int itemsCap = (int)memVal.itemsMem.getCapacity() / Double.BYTES; + setMinDoubleValue(memVal.minMaxArrCompact.getDouble(0)); + setMaxDoubleValue(memVal.minMaxArrCompact.getDouble(Double.BYTES)); + final int itemsCap = (int)memVal.itemsArrCompact.getCapacity() / Double.BYTES; final double[] myItemsArr = new double[itemsCap]; - memVal.itemsMem.getDoubleArray(0, myItemsArr, 0, itemsCap); + memVal.itemsArrCompact.getDoubleArray(0, myItemsArr, 0, itemsCap); setDoubleItemsArray(myItemsArr); } else { //float - setMinFloatValue(memVal.minMaxMem.getFloat(0)); - setMaxFloatValue(memVal.minMaxMem.getFloat(Float.BYTES)); - final int itemsCap = (int)memVal.itemsMem.getCapacity() / Float.BYTES; + setMinFloatValue(memVal.minMaxArrCompact.getFloat(0)); + setMaxFloatValue(memVal.minMaxArrCompact.getFloat(Float.BYTES)); + final int itemsCap = (int)memVal.itemsArrCompact.getCapacity() / Float.BYTES; final float[] myItemsArr = new float[itemsCap]; - memVal.itemsMem.getFloatArray(0, myItemsArr, 0, itemsCap); + memVal.itemsArrCompact.getFloatArray(0, myItemsArr, 0, itemsCap); setFloatItemsArray(myItemsArr); } } @@ -506,10 +554,16 @@ final float[] getFloatsQuantiles(final double[] fractions) { return quantiles; } + abstract double getDoubleItemsArrayAt(int index); + + abstract float getFloatItemsArrayAt(int index); + abstract String getLayout(); abstract int[] getLevelsArray(); + abstract int getLevelsArrayAt(int index); + abstract double getMaxDoubleValue(); abstract float getMaxFloatValue(); @@ -524,10 +578,73 @@ final float[] getFloatsQuantiles(final double[] fractions) { abstract void incNumLevels(); + boolean isDoublesSketch() { return sketchType == DOUBLES_SKETCH; } + + boolean isFloatsSketch() { return sketchType != DOUBLES_SKETCH; } + abstract boolean isLevelZeroSorted(); - final void mergeDouble(final KllDoublesSketch other) { - if (other == null || other.isEmpty()) { return; } + /** + * This method is for direct Double and Float sketches only and does the following: + *
  • Allocates a new WritableMemory of the required size
  • + *
  • Copies over the preamble as is (20 or 24 bytes)
  • + *
  • Creates new memory regions for Levels Array, Min/Max Array, Items Array, but + * does not fill them. They may contain garbage.
  • + *
+ * The caller is responsible for filling these regions and updating the preamble. + * @param sketch The current sketch that needs to be expanded. + * @param newLevelsArrLen the element length of the new Levels array. + * @param newItemsArrLen the element length of the new Items array. + * @return the new expanded memory with preamble. + */ + static WritableMemory memorySpaceMgmt( + final KllSketch sketch, + final int newLevelsArrLen, + final int newItemsArrLen) { + final SketchType sketchType = sketch.sketchType; + final WritableMemory oldWmem = sketch.wmem; + final int typeBytes; + final int startAdr; + + if (sketchType == DOUBLES_SKETCH) { + typeBytes = Double.BYTES; + startAdr = DATA_START_ADR_DOUBLE; + } else { + typeBytes = Float.BYTES; + startAdr = DATA_START_ADR_FLOAT; + } + int totalSketchBytes = startAdr; + totalSketchBytes += newLevelsArrLen * Integer.BYTES; + totalSketchBytes += 2 * typeBytes; + totalSketchBytes += newItemsArrLen * typeBytes; + final WritableMemory newWmem; + + if (totalSketchBytes > oldWmem.getCapacity()) { //Acquire new WritableMemory + newWmem = sketch.memReqSvr.request(oldWmem, totalSketchBytes); + oldWmem.copyTo(0, newWmem, 0, startAdr); //copy preamble + } + else { //Expand in current memory + newWmem = oldWmem; + } + + int offset = startAdr; + //LEVELS ARR + int lengthBytes = newLevelsArrLen * Integer.BYTES; + sketch.setLevelsArrayUpdatable(newWmem.writableRegion(offset, lengthBytes)); // + offset += lengthBytes; + //MIN MAX ARR + lengthBytes = 2 * typeBytes; + sketch.setMinMaxArrayUpdatable(newWmem.writableRegion(offset, lengthBytes)); + offset += lengthBytes; + //ITEMS ARR + lengthBytes = newItemsArrLen * typeBytes; + sketch.setItemsArrayUpdatable(newWmem.writableRegion(offset, lengthBytes)); + assert totalSketchBytes <= newWmem.getCapacity(); + return newWmem; + } + + final void mergeDoubleImpl(final KllSketch other) { + if (other.isEmpty()) { return; } final long finalN = getN() + other.getN(); //update this sketch with level0 items from the other sketch final double[] otherDoubleItemsArr = other.getDoubleItemsArray(); @@ -535,26 +652,95 @@ final void mergeDouble(final KllDoublesSketch other) { for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { updateDouble(otherDoubleItemsArr[i]); } - if (other.getNumLevels() >= 2) { //now merge other levels if they exist - mergeDoubleHigherLevels(other, finalN); - } - //update min, max values, n + // after the level 0 update, we capture the key mutable variables final double myMin = getMinDoubleValue(); - final double otherMin = other.getMinDoubleValue(); final double myMax = getMaxDoubleValue(); - final double otherMax = other.getMaxDoubleValue(); - if (Double.isNaN(myMin) || otherMin < myMin) { setMinDoubleValue(otherMin); } - if (Double.isNaN(myMax) || otherMax > myMax) { setMaxDoubleValue(otherMax); } + final int myDyMinK = getDyMinK(); + + final int myCurNumLevels = getNumLevels(); + final int[] myCurLevelsArr = getLevelsArray(); + final double[] myCurDoubleItemsArr = getDoubleItemsArray(); + + final int myNewNumLevels; + final int[] myNewLevelsArr; + final double[] myNewDoubleItemsArr; + + if (other.getNumLevels() > 1) { //now merge other levels if they exist + final int tmpSpaceNeeded = getNumRetained() + + KllHelper.getNumRetainedAboveLevelZero(other.getNumLevels(), otherLevelsArr); + final double[] workbuf = new double[tmpSpaceNeeded]; + final int ub = KllHelper.ubOnNumLevels(finalN); + final int[] worklevels = new int[ub + 2]; // ub+1 does not work + final int[] outlevels = new int[ub + 2]; + + final int provisionalNumLevels = max(myCurNumLevels, other.getNumLevels()); + + populateDoubleWorkArrays(other, workbuf, worklevels, provisionalNumLevels); + + // notice that workbuf is being used as both the input and output + final int[] result = KllDoublesHelper.generalDoublesCompress(getK(), M, provisionalNumLevels, workbuf, + worklevels, workbuf, outlevels, isLevelZeroSorted(), random); + final int targetItemCount = result[1]; //was finalCapacity. Max size given k, m, numLevels + final int curItemCount = result[2]; //was finalPop + + // now we need to finalize the results for the "self" sketch + + //THE NEW NUM LEVELS + myNewNumLevels = result[0]; //was finalNumLevels + assert myNewNumLevels <= ub; // ub may be much bigger + + // THE NEW ITEMS ARRAY (was newbuf) + myNewDoubleItemsArr = (targetItemCount == myCurDoubleItemsArr.length) + ? myCurDoubleItemsArr + : new double[targetItemCount]; + final int freeSpaceAtBottom = targetItemCount - curItemCount; + //shift the new items array + System.arraycopy(workbuf, outlevels[0], myNewDoubleItemsArr, freeSpaceAtBottom, curItemCount); + final int theShift = freeSpaceAtBottom - outlevels[0]; + + //calculate the new levels array length + final int finalLevelsArrLen; + if (myCurLevelsArr.length < myNewNumLevels + 1) { finalLevelsArrLen = myNewNumLevels + 1; } + else { finalLevelsArrLen = myCurLevelsArr.length; } + + //THE NEW LEVELS ARRAY + myNewLevelsArr = new int[finalLevelsArrLen]; + for (int lvl = 0; lvl < myNewNumLevels + 1; lvl++) { // includes the "extra" index + myNewLevelsArr[lvl] = outlevels[lvl] + theShift; + } + + //MEMORY SPACE MANAGEMENT + if (direct) { + wmem = memorySpaceMgmt(this, myNewLevelsArr.length, myNewDoubleItemsArr.length); + } //End direct + + } else { + myNewNumLevels = myCurNumLevels; + myNewLevelsArr = myCurLevelsArr; + myNewDoubleItemsArr = myCurDoubleItemsArr; + } + + //Update Preamble: setN(finalN); + if (other.isEstimationMode()) { //otherwise the merge brings over exact items. + setDyMinK(min(myDyMinK, other.getDyMinK())); + } + //Update min, max values + final double otherMin = other.getMinDoubleValue(); + final double otherMax = other.getMaxDoubleValue(); + if (Double.isNaN(myMin) || otherMin <= myMin) { setMinDoubleValue(otherMin); } + if (Double.isNaN(myMax) || otherMax >= myMax) { setMaxDoubleValue(otherMax); } + + //Update numLevels, levelsArray, items + setNumLevels(myNewNumLevels); + setLevelsArray(myNewLevelsArr); + setDoubleItemsArray(myNewDoubleItemsArr); assert KllHelper.sumTheSampleWeights(getNumLevels(), getLevelsArray()) == getN(); - if (other.isEstimationMode()) { - setDyMinK(min(getDyMinK(), other.getDyMinK())); - } } - final void mergeFloat(final KllFloatsSketch other) { - if (other == null || other.isEmpty()) { return; } + final void mergeFloatImpl(final KllSketch other) { + if (other.isEmpty()) { return; } final long finalN = getN() + other.getN(); //update this sketch with level0 items from the other sketch final float[] otherFloatItemsArr = other.getFloatItemsArray(); @@ -562,35 +748,115 @@ final void mergeFloat(final KllFloatsSketch other) { for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { updateFloat(otherFloatItemsArr[i]); } - if (other.getNumLevels() >= 2) { //now merge other levels if they exist - mergeFloatHigherLevels(other, finalN); - } - //update min, max values, n + // after the level 0 update, we capture the key mutable variables final float myMin = getMinFloatValue(); - final float otherMin = other.getMinFloatValue(); final float myMax = getMaxFloatValue(); + final int myDyMinK = getDyMinK(); + + final int myCurNumLevels = getNumLevels(); + final int[] myCurLevelsArr = getLevelsArray(); + final float[] myCurFloatItemsArr = getFloatItemsArray(); + + final int myNewNumLevels; + final int[] myNewLevelsArr; + final float[] myNewFloatItemsArr; + + if (other.getNumLevels() > 1) { //now merge other levels if they exist + final int tmpSpaceNeeded = getNumRetained() + + KllHelper.getNumRetainedAboveLevelZero(other.getNumLevels(), otherLevelsArr); + final float[] workbuf = new float[tmpSpaceNeeded]; + final int ub = KllHelper.ubOnNumLevels(finalN); + final int[] worklevels = new int[ub + 2]; // ub+1 does not work + final int[] outlevels = new int[ub + 2]; + + final int provisionalNumLevels = max(myCurNumLevels, other.getNumLevels()); + + populateFloatWorkArrays(other, workbuf, worklevels, provisionalNumLevels); + + // notice that workbuf is being used as both the input and output + final int[] result = KllFloatsHelper.generalFloatsCompress(getK(), M, provisionalNumLevels, workbuf, + worklevels, workbuf, outlevels, isLevelZeroSorted(), random); + final int targetItemCount = result[1]; //was finalCapacity. Max size given k, m, numLevels + final int curItemCount = result[2]; //was finalPop + + // now we need to finalize the results for the "self" sketch + + //THE NEW NUM LEVELS + myNewNumLevels = result[0]; //was finalNumLevels + assert myNewNumLevels <= ub; // ub may be much bigger + + // THE NEW ITEMS ARRAY (was newbuf) + myNewFloatItemsArr = (targetItemCount == myCurFloatItemsArr.length) + ? myCurFloatItemsArr + : new float[targetItemCount]; + final int freeSpaceAtBottom = targetItemCount - curItemCount; + //shift the new items array + System.arraycopy(workbuf, outlevels[0], myNewFloatItemsArr, freeSpaceAtBottom, curItemCount); + final int theShift = freeSpaceAtBottom - outlevels[0]; + + //calculate the new levels array length + final int finalLevelsArrLen; + if (myCurLevelsArr.length < myNewNumLevels + 1) { finalLevelsArrLen = myNewNumLevels + 1; } + else { finalLevelsArrLen = myCurLevelsArr.length; } + + //THE NEW LEVELS ARRAY + myNewLevelsArr = new int[finalLevelsArrLen]; + for (int lvl = 0; lvl < myNewNumLevels + 1; lvl++) { // includes the "extra" index + myNewLevelsArr[lvl] = outlevels[lvl] + theShift; + } + + //MEMORY SPACE MANAGEMENT + if (direct) { + wmem = memorySpaceMgmt(this, myNewLevelsArr.length, myNewFloatItemsArr.length); + } //End direct + + } else { + myNewNumLevels = myCurNumLevels; + myNewLevelsArr = myCurLevelsArr; + myNewFloatItemsArr = myCurFloatItemsArr; + } + + //Update Preamble: + setN(finalN); + if (other.isEstimationMode()) { //otherwise the merge brings over exact items. + setDyMinK(min(myDyMinK, other.getDyMinK())); + } + + //Update min, max values + final float otherMin = other.getMinFloatValue(); final float otherMax = other.getMaxFloatValue(); if (Float.isNaN(myMin) || otherMin < myMin) { setMinFloatValue(otherMin); } if (Float.isNaN(myMax) || otherMax > myMax) { setMaxFloatValue(otherMax); } - setN(finalN); + //Update numLevels, levelsArray, items + setNumLevels(myNewNumLevels); + setLevelsArray(myNewLevelsArr); + setFloatItemsArray(myNewFloatItemsArr); assert KllHelper.sumTheSampleWeights(getNumLevels(), getLevelsArray()) == getN(); - if (other.isEstimationMode()) { - setDyMinK(min(getDyMinK(), other.getDyMinK())); - } } abstract void setDoubleItemsArray(double[] floatItems); + abstract void setDoubleItemsArrayAt(int index, double value); + abstract void setDyMinK(int dyMinK); abstract void setFloatItemsArray(float[] floatItems); - //Only for internal changes to the array, NOT for changing its size - abstract void updateLevelsArray(int[] levels); + abstract void setFloatItemsArrayAt(int index, float value); + + abstract void setItemsArrayUpdatable(WritableMemory itemsMem); abstract void setLevelsArray(int[] levelsArr); + abstract void setLevelsArrayAt(int index, int value); + + abstract void setLevelsArrayAtPlusEq(int index, int plusEq); + + abstract void setLevelsArrayAtMinusEq(int index, int minusEq); + + abstract void setLevelsArrayUpdatable(WritableMemory levelsMem); + abstract void setLevelZeroSorted(boolean sorted); abstract void setMaxDoubleValue(double value); @@ -601,49 +867,41 @@ final void mergeFloat(final KllFloatsSketch other) { abstract void setMinFloatValue(float value); + abstract void setMinMaxArrayUpdatable(WritableMemory minMaxMem); + abstract void setN(long n); abstract void setNumLevels(int numLevels); - final byte[] toGenericCompactByteArray() { //From Heap Only - final boolean doubleType = (sketchType == DOUBLE_SKETCH); + final byte[] toCompactByteArrayImpl() { final byte[] byteArr = new byte[getCurrentCompactSerializedSizeBytes()]; final WritableMemory wmem = WritableMemory.writableWrap(byteArr); - final boolean singleItem = getN() == 1; - final boolean empty = isEmpty(); - //load the preamble - if (doubleType) { - wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) - (empty || singleItem ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_DOUBLE)); - } else { - wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) - (empty || singleItem ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_FLOAT)); - } - wmem.putByte(SER_VER_BYTE_ADR, singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION_EMPTY_FULL); - wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); - byte flags = (byte) ( - (empty ? EMPTY_BIT_MASK : 0) - | (singleItem ? SINGLE_ITEM_BIT_MASK : 0) - | (isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0)); - - flags |= (byte) (doubleType ? DOUBLES_SKETCH_BIT_MASK : 0); - wmem.putByte(FLAGS_BYTE_ADR, flags); - wmem.putShort(K_SHORT_ADR, (short) getK()); - wmem.putByte(M_BYTE_ADR, (byte) M); - if (empty) { return byteArr; } + loadFirst8Bytes(this, wmem, false); + if (getN() == 0) { return byteArr; } //empty + final boolean doubleType = (sketchType == DOUBLES_SKETCH); //load data int offset = DATA_START_ADR_SINGLE_ITEM; final int[] myLevelsArr = getLevelsArray(); - if (!singleItem) { - wmem.putLong(N_LONG_ADR, getN()); - wmem.putShort(DY_MIN_K_SHORT_ADR, (short) getDyMinK()); - wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) getNumLevels()); + if (getN() == 1) { //single item + if (doubleType) { + wmem.putDouble(offset, getDoubleItemsArray()[myLevelsArr[0]]); + } else { + wmem.putFloat(offset, getFloatItemsArray()[myLevelsArr[0]]); + } + } else { // n > 1 + //remainder of preamble after first 8 bytes + insertN(wmem, getN()); + insertDyMinK(wmem, getDyMinK()); + insertNumLevels(wmem, getNumLevels()); offset = (doubleType) ? DATA_START_ADR_DOUBLE : DATA_START_ADR_FLOAT; - // the last integer in levels_ is not serialized because it can be derived + + //LOAD LEVELS ARR the last integer in levels_ is NOT serialized final int len = myLevelsArr.length - 1; wmem.putIntArray(offset, myLevelsArr, 0, len); offset += len * Integer.BYTES; + + //LOAD MIN, MAX VALUES FOLLOWED BY ITEMS ARRAY if (doubleType) { wmem.putDouble(offset, getMinDoubleValue()); offset += Double.BYTES; @@ -657,21 +915,40 @@ final byte[] toGenericCompactByteArray() { //From Heap Only offset += Float.BYTES; wmem.putFloatArray(offset, getFloatItemsArray(), myLevelsArr[0], getNumRetained()); } - } else { //single item - if (doubleType) { - final double value = getDoubleItemsArray()[myLevelsArr[0]]; - wmem.putDouble(offset, value); - } else { - final float value = getFloatItemsArray()[myLevelsArr[0]]; - wmem.putFloat(offset, value); - } } return byteArr; } + private static void loadFirst8Bytes(final KllSketch sk, final WritableMemory wmem, + final boolean updatable) { + final boolean empty = sk.getN() == 0; + final boolean lvlZeroSorted = sk.isLevelZeroSorted(); + final boolean singleItem = sk.getN() == 1; + final boolean doubleType = (sk.sketchType == DOUBLES_SKETCH); + final int preInts = + updatable + ? (doubleType ? PREAMBLE_INTS_DOUBLE : PREAMBLE_INTS_FLOAT) + : ((empty || singleItem) + ? PREAMBLE_INTS_EMPTY_SINGLE + : (doubleType) ? PREAMBLE_INTS_DOUBLE : PREAMBLE_INTS_FLOAT); + //load the preamble + insertPreInts(wmem, preInts); + final int server = updatable ? SERIAL_VERSION_UPDATABLE + : (singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION_EMPTY_FULL); + insertSerVer(wmem, server); + insertFamilyID(wmem, Family.KLL.getID()); + insertEmptyFlag(wmem, empty); + insertLevelZeroSortedFlag(wmem, lvlZeroSorted); + insertSingleItemFlag(wmem, singleItem); + insertDoubleSketchFlag(wmem, doubleType); + insertUpdatableFlag(wmem, updatable); + insertK(wmem, sk.getK()); + insertM(wmem, M); + } + @SuppressWarnings("null") - final String toGenericString(final boolean withLevels, final boolean withData) { - final boolean doubleType = (sketchType == DOUBLE_SKETCH); + final String toStringImpl(final boolean withLevels, final boolean withData) { + final boolean doubleType = (sketchType == DOUBLES_SKETCH); final int k = getK(); final String epsPct = String.format("%.3f%%", getNormalizedRankError(false) * 100); final String epsPMFPct = String.format("%.3f%%", getNormalizedRankError(true) * 100); @@ -710,106 +987,105 @@ final String toGenericString(final boolean withLevels, final boolean withData) { } else { myFloatItemsArr = getFloatItemsArray(); } - if (withLevels) { - sb.append("### KLL levels array:").append(Util.LS) - .append(" level, offset: nominal capacity, actual size").append(Util.LS); - int level = 0; - for ( ; level < myNumLevels; level++) { - sb.append(" ").append(level).append(", ").append(myLevelsArr[level]).append(": ") - .append(KllHelper.levelCapacity(k, myNumLevels, level, M)) - .append(", ").append(KllHelper.currentLevelSize(level, myNumLevels, myLevelsArr)).append(Util.LS); - } - sb.append(" ").append(level).append(", ").append(myLevelsArr[level]).append(": (Exclusive)") - .append(Util.LS); - sb.append("### End levels array").append(Util.LS); + sb.append(outputLevels(k, myNumLevels, myLevelsArr)); } - if (withData) { - sb.append("### KLL items data {index, item}:").append(Util.LS); - if (myLevelsArr[0] > 0) { - sb.append(" Garbage:" + Util.LS); - if (doubleType) { - for (int i = 0; i < myLevelsArr[0]; i++) { - sb.append(" ").append(i + ", ").append(myDoubleItemsArr[i]).append(Util.LS); - } - } else { - for (int i = 0; i < myLevelsArr[0]; i++) { - sb.append(" ").append(i + ", ").append(myFloatItemsArr[i]).append(Util.LS); - } + sb.append(outputData(doubleType, myNumLevels, myLevelsArr, myFloatItemsArr, myDoubleItemsArr)); + } + return sb.toString(); + } + + static String outputLevels(final int k, final int numLevels, final int[] levelsArr) { + final StringBuilder sb = new StringBuilder(); + sb.append("### KLL levels array:").append(Util.LS) + .append(" level, offset: nominal capacity, actual size").append(Util.LS); + int level = 0; + for ( ; level < numLevels; level++) { + sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": ") + .append(KllHelper.levelCapacity(k, numLevels, level, M)) + .append(", ").append(KllHelper.currentLevelSize(level, numLevels, levelsArr)).append(Util.LS); + } + sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": (Exclusive)") + .append(Util.LS); + sb.append("### End levels array").append(Util.LS); + return sb.toString(); + } + + static String outputData(final boolean doubleType, final int numLevels, final int[] levelsArr, + final float[] floatItemsArr, final double[] doubleItemsArr) { + final StringBuilder sb = new StringBuilder(); + sb.append("### KLL items data {index, item}:").append(Util.LS); + if (levelsArr[0] > 0) { + sb.append(" Garbage:" + Util.LS); + if (doubleType) { + for (int i = 0; i < levelsArr[0]; i++) { + sb.append(" ").append(i + ", ").append(doubleItemsArr[i]).append(Util.LS); + } + } else { + for (int i = 0; i < levelsArr[0]; i++) { + sb.append(" ").append(i + ", ").append(floatItemsArr[i]).append(Util.LS); } } - int level = 0; - if (doubleType) { - while (level < myNumLevels) { - final int fromIndex = myLevelsArr[level]; - final int toIndex = myLevelsArr[level + 1]; // exclusive - if (fromIndex < toIndex) { - sb.append(" level[").append(level).append("]: offset: " + myLevelsArr[level] + " wt: " + (1 << level)); - sb.append(Util.LS); - } - - for (int i = fromIndex; i < toIndex; i++) { - sb.append(" ").append(i + ", ").append(myDoubleItemsArr[i]).append(Util.LS); - } - level++; + } + int level = 0; + if (doubleType) { + while (level < numLevels) { + final int fromIndex = levelsArr[level]; + final int toIndex = levelsArr[level + 1]; // exclusive + if (fromIndex < toIndex) { + sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level)); + sb.append(Util.LS); + } + + for (int i = fromIndex; i < toIndex; i++) { + sb.append(" ").append(i + ", ").append(doubleItemsArr[i]).append(Util.LS); } + level++; } - else { - while (level < myNumLevels) { - final int fromIndex = myLevelsArr[level]; - final int toIndex = myLevelsArr[level + 1]; // exclusive - if (fromIndex <= toIndex) { - sb.append(" level[").append(level).append("]: offset: " + myLevelsArr[level] + " wt: " + (1 << level)); - sb.append(Util.LS); - } - - for (int i = fromIndex; i < toIndex; i++) { - sb.append(" ").append(i + ", ").append(myFloatItemsArr[i]).append(Util.LS); - } - level++; + } + else { + while (level < numLevels) { + final int fromIndex = levelsArr[level]; + final int toIndex = levelsArr[level + 1]; // exclusive + if (fromIndex <= toIndex) { + sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level)); + sb.append(Util.LS); } + + for (int i = fromIndex; i < toIndex; i++) { + sb.append(" ").append(i + ", ").append(floatItemsArr[i]).append(Util.LS); + } + level++; } - sb.append(" level[" + level + "]: offset: " + myLevelsArr[level] + " (Exclusive)"); - sb.append(Util.LS); - sb.append("### End items data").append(Util.LS); } + sb.append(" level[" + level + "]: offset: " + levelsArr[level] + " (Exclusive)"); + sb.append(Util.LS); + sb.append("### End items data").append(Util.LS); + return sb.toString(); } - final byte[] toGenericUpdatableByteArray() { - final boolean doubleType = (sketchType == DOUBLE_SKETCH); + final byte[] toUpdatableByteArrayImpl() { final byte[] byteArr = new byte[getCurrentUpdatableSerializedSizeBytes()]; final WritableMemory wmem = WritableMemory.writableWrap(byteArr); - final boolean singleItem = getN() == 1; - final boolean empty = isEmpty(); - //load the preamble - if (doubleType) { - wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) PREAMBLE_INTS_DOUBLE); //ignore empty, singleItem - } else { - wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) PREAMBLE_INTS_FLOAT); //ignore empty, singleItem - } - wmem.putByte(SER_VER_BYTE_ADR, SERIAL_VERSION_UPDATABLE); - wmem.putByte(FAMILY_BYTE_ADR, (byte) Family.KLL.getID()); - byte flags = (byte) ( - (empty ? EMPTY_BIT_MASK : 0) //set but not used - | (singleItem ? SINGLE_ITEM_BIT_MASK : 0) //set but not used - | (isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) - | UPDATABLE_BIT_MASK); - flags |= (byte) (doubleType ? DOUBLES_SKETCH_BIT_MASK : 0); - wmem.putByte(FLAGS_BYTE_ADR, flags); - wmem.putShort(K_SHORT_ADR, (short) getK()); - wmem.putByte(M_BYTE_ADR, (byte) M); + loadFirst8Bytes(this, wmem, true); + //remainder of preamble after first 8 bytes + insertN(wmem, getN()); + insertDyMinK(wmem, getDyMinK()); + insertNumLevels(wmem, getNumLevels()); + //load data - wmem.putLong(N_LONG_ADR, getN()); - wmem.putShort(DY_MIN_K_SHORT_ADR, (short) getDyMinK()); - wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) getNumLevels()); + final boolean doubleType = (sketchType == DOUBLES_SKETCH); int offset = (doubleType) ? DATA_START_ADR_DOUBLE : DATA_START_ADR_FLOAT; - // the last integer in levels_ IS serialized + + //LOAD LEVELS ARRAY the last integer in levels_ IS serialized final int[] myLevelsArr = getLevelsArray(); final int len = myLevelsArr.length; wmem.putIntArray(offset, myLevelsArr, 0, len); offset += len * Integer.BYTES; + + //LOAD MIN, MAX VALUES FOLLOWED BY ITEMS ARRAY if (doubleType) { wmem.putDouble(offset, getMinDoubleValue()); offset += Double.BYTES; @@ -837,19 +1113,13 @@ final void updateDouble(final double value) { if (value < getMinDoubleValue()) { setMinDoubleValue(value); } if (value > getMaxDoubleValue()) { setMaxDoubleValue(value); } } - int[] myLevelsArr = getLevelsArray(); - double[] myDoubleItemsArr = getDoubleItemsArray(); - if (myLevelsArr[0] == 0) { - compressWhileUpdatingDoublesSketch(); - } - myLevelsArr = getLevelsArray(); //refresh - myDoubleItemsArr = getDoubleItemsArray(); + if (getLevelsArrayAt(0) == 0) { compressWhileUpdatingSketch(); } incN(); setLevelZeroSorted(false); - final int nextPos = myLevelsArr[0] - 1; - assert myLevelsArr[0] >= 0; - myLevelsArr[0] = nextPos; - myDoubleItemsArr[nextPos] = value; + final int nextPos = getLevelsArrayAt(0) - 1; + assert getLevelsArrayAt(0) >= 0; + setLevelsArrayAt(0, nextPos); + setDoubleItemsArrayAt(nextPos, value); } final void updateFloat(final float value) { @@ -861,109 +1131,63 @@ final void updateFloat(final float value) { if (value < getMinFloatValue()) { setMinFloatValue(value); } if (value > getMaxFloatValue()) { setMaxFloatValue(value); } } - int[] myLevelsArr = getLevelsArray(); - float[] myFloatItemsArr = getFloatItemsArray(); - if (myLevelsArr[0] == 0) { - compressWhileUpdatingFloatsSketch(); - } - myLevelsArr = getLevelsArray(); //refresh - myFloatItemsArr = getFloatItemsArray(); + + if (getLevelsArrayAt(0) == 0) { compressWhileUpdatingSketch(); } incN(); setLevelZeroSorted(false); - final int nextPos = myLevelsArr[0] - 1; - assert myLevelsArr[0] >= 0; - myLevelsArr[0] = nextPos; - myFloatItemsArr[nextPos] = value; + final int nextPos = getLevelsArrayAt(0) - 1; + assert getLevelsArrayAt(0) >= 0; + setLevelsArrayAt(0, nextPos); + setFloatItemsArrayAt(nextPos, value); } //Private non-static methods /** - * This grows the levels arr by 1 (if needed) and increases the capacity of the items array at the bottom + * This grows the levels arr by 1 (if needed) and increases the capacity of the items array + * at the bottom. Only numLevels, the levels array and the items array are affected. */ - private void addEmptyTopLevelToCompletelyFullDoublesSketch() { + @SuppressWarnings("null") + private void addEmptyTopLevelToCompletelyFullSketch() { final int[] myCurLevelsArr = getLevelsArray(); - final double[] myCurDoubleItemsArr = getDoubleItemsArray(); final int myCurNumLevels = getNumLevels(); - final int myCurTotalItemsCap = myCurLevelsArr[myCurNumLevels]; - final int[] myNewLevelsArr; - final double[] myNewDoubleItemsArr; - final int myNewNumLevels; - final int myNewTotalItemsCap; - - // make sure that we are following a certain growth scheme - assert myCurLevelsArr[0] == 0; //definition of full - assert myCurDoubleItemsArr.length == myCurTotalItemsCap; - - //this is a little out of sequence so that we can pre-compute the total required increase in space - final int deltaItemsCap = KllHelper.levelCapacity(getK(), myCurNumLevels + 1, 0, M); - myNewTotalItemsCap = myCurTotalItemsCap + deltaItemsCap; + final int myCurTotalItemsCapacity = myCurLevelsArr[myCurNumLevels]; + double minDouble = Double.NaN; + double maxDouble = Double.NaN; + float minFloat = Float.NaN; + float maxFloat = Float.NaN; - // Check if growing the levels arr if required. - // Note that merging MIGHT over-grow levels_, in which case we might not have to grow it - final boolean growLevelsArr = myCurLevelsArr.length < myCurNumLevels + 2; + double[] myCurDoubleItemsArr = null; + float[] myCurFloatItemsArr = null; - //INSERT SPACE MANAGEMENT HERE - //int totalDeltaSpaceRequired = deltaItemsCap * Double.BYTES; - //if (growLevelsArr) { totalDeltaSpaceRequired += Integer.BYTES; } - // ... - - // GROW LEVELS ARRAY - if (growLevelsArr) { - //grow levels arr by one and copy the old data to the new array, extra space at the top. - myNewLevelsArr = Arrays.copyOf(myCurLevelsArr, myCurNumLevels + 2); - assert myNewLevelsArr.length == myCurLevelsArr.length + 1; - myNewNumLevels = myCurNumLevels + 1; - incNumLevels(); //increment the class member - } else { - myNewLevelsArr = myCurLevelsArr; - myNewNumLevels = myCurNumLevels; - } - // This loop updates all level indices EXCLUDING the "extra" index at the top - for (int level = 0; level <= myNewNumLevels - 1; level++) { - myNewLevelsArr[level] += deltaItemsCap; - } - myNewLevelsArr[myNewNumLevels] = myNewTotalItemsCap; // initialize the new "extra" index at the top - setLevelsArray(myNewLevelsArr); - - // GROW ITEMS ARRAY - myNewDoubleItemsArr = new double[myNewTotalItemsCap]; - // copy and shift the current data into the new array - System.arraycopy(myCurDoubleItemsArr, 0, myNewDoubleItemsArr, deltaItemsCap, myCurTotalItemsCap); - //Update the items array - setDoubleItemsArray(myNewDoubleItemsArr); - } - - /** - * This grows the levels arr by 1 (if needed) and increases the capacity of the items array at the bottom - */ - private void addEmptyTopLevelToCompletelyFullFloatsSketch() { - final int[] myCurLevelsArr = getLevelsArray(); - final float[] myCurFloatItemsArr = getFloatItemsArray(); - final int myCurNumLevels = getNumLevels(); - final int myCurTotalItemsCap = myCurLevelsArr[myCurNumLevels]; - final int[] myNewLevelsArr; - final float[] myNewFloatItemsArr; final int myNewNumLevels; - final int myNewTotalItemsCap; - - // make sure that we are following a certain growth scheme - assert myCurLevelsArr[0] == 0; //definition of full - assert myCurFloatItemsArr.length == myCurTotalItemsCap; + final int[] myNewLevelsArr; + final int myNewTotalItemsCapacity; + + float[] myNewFloatItemsArr = null; + double[] myNewDoubleItemsArr = null; + + if (sketchType == DOUBLES_SKETCH) { + minDouble = getMinDoubleValue(); + maxDouble = getMaxDoubleValue(); + myCurDoubleItemsArr = getDoubleItemsArray(); + //assert we are following a certain growth scheme + assert myCurDoubleItemsArr.length == myCurTotalItemsCapacity; + } else { //FLOATS_SKETCH + minFloat = getMinFloatValue(); + maxFloat = getMaxFloatValue(); + myCurFloatItemsArr = getFloatItemsArray(); + assert myCurFloatItemsArr.length == myCurTotalItemsCapacity; + } + assert myCurLevelsArr[0] == 0; //definition of full is part of the growth scheme - //this is a little out of sequence so that we can pre-compute the total required increase in space final int deltaItemsCap = KllHelper.levelCapacity(getK(), myCurNumLevels + 1, 0, M); - myNewTotalItemsCap = myCurTotalItemsCap + deltaItemsCap; + myNewTotalItemsCapacity = myCurTotalItemsCapacity + deltaItemsCap; // Check if growing the levels arr if required. // Note that merging MIGHT over-grow levels_, in which case we might not have to grow it final boolean growLevelsArr = myCurLevelsArr.length < myCurNumLevels + 2; - //INSERT SPACE MANAGEMENT HERE - //int totalDeltaSpaceRequired = deltaItemsCap * Float.BYTES; - //if (growLevelsArr) { totalDeltaSpaceRequired += Integer.BYTES; } - // ... - // GROW LEVELS ARRAY if (growLevelsArr) { //grow levels arr by one and copy the old data to the new array, extra space at the top. @@ -979,90 +1203,51 @@ private void addEmptyTopLevelToCompletelyFullFloatsSketch() { for (int level = 0; level <= myNewNumLevels - 1; level++) { myNewLevelsArr[level] += deltaItemsCap; } - myNewLevelsArr[myNewNumLevels] = myNewTotalItemsCap; // initialize the new "extra" index at the top - setLevelsArray(myNewLevelsArr); + myNewLevelsArr[myNewNumLevels] = myNewTotalItemsCapacity; // initialize the new "extra" index at the top // GROW ITEMS ARRAY - myNewFloatItemsArr = new float[myNewTotalItemsCap]; - // copy and shift the current items data into the new array - System.arraycopy(myCurFloatItemsArr, 0, myNewFloatItemsArr, deltaItemsCap, myCurTotalItemsCap); - //Update the items array - setFloatItemsArray(myNewFloatItemsArr); - } - - // The following code is only valid in the special case of exactly reaching capacity while updating. - // It cannot be used while merging, while reducing k, or anything else. - private void compressWhileUpdatingDoublesSketch() { - final int level = KllHelper.findLevelToCompact(getK(), M, getNumLevels(), getLevelsArray()); - - // It is important to add the new top level right here. Be aware that this next operation - // grows the items array, shifts the items data and the level boundaries of the data. - // It also grows the levels array and increments numLevels_. - if (level == getNumLevels() - 1) { - addEmptyTopLevelToCompletelyFullDoublesSketch(); - } - final int[] myLevelsArr = getLevelsArray(); //new levels arr - final int rawBeg = myLevelsArr[level]; - final int rawEnd = myLevelsArr[level + 1]; - // +2 is OK because we already added a new top level if necessary - final int popAbove = myLevelsArr[level + 2] - rawEnd; - final int rawPop = rawEnd - rawBeg; - final boolean oddPop = isOdd(rawPop); - final int adjBeg = oddPop ? rawBeg + 1 : rawBeg; - final int adjPop = oddPop ? rawPop - 1 : rawPop; - final int halfAdjPop = adjPop / 2; - - // level zero might not be sorted, so we must sort it if we wish to compact it - - final double[] myDoubleItemsArr = getDoubleItemsArray(); - if (level == 0) { - Arrays.sort(myDoubleItemsArr, adjBeg, adjBeg + adjPop); - } - if (popAbove == 0) { - KllDoublesHelper.randomlyHalveUpDoubles(myDoubleItemsArr, adjBeg, adjPop, random); + if (sketchType == DOUBLES_SKETCH) { + myNewDoubleItemsArr = new double[myNewTotalItemsCapacity]; + // copy and shift the current data into the new array + System.arraycopy(myCurDoubleItemsArr, 0, myNewDoubleItemsArr, deltaItemsCap, myCurTotalItemsCapacity); } else { - KllDoublesHelper.randomlyHalveDownDoubles(myDoubleItemsArr, adjBeg, adjPop, random); - KllDoublesHelper.mergeSortedDoubleArrays( - myDoubleItemsArr, adjBeg, halfAdjPop, - myDoubleItemsArr, rawEnd, popAbove, - myDoubleItemsArr, adjBeg + halfAdjPop); + myNewFloatItemsArr = new float[myNewTotalItemsCapacity]; + // copy and shift the current items data into the new array + System.arraycopy(myCurFloatItemsArr, 0, myNewFloatItemsArr, deltaItemsCap, myCurTotalItemsCapacity); } - myLevelsArr[level + 1] -= halfAdjPop; // adjust boundaries of the level above - if (oddPop) { - myLevelsArr[level] = myLevelsArr[level + 1] - 1; // the current level now contains one item - myDoubleItemsArr[myLevelsArr[level]] = myDoubleItemsArr[rawBeg]; // namely this leftover guy - } else { - myLevelsArr[level] = myLevelsArr[level + 1]; // the current level is now empty + //MEMORY SPACE MANAGEMENT + if (direct) { + wmem = memorySpaceMgmt(this, myNewLevelsArr.length, myNewTotalItemsCapacity); } - - // verify that we freed up halfAdjPop array slots just below the current level - assert myLevelsArr[level] == rawBeg + halfAdjPop; - - // finally, we need to shift up the data in the levels below - // so that the freed-up space can be used by level zero - if (level > 0) { - final int amount = rawBeg - myLevelsArr[0]; - System.arraycopy(myDoubleItemsArr, myLevelsArr[0], - myDoubleItemsArr, myLevelsArr[0] + halfAdjPop, amount); - for (int lvl = 0; lvl < level; lvl++) { - myLevelsArr[lvl] += halfAdjPop; - } + //update our sketch with new expanded spaces + setNumLevels(myNewNumLevels); + setLevelsArray(myNewLevelsArr); + if (sketchType == DOUBLES_SKETCH) { + setMinDoubleValue(minDouble); + setMaxDoubleValue(maxDouble); + setDoubleItemsArray(myNewDoubleItemsArr); + } else { //Float sketch + setMinFloatValue(minFloat); + setMaxFloatValue(maxFloat); + setFloatItemsArray(myNewFloatItemsArr); } } // The following code is only valid in the special case of exactly reaching capacity while updating. // It cannot be used while merging, while reducing k, or anything else. - private void compressWhileUpdatingFloatsSketch() { + @SuppressWarnings("null") + private void compressWhileUpdatingSketch() { final int level = KllHelper.findLevelToCompact(getK(), M, getNumLevels(), getLevelsArray()); - - // It is important to add the new top level right here. Be aware that this next operation - // grows the items array, shifts the items data and the level boundaries of the data. - // It also grows the levels array and increments numLevels_. if (level == getNumLevels() - 1) { - addEmptyTopLevelToCompletelyFullFloatsSketch(); + //The level to compact is the top level, thus we need to add a level. + //Be aware that this operation grows the items array, + //shifts the items data and the level boundaries of the data, + //and grows the levels array and increments numLevels_. + addEmptyTopLevelToCompletelyFullSketch(); } - final int[] myLevelsArr = getLevelsArray(); //new levels arr + + final int[] myLevelsArr = getLevelsArray(); final int rawBeg = myLevelsArr[level]; final int rawEnd = myLevelsArr[level + 1]; // +2 is OK because we already added a new top level if necessary @@ -1074,40 +1259,136 @@ private void compressWhileUpdatingFloatsSketch() { final int halfAdjPop = adjPop / 2; // level zero might not be sorted, so we must sort it if we wish to compact it + float[] myFloatItemsArr; + double[] myDoubleItemsArr; - final float[] myFloatItemsArr = getFloatItemsArray(); - if (level == 0) { - Arrays.sort(myFloatItemsArr, adjBeg, adjBeg + adjPop); - } - if (popAbove == 0) { - KllFloatsHelper.randomlyHalveUpFloats(myFloatItemsArr, adjBeg, adjPop, random); - } else { - KllFloatsHelper.randomlyHalveDownFloats(myFloatItemsArr, adjBeg, adjPop, random); - KllFloatsHelper.mergeSortedFloatArrays( - myFloatItemsArr, adjBeg, halfAdjPop, - myFloatItemsArr, rawEnd, popAbove, - myFloatItemsArr, adjBeg + halfAdjPop); + if (sketchType == DOUBLES_SKETCH) { + myFloatItemsArr = null; + myDoubleItemsArr = getDoubleItemsArray(); + if (level == 0) { + if (direct) { + myDoubleItemsArr = getDoubleItemsArray(); + Arrays.sort(myDoubleItemsArr, adjBeg, adjBeg + adjPop); + setDoubleItemsArray(myDoubleItemsArr); + } else { + Arrays.sort(getDoubleItemsArray(), adjBeg, adjBeg + adjPop); + } + } + if (popAbove == 0) { + if (direct) { + myDoubleItemsArr = getDoubleItemsArray(); + KllDoublesHelper.randomlyHalveUpDoubles(myDoubleItemsArr, adjBeg, adjPop, random); + setDoubleItemsArray(myDoubleItemsArr); + } else { + KllDoublesHelper.randomlyHalveUpDoubles(getDoubleItemsArray(), adjBeg, adjPop, random); + } + } else { + if (direct) { + myDoubleItemsArr = getDoubleItemsArray(); + KllDoublesHelper.randomlyHalveDownDoubles(myDoubleItemsArr, adjBeg, adjPop, random); + setDoubleItemsArray(myDoubleItemsArr); + } else { + KllDoublesHelper.randomlyHalveDownDoubles(getDoubleItemsArray(), adjBeg, adjPop, random); + } + if (direct ) { + myDoubleItemsArr = getDoubleItemsArray(); + KllDoublesHelper.mergeSortedDoubleArrays( + myDoubleItemsArr, adjBeg, halfAdjPop, + myDoubleItemsArr, rawEnd, popAbove, + myDoubleItemsArr, adjBeg + halfAdjPop); + setDoubleItemsArray(myDoubleItemsArr); + } else { + myDoubleItemsArr = getDoubleItemsArray(); + KllDoublesHelper.mergeSortedDoubleArrays( + myDoubleItemsArr, adjBeg, halfAdjPop, + myDoubleItemsArr, rawEnd, popAbove, + myDoubleItemsArr, adjBeg + halfAdjPop); + } + } + } else { //Float sketch + myFloatItemsArr = getFloatItemsArray(); + myDoubleItemsArr = null; + if (level == 0) { + if (direct) { + myFloatItemsArr = getFloatItemsArray(); + Arrays.sort(myFloatItemsArr, adjBeg, adjBeg + adjPop); + setFloatItemsArray(myFloatItemsArr); + } else { + Arrays.sort(getFloatItemsArray(), adjBeg, adjBeg + adjPop); + } + } + if (popAbove == 0) { + if (direct) { + myFloatItemsArr = getFloatItemsArray(); + KllFloatsHelper.randomlyHalveUpFloats(myFloatItemsArr, adjBeg, adjPop, random); + setFloatItemsArray(myFloatItemsArr); + } else { + KllFloatsHelper.randomlyHalveUpFloats(getFloatItemsArray(), adjBeg, adjPop, random); + } + } else { + if (direct) { + myFloatItemsArr = getFloatItemsArray(); + KllFloatsHelper.randomlyHalveDownFloats(myFloatItemsArr, adjBeg, adjPop, random); + setFloatItemsArray(myFloatItemsArr); + } else { + KllFloatsHelper.randomlyHalveDownFloats(getFloatItemsArray(), adjBeg, adjPop, random); + } + if (direct ) { + myFloatItemsArr = getFloatItemsArray(); + KllFloatsHelper.mergeSortedFloatArrays( + myFloatItemsArr, adjBeg, halfAdjPop, + myFloatItemsArr, rawEnd, popAbove, + myFloatItemsArr, adjBeg + halfAdjPop); + setFloatItemsArray(myFloatItemsArr); + } else { + myFloatItemsArr = getFloatItemsArray(); + KllFloatsHelper.mergeSortedFloatArrays( + myFloatItemsArr, adjBeg, halfAdjPop, + myFloatItemsArr, rawEnd, popAbove, + myFloatItemsArr, adjBeg + halfAdjPop); + } + } } - myLevelsArr[level + 1] -= halfAdjPop; // adjust boundaries of the level above + setLevelsArrayAtMinusEq(level + 1, halfAdjPop); // adjust boundaries of the level above if (oddPop) { - myLevelsArr[level] = myLevelsArr[level + 1] - 1; // the current level now contains one item - myFloatItemsArr[myLevelsArr[level]] = myFloatItemsArr[rawBeg]; // namely this leftover guy + setLevelsArrayAt(level, getLevelsArrayAt(level + 1) - 1); // the current level now contains one item + if (sketchType == DOUBLES_SKETCH) { + setDoubleItemsArrayAt(getLevelsArrayAt(level), getDoubleItemsArrayAt(rawBeg)); // namely this leftover guy + } else { + setFloatItemsArrayAt(getLevelsArrayAt(level), getFloatItemsArrayAt(rawBeg)); // namely this leftover guy + } + } else { - myLevelsArr[level] = myLevelsArr[level + 1]; // the current level is now empty + setLevelsArrayAt(level, getLevelsArrayAt(level + 1)); // the current level is now empty } // verify that we freed up halfAdjPop array slots just below the current level - assert myLevelsArr[level] == rawBeg + halfAdjPop; + assert getLevelsArrayAt(level) == rawBeg + halfAdjPop; // finally, we need to shift up the data in the levels below // so that the freed-up space can be used by level zero if (level > 0) { - final int amount = rawBeg - myLevelsArr[0]; - System.arraycopy(myFloatItemsArr, myLevelsArr[0], - myFloatItemsArr, myLevelsArr[0] + halfAdjPop, amount); + final int amount = rawBeg - getLevelsArrayAt(0); + if (sketchType == DOUBLES_SKETCH) { + if (direct) { + myDoubleItemsArr = getDoubleItemsArray(); + System.arraycopy(myDoubleItemsArr, myLevelsArr[0], myDoubleItemsArr, myLevelsArr[0] + halfAdjPop, amount); + setDoubleItemsArray(myDoubleItemsArr); + } else { + System.arraycopy(myDoubleItemsArr, myLevelsArr[0], myDoubleItemsArr, myLevelsArr[0] + halfAdjPop, amount); + } + } else { + if (direct) { + myFloatItemsArr = getFloatItemsArray(); + System.arraycopy(myDoubleItemsArr, myLevelsArr[0], myDoubleItemsArr, myLevelsArr[0] + halfAdjPop, amount); + setFloatItemsArray(myFloatItemsArr); + } else { + System.arraycopy(myFloatItemsArr, myLevelsArr[0], myFloatItemsArr, myLevelsArr[0] + halfAdjPop, amount); + } + } for (int lvl = 0; lvl < level; lvl++) { - myLevelsArr[lvl] += halfAdjPop; + setLevelsArrayAtPlusEq(lvl, halfAdjPop); } } } @@ -1202,113 +1483,7 @@ private void incrementFloatBucketsUnsortedLevel(final int fromIndex, final int t } } - private void mergeDoubleHigherLevels(final KllDoublesSketch other, final long finalN) { - final int myCurNumLevels = getNumLevels(); - final int myCurLevelsArrLength = getLevelsArray().length; - final int myCurItemsArrLength = getDoubleItemsArray().length; - - final int tmpSpaceNeeded = getNumRetained() - + KllHelper.getNumRetainedAboveLevelZero(other.getNumLevels(), other.getLevelsArray()); - final double[] workbuf = new double[tmpSpaceNeeded]; - final int ub = KllHelper.ubOnNumLevels(finalN); - final int[] worklevels = new int[ub + 2]; // ub+1 does not work - final int[] outlevels = new int[ub + 2]; - - final int provisionalNumLevels = max(myCurNumLevels, other.getNumLevels()); - - populateDoubleWorkArrays(other, workbuf, worklevels, provisionalNumLevels); - - // notice that workbuf is being used as both the input and output here - final int[] result = KllDoublesHelper.generalDoublesCompress(getK(), M, provisionalNumLevels, workbuf, - worklevels, workbuf, outlevels, isLevelZeroSorted(), random); - final int finalNumLevels = result[0]; - final int finalCapacity = result[1]; - final int finalPop = result[2]; - - assert finalNumLevels <= ub; // ub may be much bigger - - // now we need to transfer the results back into the "self" sketch - final double[] newbuf = finalCapacity == myCurItemsArrLength - ? getDoubleItemsArray() : new double[finalCapacity]; - final int freeSpaceAtBottom = finalCapacity - finalPop; - System.arraycopy(workbuf, outlevels[0], newbuf, freeSpaceAtBottom, finalPop); - final int theShift = freeSpaceAtBottom - outlevels[0]; - - final int finalLevelsArrLen; - if (myCurLevelsArrLength < finalNumLevels + 1) { - finalLevelsArrLen = finalNumLevels + 1; - } else { finalLevelsArrLen = myCurLevelsArrLength; } - - final int[] myFinalLevelsArr = new int[finalLevelsArrLen]; - - for (int lvl = 0; lvl < finalNumLevels + 1; lvl++) { // includes the "extra" index - myFinalLevelsArr[lvl] = outlevels[lvl] + theShift; - } - - //MEMORY MANAGEMENT -// final int itemsDeltaBytes = (newbuf.length - myCurItemsArrLength) * Double.BYTES; -// final int levelsDeltaBytes = finalLevelsArrLen * Integer.BYTES; -// final int totalDeltaBytes = itemsDeltaBytes + levelsDeltaBytes; - - setLevelsArray(myFinalLevelsArr); - setDoubleItemsArray(newbuf); - setNumLevels(finalNumLevels); - } - - private void mergeFloatHigherLevels(final KllFloatsSketch other, final long finalN) { - final int myCurNumLevels = getNumLevels(); - final int myCurLevelsArrLength = getLevelsArray().length; - final int myCurItemsArrLength = getFloatItemsArray().length; - - final int tmpSpaceNeeded = getNumRetained() - + KllHelper.getNumRetainedAboveLevelZero(other.getNumLevels(), other.getLevelsArray()); - final float[] workbuf = new float[tmpSpaceNeeded]; - final int ub = KllHelper.ubOnNumLevels(finalN); - final int[] worklevels = new int[ub + 2]; // ub+1 does not work - final int[] outlevels = new int[ub + 2]; - - final int provisionalNumLevels = max(myCurNumLevels, other.getNumLevels()); - - populateFloatWorkArrays(other, workbuf, worklevels, provisionalNumLevels); - - // notice that workbuf is being used as both the input and output here - final int[] result = KllFloatsHelper.generalFloatsCompress(getK(), M, provisionalNumLevels, workbuf, - worklevels, workbuf, outlevels, isLevelZeroSorted(), random); - final int finalNumLevels = result[0]; - final int finalCapacity = result[1]; - final int finalPop = result[2]; - - assert finalNumLevels <= ub; // ub may be much bigger - - // now we need to transfer the results back into the "self" sketch - final float[] newbuf = finalCapacity == myCurItemsArrLength - ? getFloatItemsArray() : new float[finalCapacity]; - final int freeSpaceAtBottom = finalCapacity - finalPop; - System.arraycopy(workbuf, outlevels[0], newbuf, freeSpaceAtBottom, finalPop); - final int theShift = freeSpaceAtBottom - outlevels[0]; - - final int finalLevelsArrLen; - if (myCurLevelsArrLength < finalNumLevels + 1) { - finalLevelsArrLen = finalNumLevels + 1; - } else { finalLevelsArrLen = myCurLevelsArrLength; } - - final int[] myFinalLevelsArr = new int[finalLevelsArrLen]; - - for (int lvl = 0; lvl < finalNumLevels + 1; lvl++) { // includes the "extra" index - myFinalLevelsArr[lvl] = outlevels[lvl] + theShift; - } - - //MEMORY MANAGEMENT -// final int itemsDeltaBytes = (newbuf.length - myCurItemsArrLength) * Float.BYTES; -// final int levelsDeltaBytes = finalLevelsArrLen * Integer.BYTES; -// final int totalDeltaBytes = itemsDeltaBytes + levelsDeltaBytes; - - setLevelsArray(myFinalLevelsArr); - setFloatItemsArray(newbuf); - setNumLevels(finalNumLevels); - } - - private void populateDoubleWorkArrays(final KllDoublesSketch other, final double[] workbuf, + private void populateDoubleWorkArrays(final KllSketch other, final double[] workbuf, final int[] worklevels, final int provisionalNumLevels) { worklevels[0] = 0; final int[] myLevelsArr = getLevelsArray(); @@ -1337,7 +1512,7 @@ private void populateDoubleWorkArrays(final KllDoublesSketch other, final double } } - private void populateFloatWorkArrays(final KllFloatsSketch other, final float[] workbuf, + private void populateFloatWorkArrays(final KllSketch other, final float[] workbuf, final int[] worklevels, final int provisionalNumLevels) { worklevels[0] = 0; final int[] myLevelsArr = getLevelsArray(); diff --git a/src/main/java/org/apache/datasketches/kll/MemoryValidate.java b/src/main/java/org/apache/datasketches/kll/MemoryValidate.java index 1cbad775b..fe1daafdd 100644 --- a/src/main/java/org/apache/datasketches/kll/MemoryValidate.java +++ b/src/main/java/org/apache/datasketches/kll/MemoryValidate.java @@ -24,26 +24,26 @@ import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; -import static org.apache.datasketches.kll.KllPreambleUtil.DOUBLES_SKETCH_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; -import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.extractDoubleSketchFlag; import static org.apache.datasketches.kll.KllPreambleUtil.extractDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.extractEmptyFlag; import static org.apache.datasketches.kll.KllPreambleUtil.extractFamilyID; import static org.apache.datasketches.kll.KllPreambleUtil.extractFlags; import static org.apache.datasketches.kll.KllPreambleUtil.extractK; +import static org.apache.datasketches.kll.KllPreambleUtil.extractLevelZeroSortedFlag; import static org.apache.datasketches.kll.KllPreambleUtil.extractM; import static org.apache.datasketches.kll.KllPreambleUtil.extractN; import static org.apache.datasketches.kll.KllPreambleUtil.extractNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.extractPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.extractSerVer; +import static org.apache.datasketches.kll.KllPreambleUtil.extractSingleItemFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.extractUpdatableFlag; import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; @@ -65,6 +65,7 @@ final class MemoryValidate { final boolean updatable; final int k; final int m; + final int memCapacity; Layout layout; // depending on the layout, the next 8-16 bytes of the preamble, may be filled with assumed values. @@ -74,49 +75,49 @@ final class MemoryValidate { int dyMinK; int numLevels; // derived - int memItemsCap; //capacity of Items array for exporting and for Updatable form - int memItemsRetained; //actual items retained in Compact form + int capacityItems; //capacity of Items array for exporting and for Updatable form + int itemsRetained; //actual items retained in Compact form + int itemsArrStart; int sketchBytes; - Memory levelsMem; //if sk = empty or single, this is derived - Memory minMaxMem; //if sk = empty or single, this is derived - Memory itemsMem; //if sk = empty or single, this is derived - WritableMemory levelsWmem; - WritableMemory minMaxWmem; - WritableMemory itemsWmem; + Memory levelsArrCompact; //if sk = empty or single, this is derived + Memory minMaxArrCompact; //if sk = empty or single, this is derived + Memory itemsArrCompact; //if sk = empty or single, this is derived + WritableMemory levelsArrUpdatable; + WritableMemory minMaxArrUpdatable; + WritableMemory itemsArrUpdatable; MemoryValidate(final Memory srcMem) { + memCapacity = (int) srcMem.getCapacity(); preInts = extractPreInts(srcMem); serVer = extractSerVer(srcMem); familyID = extractFamilyID(srcMem); - if (familyID != Family.KLL.getID()) { memoryCheckThrow(0, familyID); } + if (familyID != Family.KLL.getID()) { memoryValidateThrow(0, familyID); } famName = idToFamily(familyID).toString(); - if (famName != "KLL") { memoryCheckThrow(23, 0); } - flags = extractFlags(srcMem); - empty = (flags & EMPTY_BIT_MASK) > 0; - level0Sorted = (flags & LEVEL_ZERO_SORTED_BIT_MASK) > 0; - singleItem = (flags & SINGLE_ITEM_BIT_MASK) > 0; - doublesSketch = (flags & DOUBLES_SKETCH_BIT_MASK) > 0; - updatable = (flags & UPDATABLE_BIT_MASK) > 0; + empty = extractEmptyFlag(srcMem); + level0Sorted = extractLevelZeroSortedFlag(srcMem); + singleItem = extractSingleItemFlag(srcMem); + doublesSketch = extractDoubleSketchFlag(srcMem); + updatable = extractUpdatableFlag(srcMem); k = extractK(srcMem); KllHelper.checkK(k); m = extractM(srcMem); - if (m != 8) { memoryCheckThrow(7, m); } + if (m != DEFAULT_M) { memoryValidateThrow(7, m); } + if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatable) { memoryValidateThrow(10, 0); } if (updatable) { updatableMemoryValidate((WritableMemory) srcMem); } else { compactMemoryValidate(srcMem); } - } void compactMemoryValidate(final Memory srcMem) { - final int checkFlags = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); - if ((checkFlags & 5) == 5) { memoryCheckThrow(20, flags); } - - switch (checkFlags) { + if (empty && singleItem) { memoryValidateThrow(20, 0); } + final int sw = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); + switch (sw) { case 0: { //Float Compact FULL - if (preInts != PREAMBLE_INTS_FLOAT) { memoryCheckThrow(6, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } + if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(6, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(2, serVer); } + layout = Layout.FLOAT_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); @@ -126,64 +127,65 @@ void compactMemoryValidate(final Memory srcMem) { final int[] myLevelsArr = new int[numLevels + 1]; srcMem.getIntArray(offset, myLevelsArr, 0, numLevels); //copies all except the last one myLevelsArr[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); //load the last one - levelsMem = Memory.wrap(myLevelsArr); //separate from srcMem, - offset += levelsMem.getCapacity() - Integer.BYTES; // but one larger than srcMem + levelsArrCompact = Memory.wrap(myLevelsArr); //separate from srcMem, + offset += levelsArrCompact.getCapacity() - Integer.BYTES; // but one larger than srcMem // MIN/MAX MEM - minMaxMem = srcMem.region(offset, 2 * Float.BYTES); - offset += minMaxMem.getCapacity(); + minMaxArrCompact = srcMem.region(offset, 2 * Float.BYTES); + offset += minMaxArrCompact.getCapacity(); // ITEMS MEM - memItemsCap = myLevelsArr[numLevels]; - memItemsRetained = memItemsCap - myLevelsArr[0]; - final float[] myItemsArr = new float[memItemsCap]; - srcMem.getFloatArray(offset, myItemsArr, myLevelsArr[0], memItemsRetained); - itemsMem = Memory.wrap(myItemsArr); - sketchBytes = offset + memItemsRetained * Float.BYTES; + itemsArrStart = offset; + capacityItems = myLevelsArr[numLevels]; + itemsRetained = capacityItems - myLevelsArr[0]; + final float[] myItemsArr = new float[capacityItems]; + srcMem.getFloatArray(offset, myItemsArr, myLevelsArr[0], itemsRetained); + itemsArrCompact = Memory.wrap(myItemsArr); + sketchBytes = offset + itemsRetained * Float.BYTES; break; } case 1: { //Float Compact EMPTY - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(1, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(2, serVer); } layout = Layout.FLOAT_EMPTY_COMPACT; n = 0; //assumed dyMinK = k; //assumed numLevels = 1; //assumed - // LEVELS MEM - levelsMem = Memory.wrap(new int[] {k, k}); + levelsArrCompact = Memory.wrap(new int[] {k, k}); // MIN/MAX MEM - minMaxMem = Memory.wrap(new float[] {Float.NaN, Float.NaN}); + minMaxArrCompact = Memory.wrap(new float[] {Float.NaN, Float.NaN}); // ITEMS MEM - memItemsCap = k; - memItemsRetained = 0; - itemsMem = Memory.wrap(new float[k]); + capacityItems = k; + itemsRetained = 0; + itemsArrCompact = Memory.wrap(new float[k]); sketchBytes = DATA_START_ADR_SINGLE_ITEM; //also used for empty + itemsArrStart = DATA_START_ADR_SINGLE_ITEM; break; } case 4: { //Float Compact SINGLE - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { memoryCheckThrow(4, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(1, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(4, serVer); } layout = Layout.FLOAT_SINGLE_COMPACT; n = 1; dyMinK = k; numLevels = 1; - // LEVELS MEM - levelsMem = Memory.wrap(new int[] {k - 1, k}); + levelsArrCompact = Memory.wrap(new int[] {k - 1, k}); final float minMax = srcMem.getFloat(DATA_START_ADR_SINGLE_ITEM); // MIN/MAX MEM - minMaxMem = Memory.wrap(new float[] {minMax, minMax}); + minMaxArrCompact = Memory.wrap(new float[] {minMax, minMax}); // ITEMS MEM - memItemsCap = k; - memItemsRetained = 1; + capacityItems = k; + itemsRetained = 1; final float[] myFloatItems = new float[k]; myFloatItems[k - 1] = minMax; - itemsMem = Memory.wrap(myFloatItems); + itemsArrCompact = Memory.wrap(myFloatItems); sketchBytes = DATA_START_ADR_SINGLE_ITEM + Float.BYTES; + itemsArrStart = DATA_START_ADR_SINGLE_ITEM; break; } case 8: { //Double Compact FULL - if (preInts != PREAMBLE_INTS_DOUBLE) { memoryCheckThrow(5, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } + if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(5, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(2, serVer); } layout = Layout.DOUBLE_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); @@ -193,59 +195,62 @@ void compactMemoryValidate(final Memory srcMem) { final int[] myLevelsArr = new int[numLevels + 1]; srcMem.getIntArray(offset, myLevelsArr, 0, numLevels); //all except the last one myLevelsArr[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); //load the last one - levelsMem = Memory.wrap(myLevelsArr); //separate from srcMem - offset += levelsMem.getCapacity() - Integer.BYTES; + levelsArrCompact = Memory.wrap(myLevelsArr); //separate from srcMem + offset += levelsArrCompact.getCapacity() - Integer.BYTES; // MIN/MAX MEM - minMaxMem = srcMem.region(offset, 2 * Double.BYTES); - offset += minMaxMem.getCapacity(); + minMaxArrCompact = srcMem.region(offset, 2 * Double.BYTES); + offset += minMaxArrCompact.getCapacity(); // ITEMS MEM - memItemsCap = myLevelsArr[numLevels]; - memItemsRetained = memItemsCap - myLevelsArr[0]; - final double[] myItemsArr = new double[memItemsCap]; - srcMem.getDoubleArray(offset, myItemsArr, myLevelsArr[0], memItemsRetained); - itemsMem = Memory.wrap(myItemsArr); - sketchBytes = offset + memItemsRetained * Double.BYTES; + itemsArrStart = offset; + capacityItems = myLevelsArr[numLevels]; + itemsRetained = capacityItems - myLevelsArr[0]; + final double[] myItemsArr = new double[capacityItems]; + srcMem.getDoubleArray(offset, myItemsArr, myLevelsArr[0], itemsRetained); + itemsArrCompact = Memory.wrap(myItemsArr); + sketchBytes = offset + itemsRetained * Double.BYTES; break; } case 9: { //Double Compact EMPTY - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryCheckThrow(2, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(1, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(2, serVer); } layout = Layout.DOUBLE_EMPTY_COMPACT; n = 0; dyMinK = k; numLevels = 1; // LEVELS MEM - levelsMem = Memory.wrap(new int[] {k, k}); + levelsArrCompact = Memory.wrap(new int[] {k, k}); // MIN/MAX MEM - minMaxMem = Memory.wrap(new double[] {Double.NaN, Double.NaN}); + minMaxArrCompact = Memory.wrap(new double[] {Double.NaN, Double.NaN}); // ITEMS MEM - memItemsCap = k; - memItemsRetained = 0; - itemsMem = Memory.wrap(new double[k]); + capacityItems = k; + itemsRetained = 0; + itemsArrCompact = Memory.wrap(new double[k]); sketchBytes = DATA_START_ADR_SINGLE_ITEM; //also used for empty + itemsArrStart = DATA_START_ADR_SINGLE_ITEM; break; } case 12: { //Double Compact SINGLE - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryCheckThrow(1, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { memoryCheckThrow(4, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(1, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(4, serVer); } layout = Layout.DOUBLE_SINGLE_COMPACT; n = 1; dyMinK = k; numLevels = 1; // LEVELS MEM - levelsMem = Memory.wrap(new int[] {k - 1, k}); + levelsArrCompact = Memory.wrap(new int[] {k - 1, k}); final double minMax = srcMem.getDouble(DATA_START_ADR_SINGLE_ITEM); // MIN/MAX MEM - minMaxMem = Memory.wrap(new double[] {minMax, minMax}); + minMaxArrCompact = Memory.wrap(new double[] {minMax, minMax}); // ITEMS MEM - memItemsCap = k; - memItemsRetained = 1; + capacityItems = k; + itemsRetained = 1; final double[] myDoubleItems = new double[k]; myDoubleItems[k - 1] = minMax; - itemsMem = Memory.wrap(myDoubleItems); + itemsArrCompact = Memory.wrap(myDoubleItems); sketchBytes = DATA_START_ADR_SINGLE_ITEM + Double.BYTES; + itemsArrStart = DATA_START_ADR_SINGLE_ITEM; break; } default: break; //can't happen @@ -253,98 +258,76 @@ void compactMemoryValidate(final Memory srcMem) { } void updatableMemoryValidate(final WritableMemory wSrcMem) { - final int checkFlags = (doublesSketch ? 8 : 0); - if ((checkFlags & 5) == 5) { memoryCheckThrow(20, flags); } - //System.out.println(KllPreambleUtil.memoryToString(wSrcMem)); - - switch (checkFlags) { - case 0: { //Float Updatable FULL - if (preInts != PREAMBLE_INTS_FLOAT) { memoryCheckThrow(6, preInts); } - if (serVer != SERIAL_VERSION_UPDATABLE) { memoryCheckThrow(10, serVer); } - layout = Layout.FLOAT_UPDATABLE; - n = extractN(wSrcMem); - empty = n == 0; - singleItem = n == 1; - dyMinK = extractDyMinK(wSrcMem); - numLevels = extractNumLevels(wSrcMem); - int offset = DATA_START_ADR_FLOAT; - //LEVELS - levelsWmem = wSrcMem.writableRegion(offset, (numLevels + 1) * Integer.BYTES); - offset += (int)levelsWmem.getCapacity(); - //MIN/MAX - minMaxWmem = wSrcMem.writableRegion(offset, 2 * Float.BYTES); - offset += (int)minMaxWmem.getCapacity(); - //ITEMS - memItemsCap = levelsWmem.getInt(numLevels * Integer.BYTES); - itemsWmem = wSrcMem.writableRegion(offset, memItemsCap * Float.BYTES); - offset += itemsWmem.getCapacity(); - sketchBytes = offset; - break; - } - - case 8: { //Double Updatable FULL - if (preInts != PREAMBLE_INTS_DOUBLE) { memoryCheckThrow(5, preInts); } - if (serVer != SERIAL_VERSION_UPDATABLE) { memoryCheckThrow(10, serVer); } - layout = Layout.DOUBLE_UPDATABLE; - n = extractN(wSrcMem); - empty = n == 0; - singleItem = n == 1; - dyMinK = extractDyMinK(wSrcMem); - numLevels = extractNumLevels(wSrcMem); + if (doublesSketch) { //Double Updatable FULL + if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(5, preInts); } + layout = Layout.DOUBLE_UPDATABLE; + n = extractN(wSrcMem); + empty = n == 0; //empty & singleItem are set for convenience + singleItem = n == 1; // there is no error checking on these bits + dyMinK = extractDyMinK(wSrcMem); + numLevels = extractNumLevels(wSrcMem); - int offset = DATA_START_ADR_DOUBLE; - //LEVELS - levelsWmem = wSrcMem.writableRegion(offset, (numLevels + 1) * Integer.BYTES); - offset += (int)levelsWmem.getCapacity(); - //MIN/MAX - minMaxWmem = wSrcMem.writableRegion(offset, 2 * Double.BYTES); - offset += (int)minMaxWmem.getCapacity(); - //ITEMS - memItemsCap = levelsWmem.getInt(numLevels * Integer.BYTES); - itemsWmem = wSrcMem.writableRegion(offset, memItemsCap * Double.BYTES); - offset += itemsWmem.getCapacity(); - sketchBytes = offset; - break; - } - default: break; //can't happen + int offset = DATA_START_ADR_DOUBLE; + //LEVELS + levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1) * Integer.BYTES); + offset += (int)levelsArrUpdatable.getCapacity(); + //MIN/MAX + minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2 * Double.BYTES); + offset += (int)minMaxArrUpdatable.getCapacity(); + //ITEMS + capacityItems = levelsArrUpdatable.getInt(numLevels * Integer.BYTES); + final int itemsArrBytes = capacityItems * Double.BYTES; + itemsArrStart = offset; + itemsArrStart = memCapacity - itemsArrBytes; + if (itemsArrStart < offset) { memoryValidateThrow(24, offset - itemsArrStart); } + itemsArrUpdatable = wSrcMem.writableRegion(itemsArrStart, itemsArrBytes); + sketchBytes = itemsArrStart + itemsArrBytes; + } + else { //Float Updatable FULL + if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(6, preInts); } + layout = Layout.FLOAT_UPDATABLE; + n = extractN(wSrcMem); + empty = n == 0; //empty & singleItem are set for convenience + singleItem = n == 1; // there is no error checking on these bits + dyMinK = extractDyMinK(wSrcMem); + numLevels = extractNumLevels(wSrcMem); + int offset = DATA_START_ADR_FLOAT; + //LEVELS + levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1) * Integer.BYTES); + offset += (int)levelsArrUpdatable.getCapacity(); + //MIN/MAX + minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2 * Float.BYTES); + offset += (int)minMaxArrUpdatable.getCapacity(); + //ITEMS + capacityItems = levelsArrUpdatable.getInt(numLevels * Integer.BYTES); + final int itemsArrBytes = capacityItems * Float.BYTES; + itemsArrStart = offset; + itemsArrStart = memCapacity - itemsArrBytes; + if (itemsArrStart < offset) { memoryValidateThrow(24, offset - itemsArrStart); } + itemsArrUpdatable = wSrcMem.writableRegion(itemsArrStart, itemsArrBytes); + sketchBytes = itemsArrStart + itemsArrBytes; } } -// @SuppressWarnings("unused") -// private static void printMemInts(final Memory mem) { -// final int capInts = (int)(mem.getCapacity() / 4); -// for (int i = 0; i < capInts; i++) { -// System.out.println(mem.getInt(i * 4)); -// } -// } -// -// @SuppressWarnings("unused") -// private static void printMemFloats(final Memory mem) { -// final int capFlts = (int)(mem.getCapacity() / 4); -// for (int i = 0; i < capFlts; i++) { -// System.out.println(mem.getFloat(i * 4)); -// } -// } - - - private static void memoryCheckThrow(final int errNo, final int value) { + private static void memoryValidateThrow(final int errNo, final int value) { String msg = ""; switch (errNo) { case 0: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; case 1: msg = "Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; case 2: msg = "Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: " + value; break; - case 3: msg = "Single Item Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; + //case 3: msg = "Single Item Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; case 4: msg = "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; case 5: msg = "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; case 6: msg = "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; case 7: msg = "The M field must be set to " + DEFAULT_M + ", NOT: " + value; break; - case 8: msg = "The dynamic MinK must be equal to K, NOT: " + value; break; - case 9: msg = "numLevels must be one, NOT: " + value; break; - case 10: msg = "Updatable Bit: 1 -> SerVer: " + SERIAL_VERSION_UPDATABLE + ", NOT: " + value; break; + //case 8: msg = "The dynamic MinK must be equal to K, NOT: " + value; break; + //case 9: msg = "numLevels must be one, NOT: " + value; break; + case 10: msg = "((SerVer == 3) ^ (Updatable Bit)) must = 0."; break; case 20: msg = "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; - case 21: msg = "N != 0 and empty bit is set. N: " + value; break; - case 22: msg = "N != 1 and single item bit is set. N: " + value; break; - case 23: msg = "Family name is not KLL"; break; + //case 21: msg = "N != 0 and empty bit is set. N: " + value; break; + //case 22: msg = "N != 1 and single item bit is set. N: " + value; break; + //case 23: msg = "Family name is not KLL"; break; + case 24: msg = "Given Memory has insufficient capacity. Need " + value + " bytes."; break; } throw new SketchesArgumentException(msg); } diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java new file mode 100644 index 000000000..572b3b1ba --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -0,0 +1,595 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +//import static org.apache.datasketches.Util.getResourceBytes; //don't have matching numbers from C++ +import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; +import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + + +@SuppressWarnings("javadoc") +public class KllDirectDoublesSketchTest { + + private static final double PMF_EPS_FOR_K_8 = 0.35; // PMF rank error (epsilon) for k=8 + private static final double PMF_EPS_FOR_K_128 = 0.025; // PMF rank error (epsilon) for k=128 + private static final double PMF_EPS_FOR_K_256 = 0.013; // PMF rank error (epsilon) for k=256 + private static final double NUMERIC_NOISE_TOLERANCE = 1E-6; + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void empty() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(Double.NaN); // this must not change anything + assertTrue(sketch.isEmpty()); + assertEquals(sketch.getN(), 0); + assertEquals(sketch.getNumRetained(), 0); + assertTrue(Double.isNaN(sketch.getRank(0))); + assertTrue(Double.isNaN(sketch.getMinValue())); + assertTrue(Double.isNaN(sketch.getMaxValue())); + assertTrue(Double.isNaN(sketch.getQuantile(0.5))); + assertNull(sketch.getQuantiles(new double[] {0})); + assertNull(sketch.getPMF(new double[] {0})); + assertNotNull(sketch.toString(true, true)); + assertNotNull(sketch.toString()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantileInvalidArg() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(1); + sketch.getQuantile(-1.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantilesInvalidArg() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(1); + sketch.getQuantiles(new double[] {2.0}); + } + + @Test + public void oneItem() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(1); + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 1); + assertEquals(sketch.getNumRetained(), 1); + assertEquals(sketch.getRank(1), 0.0); + assertEquals(sketch.getRank(2), 1.0); + assertEquals(sketch.getMinValue(), 1f); + assertEquals(sketch.getMaxValue(), 1f); + assertEquals(sketch.getQuantile(0.5), 1f); + } + + @Test + public void manyItemsEstimationMode() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + final int n = 1_000_000; + + for (int i = 0; i < n; i++) { + sketch.update(i); + } + assertEquals(sketch.getN(), n); + + // test getRank + for (int i = 0; i < n; i++) { + final double trueRank = (double) i / n; + assertEquals(sketch.getRank(i), trueRank, PMF_EPS_FOR_K_256, "for value " + i); + } + + // test getPMF + final double[] pmf = sketch.getPMF(new double[] {n / 2}); // split at median + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); + assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); + + assertEquals(sketch.getMinValue(), 0f); // min value is exact + assertEquals(sketch.getQuantile(0), 0f); // min value is exact + assertEquals(sketch.getMaxValue(), n - 1f); // max value is exact + assertEquals(sketch.getQuantile(1), n - 1f); // max value is exact + + // check at every 0.1 percentage point + final double[] fractions = new double[1001]; + final double[] reverseFractions = new double[1001]; // check that ordering doesn't matter + for (int i = 0; i <= 1000; i++) { + fractions[i] = (double) i / 1000; + reverseFractions[1000 - i] = fractions[i]; + } + final double[] quantiles = sketch.getQuantiles(fractions); + final double[] reverseQuantiles = sketch.getQuantiles(reverseFractions); + double previousQuantile = 0; + for (int i = 0; i <= 1000; i++) { + final double quantile = sketch.getQuantile(fractions[i]); + assertEquals(quantile, quantiles[i]); + assertEquals(quantile, reverseQuantiles[1000 - i]); + assertTrue(previousQuantile <= quantile); + previousQuantile = quantile; + } + } + + @Test + public void getRankGetCdfGetPmfConsistency() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + final int n = 1000; + final double[] values = new double[n]; + for (int i = 0; i < n; i++) { + sketch.update(i); + values[i] = i; + } + final double[] ranks = sketch.getCDF(values); + final double[] pmf = sketch.getPMF(values); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i]), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + + @Test + public void merge() { + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final KllDirectDoublesSketch sketch2 = getDDSketch(200, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i * 1.0); + sketch2.update((2 * n - i - 1) * 1.0); + } + + assertEquals(sketch1.getMinValue(), 0.0); + assertEquals(sketch1.getMaxValue(), (n - 1) * 1.0); + + assertEquals(sketch2.getMinValue(), n * 1.0); + assertEquals(sketch2.getMaxValue(), (2 * n - 1) * 1.0); + + sketch1.merge(sketch2); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2L * n); + assertEquals(sketch1.getMinValue(), 0.0); + assertEquals(sketch1.getMaxValue(), (2 * n - 1) * 1.0); + assertEquals(sketch1.getQuantile(0.5), n * 1.0, n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeLowerK() { + final KllDirectDoublesSketch sketch1 = getDDSketch(256, 0); + final KllDirectDoublesSketch sketch2 = getDDSketch(128, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinValue(), 0.0f); + assertEquals(sketch1.getMaxValue(), n - 1f); + + assertEquals(sketch2.getMinValue(), n); + assertEquals(sketch2.getMaxValue(), 2f * n - 1f); + + assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); + assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); + sketch1.merge(sketch2); + + // sketch1 must get "contaminated" by the lower K in sketch2 + assertEquals(sketch1.getNormalizedRankError(false), sketch2.getNormalizedRankError(false)); + assertEquals(sketch1.getNormalizedRankError(true), sketch2.getNormalizedRankError(true)); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2 * n); + assertEquals(sketch1.getMinValue(), 0f); + assertEquals(sketch1.getMaxValue(), 2f * n - 1f); + assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_128); + } + + @Test + public void mergeEmptyLowerK() { + final KllDirectDoublesSketch sketch1 = getDDSketch(256, 0); + final KllDirectDoublesSketch sketch2 = getDDSketch(128, 0); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + + // rank error should not be affected by a merge with an empty sketch with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinValue(), 0f); + assertEquals(sketch1.getMaxValue(), n - 1f); + assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256); + + //merge the other way + sketch2.merge(sketch1); + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinValue(), 0f); + assertEquals(sketch1.getMaxValue(), n - 1f); + assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeExactModeLowerK() { + final KllDirectDoublesSketch sketch1 = getDDSketch(256, 0); + final KllDirectDoublesSketch sketch2 = getDDSketch(128, 0); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + sketch2.update(1); + + // rank error should not be affected by a merge with a sketch in exact mode with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + } + + @Test + public void mergeMinMinValueFromOther() { + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final KllDirectDoublesSketch sketch2 = getDDSketch(200, 0); + sketch1.update(1); + sketch2.update(2); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinValue(), 1.0F); + } + + @Test + public void mergeMinAndMaxFromOther() { + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final KllDirectDoublesSketch sketch2 = getDDSketch(200, 0); + for (int i = 1; i <= 1_000_000; i++) { + sketch1.update(i); + } + sketch2.merge(sketch1); + assertEquals(sketch2.getMinValue(), 1F); + assertEquals(sketch2.getMaxValue(), 1_000_000F); + } + + @SuppressWarnings("unused") + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooSmall() { + final KllDirectDoublesSketch sketch1 = getDDSketch(MIN_K - 1, 0); + } + + @SuppressWarnings("unused") + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooLarge() { + final KllDirectDoublesSketch sketch1 = getDDSketch(MAX_K + 1, 0); + } + + @Test + public void minK() { + final KllDirectDoublesSketch sketch = getDDSketch(MIN_K, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), MIN_K); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); + } + + @Test + public void maxK() { + final KllDirectDoublesSketch sketch = getDDSketch(MAX_K, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), MAX_K); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); + } + + @Test + public void serializeDeserializeEmpty() { //compact serialize then heapify using KllDoublesSketch + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final byte[] bytes = sketch1.toByteArray(); + final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertTrue(Double.isNaN(sketch2.getMinValue())); + assertTrue(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test + public void serializeDeserializeEmpty2() { //updatable serialize then new (loaded) KllDirectDoublesSketch + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final byte[] bytes = sketch1.toUpdatableByteArray(); + final KllDirectDoublesSketch sketch2 = new KllDirectDoublesSketch(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertTrue(Double.isNaN(sketch2.getMinValue())); + assertTrue(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test + public void serializeDeserializeOneItem() { //compact serialize then heapify using KllDoublesSketch + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + sketch1.update(1); + final byte[] bytes = sketch1.toByteArray(); + final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertFalse(Double.isNaN(sketch2.getMinValue())); + assertFalse(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Double.BYTES); + } + + @Test + public void serializeDeserializeOneItem2() { //updatable serialize then new (loaded) KllDirectDoublesSketch + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + sketch1.update(1); + final byte[] bytes = sketch1.toUpdatableByteArray(); + final KllDirectDoublesSketch sketch2 = new KllDirectDoublesSketch(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertFalse(Double.isNaN(sketch2.getMinValue())); + assertFalse(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Double.BYTES); + } + + @Test + public void serializeDeserialize() { //compact serialize then heapify using KllDoublesSketch + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final int n = 1000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + final byte[] bytes = sketch1.toByteArray(); + final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinValue(), sketch1.getMinValue()); + assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test + public void serializeDeserialize2() { //updatable serialize then new (loaded) KllDirectDoublesSketch + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final int n = 1000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + final byte[] bytes = sketch1.toUpdatableByteArray(); + final KllDirectDoublesSketch sketch2 = new KllDirectDoublesSketch(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinValue(), sketch1.getMinValue()); + assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void outOfOrderSplitPoints() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(0); + sketch.getCDF(new double[] {1, 0}); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void nanSplitPoint() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(0); + sketch.getCDF(new double[] {Double.NaN}); + } + + @Test + public void getQuantiles() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(1); + sketch.update(2); + sketch.update(3); + final double[] quantiles1 = sketch.getQuantiles(new double[] {0, 0.5, 1}); + final double[] quantiles2 = sketch.getQuantiles(3); + assertEquals(quantiles1, quantiles2); + assertEquals(quantiles1[0], 1f); + assertEquals(quantiles1[1], 2f); + assertEquals(quantiles1[2], 3f); + } + + //@Test + public void checkSimpleMergeDirect() { //used for troubleshooting + int k = 20; + int n1 = 21; + int n2 = 43; + KllDoublesSketch sk1 = new KllDoublesSketch(k); + KllDoublesSketch sk2 = new KllDoublesSketch(k); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println("SK1:"); + println(sk1.toString(true, true)); + println("SK2:"); + println(sk2.toString(true, true)); + WritableMemory wmem1 = WritableMemory.writableWrap(sk1.toUpdatableByteArray()); + WritableMemory wmem2 = WritableMemory.writableWrap(sk2.toUpdatableByteArray()); + KllDirectDoublesSketch dsk1 = new KllDirectDoublesSketch(wmem1, new DefaultMemoryRequestServer()); + KllDirectDoublesSketch dsk2 = new KllDirectDoublesSketch(wmem2, new DefaultMemoryRequestServer()); + println("BEFORE MERGE"); + println(dsk1.toString(true, true)); + dsk1.merge(dsk2); + println("AFTER MERGE"); + println(dsk1.toString(true, true)); + } + + @Test + public void checkSketchInitializeDirectDoubleUpdatableMem() { + int k = 20; //don't change this + KllDirectDoublesSketch sk; + KllDoublesSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: DOUBLE FULL DIRECT FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = new KllDirectDoublesSketch(wmem, new DefaultMemoryRequestServer()); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = new KllDirectDoublesSketch(wmem, new DefaultMemoryRequestServer()); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = new KllDirectDoublesSketch(wmem, new DefaultMemoryRequestServer()); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkGetWritableMemory() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 200); + assertEquals(sketch.getK(), 200); + assertEquals(sketch.getN(), 200); + assertFalse(sketch.isEmpty()); + assertTrue(sketch.isDirect()); + assertFalse(sketch.isEstimationMode()); + assertTrue(sketch.isDoublesSketch()); + assertFalse(sketch.isLevelZeroSorted()); + assertFalse(sketch.isFloatsSketch()); + + final WritableMemory wmem = sketch.getWritableMemory(); + final KllDoublesSketch sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), 200); + assertEquals(sk.getN(), 200); + assertFalse(sk.isEmpty()); + assertFalse(sk.isDirect()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isDoublesSketch()); + assertFalse(sk.isLevelZeroSorted()); + assertFalse(sk.isFloatsSketch()); + assertTrue(KllSketch.isCompatible()); + } + + + private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { + KllDoublesSketch sk = new KllDoublesSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + + KllDirectDoublesSketch ddsk = new KllDirectDoublesSketch(wmem, memReqSvr); + return ddsk; + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + +} + diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index 6f68c467c..a171350d1 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -20,7 +20,6 @@ package org.apache.datasketches.kll; //import static org.apache.datasketches.Util.getResourceBytes; //don't have matching numbers from C++ -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; import static org.testng.Assert.assertEquals; @@ -93,8 +92,8 @@ public void manyItemsEstimationMode() { for (int i = 0; i < n; i++) { sketch.update(i); - assertEquals(sketch.getN(), i + 1); } + assertEquals(sketch.getN(), n); // test getRank for (int i = 0; i < n; i++) { @@ -130,7 +129,7 @@ public void manyItemsEstimationMode() { assertTrue(previousQuantile <= quantile); previousQuantile = quantile; } -} + } @Test public void getRankGetCdfGetPmfConsistency() { @@ -384,33 +383,6 @@ public void nanSplitPoint() { sketch.getCDF(new double[] {Double.NaN}); } - @Test - public void getMaxSerializedSizeBytes() { - final int sizeBytes = - KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30); - assertEquals(sizeBytes, 5708); - } - - @Test - public void checkUbOnNumLevels() { - assertEquals(KllHelper.ubOnNumLevels(0), 1); - } - - @Test - public void checkIntCapAux() { - int lvlCap = KllHelper.levelCapacity(10, 61, 0, 8); - assertEquals(lvlCap, 8); - lvlCap = KllHelper.levelCapacity(10, 61, 60, 8); - assertEquals(lvlCap, 10); - } - - @Test - public void checkSuperLargeKandLevels() { - //This is beyond what the sketch can be configured for. - final int size = KllHelper.computeTotalItemCapacity(1 << 29, 8, 61); - assertEquals(size, 1_610_612_846); - } - @Test public void getQuantiles() { final KllDoublesSketch sketch = new KllDoublesSketch(); diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java index 64bf8e353..61d33f44b 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java @@ -218,7 +218,7 @@ public void checkTestResults() { int numLevels = sketch.getNumLevels(); int numSamples = sketch.getNumRetained(); int[] levels = sketch.getLevelsArray(); - long hashedSamples = simpleHashOfSubArray(sketch.getItems(), levels[0], numSamples); + long hashedSamples = simpleHashOfSubArray(sketch.getDoubleItemsArray(), levels[0], numSamples); System.out.print(testI); assert correctResultsWithReset[(7 * testI) + 4] == numLevels; assert correctResultsWithReset[(7 * testI) + 5] == numSamples; diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index 50e429956..3a10d4030 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -19,7 +19,6 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; import static org.apache.datasketches.Util.getResourceBytes; @@ -384,33 +383,6 @@ public void nanSplitPoint() { sketch.getCDF(new float[] {Float.NaN}); } - @Test - public void getMaxSerializedSizeBytes() { - final int sizeBytes = - KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30); - assertEquals(sizeBytes, 2908); - } - - @Test - public void checkUbOnNumLevels() { - assertEquals(KllHelper.ubOnNumLevels(0), 1); - } - - @Test - public void checkIntCapAux() { - int lvlCap = KllHelper.levelCapacity(10, 61, 0, 8); - assertEquals(lvlCap, 8); - lvlCap = KllHelper.levelCapacity(10, 61, 60, 8); - assertEquals(lvlCap, 10); - } - - @Test - public void checkSuperLargeKandLevels() { - //This is beyond what the sketch can be configured for. - final int size = KllHelper.computeTotalItemCapacity(1 << 29, 8, 61); - assertEquals(size, 1_610_612_846); - } - @Test public void getQuantiles() { final KllFloatsSketch sketch = new KllFloatsSketch(); @@ -425,4 +397,19 @@ public void getQuantiles() { assertEquals(quantiles1[2], 3f); } + @SuppressWarnings("deprecation") + @Test + public void checkDeprecatedMethods() { + final int k = 200; + final int n = 200; + int bytes = KllFloatsSketch.getMaxSerializedSizeBytes(k, n); //assumed float before + assertEquals(bytes, 832); + KllFloatsSketch sk = new KllFloatsSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + final byte[] byteArr = sk.toByteArray(); + assertEquals(byteArr.length, 832); + bytes = sk.getSerializedSizeBytes(); //defaults to compact + assertEquals(bytes, 832); + } + } diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java index 39f5e6267..9d3227c5b 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java @@ -219,7 +219,7 @@ public void checkTestResults() { int numLevels = sketch.getNumLevels(); int numSamples = sketch.getNumRetained(); int[] levels = sketch.getLevelsArray(); - long hashedSamples = simpleHashOfSubArray(sketch.getItems(), levels[0], numSamples); + long hashedSamples = simpleHashOfSubArray(sketch.getFloatItemsArray(), levels[0], numSamples); System.out.print(testI); assert correctResultsWithReset[(7 * testI) + 4] == numLevels; assert correctResultsWithReset[(7 * testI) + 5] == numSamples; diff --git a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java index 97d09cbd5..0fd08ad1d 100644 --- a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java @@ -21,11 +21,13 @@ import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; import static org.apache.datasketches.kll.KllHelper.getLevelStats; -import static org.apache.datasketches.kll.KllPreambleUtil.SketchType.DOUBLE_SKETCH; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; +import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import static org.testng.Assert.assertEquals; import org.apache.datasketches.kll.KllHelper.LevelStats; -import org.apache.datasketches.kll.KllPreambleUtil.SketchType; +import org.apache.datasketches.kll.KllSketch.SketchType; import org.apache.datasketches.memory.Memory; import org.testng.annotations.Test; @@ -37,16 +39,27 @@ public void testGetAllLevelStats() { long n = 1L << 30; int k = 200; int m = 8; - LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, false, false, DOUBLE_SKETCH); + LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, false, false, DOUBLES_SKETCH); assertEquals(lvlStats.getCompactBytes(), 5708); } + @Test + public void checkGetKFromEps() { + final int k = DEFAULT_K; + final double eps = KllHelper.getNormalizedRankError(k, false); + final double epsPmf = KllHelper.getNormalizedRankError(k, true); + final int kEps = KllSketch.getKFromEpsilon(eps, false); + final int kEpsPmf = KllSketch.getKFromEpsilon(epsPmf, true); + assertEquals(kEps, k); + assertEquals(kEpsPmf, k); + } + @Test //convert two false below to true for visual checking public void getStatsAtNumLevels() { int k = 200; int m = 8; int numLevels = 23; - LevelStats lvlStats = getLevelStats(k, m, numLevels, false, false, DOUBLE_SKETCH); + LevelStats lvlStats = getLevelStats(k, m, numLevels, false, false, DOUBLES_SKETCH); assertEquals(lvlStats.getCompactBytes(), 5708); } @@ -87,22 +100,73 @@ public void checkUpdatableSerDe() { assertEquals(sk2.getNumRetained(), retained); } + @Test + public void getMaxCompactFloatsSerializedSizeBytes() { + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30, FLOATS_SKETCH, false); + assertEquals(sizeBytes, 2908); + } + + @Test + public void getMaxUpdatableFloatsSerializedSizeBytes() { + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30, FLOATS_SKETCH, true); + assertEquals(sizeBytes, 2912); + } + + + @Test + public void getMaxCompactDoublesSerializedSizeBytes() { + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30, DOUBLES_SKETCH, false); + assertEquals(sizeBytes, 5708); + } + + @Test + public void getMaxUpdatableDoubleSerializedSizeBytes() { + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30, DOUBLES_SKETCH, true); + assertEquals(sizeBytes, 5712); + } + + @Test + public void checkUbOnNumLevels() { + assertEquals(KllHelper.ubOnNumLevels(0), 1); + } + + @Test + public void checkIntCapAux() { + int lvlCap = KllHelper.levelCapacity(10, 61, 0, 8); + assertEquals(lvlCap, 8); + lvlCap = KllHelper.levelCapacity(10, 61, 60, 8); + assertEquals(lvlCap, 10); + } + + @Test + public void checkSuperLargeKandLevels() { + //This is beyond what the sketch can be configured for. + final int size = KllHelper.computeTotalItemCapacity(1 << 29, 8, 61); + assertEquals(size, 1_610_612_846); + } + + //Experimental - //@Test //convert two false below to true for visual checking + @Test public void testGetAllLevelStats2() { long n = 533; int k = 200; int m = 8; - LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, true, true, DOUBLE_SKETCH); + LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, true, true, DOUBLES_SKETCH); + assertEquals(lvlStats.getNumLevels(), 2); + assertEquals(lvlStats.getMaxCap(), 333); + } - //@Test + @Test public void getStatsAtNumLevels2() { int k = 20; int m = 8; int numLevels = 2; - LevelStats lvlStats = getLevelStats(k, m, numLevels, true, true, DOUBLE_SKETCH); + LevelStats lvlStats = getLevelStats(k, m, numLevels, true, true, DOUBLES_SKETCH); + assertEquals(lvlStats.getNumLevels(), 2); + assertEquals(lvlStats.getMaxCap(), 33); } /** diff --git a/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java b/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java new file mode 100644 index 000000000..143570d3b --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.kll.KllPreambleUtil.*; + +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +@SuppressWarnings("unused") +public class MemoryValidateTest { + + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidFamily() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertFamilyID(wmem, 14); + MemoryValidate memVal = new MemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidSerVer() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertSerVer(wmem, 4); + MemoryValidate memVal = new MemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidEmptyAndSingle() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertFlags(wmem, 5); + MemoryValidate memVal = new MemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidUpdatableAndSerVer() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertFlags(wmem, 16); + insertSerVer(wmem, 2); + MemoryValidate memVal = new MemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidPreIntsAndSingle() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertFlags(wmem, 16); + insertSerVer(wmem, 2); + MemoryValidate memVal = new MemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidSerVerAndSingle2() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertFlags(wmem, 4); + insertSerVer(wmem, 1); + MemoryValidate memVal = new MemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidPreIntsAndSingle2() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertFlags(wmem, 4); + insertPreInts(wmem, 1); + MemoryValidate memVal = new MemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidPreIntsAndDouble() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertFlags(wmem, 8); + insertPreInts(wmem, 6); + insertSerVer(wmem, 2); + MemoryValidate memVal = new MemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidDoubleCompactAndSingle() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertFlags(wmem, 12); //double & single + insertPreInts(wmem, 2);//should be 2 + insertSerVer(wmem, 1); //should be 2 + MemoryValidate memVal = new MemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidDoubleUpdatableAndSerVer() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertSerVer(wmem, 3); + insertFlags(wmem, 24); //double & updatable + insertPreInts(wmem, 5);//should be 6 + MemoryValidate memVal = new MemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidFloatFullAndPreInts() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertFlags(wmem, 0); //float full + insertSerVer(wmem, 2); //should be 1 + insertPreInts(wmem, 5);//should be 5 + MemoryValidate memVal = new MemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidFloatUpdatableFullAndPreInts() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertFlags(wmem, 16); //float updatable full + insertSerVer(wmem, 3); //should be 3 + insertPreInts(wmem, 6);//should be 5 + MemoryValidate memVal = new MemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidDoubleCompactSingleAndPreInts() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + insertFlags(wmem, 12); //double & single + insertPreInts(wmem, 5);//should be 2 + insertSerVer(wmem, 2); //should be 2 + MemoryValidate memVal = new MemoryValidate(wmem); + } + +} + diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java new file mode 100644 index 000000000..60e2fd8cd --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java @@ -0,0 +1,444 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import java.util.Objects; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +public class MiscDirectDoublesTest { + static final String LS = System.getProperty("line.separator"); + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + + @Test + public void checkBounds() { + final KllDirectDoublesSketch sk = getDDSketch(200, 0); + for (int i = 0; i < 1000; i++) { + sk.update(i); + } + final double eps = sk.getNormalizedRankError(false); + final double est = sk.getQuantile(0.5); + final double ub = sk.getQuantileUpperBound(0.5); + final double lb = sk.getQuantileLowerBound(0.5); + assertEquals(ub, sk.getQuantile(.5 + eps)); + assertEquals(lb, sk.getQuantile(0.5 - eps)); + println("Ext : " + est); + println("UB : " + ub); + println("LB : " + lb); + } + + @Test + public void checkMisc() { + final KllDirectDoublesSketch sk = getDDSketch(8, 0); + assertTrue(Objects.isNull(sk.getQuantiles(10))); + //sk.toString(true, true); + for (int i = 0; i < 20; i++) { sk.update(i); } + //sk.toString(true, true); + //sk.toByteArray(); + final double[] items = sk.getDoubleItemsArray(); + assertEquals(items.length, 16); + final int[] levels = sk.getLevelsArray(); + assertEquals(levels.length, 3); + assertEquals(sk.getNumLevels(), 2); + } + + //@Test //enable static println(..) for visual checking + public void visualCheckToString() { + final KllDirectDoublesSketch sk = getDDSketch(20, 0); + for (int i = 0; i < 10; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + + final KllDirectDoublesSketch sk2 = getDDSketch(20, 0); + for (int i = 0; i < 400; i++) { sk2.update(i + 1); } + println("\n" + sk2.toString(true, true)); + + sk2.merge(sk); + final String s2 = sk2.toString(true, true); + println(LS + s2); + } + + @Test + public void viewCompactions() { + final KllDirectDoublesSketch sk = getDDSketch(20, 0); + show(sk, 20); + show(sk, 21); //compaction 1 + show(sk, 43); + show(sk, 44); //compaction 2 + show(sk, 54); + show(sk, 55); //compaction 3 + show(sk, 73); + show(sk, 74); //compaction 4 + show(sk, 88); + show(sk, 89); //compaction 5 + show(sk, 96); + show(sk, 97); //compaction 6 + show(sk, 108); + } + + private static void show(final KllDirectDoublesSketch sk, int limit) { + int i = (int) sk.getN(); + for ( ; i < limit; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + } + + @Test + public void checkSketchInitializeDoubleHeap() { + int k = 20; //don't change this + KllDirectDoublesSketch sk; + + //println("#### CASE: DOUBLE FULL HEAP"); + sk = getDDSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + //assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + //assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE HEAP EMPTY"); + sk = getDDSketch(k, 0); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + //assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + //assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE HEAP SINGLE"); + sk = getDDSketch(k, 0); + sk.update(1); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + //assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + //assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeDoubleHeapifyCompactMem() { + int k = 20; //don't change this + KllDoublesSketch sk; + KllDirectDoublesSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: DOUBLE FULL HEAPIFIED FROM COMPACT"); + sk2 = getDDSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + //assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + //assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM COMPACT"); + sk2 = getDDSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + //assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + //assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM COMPACT"); + sk2 = getDDSketch(k, 0); + sk2.update(1); + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + //assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + //assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeDoubleHeapifyUpdatableMem() { + int k = 20; //don't change this + KllDoublesSketch sk; + KllDirectDoublesSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: DOUBLE FULL HEAPIFIED FROM UPDATABLE"); + sk2 = getDDSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + //assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + //assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + // println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = getDDSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + //assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + //assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = getDDSketch(k, 0); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + //assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinDoubleValue(), 1.0); + //assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkMemoryToStringDoubleUpdatable() { + int k = 20; //don't change this + KllDirectDoublesSketch sk; + KllDirectDoublesSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: DOUBLE FULL UPDATABLE"); + sk = getDDSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = new KllDirectDoublesSketch(wmem, memReqSvr); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: DOUBLE EMPTY UPDATABLE"); + sk = getDDSketch(k, 0); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = new KllDirectDoublesSketch(wmem, memReqSvr); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: DOUBLE SINGLE UPDATABL"); + sk = getDDSketch(k, 0); + sk.update(1); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = new KllDirectDoublesSketch(wmem, memReqSvr); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + } + + @Test + public void checkSimpleMerge() { + int k = 20; + int n1 = 21; + int n2 = 21; + KllDirectDoublesSketch sk1 = getDDSketch(k, 0); + KllDirectDoublesSketch sk2 = getDDSketch(k, 0); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println(sk1.toString(true, true)); + println(sk2.toString(true, true)); + sk1.merge(sk2); + println(sk1.toString(true, true)); + assertEquals(sk1.getMaxValue(), 121.0); + assertEquals(sk1.getMinValue(), 1.0); + } + + private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { + KllDoublesSketch sk = new KllDoublesSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + + KllDirectDoublesSketch ddsk = new KllDirectDoublesSketch(wmem, memReqSvr); + return ddsk; + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + +} + diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index aaf88d5e6..af5234c18 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -19,15 +19,14 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; import java.util.Objects; -import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.SketchesArgumentException; import org.testng.annotations.Test; /** @@ -37,17 +36,6 @@ public class MiscDoublesTest { static final String LS = System.getProperty("line.separator"); - @Test - public void checkGetKFromEps() { - final int k = DEFAULT_K; - final double eps = KllHelper.getNormalizedRankError(k, false); - final double epsPmf = KllHelper.getNormalizedRankError(k, true); - final int kEps = KllSketch.getKFromEpsilon(eps, false); - final int kEpsPmf = KllSketch.getKFromEpsilon(epsPmf, true); - assertEquals(kEps, k); - assertEquals(kEpsPmf, k); - } - @Test public void checkBounds() { final KllDoublesSketch kll = new KllDoublesSketch(); //default k = 200 @@ -115,14 +103,14 @@ public void checkMisc() { for (int i = 0; i < 20; i++) { sk.update(i); } sk.toString(true, true); sk.toByteArray(); - final double[] items = sk.getItems(); + final double[] items = sk.getDoubleItemsArray(); assertEquals(items.length, 16); final int[] levels = sk.getLevelsArray(); assertEquals(levels.length, 3); assertEquals(sk.getNumLevels(), 2); } - @Test //enable static println(..) for visual checking + //@Test //enable static println(..) for visual checking public void visualCheckToString() { final KllDoublesSketch sketch = new KllDoublesSketch(20); for (int i = 0; i < 10; i++) { sketch.update(i + 1); } @@ -185,7 +173,6 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 33); @@ -206,7 +193,6 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); @@ -228,7 +214,6 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); @@ -263,7 +248,6 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 33); @@ -288,7 +272,6 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); @@ -314,7 +297,6 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); @@ -349,7 +331,6 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 33); @@ -374,7 +355,6 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); @@ -400,93 +380,6 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); - assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); - assertEquals(sk.getLevelsArray().length, 2); - assertEquals(sk.getMaxDoubleValue(), 1.0); - assertEquals(sk.getMaxFloatValue(), 1.0F); - assertEquals(sk.getMinDoubleValue(), 1.0); - assertEquals(sk.getMinFloatValue(), 1.0F); - assertEquals(sk.getNumLevels(), 1); - assertFalse(sk.isLevelZeroSorted()); - } - - //@Test //TODO Work on Direct - public void checkSketchInitializeDirectDoubleUpdatableMem() { - int k = 20; //don't change this - KllDirectDoublesSketch sk; - KllDoublesSketch sk2; - byte[] compBytes; - WritableMemory wmem; - - println("#### CASE: DOUBLE FULL DIRECT FROM UPDATABLE"); - sk2 = new KllDoublesSketch(k); - for (int i = 1; i <= k + 1; i++) { sk2.update(i); } - //println(sk2.toString(true, true)); - compBytes = sk2.toUpdatableByteArray(); - wmem = WritableMemory.writableWrap(compBytes); - println(KllPreambleUtil.toString(wmem)); - sk = new KllDirectDoublesSketch(wmem); - assertEquals(sk.getK(), k); - assertEquals(sk.getN(), k + 1); - assertEquals(sk.getNumRetained(), 11); - assertFalse(sk.isEmpty()); - assertTrue(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); - assertEquals(sk.getDoubleItemsArray().length, 33); - assertEquals(sk.getLayout(), "HEAP"); - assertEquals(sk.getLevelsArray().length, 3); - assertEquals(sk.getMaxDoubleValue(), 21.0); - assertEquals(sk.getMaxFloatValue(), 21.0F); - assertEquals(sk.getMinDoubleValue(), 1.0); - assertEquals(sk.getMinFloatValue(), 1.0F); - assertEquals(sk.getNumLevels(), 2); - assertFalse(sk.isLevelZeroSorted()); - - println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM UPDATABLE"); - sk2 = new KllDoublesSketch(k); - //println(sk.toString(true, true)); - compBytes = sk2.toUpdatableByteArray(); - wmem = WritableMemory.writableWrap(compBytes); - println(KllPreambleUtil.toString(wmem)); - sk = new KllDirectDoublesSketch(wmem); - assertEquals(sk.getK(), k); - assertEquals(sk.getN(), 0); - assertEquals(sk.getNumRetained(), 0); - assertTrue(sk.isEmpty()); - assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); - assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); - assertEquals(sk.getLevelsArray().length, 2); - assertEquals(sk.getMaxDoubleValue(), Double.NaN); - assertEquals(sk.getMaxFloatValue(), Float.NaN); - assertEquals(sk.getMinDoubleValue(), Double.NaN); - assertEquals(sk.getMinFloatValue(), Float.NaN); - assertEquals(sk.getNumLevels(), 1); - assertFalse(sk.isLevelZeroSorted()); - - println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM UPDATABLE"); - sk2 = new KllDoublesSketch(k); - sk2.update(1); - //println(sk.toString(true, true)); - compBytes = sk2.toUpdatableByteArray(); - wmem = WritableMemory.writableWrap(compBytes); - println(KllPreambleUtil.toString(wmem)); - sk = new KllDirectDoublesSketch(wmem); - assertEquals(sk.getK(), k); - assertEquals(sk.getN(), 1); - assertEquals(sk.getNumRetained(), 1); - assertFalse(sk.isEmpty()); - assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); @@ -616,6 +509,25 @@ public void checkMemoryToStringDoubleUpdatable() { assertEquals(upBytes, upBytes2); } + @Test + public void checkSimpleMerge() { + int k = 20; + int n1 = 21; + int n2 = 21; + KllDoublesSketch sk1 = new KllDoublesSketch(k); + KllDoublesSketch sk2 = new KllDoublesSketch(k); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println(sk1.toString(true, true)); + println(sk2.toString(true, true)); + sk1.merge(sk2); + println(sk1.toString(true, true)); + } + @Test public void printlnTest() { println("PRINTING: " + this.getClass().getName()); diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index 81437d957..6dc4160f3 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -19,7 +19,6 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; @@ -37,17 +36,6 @@ public class MiscFloatsTest { static final String LS = System.getProperty("line.separator"); - @Test - public void checkGetKFromEps() { - final int k = DEFAULT_K; - final double eps = KllHelper.getNormalizedRankError(k, false); - final double epsPmf = KllHelper.getNormalizedRankError(k, true); - final int kEps = KllSketch.getKFromEpsilon(eps, false); - final int kEpsPmf = KllSketch.getKFromEpsilon(epsPmf, true); - assertEquals(kEps, k); - assertEquals(kEpsPmf, k); - } - @Test public void checkBounds() { final KllFloatsSketch kll = new KllFloatsSketch(); //default k = 200 @@ -115,14 +103,14 @@ public void checkMisc() { for (int i = 0; i < 20; i++) { sk.update(i); } sk.toString(true, true); sk.toByteArray(); - final float[] items = sk.getItems(); + final float[] items = sk.getFloatItemsArray(); assertEquals(items.length, 16); final int[] levels = sk.getLevelsArray(); assertEquals(levels.length, 3); assertEquals(sk.getNumLevels(), 2); } - @Test //enable static println(..) for visual checking + //@Test //enable static println(..) for visual checking public void visualCheckToString() { final KllFloatsSketch sketch = new KllFloatsSketch(20); for (int i = 0; i < 10; i++) { sketch.update(i + 1); } @@ -185,7 +173,6 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 33); @@ -206,7 +193,6 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); @@ -228,7 +214,6 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); @@ -263,7 +248,6 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 33); @@ -288,7 +272,6 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); @@ -314,7 +297,6 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); @@ -349,7 +331,6 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 33); @@ -374,7 +355,6 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); @@ -400,7 +380,6 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertTrue(sk.isUpdatable()); assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); @@ -530,6 +509,25 @@ public void checkMemoryToStringFloatUpdatable() { assertEquals(upBytes, upBytes2); } + @Test + public void checkSimpleMerge() { + int k = 20; + int n1 = 21; + int n2 = 43; + KllFloatsSketch sk1 = new KllFloatsSketch(k); + KllFloatsSketch sk2 = new KllFloatsSketch(k); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println(sk1.toString(true, true)); + println(sk2.toString(true, true)); + sk1.merge(sk2); + println(sk1.toString(true, true)); + } + @Test public void printlnTest() { println("PRINTING: " + this.getClass().getName()); From 5e3dd401fa7f90b0213e48b825849b6b04faf237 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 25 Mar 2022 19:24:56 -0700 Subject: [PATCH 12/31] Iteration 11, ready for review. --- .../kll/KllDirectDoublesSketch.java | 2 - .../kll/KllDirectFloatsSketch.java | 249 ++++++++ .../apache/datasketches/kll/KllSketch.java | 40 +- .../apache/datasketches/kll/package-info.java | 28 +- .../KllDirectDoublesSketchIteratorTest.java | 78 +++ .../kll/KllDirectDoublesSketchTest.java | 36 +- .../KllDirectFloatsSketchIteratorTest.java | 78 +++ .../kll/KllDirectFloatsSketchTest.java | 594 ++++++++++++++++++ .../kll/KllDoublesSketchTest.java | 34 +- .../kll/MiscDirectDoublesTest.java | 21 +- .../kll/MiscDirectFloatsTest.java | 425 +++++++++++++ 11 files changed, 1519 insertions(+), 66 deletions(-) create mode 100644 src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java create mode 100644 src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java create mode 100644 src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchIteratorTest.java create mode 100644 src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 99da03077..45adf4a20 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -44,8 +44,6 @@ public KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServ super(SketchType.DOUBLES_SKETCH, wmem, memReqSvr); } - //public int getNumRetained() - /** * Returns an approximation to the Cumulative Distribution Function (CDF), which is the * cumulative analog of the PMF, of the input stream given a set of splitPoint (values). diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java new file mode 100644 index 000000000..9eb52f302 --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static java.lang.Math.max; +import static java.lang.Math.min; + +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +/** + * This class implements an off-heap floats KllSketch via a WritableMemory instance of the sketch. + * + *

Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}

+ * + * @author Lee Rhodes, Kevin Lang + */ +public class KllDirectFloatsSketch extends KllDirectSketch { + + /** + * + * @param wmem the current WritableMemory + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + */ + public KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr) { + super(SketchType.FLOATS_SKETCH, wmem, memReqSvr); + } + + /** + * Returns an approximation to the Cumulative Distribution Function (CDF), which is the + * cumulative analog of the PMF, of the input stream given a set of splitPoint (values). + * + *

The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function. + * + *

If the sketch is empty this returns null.

+ * + * @param splitPoints an array of m unique, monotonically increasing float values + * that divide the real number line into m+1 consecutive disjoint intervals. + * The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and + * exclusive of the right splitPoint, with the exception that the last interval will include + * the maximum value. + * It is not necessary to include either the min or max values in these split points. + * + * @return an array of m+1 double values, which are a consecutive approximation to the CDF + * of the input stream given the splitPoints. The value at array position j of the returned + * CDF array is the sum of the returned values in positions 0 through j of the returned PMF + * array. + */ + public double[] getCDF(final float[] splitPoints) { + return getFloatsPmfOrCdf(splitPoints, true); + } + + /** + * Returns the max value of the stream. + * If the sketch is empty this returns NaN. + * + * @return the max value of the stream + */ + public float getMaxValue() { + return getMaxFloatValue(); + } + + /** + * Returns the min value of the stream. + * If the sketch is empty this returns NaN. + * + * @return the min value of the stream + */ + public float getMinValue() { + return getMinFloatValue(); + } + + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * given a set of splitPoints (values). + * + *

The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(true) function. + * + *

If the sketch is empty this returns null.

+ * + * @param splitPoints an array of m unique, monotonically increasing float values + * that divide the real number line into m+1 consecutive disjoint intervals. + * The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and + * exclusive of the right splitPoint, with the exception that the last interval will include + * the maximum value. + * It is not necessary to include either the min or max values in these split points. + * + * @return an array of m+1 doubles each of which is an approximation + * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right + * splitPoint, with the exception that the last interval will include maximum value. + */ + public double[] getPMF(final float[] splitPoints) { + return getFloatsPmfOrCdf(splitPoints, false); + } + + /** + * Returns an approximation to the value of the data item + * that would be preceded by the given fraction of a hypothetical sorted + * version of the input stream so far. + * + *

We note that this method has a fairly large overhead (microseconds instead of nanoseconds) + * so it should not be called multiple times to get different quantiles from the same + * sketch. Instead use getQuantiles(), which pays the overhead only once. + * + *

If the sketch is empty this returns NaN. + * + * @param fraction the specified fractional position in the hypothetical sorted stream. + * These are also called normalized ranks or fractional ranks. + * If fraction = 0.0, the true minimum value of the stream is returned. + * If fraction = 1.0, the true maximum value of the stream is returned. + * + * @return the approximation to the value at the given fraction + */ + public float getQuantile(final double fraction) { + return getFloatsQuantile(fraction); + } + + /** + * Gets the lower bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. + * @param fraction the given normalized rank as a fraction + * @return the lower bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. + */ + public float getQuantileLowerBound(final double fraction) { + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false))); + } + + /** + * This is a more efficient multiple-query version of getQuantile(). + * + *

This returns an array that could have been generated by using getQuantile() with many + * different fractional ranks, but would be very inefficient. + * This method incurs the internal set-up overhead once and obtains multiple quantile values in + * a single query. It is strongly recommend that this method be used instead of multiple calls + * to getQuantile(). + * + *

If the sketch is empty this returns null. + * + * @param fractions given array of fractional positions in the hypothetical sorted stream. + * These are also called normalized ranks or fractional ranks. + * These fractions must be in the interval [0.0, 1.0], inclusive. + * + * @return array of approximations to the given fractions in the same order as given fractions + * array. + */ + public float[] getQuantiles(final double[] fractions) { + return getFloatsQuantiles(fractions); + } + + /** + * This is also a more efficient multiple-query version of getQuantile() and allows the caller to + * specify the number of evenly spaced fractional ranks. + * + *

If the sketch is empty this returns null. + * + * @param numEvenlySpaced an integer that specifies the number of evenly spaced fractional ranks. + * This must be a positive integer greater than 0. A value of 1 will return the min value. + * A value of 2 will return the min and the max value. A value of 3 will return the min, + * the median and the max value, etc. + * + * @return array of approximations to the given fractions in the same order as given fractions + * array. + */ + public float[] getQuantiles(final int numEvenlySpaced) { + if (isEmpty()) { return null; } + return getQuantiles(org.apache.datasketches.Util.evenlySpaced(0.0, 1.0, numEvenlySpaced)); + } + + /** + * Gets the upper bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. + * @param fraction the given normalized rank as a fraction + * @return the upper bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. + */ + public float getQuantileUpperBound(final double fraction) { + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); + } + + /** + * Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1, + * inclusive. + * + *

The resulting approximation has a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function. + * + *

If the sketch is empty this returns NaN.

+ * + * @param value to be ranked + * @return an approximate rank of the given value + */ + public double getRank(final float value) { + return getFloatRank(value); + } + + /** + * @return the iterator for this class + */ + public KllFloatsSketchIterator iterator() { + return new KllFloatsSketchIterator(getFloatItemsArray(), getLevelsArray(), getNumLevels()); + } + + /** + * Merges another sketch into this one. + * @param other sketch to merge into this one + */ + public void merge(final KllSketch other) { + if (!other.isDirect()) { kllSketchThrow(32); } + if (!other.isFloatsSketch()) { kllSketchThrow(34); } + mergeFloatImpl(other); + } + + @Override + public byte[] toByteArray() { + return toCompactByteArrayImpl(); + } + + @Override + public String toString(final boolean withLevels, final boolean withData) { + return toStringImpl(withLevels, withData); + } + + public void update(final float value) { + updateFloat(value); + } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index fb4252cd8..aeb67d2f0 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -729,8 +729,8 @@ final void mergeDoubleImpl(final KllSketch other) { //Update min, max values final double otherMin = other.getMinDoubleValue(); final double otherMax = other.getMaxDoubleValue(); - if (Double.isNaN(myMin) || otherMin <= myMin) { setMinDoubleValue(otherMin); } - if (Double.isNaN(myMax) || otherMax >= myMax) { setMaxDoubleValue(otherMax); } + setMinDoubleValue(resolveDoubleMinValue(myMin, otherMin)); + setMaxDoubleValue(resolveDoubleMaxValue(myMax, otherMax)); //Update numLevels, levelsArray, items setNumLevels(myNewNumLevels); @@ -739,6 +739,21 @@ final void mergeDoubleImpl(final KllSketch other) { assert KllHelper.sumTheSampleWeights(getNumLevels(), getLevelsArray()) == getN(); } + private static double resolveDoubleMinValue(final double myMin, final double otherMin) { + if (Double.isNaN(myMin) && Double.isNaN(otherMin)) { return Double.NaN; } + if (Double.isNaN(myMin)) { return otherMin; } + if (Double.isNaN(otherMin)) { return myMin; } + return min(myMin, otherMin); + } + + private static double resolveDoubleMaxValue(final double myMax, final double otherMax) { + if (Double.isNaN(myMax) && Double.isNaN(otherMax)) { return Double.NaN; } + if (Double.isNaN(myMax)) { return otherMax; } + if (Double.isNaN(otherMax)) { return myMax; } + return max(myMax, otherMax); + } + + final void mergeFloatImpl(final KllSketch other) { if (other.isEmpty()) { return; } final long finalN = getN() + other.getN(); @@ -825,8 +840,8 @@ final void mergeFloatImpl(final KllSketch other) { //Update min, max values final float otherMin = other.getMinFloatValue(); final float otherMax = other.getMaxFloatValue(); - if (Float.isNaN(myMin) || otherMin < myMin) { setMinFloatValue(otherMin); } - if (Float.isNaN(myMax) || otherMax > myMax) { setMaxFloatValue(otherMax); } + setMinFloatValue(resolveFloatMinValue(myMin, otherMin)); + setMaxFloatValue(resolveFloatMaxValue(myMax, otherMax)); //Update numLevels, levelsArray, items setNumLevels(myNewNumLevels); @@ -835,6 +850,21 @@ final void mergeFloatImpl(final KllSketch other) { assert KllHelper.sumTheSampleWeights(getNumLevels(), getLevelsArray()) == getN(); } + private static float resolveFloatMinValue(final float myMin, final float otherMin) { + if (Float.isNaN(myMin) && Float.isNaN(otherMin)) { return Float.NaN; } + if (Float.isNaN(myMin)) { return otherMin; } + if (Float.isNaN(otherMin)) { return myMin; } + return min(myMin, otherMin); + } + + private static float resolveFloatMaxValue(final float myMax, final float otherMax) { + if (Float.isNaN(myMax) && Float.isNaN(otherMax)) { return Float.NaN; } + if (Float.isNaN(myMax)) { return otherMax; } + if (Float.isNaN(otherMax)) { return myMax; } + return max(myMax, otherMax); + } + + abstract void setDoubleItemsArray(double[] floatItems); abstract void setDoubleItemsArrayAt(int index, double value); @@ -1381,7 +1411,7 @@ private void compressWhileUpdatingSketch() { } else { if (direct) { myFloatItemsArr = getFloatItemsArray(); - System.arraycopy(myDoubleItemsArr, myLevelsArr[0], myDoubleItemsArr, myLevelsArr[0] + halfAdjPop, amount); + System.arraycopy(myFloatItemsArr, myLevelsArr[0], myFloatItemsArr, myLevelsArr[0] + halfAdjPop, amount); setFloatItemsArray(myFloatItemsArr); } else { System.arraycopy(myFloatItemsArr, myLevelsArr[0], myFloatItemsArr, myLevelsArr[0] + halfAdjPop, amount); diff --git a/src/main/java/org/apache/datasketches/kll/package-info.java b/src/main/java/org/apache/datasketches/kll/package-info.java index 141c25f8a..6dcc6c20d 100644 --- a/src/main/java/org/apache/datasketches/kll/package-info.java +++ b/src/main/java/org/apache/datasketches/kll/package-info.java @@ -35,16 +35,17 @@ * *

The normalized rank (rank) of any specific value is defined as its * absolute rank divided by N. - * Thus, the normalized rank is a value between zero and one. + * Thus, the normalized rank is a value in the interval [0.0, 1.0), exclusive. * In the documentation and Javadocs for this sketch absolute rank is never used so any * reference to just rank should be interpreted to mean normalized rank. * *

This sketch is configured with a parameter k, which affects the size of the sketch * and its estimation error. * - *

The estimation error is commonly called epsilon (or eps) and is a fraction - * between zero and one. Larger values of k result in smaller values of epsilon. - * Epsilon is always with respect to the rank and cannot be applied to the + *

In the research literature, the estimation error is commonly called epsilon + * (or eps) and is a fraction between zero and one. + * Larger values of k result in smaller values of epsilon. + * The epsilon error is always with respect to the rank and cannot be applied to the * corresponding values. * *

The relationship between the normalized rank and the corresponding values can be viewed @@ -147,6 +148,25 @@ *

  • Then vlo ≤ v ≤ vhi, with 99% confidence.
  • * * + *

    The current implementations of the KLL sketch in the DataSketches Java library component include:

    + * + *
      + *
    • KllFloatsSketch: This operates on the Java heap and uses the java float primitive for the + * smallest possible size. It can be serialized to a compact, immutable form or to an updatable form suitable for + * use by the Kll Direct sketches.
    • + *
    • KllDoublesSketch: This operates on the Java heap and uses the java double primitive for a much + * larger range of numeric values, and is larger as a result. It can be serialized to a compact, immutable form or + * to an updatable form suitable for use by the Kll Direct sketches.
    • + *
    • KllDirectFloatsSketch: This is intended to operate off-heap and performs all of its operations in one + * contiguous chunk of memory. It uses the java float primitive for the smallest possible size off-heap.
    • + *
    • KllDirectDoublesSketch: This is intended to operate off-heap and performs all of its operations in one + * contiguous chunk of memory. It uses the java double primitive for a much larger range of numeric values, + * and is larger as a result.
    • + *
    + * + *

    Please visit our website: DataSketches Home Page for more + * information.

    + * * @author Kevin Lang * @author Alexander Saydakov * @author Lee Rhodes diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java new file mode 100644 index 000000000..bb325a44e --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class KllDirectDoublesSketchIteratorTest { + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void emptySketch() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + KllDoublesSketchIterator it = sketch.iterator(); + Assert.assertFalse(it.next()); + } + + @Test + public void oneItemSketch() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(0); + KllDoublesSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getValue(), 0f); + Assert.assertEquals(it.getWeight(), 1); + Assert.assertFalse(it.next()); + } + + @Test + public void bigSketches() { + for (int n = 1000; n < 100000; n += 2000) { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + for (int i = 0; i < n; i++) { + sketch.update(i); + } + KllDoublesSketchIterator it = sketch.iterator(); + int count = 0; + int weight = 0; + while (it.next()) { + count++; + weight += it.getWeight(); + } + Assert.assertEquals(count, sketch.getNumRetained()); + Assert.assertEquals(weight, n); + } + } + + private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { + KllDoublesSketch sk = new KllDoublesSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + + KllDirectDoublesSketch ddsk = new KllDirectDoublesSketch(wmem, memReqSvr); + return ddsk; + } + +} + diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java index 572b3b1ba..a12a50cac 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -84,9 +84,9 @@ public void oneItem() { assertEquals(sketch.getNumRetained(), 1); assertEquals(sketch.getRank(1), 0.0); assertEquals(sketch.getRank(2), 1.0); - assertEquals(sketch.getMinValue(), 1f); - assertEquals(sketch.getMaxValue(), 1f); - assertEquals(sketch.getQuantile(0.5), 1f); + assertEquals(sketch.getMinValue(), 1.0); + assertEquals(sketch.getMaxValue(), 1.0); + assertEquals(sketch.getQuantile(0.5), 1.0); } @Test @@ -209,8 +209,8 @@ public void mergeLowerK() { assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), 2 * n); - assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), 2f * n - 1f); + assertEquals(sketch1.getMinValue(), 0); + assertEquals(sketch1.getMaxValue(), 2.0 * n - 1.0); assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_128); } @@ -230,17 +230,17 @@ public void mergeEmptyLowerK() { assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), n); - assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), n - 1f); - assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256); + assertEquals(sketch1.getMinValue(), 0); + assertEquals(sketch1.getMaxValue(), n - 1.0); + assertEquals(sketch1.getQuantile(0.5), n / 2.0, n / 2 * PMF_EPS_FOR_K_256); //merge the other way sketch2.merge(sketch1); assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), n); - assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), n - 1f); - assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256); + assertEquals(sketch1.getMinValue(), 0); + assertEquals(sketch1.getMaxValue(), n - 1.0); + assertEquals(sketch1.getQuantile(0.5), n / 2.0, n / 2 * PMF_EPS_FOR_K_256); } @Test @@ -266,7 +266,7 @@ public void mergeMinMinValueFromOther() { sketch1.update(1); sketch2.update(2); sketch2.merge(sketch1); - assertEquals(sketch2.getMinValue(), 1.0F); + assertEquals(sketch2.getMinValue(), 1.0); } @Test @@ -277,8 +277,8 @@ public void mergeMinAndMaxFromOther() { sketch1.update(i); } sketch2.merge(sketch1); - assertEquals(sketch2.getMinValue(), 1F); - assertEquals(sketch2.getMaxValue(), 1_000_000F); + assertEquals(sketch2.getMinValue(), 1); + assertEquals(sketch2.getMaxValue(), 1_000_000); } @SuppressWarnings("unused") @@ -436,12 +436,12 @@ public void getQuantiles() { final double[] quantiles1 = sketch.getQuantiles(new double[] {0, 0.5, 1}); final double[] quantiles2 = sketch.getQuantiles(3); assertEquals(quantiles1, quantiles2); - assertEquals(quantiles1[0], 1f); - assertEquals(quantiles1[1], 2f); - assertEquals(quantiles1[2], 3f); + assertEquals(quantiles1[0], 1.0); + assertEquals(quantiles1[1], 2.0); + assertEquals(quantiles1[2], 3.0); } - //@Test + @Test public void checkSimpleMergeDirect() { //used for troubleshooting int k = 20; int n1 = 21; diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchIteratorTest.java new file mode 100644 index 000000000..60ea42c6a --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchIteratorTest.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class KllDirectFloatsSketchIteratorTest { + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void emptySketch() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + KllFloatsSketchIterator it = sketch.iterator(); + Assert.assertFalse(it.next()); + } + + @Test + public void oneItemSketch() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(0); + KllFloatsSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getValue(), 0f); + Assert.assertEquals(it.getWeight(), 1); + Assert.assertFalse(it.next()); + } + + @Test + public void bigSketches() { + for (int n = 1000; n < 100000; n += 2000) { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + for (int i = 0; i < n; i++) { + sketch.update(i); + } + KllFloatsSketchIterator it = sketch.iterator(); + int count = 0; + int weight = 0; + while (it.next()) { + count++; + weight += it.getWeight(); + } + Assert.assertEquals(count, sketch.getNumRetained()); + Assert.assertEquals(weight, n); + } + } + + private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { + KllFloatsSketch sk = new KllFloatsSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + + KllDirectFloatsSketch dfsk = new KllDirectFloatsSketch(wmem, memReqSvr); + return dfsk; + } + +} + diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java new file mode 100644 index 000000000..f7ce55533 --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java @@ -0,0 +1,594 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +//import static org.apache.datasketches.Util.getResourceBytes; //don't have matching numbers from C++ +import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; +import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +public class KllDirectFloatsSketchTest { + + private static final double PMF_EPS_FOR_K_8 = 0.35; // PMF rank error (epsilon) for k=8 + private static final double PMF_EPS_FOR_K_128 = 0.025; // PMF rank error (epsilon) for k=128 + private static final double PMF_EPS_FOR_K_256 = 0.013; // PMF rank error (epsilon) for k=256 + private static final double NUMERIC_NOISE_TOLERANCE = 1E-6; + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void empty() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(Float.NaN); // this must not change anything + assertTrue(sketch.isEmpty()); + assertEquals(sketch.getN(), 0); + assertEquals(sketch.getNumRetained(), 0); + assertTrue(Double.isNaN(sketch.getRank(0))); + assertTrue(Float.isNaN(sketch.getMinValue())); + assertTrue(Float.isNaN(sketch.getMaxValue())); + assertTrue(Float.isNaN(sketch.getQuantile(0.5))); + assertNull(sketch.getQuantiles(new double[] {0})); + assertNull(sketch.getPMF(new float[] {0})); + assertNotNull(sketch.toString(true, true)); + assertNotNull(sketch.toString()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantileInvalidArg() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(1); + sketch.getQuantile(-1.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantilesInvalidArg() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(1); + sketch.getQuantiles(new double[] {2.0}); + } + + @Test + public void oneItem() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(1); + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 1); + assertEquals(sketch.getNumRetained(), 1); + assertEquals(sketch.getRank(1), 0.0); + assertEquals(sketch.getRank(2), 1.0); + assertEquals(sketch.getMinValue(), 1f); + assertEquals(sketch.getMaxValue(), 1f); + assertEquals(sketch.getQuantile(0.5), 1f); + } + + @Test + public void manyItemsEstimationMode() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + final int n = 1_000_000; + + for (int i = 0; i < n; i++) { + sketch.update(i); + } + assertEquals(sketch.getN(), n); + + // test getRank + for (int i = 0; i < n; i++) { + final double trueRank = (double) i / n; + assertEquals(sketch.getRank(i), trueRank, PMF_EPS_FOR_K_256, "for value " + i); + } + + // test getPMF + final double[] pmf = sketch.getPMF(new float[] {n / 2}); // split at median + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); + assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); + + assertEquals(sketch.getMinValue(), 0f); // min value is exact + assertEquals(sketch.getQuantile(0), 0f); // min value is exact + assertEquals(sketch.getMaxValue(), n - 1f); // max value is exact + assertEquals(sketch.getQuantile(1), n - 1f); // max value is exact + + // check at every 0.1 percentage point + final double[] fractions = new double[1001]; + final double[] reverseFractions = new double[1001]; // check that ordering doesn't matter + for (int i = 0; i <= 1000; i++) { + fractions[i] = (double) i / 1000; + reverseFractions[1000 - i] = fractions[i]; + } + final float[] quantiles = sketch.getQuantiles(fractions); + final float[] reverseQuantiles = sketch.getQuantiles(reverseFractions); + double previousQuantile = 0; + for (int i = 0; i <= 1000; i++) { + final double quantile = sketch.getQuantile(fractions[i]); + assertEquals(quantile, quantiles[i]); + assertEquals(quantile, reverseQuantiles[1000 - i]); + assertTrue(previousQuantile <= quantile); + previousQuantile = quantile; + } + } + + @Test + public void getRankGetCdfGetPmfConsistency() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + final int n = 1000; + final float[] values = new float[n]; + for (int i = 0; i < n; i++) { + sketch.update(i); + values[i] = i; + } + final double[] ranks = sketch.getCDF(values); + final double[] pmf = sketch.getPMF(values); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i]), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + + @Test + public void merge() { + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + final KllDirectFloatsSketch sketch2 = getDFSketch(200, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i * 1.0F); + sketch2.update((2 * n - i - 1) * 1.0F); + } + + assertEquals(sketch1.getMinValue(), 0.0); + assertEquals(sketch1.getMaxValue(), (n - 1) * 1.0); + + assertEquals(sketch2.getMinValue(), n * 1.0); + assertEquals(sketch2.getMaxValue(), (2 * n - 1) * 1.0); + + sketch1.merge(sketch2); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2L * n); + assertEquals(sketch1.getMinValue(), 0.0); + assertEquals(sketch1.getMaxValue(), (2 * n - 1) * 1.0F); + assertEquals(sketch1.getQuantile(0.5), n * 1.0F, n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeLowerK() { + final KllDirectFloatsSketch sketch1 = getDFSketch(256, 0); + final KllDirectFloatsSketch sketch2 = getDFSketch(128, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinValue(), 0.0f); + assertEquals(sketch1.getMaxValue(), n - 1f); + + assertEquals(sketch2.getMinValue(), n); + assertEquals(sketch2.getMaxValue(), 2f * n - 1f); + + assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); + assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); + sketch1.merge(sketch2); + + // sketch1 must get "contaminated" by the lower K in sketch2 + assertEquals(sketch1.getNormalizedRankError(false), sketch2.getNormalizedRankError(false)); + assertEquals(sketch1.getNormalizedRankError(true), sketch2.getNormalizedRankError(true)); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2 * n); + assertEquals(sketch1.getMinValue(), 0f); + assertEquals(sketch1.getMaxValue(), 2f * n - 1f); + assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_128); + } + + @Test + public void mergeEmptyLowerK() { + final KllDirectFloatsSketch sketch1 = getDFSketch(256, 0); + final KllDirectFloatsSketch sketch2 = getDFSketch(128, 0); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + + // rank error should not be affected by a merge with an empty sketch with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinValue(), 0f); + assertEquals(sketch1.getMaxValue(), n - 1f); + assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256); + + //merge the other way + sketch2.merge(sketch1); + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinValue(), 0f); + assertEquals(sketch1.getMaxValue(), n - 1f); + assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeExactModeLowerK() { + final KllDirectFloatsSketch sketch1 = getDFSketch(256, 0); + final KllDirectFloatsSketch sketch2 = getDFSketch(128, 0); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + sketch2.update(1); + + // rank error should not be affected by a merge with a sketch in exact mode with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + } + + @Test + public void mergeMinMinValueFromOther() { + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + final KllDirectFloatsSketch sketch2 = getDFSketch(200, 0); + sketch1.update(1); + sketch2.update(2); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinValue(), 1.0F); + } + + @Test + public void mergeMinAndMaxFromOther() { + final KllDirectFloatsSketch sketch1 = getDFSketch(8, 0); //was 200 + final KllDirectFloatsSketch sketch2 = getDFSketch(8, 0); //was 200 + for (int i = 1; i <= 9; i++) { //was 1_000_000 + sketch1.update(i); + } + //System.out.println(sketch1.toString(true, true)); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinValue(), 1F); + assertEquals(sketch2.getMaxValue(), 9F); //was 1_000_000 + } + + @SuppressWarnings("unused") + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooSmall() { + final KllDirectFloatsSketch sketch1 = getDFSketch(MIN_K - 1, 0); + } + + @SuppressWarnings("unused") + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooLarge() { + final KllDirectFloatsSketch sketch1 = getDFSketch(MAX_K + 1, 0); + } + + @Test + public void minK() { + final KllDirectFloatsSketch sketch = getDFSketch(MIN_K, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), MIN_K); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); + } + + @Test + public void maxK() { + final KllDirectFloatsSketch sketch = getDFSketch(MAX_K, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), MAX_K); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); + } + + @Test + public void serializeDeserializeEmpty() { //compact serialize then heapify using KllDoublesSketch + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + final byte[] bytes = sketch1.toByteArray(); + final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertTrue(Double.isNaN(sketch2.getMinValue())); + assertTrue(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test + public void serializeDeserializeEmpty2() { //updatable serialize then new (loaded) KllDirectDoublesSketch + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + final byte[] bytes = sketch1.toUpdatableByteArray(); + final KllDirectFloatsSketch sketch2 = new KllDirectFloatsSketch(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertTrue(Double.isNaN(sketch2.getMinValue())); + assertTrue(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test + public void serializeDeserializeOneItem() { //compact serialize then heapify using KllDoublesSketch + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + sketch1.update(1); + final byte[] bytes = sketch1.toByteArray(); + final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertFalse(Double.isNaN(sketch2.getMinValue())); + assertFalse(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Float.BYTES); + } + + @Test + public void serializeDeserializeOneItem2() { //updatable serialize then new (loaded) KllDirectDoublesSketch + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + sketch1.update(1); + final byte[] bytes = sketch1.toUpdatableByteArray(); + final KllDirectFloatsSketch sketch2 = new KllDirectFloatsSketch(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertFalse(Double.isNaN(sketch2.getMinValue())); + assertFalse(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Float.BYTES); + } + + @Test + public void serializeDeserialize() { //compact serialize then heapify using KllDoublesSketch + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + final int n = 1000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + final byte[] bytes = sketch1.toByteArray(); + final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinValue(), sketch1.getMinValue()); + assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test + public void serializeDeserialize2() { //updatable serialize then new (loaded) KllDirectDoublesSketch + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + final int n = 1000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + final byte[] bytes = sketch1.toUpdatableByteArray(); + final KllDirectFloatsSketch sketch2 = new KllDirectFloatsSketch(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinValue(), sketch1.getMinValue()); + assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void outOfOrderSplitPoints() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(0); + sketch.getCDF(new float[] {1, 0}); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void nanSplitPoint() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(0); + sketch.getCDF(new float[] {Float.NaN}); + } + + @Test + public void getQuantiles() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(1); + sketch.update(2); + sketch.update(3); + final float[] quantiles1 = sketch.getQuantiles(new double[] {0, 0.5, 1}); + final float[] quantiles2 = sketch.getQuantiles(3); + assertEquals(quantiles1, quantiles2); + assertEquals(quantiles1[0], 1f); + assertEquals(quantiles1[1], 2f); + assertEquals(quantiles1[2], 3f); + } + + @Test + public void checkSimpleMergeDirect() { //used for troubleshooting + int k = 20; + int n1 = 21; + int n2 = 43; + KllFloatsSketch sk1 = new KllFloatsSketch(k); + KllFloatsSketch sk2 = new KllFloatsSketch(k); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println("SK1:"); + println(sk1.toString(true, true)); + println("SK2:"); + println(sk2.toString(true, true)); + WritableMemory wmem1 = WritableMemory.writableWrap(sk1.toUpdatableByteArray()); + WritableMemory wmem2 = WritableMemory.writableWrap(sk2.toUpdatableByteArray()); + KllDirectFloatsSketch dsk1 = new KllDirectFloatsSketch(wmem1, new DefaultMemoryRequestServer()); + KllDirectFloatsSketch dsk2 = new KllDirectFloatsSketch(wmem2, new DefaultMemoryRequestServer()); + println("BEFORE MERGE"); + println(dsk1.toString(true, true)); + dsk1.merge(dsk2); + println("AFTER MERGE"); + println(dsk1.toString(true, true)); + } + + @Test + public void checkSketchInitializeDirectDoubleUpdatableMem() { + int k = 20; //don't change this + KllDirectFloatsSketch sk; + KllFloatsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: DOUBLE FULL DIRECT FROM UPDATABLE"); + sk2 = new KllFloatsSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = new KllDirectFloatsSketch(wmem, new DefaultMemoryRequestServer()); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLayout(), "FLOAT_UPDATABLE"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxFloatValue(), 21.0); + assertEquals(sk.getMinFloatValue(), 1.0); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = new KllFloatsSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = new KllDirectFloatsSketch(wmem, new DefaultMemoryRequestServer()); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "FLOAT_UPDATABLE"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), Double.NaN); + assertEquals(sk.getMinFloatValue(), Double.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = new KllFloatsSketch(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = new KllDirectFloatsSketch(wmem, new DefaultMemoryRequestServer()); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "FLOAT_UPDATABLE"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkGetWritableMemory() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 200); + assertEquals(sketch.getK(), 200); + assertEquals(sketch.getN(), 200); + assertFalse(sketch.isEmpty()); + assertTrue(sketch.isDirect()); + assertFalse(sketch.isEstimationMode()); + assertTrue(sketch.isFloatsSketch()); + assertFalse(sketch.isLevelZeroSorted()); + assertFalse(sketch.isDoublesSketch()); + + final WritableMemory wmem = sketch.getWritableMemory(); + final KllFloatsSketch sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), 200); + assertEquals(sk.getN(), 200); + assertFalse(sk.isEmpty()); + assertFalse(sk.isDirect()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isFloatsSketch()); + assertFalse(sk.isLevelZeroSorted()); + assertFalse(sk.isDoublesSketch()); + assertTrue(KllSketch.isCompatible()); + } + + + private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { + KllFloatsSketch sk = new KllFloatsSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + + KllDirectFloatsSketch dfsk = new KllDirectFloatsSketch(wmem, memReqSvr); + return dfsk; + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + +} + diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index a171350d1..081903be8 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -80,9 +80,9 @@ public void oneItem() { assertEquals(sketch.getNumRetained(), 1); assertEquals(sketch.getRank(1), 0.0); assertEquals(sketch.getRank(2), 1.0); - assertEquals(sketch.getMinValue(), 1f); - assertEquals(sketch.getMaxValue(), 1f); - assertEquals(sketch.getQuantile(0.5), 1f); + assertEquals(sketch.getMinValue(), 1.0); + assertEquals(sketch.getMaxValue(), 1.0); + assertEquals(sketch.getQuantile(0.5), 1.0); } @Test @@ -193,7 +193,7 @@ public void mergeLowerK() { assertEquals(sketch1.getMaxValue(), n - 1f); assertEquals(sketch2.getMinValue(), n); - assertEquals(sketch2.getMaxValue(), 2f * n - 1f); + assertEquals(sketch2.getMaxValue(), 2f * n - 1.0); assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); @@ -205,8 +205,8 @@ public void mergeLowerK() { assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), 2 * n); - assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), 2f * n - 1f); + assertEquals(sketch1.getMinValue(), 0); + assertEquals(sketch1.getMaxValue(), 2f * n - 1.0); assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_128); } @@ -226,17 +226,17 @@ public void mergeEmptyLowerK() { assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), n); - assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), n - 1f); - assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256); + assertEquals(sketch1.getMinValue(), 0); + assertEquals(sketch1.getMaxValue(), n - 1.0); + assertEquals(sketch1.getQuantile(0.5), n / 2.0, n / 2 * PMF_EPS_FOR_K_256); //merge the other way sketch2.merge(sketch1); assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), n); assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), n - 1f); - assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256); + assertEquals(sketch1.getMaxValue(), n - 1.0); + assertEquals(sketch1.getQuantile(0.5), n / 2.0, n / 2 * PMF_EPS_FOR_K_256); } @Test @@ -262,7 +262,7 @@ public void mergeMinMinValueFromOther() { sketch1.update(1); sketch2.update(2); sketch2.merge(sketch1); - assertEquals(sketch2.getMinValue(), 1.0F); + assertEquals(sketch2.getMinValue(), 1.0); } @Test @@ -273,8 +273,8 @@ public void mergeMinAndMaxFromOther() { } final KllDoublesSketch sketch2 = new KllDoublesSketch(); sketch2.merge(sketch1); - assertEquals(sketch2.getMinValue(), 1F); - assertEquals(sketch2.getMaxValue(), 1_000_000F); + assertEquals(sketch2.getMinValue(), 1); + assertEquals(sketch2.getMaxValue(), 1_000_000); } @SuppressWarnings("unused") @@ -392,9 +392,9 @@ public void getQuantiles() { final double[] quantiles1 = sketch.getQuantiles(new double[] {0, 0.5, 1}); final double[] quantiles2 = sketch.getQuantiles(3); assertEquals(quantiles1, quantiles2); - assertEquals(quantiles1[0], 1f); - assertEquals(quantiles1[1], 2f); - assertEquals(quantiles1[2], 3f); + assertEquals(quantiles1[0], 1.0); + assertEquals(quantiles1[1], 2.0); + assertEquals(quantiles1[2], 3.0); } } diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java index 60e2fd8cd..f3b6d7d82 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java @@ -33,7 +33,6 @@ public class MiscDirectDoublesTest { static final String LS = System.getProperty("line.separator"); private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); - @Test public void checkBounds() { final KllDirectDoublesSketch sk = getDDSketch(200, 0); @@ -81,7 +80,7 @@ public void visualCheckToString() { println(LS + s2); } - @Test + //@Test public void viewCompactions() { final KllDirectDoublesSketch sk = getDDSketch(20, 0); show(sk, 20); @@ -125,9 +124,7 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); - //assertEquals(sk.getMaxFloatValue(), 21.0F); assertEquals(sk.getMinDoubleValue(), 1.0); - //assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); @@ -145,9 +142,7 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); - //assertEquals(sk.getMaxFloatValue(), Float.NaN); assertEquals(sk.getMinDoubleValue(), Double.NaN); - //assertEquals(sk.getMinFloatValue(), Float.NaN); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); @@ -166,9 +161,7 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); - //assertEquals(sk.getMaxFloatValue(), 1.0F); assertEquals(sk.getMinDoubleValue(), 1.0); - //assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); } @@ -200,9 +193,7 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); - //assertEquals(sk.getMaxFloatValue(), 21.0F); assertEquals(sk.getMinDoubleValue(), 1.0); - //assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); @@ -224,9 +215,7 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); - //assertEquals(sk.getMaxFloatValue(), Float.NaN); assertEquals(sk.getMinDoubleValue(), Double.NaN); - //assertEquals(sk.getMinFloatValue(), Float.NaN); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); @@ -249,9 +238,7 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); - //assertEquals(sk.getMaxFloatValue(), 1.0F); assertEquals(sk.getMinDoubleValue(), 1.0); - //assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); } @@ -283,9 +270,7 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); - //assertEquals(sk.getMaxFloatValue(), 21.0F); assertEquals(sk.getMinDoubleValue(), 1.0); - //assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); @@ -307,9 +292,7 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); - //assertEquals(sk.getMaxFloatValue(), Float.NaN); assertEquals(sk.getMinDoubleValue(), Double.NaN); - //assertEquals(sk.getMinFloatValue(), Float.NaN); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); @@ -332,9 +315,7 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); - //assertEquals(sk.getMaxFloatValue(), 1.0F); assertEquals(sk.getMinDoubleValue(), 1.0); - //assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); } diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java new file mode 100644 index 000000000..27094c5a2 --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java @@ -0,0 +1,425 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import java.util.Objects; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +public class MiscDirectFloatsTest { + static final String LS = System.getProperty("line.separator"); + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void checkBounds() { + final KllDirectFloatsSketch sk = getDFSketch(200, 0); + for (int i = 0; i < 1000; i++) { + sk.update(i); + } + final double eps = sk.getNormalizedRankError(false); + final float est = sk.getQuantile(0.5); + final float ub = sk.getQuantileUpperBound(0.5); + final float lb = sk.getQuantileLowerBound(0.5); + assertEquals(ub, sk.getQuantile(.5 + eps)); + assertEquals(lb, sk.getQuantile(0.5 - eps)); + println("Ext : " + est); + println("UB : " + ub); + println("LB : " + lb); + } + + @Test + public void checkMisc() { + final KllDirectFloatsSketch sk = getDFSketch(8, 0); + assertTrue(Objects.isNull(sk.getQuantiles(10))); + //sk.toString(true, true); + for (int i = 0; i < 20; i++) { sk.update(i); } + //sk.toString(true, true); + //sk.toByteArray(); + final float[] items = sk.getFloatItemsArray(); + assertEquals(items.length, 16); + final int[] levels = sk.getLevelsArray(); + assertEquals(levels.length, 3); + assertEquals(sk.getNumLevels(), 2); + } + + //@Test //enable static println(..) for visual checking + public void visualCheckToString() { + final KllDirectFloatsSketch sk = getDFSketch(20, 0); + for (int i = 0; i < 10; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + + final KllDirectFloatsSketch sk2 = getDFSketch(20, 0); + for (int i = 0; i < 400; i++) { sk2.update(i + 1); } + println("\n" + sk2.toString(true, true)); + + sk2.merge(sk); + final String s2 = sk2.toString(true, true); + println(LS + s2); + } + + //@Test + public void viewCompactions() { + final KllDirectFloatsSketch sk = getDFSketch(20, 0); + show(sk, 20); + show(sk, 21); //compaction 1 + show(sk, 43); + show(sk, 44); //compaction 2 + show(sk, 54); + show(sk, 55); //compaction 3 + show(sk, 73); + show(sk, 74); //compaction 4 + show(sk, 88); + show(sk, 89); //compaction 5 + show(sk, 96); + show(sk, 97); //compaction 6 + show(sk, 108); + } + + private static void show(final KllDirectFloatsSketch sk, int limit) { + int i = (int) sk.getN(); + for ( ; i < limit; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + } + + @Test + public void checkSketchInitializeFloatHeap() { + int k = 20; //don't change this + KllDirectFloatsSketch sk; + + //println("#### CASE: DOUBLE FULL HEAP"); + sk = getDFSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLayout(), "FLOAT_UPDATABLE"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE HEAP EMPTY"); + sk = getDFSketch(k, 0); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "FLOAT_UPDATABLE"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE HEAP SINGLE"); + sk = getDFSketch(k, 0); + sk.update(1); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "FLOAT_UPDATABLE"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeFloatHeapifyCompactMem() { + int k = 20; //don't change this + KllFloatsSketch sk; + KllDirectFloatsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: DOUBLE FULL HEAPIFIED FROM COMPACT"); + sk2 = getDFSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinFloatValue(), 1.0f); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM COMPACT"); + sk2 = getDFSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM COMPACT"); + sk2 = getDFSketch(k, 0); + sk2.update(1); + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeFloatHeapifyUpdatableMem() { + int k = 20; //don't change this + KllFloatsSketch sk; + KllDirectFloatsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: DOUBLE FULL HEAPIFIED FROM UPDATABLE"); + sk2 = getDFSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + // println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = getDFSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = getDFSketch(k, 0); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getDyMinK(), k); + assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLayout(), "HEAP"); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkMemoryToStringFloatUpdatable() { + int k = 20; //don't change this + KllDirectFloatsSketch sk; + KllDirectFloatsSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: DOUBLE FULL UPDATABLE"); + sk = getDFSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = new KllDirectFloatsSketch(wmem, memReqSvr); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: DOUBLE EMPTY UPDATABLE"); + sk = getDFSketch(k, 0); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = new KllDirectFloatsSketch(wmem, memReqSvr); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: DOUBLE SINGLE UPDATABL"); + sk = getDFSketch(k, 0); + sk.update(1); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = new KllDirectFloatsSketch(wmem, memReqSvr); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + } + + @Test + public void checkSimpleMerge() { + int k = 20; + int n1 = 21; + int n2 = 21; + KllDirectFloatsSketch sk1 = getDFSketch(k, 0); + KllDirectFloatsSketch sk2 = getDFSketch(k, 0); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println(sk1.toString(true, true)); + println(sk2.toString(true, true)); + sk1.merge(sk2); + println(sk1.toString(true, true)); + assertEquals(sk1.getMaxValue(), 121.0F); + assertEquals(sk1.getMinValue(), 1.0F); + } + + private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { + KllFloatsSketch sk = new KllFloatsSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + + KllDirectFloatsSketch dfsk = new KllDirectFloatsSketch(wmem, memReqSvr); + return dfsk; + } + + @Test + public void printlnTest() { + //println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + +} + From 7407207060210d9f046b74a91eaef25e16b8d384 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 26 Mar 2022 13:07:24 -0700 Subject: [PATCH 13/31] Fixed the LGTM issues detected on the last push. --- .../kll/KllDirectDoublesSketch.java | 18 ++- .../kll/KllDirectFloatsSketch.java | 18 ++- .../datasketches/kll/KllDirectSketch.java | 76 ++++++------- .../datasketches/kll/KllDoublesHelper.java | 103 +++++++++--------- .../kll/KllDoublesQuantileCalculator.java | 71 ++++++------ .../datasketches/kll/KllDoublesSketch.java | 19 ++-- .../kll/KllDoublesSketchIterator.java | 41 ++++--- .../datasketches/kll/KllFloatsHelper.java | 102 +++++++++-------- .../kll/KllFloatsQuantileCalculator.java | 70 ++++++------ .../datasketches/kll/KllFloatsSketch.java | 68 ++++++------ .../kll/KllFloatsSketchIterator.java | 40 +++---- .../datasketches/kll/KllHeapSketch.java | 42 +++---- .../apache/datasketches/kll/KllHelper.java | 2 +- .../apache/datasketches/kll/KllSketch.java | 31 +++++- .../datasketches/kll/MemoryValidate.java | 32 ++++-- 15 files changed, 381 insertions(+), 352 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 45adf4a20..6d0911c8e 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -60,10 +60,10 @@ public KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServ * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 double values, which are a consecutive approximation to the CDF - * of the input stream given the splitPoints. The value at array position j of the returned - * CDF array is the sum of the returned values in positions 0 through j of the returned PMF - * array. + * @return an array of m+1 double values on the interval [0.0, 1.0) exclusive, + * which are a consecutive approximation to the CDF of the input stream given the splitPoints. + * The value at array position j of the returned CDF array is the sum of the returned values + * in positions 0 through j of the returned PMF array. */ public double[] getCDF(final double[] splitPoints) { return getDoublesPmfOrCdf(splitPoints, true); @@ -105,8 +105,9 @@ public double getMinValue() { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 doubles each of which is an approximation - * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * @return an array of m+1 doubles on the interval [0.0, 1.0) exclusive, + * each of which is an approximation to the fraction of the total input stream values + * (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right * splitPoint, with the exception that the last interval will include maximum value. */ @@ -242,6 +243,11 @@ public String toString(final boolean withLevels, final boolean withData) { return toStringImpl(withLevels, withData); } + /** + * Updates this sketch with the given data item. + * + * @param value an item from a stream of items. NaNs are ignored. + */ public void update(final double value) { updateDouble(value); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index 9eb52f302..7a7d928ee 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -60,10 +60,10 @@ public KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServe * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 double values, which are a consecutive approximation to the CDF - * of the input stream given the splitPoints. The value at array position j of the returned - * CDF array is the sum of the returned values in positions 0 through j of the returned PMF - * array. + * @return an array of m+1 double values on the interval [0.0, 1.0) exclusive, + * which are a consecutive approximation to the CDF of the input stream given the splitPoints. + * The value at array position j of the returned CDF array is the sum of the returned values + * in positions 0 through j of the returned PMF array. */ public double[] getCDF(final float[] splitPoints) { return getFloatsPmfOrCdf(splitPoints, true); @@ -105,8 +105,9 @@ public float getMinValue() { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 doubles each of which is an approximation - * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * @return an array of m+1 doubles on the interval [0.0, 1.0) exclusive, + * each of which is an approximation to the fraction of the total input stream values + * (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right * splitPoint, with the exception that the last interval will include maximum value. */ @@ -242,6 +243,11 @@ public String toString(final boolean withLevels, final boolean withData) { return toStringImpl(withLevels, withData); } + /** + * Updates this sketch with the given data item. + * + * @param value an item from a stream of items. NaNs are ignored. + */ public void update(final float value) { updateFloat(value); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index be181249b..b3ec22c2b 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -35,10 +35,13 @@ import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; - +/** + * This class implements all the methods for the Direct (off-heap) sketches that are independent + * of the sketch type (float or double). + */ abstract class KllDirectSketch extends KllSketch { - //All these members are constant for the life of this object. If the WritableMemory changes, it will require - //rebuilding this class + //All these members are constant for the life of this object. If the WritableMemory changes, + // it may require rebuilding this class final Layout layout; final boolean updatable; WritableMemory levelsArrUpdatable; @@ -69,28 +72,16 @@ public int getK() { } @Override - double getMaxDoubleValue() { - return minMaxArrUpdatable.getDouble(Double.BYTES); - } - - @Override - float getMaxFloatValue() { - return minMaxArrUpdatable.getFloat(Float.BYTES); - } - - @Override - double getMinDoubleValue() { - return minMaxArrUpdatable.getDouble(0); - } - - @Override - float getMinFloatValue() { - return minMaxArrUpdatable.getFloat(0); + public long getN() { + return extractN(wmem); } @Override - public long getN() { - return extractN(wmem); + public byte[] toUpdatableByteArray() { + final int bytes = (int) wmem.getCapacity(); + final byte[] byteArr = new byte[bytes]; + wmem.getByteArray(0, byteArr, 0, bytes); + return byteArr; } @Override @@ -105,7 +96,7 @@ public long getN() { @Override double getDoubleItemsArrayAt(final int index) { if (sketchType == FLOATS_SKETCH) { return Double.NaN; } - return itemsArrUpdatable.getDouble(index * Double.BYTES); + return itemsArrUpdatable.getDouble((long)index * Double.BYTES); } @Override @@ -125,7 +116,7 @@ float[] getFloatItemsArray() { @Override float getFloatItemsArrayAt(final int index) { if (sketchType == DOUBLES_SKETCH) { return Float.NaN; } - return itemsArrUpdatable.getFloat(index * Float.BYTES); + return itemsArrUpdatable.getFloat((long)index * Float.BYTES); } int getItemsArrLengthItems() { @@ -145,7 +136,27 @@ int[] getLevelsArray() { @Override int getLevelsArrayAt(final int index) { - return levelsArrUpdatable.getInt(index * Integer.BYTES); + return levelsArrUpdatable.getInt((long)index * Integer.BYTES); + } + + @Override + double getMaxDoubleValue() { + return minMaxArrUpdatable.getDouble(Double.BYTES); + } + + @Override + float getMaxFloatValue() { + return minMaxArrUpdatable.getFloat(Float.BYTES); + } + + @Override + double getMinDoubleValue() { + return minMaxArrUpdatable.getDouble(0); + } + + @Override + float getMinFloatValue() { + return minMaxArrUpdatable.getFloat(0); } @Override @@ -180,7 +191,7 @@ void setDoubleItemsArray(final double[] doubleItems) { @Override void setDoubleItemsArrayAt(final int index, final double value) { - itemsArrUpdatable.putDouble(index * Double.BYTES, value); + itemsArrUpdatable.putDouble((long)index * Double.BYTES, value); } @Override @@ -197,7 +208,7 @@ void setFloatItemsArray(final float[] floatItems) { @Override void setFloatItemsArrayAt(final int index, final float value) { - itemsArrUpdatable.putFloat(index * Float.BYTES, value); + itemsArrUpdatable.putFloat((long)index * Float.BYTES, value); } @Override @@ -213,7 +224,7 @@ void setLevelsArray(final int[] levelsArr) { @Override void setLevelsArrayAt(final int index, final int value) { - levelsArrUpdatable.putInt(index * Integer.BYTES, value); + levelsArrUpdatable.putInt((long)index * Integer.BYTES, value); } @Override @@ -276,19 +287,10 @@ void setN(final long n) { insertN(wmem, n); } - @Override void setNumLevels(final int numLevels) { if (!updatable) { kllSketchThrow(30); } insertNumLevels(wmem, numLevels); } - @Override - public byte[] toUpdatableByteArray() { - final int bytes = (int) wmem.getCapacity(); - final byte[] byteArr = new byte[bytes]; - wmem.getByteArray(0, byteArr, 0, bytes); - return byteArr; - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 125419e77..9049775a1 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -34,54 +34,6 @@ */ class KllDoublesHelper { - /** - * Checks the sequential validity of the given array of double values. - * They must be unique, monotonically increasing and not NaN. - * @param values the given array of values - */ - static void validateDoubleValues(final double[] values) { - for (int i = 0; i < values.length; i++) { - if (!Double.isFinite(values[i])) { - throw new SketchesArgumentException("Values must be finite"); - } - if (i < values.length - 1 && values[i] >= values[i + 1]) { - throw new SketchesArgumentException( - "Values must be unique and monotonically increasing"); - } - } - } - - static void mergeSortedDoubleArrays( - final double[] bufA, final int startA, final int lenA, - final double[] bufB, final int startB, final int lenB, - final double[] bufC, final int startC) { - final int lenC = lenA + lenB; - final int limA = startA + lenA; - final int limB = startB + lenB; - final int limC = startC + lenC; - - int a = startA; - int b = startB; - - for (int c = startC; c < limC; c++) { - if (a == limA) { - bufC[c] = bufB[b]; - b++; - } else if (b == limB) { - bufC[c] = bufA[a]; - a++; - } else if (bufA[a] < bufB[b]) { - bufC[c] = bufA[a]; - a++; - } else { - bufC[c] = bufB[b]; - b++; - } - } - assert a == limA; - assert b == limB; - } - /** * Compression algorithm used to merge higher levels. *

    Here is what we do for each level:

    @@ -193,20 +145,47 @@ static int[] generalDoublesCompress( numLevels++; targetItemCount += KllHelper.levelCapacity(k, numLevels, 0, m); } - } // end of code for compacting a level // determine whether we have processed all levels yet (including any new levels that we created) - if (curLevel == (numLevels - 1)) { doneYet = true; } - } // end of loop over levels assert (outLevels[numLevels] - outLevels[0]) == currentItemCount; - return new int[] {numLevels, targetItemCount, currentItemCount}; } + static void mergeSortedDoubleArrays( + final double[] bufA, final int startA, final int lenA, + final double[] bufB, final int startB, final int lenB, + final double[] bufC, final int startC) { + final int lenC = lenA + lenB; + final int limA = startA + lenA; + final int limB = startB + lenB; + final int limC = startC + lenC; + + int a = startA; + int b = startB; + + for (int c = startC; c < limC; c++) { + if (a == limA) { + bufC[c] = bufB[b]; + b++; + } else if (b == limB) { + bufC[c] = bufA[a]; + a++; + } else if (bufA[a] < bufB[b]) { + bufC[c] = bufA[a]; + a++; + } else { + bufC[c] = bufB[b]; + b++; + } + } + assert a == limA; + assert b == limB; + } + //This must be modified for validation static void randomlyHalveDownDoubles(final double[] buf, final int start, final int length, final Random random) { assert isEven(length); @@ -233,9 +212,26 @@ static void randomlyHalveUpDoubles(final double[] buf, final int start, final in } } + /** + * Checks the sequential validity of the given array of double values. + * They must be unique, monotonically increasing and not NaN. + * @param values the given array of values + */ + static void validateDoubleValues(final double[] values) { + for (int i = 0; i < values.length; i++) { + if (!Double.isFinite(values[i])) { + throw new SketchesArgumentException("Values must be finite"); + } + if (i < values.length - 1 && values[i] >= values[i + 1]) { + throw new SketchesArgumentException( + "Values must be unique and monotonically increasing"); + } + } + } + /* * The following must be enabled for use with the KllDoublesValidationTest, - * which is only enabled for manual testing. In addition, the two methods + * which is only enabled for manual testing. In addition, two methods * above need to be modified as commented. */ @@ -248,4 +244,3 @@ static void randomlyHalveUpDoubles(final double[] buf, final int start, final in // } } - diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java b/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java index ba269836f..91453549a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java @@ -58,41 +58,6 @@ final class KllDoublesQuantileCalculator { numLevels_ = 0; //not used by test } - double getQuantile(final double phi) { //phi is normalized rank [0,1]. - final long pos = QuantilesHelper.posOfPhi(phi, n_); - return approximatelyAnswerPositonalQuery(pos); - } - - private double approximatelyAnswerPositonalQuery(final long pos) { - assert pos >= 0; - assert pos < n_; - final int index = QuantilesHelper.chunkContainingPos(weights_, pos); - return items_[index]; - } - - private void populateFromSketch(final double[] srcItems, final int[] srcLevels, - final int numLevels, final int numItems) { - final int offset = srcLevels[0]; - System.arraycopy(srcItems, offset, items_, 0, numItems); - int srcLevel = 0; - int dstLevel = 0; - long weight = 1; - while (srcLevel < numLevels) { - final int fromIndex = srcLevels[srcLevel] - offset; - final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive - if (fromIndex < toIndex) { // if equal, skip empty level - Arrays.fill(weights_, fromIndex, toIndex, weight); - levels_[dstLevel] = fromIndex; - levels_[dstLevel + 1] = toIndex; - dstLevel++; - } - srcLevel++; - weight *= 2; - } - weights_[numItems] = 0; - numLevels_ = dstLevel; - } - private static void blockyTandemMergeSort(final double[] items, final long[] weights, final int[] levels, final int numLevels) { if (numLevels == 1) { return; } @@ -167,5 +132,39 @@ private static void tandemMerge( } } -} + double getQuantile(final double phi) { //phi is normalized rank [0,1]. + final long pos = QuantilesHelper.posOfPhi(phi, n_); + return approximatelyAnswerPositonalQuery(pos); + } + + private double approximatelyAnswerPositonalQuery(final long pos) { + assert pos >= 0; + assert pos < n_; + final int index = QuantilesHelper.chunkContainingPos(weights_, pos); + return items_[index]; + } + private void populateFromSketch(final double[] srcItems, final int[] srcLevels, + final int numLevels, final int numItems) { + final int offset = srcLevels[0]; + System.arraycopy(srcItems, offset, items_, 0, numItems); + int srcLevel = 0; + int dstLevel = 0; + long weight = 1; + while (srcLevel < numLevels) { + final int fromIndex = srcLevels[srcLevel] - offset; + final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive + if (fromIndex < toIndex) { // if equal, skip empty level + Arrays.fill(weights_, fromIndex, toIndex, weight); + levels_[dstLevel] = fromIndex; + levels_[dstLevel + 1] = toIndex; + dstLevel++; + } + srcLevel++; + weight *= 2; + } + weights_[numItems] = 0; + numLevels_ = dstLevel; + } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 6ccd34ff9..595967892 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -26,6 +26,8 @@ import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; +//intentional space +//intentional space /** * This class implements an on-heap doubles KllSketch. * @@ -78,8 +80,8 @@ private KllDoublesSketch(final Memory mem, final MemoryValidate memVal) { * See Memory * @return a heap-based sketch based on the given Memory. */ - //To simplify the code, the PreambleUtil.MemoryCheck does nearly all the validity checking. - //The verified Memory is then passed to the actual private heapify constructor. + //To simplify the code, the MemoryValidate class does nearly all the validity checking. + //The validated Memory is then passed to the actual private heapify constructor. public static KllDoublesSketch heapify(final Memory mem) { final MemoryValidate memChk = new MemoryValidate(mem); if (!memChk.doublesSketch) { @@ -104,10 +106,10 @@ public static KllDoublesSketch heapify(final Memory mem) { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 double values, which are a consecutive approximation to the CDF - * of the input stream given the splitPoints. The value at array position j of the returned - * CDF array is the sum of the returned values in positions 0 through j of the returned PMF - * array. + * @return an array of m+1 double values on the interval [0.0, 1.0) exclusive, + * which are a consecutive approximation to the CDF of the input stream given the splitPoints. + * The value at array position j of the returned CDF array is the sum of the returned values + * in positions 0 through j of the returned PMF array. */ public double[] getCDF(final double[] splitPoints) { return getDoublesPmfOrCdf(splitPoints, true); @@ -145,8 +147,9 @@ public double[] getCDF(final double[] splitPoints) { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 doubles each of which is an approximation - * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * @return an array of m+1 doubles on the interval [0.0, 1.0) exclusive, + * each of which is an approximation to the fraction of the total input stream values + * (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right * splitPoint, with the exception that the last interval will include maximum value. */ diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java index 6d4c2044a..3835cd0b4 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java @@ -39,6 +39,26 @@ public class KllDoublesSketchIterator { isInitialized_ = false; } + /** + * Gets a value from the current entry in the sketch. + * Don't call this before calling next() for the first time + * or after getting false from next(). + * @return value from the current entry + */ + public double getValue() { + return items_[i_]; + } + + /** + * Gets a weight for the value from the current entry in the sketch. + * Don't call this before calling next() for the first time + * or after getting false from next(). + * @return weight for the value from the current entry + */ + public long getWeight() { + return weight_; + } + /** * Advancing the iterator and checking existence of the next entry * is combined here for efficiency. This results in an undefined @@ -69,25 +89,4 @@ public boolean next() { return true; } - /** - * Gets a value from the current entry in the sketch. - * Don't call this before calling next() for the first time - * or after getting false from next(). - * @return value from the current entry - */ - public double getValue() { - return items_[i_]; - } - - /** - * Gets a weight for the value from the current entry in the sketch. - * Don't call this before calling next() for the first time - * or after getting false from next(). - * @return weight for the value from the current entry - */ - public long getWeight() { - return weight_; - } - } - diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index 79e730ef1..6f15baf3c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -34,54 +34,6 @@ */ class KllFloatsHelper { - /** - * Checks the sequential validity of the given array of float values. - * They must be unique, monotonically increasing and not NaN. - * @param values the given array of values - */ - static void validateFloatValues(final float[] values) { - for (int i = 0; i < values.length; i++) { - if (!Float.isFinite(values[i])) { - throw new SketchesArgumentException("Values must be finite"); - } - if (i < values.length - 1 && values[i] >= values[i + 1]) { - throw new SketchesArgumentException( - "Values must be unique and monotonically increasing"); - } - } - } - - static void mergeSortedFloatArrays( - final float[] bufA, final int startA, final int lenA, - final float[] bufB, final int startB, final int lenB, - final float[] bufC, final int startC) { - final int lenC = lenA + lenB; - final int limA = startA + lenA; - final int limB = startB + lenB; - final int limC = startC + lenC; - - int a = startA; - int b = startB; - - for (int c = startC; c < limC; c++) { - if (a == limA) { - bufC[c] = bufB[b]; - b++; - } else if (b == limB) { - bufC[c] = bufA[a]; - a++; - } else if (bufA[a] < bufB[b]) { - bufC[c] = bufA[a]; - a++; - } else { - bufC[c] = bufB[b]; - b++; - } - } - assert a == limA; - assert b == limB; - } - /** * Compression algorithm used to merge higher levels. *

    Here is what we do for each level:

    @@ -193,20 +145,47 @@ static int[] generalFloatsCompress( numLevels++; targetItemCount += KllHelper.levelCapacity(k, numLevels, 0, m); } - } // end of code for compacting a level // determine whether we have processed all levels yet (including any new levels that we created) - if (curLevel == (numLevels - 1)) { doneYet = true; } - } // end of loop over levels assert (outLevels[numLevels] - outLevels[0]) == currentItemCount; - return new int[] {numLevels, targetItemCount, currentItemCount}; } + static void mergeSortedFloatArrays( + final float[] bufA, final int startA, final int lenA, + final float[] bufB, final int startB, final int lenB, + final float[] bufC, final int startC) { + final int lenC = lenA + lenB; + final int limA = startA + lenA; + final int limB = startB + lenB; + final int limC = startC + lenC; + + int a = startA; + int b = startB; + + for (int c = startC; c < limC; c++) { + if (a == limA) { + bufC[c] = bufB[b]; + b++; + } else if (b == limB) { + bufC[c] = bufA[a]; + a++; + } else if (bufA[a] < bufB[b]) { + bufC[c] = bufA[a]; + a++; + } else { + bufC[c] = bufB[b]; + b++; + } + } + assert a == limA; + assert b == limB; + } + //This must be modified for validation static void randomlyHalveDownFloats(final float[] buf, final int start, final int length, final Random random) { assert isEven(length); @@ -233,9 +212,26 @@ static void randomlyHalveUpFloats(final float[] buf, final int start, final int } } + /** + * Checks the sequential validity of the given array of float values. + * They must be unique, monotonically increasing and not NaN. + * @param values the given array of values + */ + static void validateFloatValues(final float[] values) { + for (int i = 0; i < values.length; i++) { + if (!Float.isFinite(values[i])) { + throw new SketchesArgumentException("Values must be finite"); + } + if (i < values.length - 1 && values[i] >= values[i + 1]) { + throw new SketchesArgumentException( + "Values must be unique and monotonically increasing"); + } + } + } + /* * The following must be enabled for use with the KllFloatsValidationTest, - * which is only enabled for manual testing. In addition, the two methods + * which is only enabled for manual testing. In addition, two methods * above need to be modified as commented. */ diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java index 97f628e29..0fee4046e 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java @@ -58,41 +58,6 @@ final class KllFloatsQuantileCalculator { numLevels_ = 0; //not used by test } - float getQuantile(final double phi) { //phi is normalized rank [0,1]. - final long pos = QuantilesHelper.posOfPhi(phi, n_); - return approximatelyAnswerPositonalQuery(pos); - } - - private float approximatelyAnswerPositonalQuery(final long pos) { - assert pos >= 0; - assert pos < n_; - final int index = QuantilesHelper.chunkContainingPos(weights_, pos); - return items_[index]; - } - - private void populateFromSketch(final float[] srcItems, final int[] srcLevels, - final int numLevels, final int numItems) { - final int offset = srcLevels[0]; - System.arraycopy(srcItems, offset, items_, 0, numItems); - int srcLevel = 0; - int dstLevel = 0; - long weight = 1; - while (srcLevel < numLevels) { - final int fromIndex = srcLevels[srcLevel] - offset; - final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive - if (fromIndex < toIndex) { // if equal, skip empty level - Arrays.fill(weights_, fromIndex, toIndex, weight); - levels_[dstLevel] = fromIndex; - levels_[dstLevel + 1] = toIndex; - dstLevel++; - } - srcLevel++; - weight *= 2; - } - weights_[numItems] = 0; - numLevels_ = dstLevel; - } - private static void blockyTandemMergeSort(final float[] items, final long[] weights, final int[] levels, final int numLevels) { if (numLevels == 1) { return; } @@ -167,4 +132,39 @@ private static void tandemMerge( } } + float getQuantile(final double phi) { //phi is normalized rank [0,1]. + final long pos = QuantilesHelper.posOfPhi(phi, n_); + return approximatelyAnswerPositonalQuery(pos); + } + + private float approximatelyAnswerPositonalQuery(final long pos) { + assert pos >= 0; + assert pos < n_; + final int index = QuantilesHelper.chunkContainingPos(weights_, pos); + return items_[index]; + } + + private void populateFromSketch(final float[] srcItems, final int[] srcLevels, + final int numLevels, final int numItems) { + final int offset = srcLevels[0]; + System.arraycopy(srcItems, offset, items_, 0, numItems); + int srcLevel = 0; + int dstLevel = 0; + long weight = 1; + while (srcLevel < numLevels) { + final int fromIndex = srcLevels[srcLevel] - offset; + final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive + if (fromIndex < toIndex) { // if equal, skip empty level + Arrays.fill(weights_, fromIndex, toIndex, weight); + levels_[dstLevel] = fromIndex; + levels_[dstLevel + 1] = toIndex; + dstLevel++; + } + srcLevel++; + weight *= 2; + } + weights_[numItems] = 0; + numLevels_ = dstLevel; + } + } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index b4955e306..321c653d2 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -80,8 +80,8 @@ private KllFloatsSketch(final Memory mem, final MemoryValidate memVal) { * See Memory * @return a heap-based sketch based on the given Memory. */ - //To simplify the code, the PreambleUtil.MemoryCheck does nearly all the validity checking. - //The verified Memory is then passed to the actual private heapify constructor. + //To simplify the code, the MemoryValidate class does nearly all the validity checking. + //The validated Memory is then passed to the actual private heapify constructor. public static KllFloatsSketch heapify(final Memory mem) { final MemoryValidate memVal = new MemoryValidate(mem); if (memVal.doublesSketch) { @@ -90,21 +90,6 @@ public static KllFloatsSketch heapify(final Memory mem) { return new KllFloatsSketch(mem, memVal); } - /** - * Returns upper bound on the compact serialized size of a FloatsSketch given a parameter - * k and stream length. This method can be used if allocation of storage - * is necessary beforehand. - * @param k parameter that controls size of the sketch and accuracy of estimates - * @param n stream length - * @return upper bound on the compact serialized size - * @deprecated use {@link #getMaxSerializedSizeBytes(int, long, SketchType, boolean)} instead. - */ - @Deprecated - public static int getMaxSerializedSizeBytes(final int k, final long n) { - final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, SketchType.FLOATS_SKETCH); - return lvlStats.getCompactBytes(); - } - /** * Returns an approximation to the Cumulative Distribution Function (CDF), which is the * cumulative analog of the PMF, of the input stream given a set of splitPoint (values). @@ -121,10 +106,10 @@ public static int getMaxSerializedSizeBytes(final int k, final long n) { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 double values, which are a consecutive approximation to the CDF - * of the input stream given the splitPoints. The value at array position j of the returned - * CDF array is the sum of the returned values in positions 0 through j of the returned PMF - * array. + * @return an array of m+1 double values on the interval [0.0, 1.0) exclusive, + * which are a consecutive approximation to the CDF of the input stream given the splitPoints. + * The value at array position j of the returned CDF array is the sum of the returned values + * in positions 0 through j of the returned PMF array. */ public double[] getCDF(final float[] splitPoints) { return getFloatsPmfOrCdf(splitPoints, true); @@ -162,8 +147,9 @@ public double[] getCDF(final float[] splitPoints) { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 doubles each of which is an approximation - * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * @return an array of m+1 doubles on the interval [0.0, 1.0) exclusive, + * each of which is an approximation to the fraction of the total input stream values + * (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right * splitPoint, with the exception that the last interval will include maximum value. */ @@ -272,16 +258,6 @@ public double getRank(final float value) { return getFloatRank(value); } - /** - * Returns the current number of compact bytes this FloatsSketch would require to store. - * @return the number of bytes this sketch would require to store. - * @deprecated use {@link KllSketch#getCurrentCompactSerializedSizeBytes()} - */ - @Deprecated - public int getSerializedSizeBytes() { - return getCurrentCompactSerializedSizeBytes(); - } - /** * @return the iterator for this class */ @@ -356,4 +332,30 @@ void setMinDoubleValue(final double value) { } @Override //Used internally void setMinFloatValue(final float value) { minFloatValue_ = value; } + /** + * Returns upper bound on the compact serialized size of a FloatsSketch given a parameter + * k and stream length. This method can be used if allocation of storage + * is necessary beforehand. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param n stream length + * @return upper bound on the compact serialized size + * @deprecated use {@link #getMaxSerializedSizeBytes(int, long, SketchType, boolean)} instead. + */ + @Deprecated + public static int getMaxSerializedSizeBytes(final int k, final long n) { + final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, SketchType.FLOATS_SKETCH); + return lvlStats.getCompactBytes(); + } + + + /** + * Returns the current number of compact bytes this FloatsSketch would require to store. + * @return the number of bytes this sketch would require to store. + * @deprecated use {@link KllSketch#getCurrentCompactSerializedSizeBytes()} + */ + @Deprecated + public int getSerializedSizeBytes() { + return getCurrentCompactSerializedSizeBytes(); + } + } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java index 2642bd210..c2c7e4508 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java @@ -39,6 +39,26 @@ public class KllFloatsSketchIterator { isInitialized_ = false; } + /** + * Gets a value from the current entry in the sketch. + * Don't call this before calling next() for the first time + * or after getting false from next(). + * @return value from the current entry + */ + public float getValue() { + return items_[i_]; + } + + /** + * Gets a weight for the value from the current entry in the sketch. + * Don't call this before calling next() for the first time + * or after getting false from next(). + * @return weight for the value from the current entry + */ + public long getWeight() { + return weight_; + } + /** * Advancing the iterator and checking existence of the next entry * is combined here for efficiency. This results in an undefined @@ -69,24 +89,4 @@ public boolean next() { return true; } - /** - * Gets a value from the current entry in the sketch. - * Don't call this before calling next() for the first time - * or after getting false from next(). - * @return value from the current entry - */ - public float getValue() { - return items_[i_]; - } - - /** - * Gets a weight for the value from the current entry in the sketch. - * Don't call this before calling next() for the first time - * or after getting false from next(). - * @return weight for the value from the current entry - */ - public long getWeight() { - return weight_; - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java index f871ac12f..297f16770 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java @@ -21,25 +21,13 @@ import org.apache.datasketches.memory.WritableMemory; +/** + * This class implements all the methods for the heap sketches that are independent + * of the sketch type (float or double). + * + * @author lrhodes + */ abstract class KllHeapSketch extends KllSketch { - - /* - * Data is stored in items_. - * The data for level i lies in positions levels_[i] through levels_[i + 1] - 1 inclusive. - * Hence, levels_ array must contain (numLevels_ + 1) indices. - * The valid portion of items_ is completely packed and sorted, except for level 0, - * which is filled from the top down. - * - * Invariants: - * 1) After a compaction, or an update, or a merge, all levels are sorted except for level zero. - * 2) After a compaction, (sum of capacities) - (sum of items) >= 1, - * so there is room for least 1 more item in level zero. - * 3) There are no gaps except at the bottom, so if levels_[0] = 0, - * the sketch is exactly filled to capacity and must be compacted. - * 4) Sum of weights of all retained items == N. - * 5) curTotalCap = items_.length = levels_[numLevels_]. - */ - private long n_; // number of items input into this sketch. private final int k; // configured value of K. private int dyMinK_; // dynamic minK for error estimation after merging with different k. @@ -65,13 +53,18 @@ abstract class KllHeapSketch extends KllSketch { } @Override - int getDyMinK() { - return dyMinK_; + public int getK() { + return k; } @Override - public int getK() { - return k; + public long getN() { + return n_; + } + + @Override + int getDyMinK() { + return dyMinK_; } @Override @@ -85,11 +78,6 @@ int[] getLevelsArray() { @Override int getLevelsArrayAt(final int index) { return levels_[index]; } - @Override - public long getN() { - return n_; - } - @Override int getNumLevels() { return numLevels_; diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 2192d86af..cf947785a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -54,7 +54,7 @@ static LevelStats getAllLevelStatsGivenN(final int k, final int m, final long n, static LevelStats getLevelStats(final int k, final int m, final int numLevels, final boolean printDetail, final boolean printSummary, final SketchType sketchType) { - int cumN = 0; + long cumN = 0; int cumCap = 0; if (printDetail) { println("Total Levels: " + numLevels); diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index aeb67d2f0..67c30e34f 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -66,8 +66,32 @@ import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; +/* + * Sampled stream data (floats or doubles) is stored as arrays or Memory objects of items. + * This array is partitioned into sections called levels and the indices into the array of items + * is tracked by a small integer array called levels or levels array. + * The data for level i lies in positions levelsArray[i] through levelsArray[i + 1] - 1 inclusive. + * Hence, the levelsArray must contain (numLevels + 1) indices. + * The valid portion of items array is completely packed and sorted, except for level 0, + * which is filled from the top down. Any items below the index levelsArray[0] is garbage and will be + * overwritten by subsequent updates. + * + * Invariants: + * 1) After a compaction, or an update, or a merge, all levels are sorted except for level zero. + * 2) After a compaction, (sum of capacities) - (sum of items) >= 1, + * so there is room for least 1 more item in level zero. + * 3) There are no gaps except at the bottom, so if levels_[0] = 0, + * the sketch is exactly filled to capacity and must be compacted or the itemsArray and levelsArray + * must be expanded to include more levels. + * 4) Sum of weights of all retained items == N. + * 5) Current total item capacity = itemsArray.length = levelsArray[numLevels]. + */ + + /** - * This class is the root of the KLL sketch class hierarchy. + * This class is the root of the KLL sketch class hierarchy. It includes the public API that is independent + * of either sketch type (float or double) and independent of whether the sketch is targeted for use on the + * heap or Direct (off-heap. * *

    Please refer to the documentation in the package-info:
    * {@link org.apache.datasketches.kll}

    @@ -190,6 +214,7 @@ final static void kllSketchThrow(final int errNo) { case 33: msg = "Given sketch must be of type Double."; break; case 34: msg = "Given sketch must be of type Float."; break; case 35: msg = "Given sketch must not be of type Direct."; break; + default: msg = "Unknown error: errNo: " + errNo; break; } throw new SketchesArgumentException(msg); } @@ -288,8 +313,8 @@ public byte[] toByteArray() { } @Override - public final String toString() { //TODO set back to false, false - return toString(true, true); + public final String toString() { + return toString(false, false); } /** diff --git a/src/main/java/org/apache/datasketches/kll/MemoryValidate.java b/src/main/java/org/apache/datasketches/kll/MemoryValidate.java index fe1daafdd..c932a9891 100644 --- a/src/main/java/org/apache/datasketches/kll/MemoryValidate.java +++ b/src/main/java/org/apache/datasketches/kll/MemoryValidate.java @@ -51,6 +51,13 @@ import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; +/** + * This class performs all the error checking of an incoming Memory object and extracts the key fields in the process. + * This is used by all sketches that read or import Memory objects. + * + * @author lrhodes + * + */ final class MemoryValidate { // first 8 bytes final int preInts; // = extractPreInts(srcMem); @@ -128,10 +135,10 @@ void compactMemoryValidate(final Memory srcMem) { srcMem.getIntArray(offset, myLevelsArr, 0, numLevels); //copies all except the last one myLevelsArr[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); //load the last one levelsArrCompact = Memory.wrap(myLevelsArr); //separate from srcMem, - offset += levelsArrCompact.getCapacity() - Integer.BYTES; // but one larger than srcMem + offset += (int)levelsArrCompact.getCapacity() - Integer.BYTES; // but one larger than srcMem // MIN/MAX MEM - minMaxArrCompact = srcMem.region(offset, 2 * Float.BYTES); - offset += minMaxArrCompact.getCapacity(); + minMaxArrCompact = srcMem.region(offset, 2L * Float.BYTES); + offset += (int)minMaxArrCompact.getCapacity(); // ITEMS MEM itemsArrStart = offset; capacityItems = myLevelsArr[numLevels]; @@ -196,10 +203,10 @@ void compactMemoryValidate(final Memory srcMem) { srcMem.getIntArray(offset, myLevelsArr, 0, numLevels); //all except the last one myLevelsArr[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); //load the last one levelsArrCompact = Memory.wrap(myLevelsArr); //separate from srcMem - offset += levelsArrCompact.getCapacity() - Integer.BYTES; + offset += (int)levelsArrCompact.getCapacity() - Integer.BYTES; // MIN/MAX MEM - minMaxArrCompact = srcMem.region(offset, 2 * Double.BYTES); - offset += minMaxArrCompact.getCapacity(); + minMaxArrCompact = srcMem.region(offset, 2L * Double.BYTES); + offset += (int)minMaxArrCompact.getCapacity(); // ITEMS MEM itemsArrStart = offset; capacityItems = myLevelsArr[numLevels]; @@ -269,13 +276,13 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { int offset = DATA_START_ADR_DOUBLE; //LEVELS - levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1) * Integer.BYTES); + levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1L) * Integer.BYTES); offset += (int)levelsArrUpdatable.getCapacity(); //MIN/MAX - minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2 * Double.BYTES); + minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2L * Double.BYTES); offset += (int)minMaxArrUpdatable.getCapacity(); //ITEMS - capacityItems = levelsArrUpdatable.getInt(numLevels * Integer.BYTES); + capacityItems = levelsArrUpdatable.getInt((long)numLevels * Integer.BYTES); final int itemsArrBytes = capacityItems * Double.BYTES; itemsArrStart = offset; itemsArrStart = memCapacity - itemsArrBytes; @@ -293,13 +300,13 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { numLevels = extractNumLevels(wSrcMem); int offset = DATA_START_ADR_FLOAT; //LEVELS - levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1) * Integer.BYTES); + levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1L) * Integer.BYTES); offset += (int)levelsArrUpdatable.getCapacity(); //MIN/MAX - minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2 * Float.BYTES); + minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2L * Float.BYTES); offset += (int)minMaxArrUpdatable.getCapacity(); //ITEMS - capacityItems = levelsArrUpdatable.getInt(numLevels * Integer.BYTES); + capacityItems = levelsArrUpdatable.getInt((long)numLevels * Integer.BYTES); final int itemsArrBytes = capacityItems * Float.BYTES; itemsArrStart = offset; itemsArrStart = memCapacity - itemsArrBytes; @@ -328,6 +335,7 @@ private static void memoryValidateThrow(final int errNo, final int value) { //case 22: msg = "N != 1 and single item bit is set. N: " + value; break; //case 23: msg = "Family name is not KLL"; break; case 24: msg = "Given Memory has insufficient capacity. Need " + value + " bytes."; break; + default: msg = "Unknown error: errNo: " + errNo; break; } throw new SketchesArgumentException(msg); } From 8fa793031a0484e8abf4246310d046e085759da8 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 26 Mar 2022 13:07:24 -0700 Subject: [PATCH 14/31] Fixed the LGTM issues detected on the last push. --- .../kll/KllDirectDoublesSketch.java | 18 ++- .../kll/KllDirectFloatsSketch.java | 18 ++- .../datasketches/kll/KllDirectSketch.java | 76 ++++++------- .../datasketches/kll/KllDoublesHelper.java | 103 +++++++++--------- .../kll/KllDoublesQuantileCalculator.java | 71 ++++++------ .../datasketches/kll/KllDoublesSketch.java | 19 ++-- .../kll/KllDoublesSketchIterator.java | 41 ++++--- .../datasketches/kll/KllFloatsHelper.java | 102 +++++++++-------- .../kll/KllFloatsQuantileCalculator.java | 70 ++++++------ .../datasketches/kll/KllFloatsSketch.java | 68 ++++++------ .../kll/KllFloatsSketchIterator.java | 40 +++---- .../datasketches/kll/KllHeapSketch.java | 42 +++---- .../apache/datasketches/kll/KllHelper.java | 2 +- .../apache/datasketches/kll/KllSketch.java | 31 +++++- .../datasketches/kll/MemoryValidate.java | 32 ++++-- 15 files changed, 381 insertions(+), 352 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 45adf4a20..6d0911c8e 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -60,10 +60,10 @@ public KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServ * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 double values, which are a consecutive approximation to the CDF - * of the input stream given the splitPoints. The value at array position j of the returned - * CDF array is the sum of the returned values in positions 0 through j of the returned PMF - * array. + * @return an array of m+1 double values on the interval [0.0, 1.0) exclusive, + * which are a consecutive approximation to the CDF of the input stream given the splitPoints. + * The value at array position j of the returned CDF array is the sum of the returned values + * in positions 0 through j of the returned PMF array. */ public double[] getCDF(final double[] splitPoints) { return getDoublesPmfOrCdf(splitPoints, true); @@ -105,8 +105,9 @@ public double getMinValue() { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 doubles each of which is an approximation - * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * @return an array of m+1 doubles on the interval [0.0, 1.0) exclusive, + * each of which is an approximation to the fraction of the total input stream values + * (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right * splitPoint, with the exception that the last interval will include maximum value. */ @@ -242,6 +243,11 @@ public String toString(final boolean withLevels, final boolean withData) { return toStringImpl(withLevels, withData); } + /** + * Updates this sketch with the given data item. + * + * @param value an item from a stream of items. NaNs are ignored. + */ public void update(final double value) { updateDouble(value); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index 9eb52f302..7a7d928ee 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -60,10 +60,10 @@ public KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServe * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 double values, which are a consecutive approximation to the CDF - * of the input stream given the splitPoints. The value at array position j of the returned - * CDF array is the sum of the returned values in positions 0 through j of the returned PMF - * array. + * @return an array of m+1 double values on the interval [0.0, 1.0) exclusive, + * which are a consecutive approximation to the CDF of the input stream given the splitPoints. + * The value at array position j of the returned CDF array is the sum of the returned values + * in positions 0 through j of the returned PMF array. */ public double[] getCDF(final float[] splitPoints) { return getFloatsPmfOrCdf(splitPoints, true); @@ -105,8 +105,9 @@ public float getMinValue() { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 doubles each of which is an approximation - * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * @return an array of m+1 doubles on the interval [0.0, 1.0) exclusive, + * each of which is an approximation to the fraction of the total input stream values + * (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right * splitPoint, with the exception that the last interval will include maximum value. */ @@ -242,6 +243,11 @@ public String toString(final boolean withLevels, final boolean withData) { return toStringImpl(withLevels, withData); } + /** + * Updates this sketch with the given data item. + * + * @param value an item from a stream of items. NaNs are ignored. + */ public void update(final float value) { updateFloat(value); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index be181249b..b3ec22c2b 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -35,10 +35,13 @@ import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; - +/** + * This class implements all the methods for the Direct (off-heap) sketches that are independent + * of the sketch type (float or double). + */ abstract class KllDirectSketch extends KllSketch { - //All these members are constant for the life of this object. If the WritableMemory changes, it will require - //rebuilding this class + //All these members are constant for the life of this object. If the WritableMemory changes, + // it may require rebuilding this class final Layout layout; final boolean updatable; WritableMemory levelsArrUpdatable; @@ -69,28 +72,16 @@ public int getK() { } @Override - double getMaxDoubleValue() { - return minMaxArrUpdatable.getDouble(Double.BYTES); - } - - @Override - float getMaxFloatValue() { - return minMaxArrUpdatable.getFloat(Float.BYTES); - } - - @Override - double getMinDoubleValue() { - return minMaxArrUpdatable.getDouble(0); - } - - @Override - float getMinFloatValue() { - return minMaxArrUpdatable.getFloat(0); + public long getN() { + return extractN(wmem); } @Override - public long getN() { - return extractN(wmem); + public byte[] toUpdatableByteArray() { + final int bytes = (int) wmem.getCapacity(); + final byte[] byteArr = new byte[bytes]; + wmem.getByteArray(0, byteArr, 0, bytes); + return byteArr; } @Override @@ -105,7 +96,7 @@ public long getN() { @Override double getDoubleItemsArrayAt(final int index) { if (sketchType == FLOATS_SKETCH) { return Double.NaN; } - return itemsArrUpdatable.getDouble(index * Double.BYTES); + return itemsArrUpdatable.getDouble((long)index * Double.BYTES); } @Override @@ -125,7 +116,7 @@ float[] getFloatItemsArray() { @Override float getFloatItemsArrayAt(final int index) { if (sketchType == DOUBLES_SKETCH) { return Float.NaN; } - return itemsArrUpdatable.getFloat(index * Float.BYTES); + return itemsArrUpdatable.getFloat((long)index * Float.BYTES); } int getItemsArrLengthItems() { @@ -145,7 +136,27 @@ int[] getLevelsArray() { @Override int getLevelsArrayAt(final int index) { - return levelsArrUpdatable.getInt(index * Integer.BYTES); + return levelsArrUpdatable.getInt((long)index * Integer.BYTES); + } + + @Override + double getMaxDoubleValue() { + return minMaxArrUpdatable.getDouble(Double.BYTES); + } + + @Override + float getMaxFloatValue() { + return minMaxArrUpdatable.getFloat(Float.BYTES); + } + + @Override + double getMinDoubleValue() { + return minMaxArrUpdatable.getDouble(0); + } + + @Override + float getMinFloatValue() { + return minMaxArrUpdatable.getFloat(0); } @Override @@ -180,7 +191,7 @@ void setDoubleItemsArray(final double[] doubleItems) { @Override void setDoubleItemsArrayAt(final int index, final double value) { - itemsArrUpdatable.putDouble(index * Double.BYTES, value); + itemsArrUpdatable.putDouble((long)index * Double.BYTES, value); } @Override @@ -197,7 +208,7 @@ void setFloatItemsArray(final float[] floatItems) { @Override void setFloatItemsArrayAt(final int index, final float value) { - itemsArrUpdatable.putFloat(index * Float.BYTES, value); + itemsArrUpdatable.putFloat((long)index * Float.BYTES, value); } @Override @@ -213,7 +224,7 @@ void setLevelsArray(final int[] levelsArr) { @Override void setLevelsArrayAt(final int index, final int value) { - levelsArrUpdatable.putInt(index * Integer.BYTES, value); + levelsArrUpdatable.putInt((long)index * Integer.BYTES, value); } @Override @@ -276,19 +287,10 @@ void setN(final long n) { insertN(wmem, n); } - @Override void setNumLevels(final int numLevels) { if (!updatable) { kllSketchThrow(30); } insertNumLevels(wmem, numLevels); } - @Override - public byte[] toUpdatableByteArray() { - final int bytes = (int) wmem.getCapacity(); - final byte[] byteArr = new byte[bytes]; - wmem.getByteArray(0, byteArr, 0, bytes); - return byteArr; - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 125419e77..9049775a1 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -34,54 +34,6 @@ */ class KllDoublesHelper { - /** - * Checks the sequential validity of the given array of double values. - * They must be unique, monotonically increasing and not NaN. - * @param values the given array of values - */ - static void validateDoubleValues(final double[] values) { - for (int i = 0; i < values.length; i++) { - if (!Double.isFinite(values[i])) { - throw new SketchesArgumentException("Values must be finite"); - } - if (i < values.length - 1 && values[i] >= values[i + 1]) { - throw new SketchesArgumentException( - "Values must be unique and monotonically increasing"); - } - } - } - - static void mergeSortedDoubleArrays( - final double[] bufA, final int startA, final int lenA, - final double[] bufB, final int startB, final int lenB, - final double[] bufC, final int startC) { - final int lenC = lenA + lenB; - final int limA = startA + lenA; - final int limB = startB + lenB; - final int limC = startC + lenC; - - int a = startA; - int b = startB; - - for (int c = startC; c < limC; c++) { - if (a == limA) { - bufC[c] = bufB[b]; - b++; - } else if (b == limB) { - bufC[c] = bufA[a]; - a++; - } else if (bufA[a] < bufB[b]) { - bufC[c] = bufA[a]; - a++; - } else { - bufC[c] = bufB[b]; - b++; - } - } - assert a == limA; - assert b == limB; - } - /** * Compression algorithm used to merge higher levels. *

    Here is what we do for each level:

    @@ -193,20 +145,47 @@ static int[] generalDoublesCompress( numLevels++; targetItemCount += KllHelper.levelCapacity(k, numLevels, 0, m); } - } // end of code for compacting a level // determine whether we have processed all levels yet (including any new levels that we created) - if (curLevel == (numLevels - 1)) { doneYet = true; } - } // end of loop over levels assert (outLevels[numLevels] - outLevels[0]) == currentItemCount; - return new int[] {numLevels, targetItemCount, currentItemCount}; } + static void mergeSortedDoubleArrays( + final double[] bufA, final int startA, final int lenA, + final double[] bufB, final int startB, final int lenB, + final double[] bufC, final int startC) { + final int lenC = lenA + lenB; + final int limA = startA + lenA; + final int limB = startB + lenB; + final int limC = startC + lenC; + + int a = startA; + int b = startB; + + for (int c = startC; c < limC; c++) { + if (a == limA) { + bufC[c] = bufB[b]; + b++; + } else if (b == limB) { + bufC[c] = bufA[a]; + a++; + } else if (bufA[a] < bufB[b]) { + bufC[c] = bufA[a]; + a++; + } else { + bufC[c] = bufB[b]; + b++; + } + } + assert a == limA; + assert b == limB; + } + //This must be modified for validation static void randomlyHalveDownDoubles(final double[] buf, final int start, final int length, final Random random) { assert isEven(length); @@ -233,9 +212,26 @@ static void randomlyHalveUpDoubles(final double[] buf, final int start, final in } } + /** + * Checks the sequential validity of the given array of double values. + * They must be unique, monotonically increasing and not NaN. + * @param values the given array of values + */ + static void validateDoubleValues(final double[] values) { + for (int i = 0; i < values.length; i++) { + if (!Double.isFinite(values[i])) { + throw new SketchesArgumentException("Values must be finite"); + } + if (i < values.length - 1 && values[i] >= values[i + 1]) { + throw new SketchesArgumentException( + "Values must be unique and monotonically increasing"); + } + } + } + /* * The following must be enabled for use with the KllDoublesValidationTest, - * which is only enabled for manual testing. In addition, the two methods + * which is only enabled for manual testing. In addition, two methods * above need to be modified as commented. */ @@ -248,4 +244,3 @@ static void randomlyHalveUpDoubles(final double[] buf, final int start, final in // } } - diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java b/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java index ba269836f..91453549a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java @@ -58,41 +58,6 @@ final class KllDoublesQuantileCalculator { numLevels_ = 0; //not used by test } - double getQuantile(final double phi) { //phi is normalized rank [0,1]. - final long pos = QuantilesHelper.posOfPhi(phi, n_); - return approximatelyAnswerPositonalQuery(pos); - } - - private double approximatelyAnswerPositonalQuery(final long pos) { - assert pos >= 0; - assert pos < n_; - final int index = QuantilesHelper.chunkContainingPos(weights_, pos); - return items_[index]; - } - - private void populateFromSketch(final double[] srcItems, final int[] srcLevels, - final int numLevels, final int numItems) { - final int offset = srcLevels[0]; - System.arraycopy(srcItems, offset, items_, 0, numItems); - int srcLevel = 0; - int dstLevel = 0; - long weight = 1; - while (srcLevel < numLevels) { - final int fromIndex = srcLevels[srcLevel] - offset; - final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive - if (fromIndex < toIndex) { // if equal, skip empty level - Arrays.fill(weights_, fromIndex, toIndex, weight); - levels_[dstLevel] = fromIndex; - levels_[dstLevel + 1] = toIndex; - dstLevel++; - } - srcLevel++; - weight *= 2; - } - weights_[numItems] = 0; - numLevels_ = dstLevel; - } - private static void blockyTandemMergeSort(final double[] items, final long[] weights, final int[] levels, final int numLevels) { if (numLevels == 1) { return; } @@ -167,5 +132,39 @@ private static void tandemMerge( } } -} + double getQuantile(final double phi) { //phi is normalized rank [0,1]. + final long pos = QuantilesHelper.posOfPhi(phi, n_); + return approximatelyAnswerPositonalQuery(pos); + } + + private double approximatelyAnswerPositonalQuery(final long pos) { + assert pos >= 0; + assert pos < n_; + final int index = QuantilesHelper.chunkContainingPos(weights_, pos); + return items_[index]; + } + private void populateFromSketch(final double[] srcItems, final int[] srcLevels, + final int numLevels, final int numItems) { + final int offset = srcLevels[0]; + System.arraycopy(srcItems, offset, items_, 0, numItems); + int srcLevel = 0; + int dstLevel = 0; + long weight = 1; + while (srcLevel < numLevels) { + final int fromIndex = srcLevels[srcLevel] - offset; + final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive + if (fromIndex < toIndex) { // if equal, skip empty level + Arrays.fill(weights_, fromIndex, toIndex, weight); + levels_[dstLevel] = fromIndex; + levels_[dstLevel + 1] = toIndex; + dstLevel++; + } + srcLevel++; + weight *= 2; + } + weights_[numItems] = 0; + numLevels_ = dstLevel; + } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 6ccd34ff9..595967892 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -26,6 +26,8 @@ import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; +//intentional space +//intentional space /** * This class implements an on-heap doubles KllSketch. * @@ -78,8 +80,8 @@ private KllDoublesSketch(final Memory mem, final MemoryValidate memVal) { * See Memory * @return a heap-based sketch based on the given Memory. */ - //To simplify the code, the PreambleUtil.MemoryCheck does nearly all the validity checking. - //The verified Memory is then passed to the actual private heapify constructor. + //To simplify the code, the MemoryValidate class does nearly all the validity checking. + //The validated Memory is then passed to the actual private heapify constructor. public static KllDoublesSketch heapify(final Memory mem) { final MemoryValidate memChk = new MemoryValidate(mem); if (!memChk.doublesSketch) { @@ -104,10 +106,10 @@ public static KllDoublesSketch heapify(final Memory mem) { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 double values, which are a consecutive approximation to the CDF - * of the input stream given the splitPoints. The value at array position j of the returned - * CDF array is the sum of the returned values in positions 0 through j of the returned PMF - * array. + * @return an array of m+1 double values on the interval [0.0, 1.0) exclusive, + * which are a consecutive approximation to the CDF of the input stream given the splitPoints. + * The value at array position j of the returned CDF array is the sum of the returned values + * in positions 0 through j of the returned PMF array. */ public double[] getCDF(final double[] splitPoints) { return getDoublesPmfOrCdf(splitPoints, true); @@ -145,8 +147,9 @@ public double[] getCDF(final double[] splitPoints) { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 doubles each of which is an approximation - * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * @return an array of m+1 doubles on the interval [0.0, 1.0) exclusive, + * each of which is an approximation to the fraction of the total input stream values + * (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right * splitPoint, with the exception that the last interval will include maximum value. */ diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java index 6d4c2044a..3835cd0b4 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java @@ -39,6 +39,26 @@ public class KllDoublesSketchIterator { isInitialized_ = false; } + /** + * Gets a value from the current entry in the sketch. + * Don't call this before calling next() for the first time + * or after getting false from next(). + * @return value from the current entry + */ + public double getValue() { + return items_[i_]; + } + + /** + * Gets a weight for the value from the current entry in the sketch. + * Don't call this before calling next() for the first time + * or after getting false from next(). + * @return weight for the value from the current entry + */ + public long getWeight() { + return weight_; + } + /** * Advancing the iterator and checking existence of the next entry * is combined here for efficiency. This results in an undefined @@ -69,25 +89,4 @@ public boolean next() { return true; } - /** - * Gets a value from the current entry in the sketch. - * Don't call this before calling next() for the first time - * or after getting false from next(). - * @return value from the current entry - */ - public double getValue() { - return items_[i_]; - } - - /** - * Gets a weight for the value from the current entry in the sketch. - * Don't call this before calling next() for the first time - * or after getting false from next(). - * @return weight for the value from the current entry - */ - public long getWeight() { - return weight_; - } - } - diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index 79e730ef1..6f15baf3c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -34,54 +34,6 @@ */ class KllFloatsHelper { - /** - * Checks the sequential validity of the given array of float values. - * They must be unique, monotonically increasing and not NaN. - * @param values the given array of values - */ - static void validateFloatValues(final float[] values) { - for (int i = 0; i < values.length; i++) { - if (!Float.isFinite(values[i])) { - throw new SketchesArgumentException("Values must be finite"); - } - if (i < values.length - 1 && values[i] >= values[i + 1]) { - throw new SketchesArgumentException( - "Values must be unique and monotonically increasing"); - } - } - } - - static void mergeSortedFloatArrays( - final float[] bufA, final int startA, final int lenA, - final float[] bufB, final int startB, final int lenB, - final float[] bufC, final int startC) { - final int lenC = lenA + lenB; - final int limA = startA + lenA; - final int limB = startB + lenB; - final int limC = startC + lenC; - - int a = startA; - int b = startB; - - for (int c = startC; c < limC; c++) { - if (a == limA) { - bufC[c] = bufB[b]; - b++; - } else if (b == limB) { - bufC[c] = bufA[a]; - a++; - } else if (bufA[a] < bufB[b]) { - bufC[c] = bufA[a]; - a++; - } else { - bufC[c] = bufB[b]; - b++; - } - } - assert a == limA; - assert b == limB; - } - /** * Compression algorithm used to merge higher levels. *

    Here is what we do for each level:

    @@ -193,20 +145,47 @@ static int[] generalFloatsCompress( numLevels++; targetItemCount += KllHelper.levelCapacity(k, numLevels, 0, m); } - } // end of code for compacting a level // determine whether we have processed all levels yet (including any new levels that we created) - if (curLevel == (numLevels - 1)) { doneYet = true; } - } // end of loop over levels assert (outLevels[numLevels] - outLevels[0]) == currentItemCount; - return new int[] {numLevels, targetItemCount, currentItemCount}; } + static void mergeSortedFloatArrays( + final float[] bufA, final int startA, final int lenA, + final float[] bufB, final int startB, final int lenB, + final float[] bufC, final int startC) { + final int lenC = lenA + lenB; + final int limA = startA + lenA; + final int limB = startB + lenB; + final int limC = startC + lenC; + + int a = startA; + int b = startB; + + for (int c = startC; c < limC; c++) { + if (a == limA) { + bufC[c] = bufB[b]; + b++; + } else if (b == limB) { + bufC[c] = bufA[a]; + a++; + } else if (bufA[a] < bufB[b]) { + bufC[c] = bufA[a]; + a++; + } else { + bufC[c] = bufB[b]; + b++; + } + } + assert a == limA; + assert b == limB; + } + //This must be modified for validation static void randomlyHalveDownFloats(final float[] buf, final int start, final int length, final Random random) { assert isEven(length); @@ -233,9 +212,26 @@ static void randomlyHalveUpFloats(final float[] buf, final int start, final int } } + /** + * Checks the sequential validity of the given array of float values. + * They must be unique, monotonically increasing and not NaN. + * @param values the given array of values + */ + static void validateFloatValues(final float[] values) { + for (int i = 0; i < values.length; i++) { + if (!Float.isFinite(values[i])) { + throw new SketchesArgumentException("Values must be finite"); + } + if (i < values.length - 1 && values[i] >= values[i + 1]) { + throw new SketchesArgumentException( + "Values must be unique and monotonically increasing"); + } + } + } + /* * The following must be enabled for use with the KllFloatsValidationTest, - * which is only enabled for manual testing. In addition, the two methods + * which is only enabled for manual testing. In addition, two methods * above need to be modified as commented. */ diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java index 97f628e29..0fee4046e 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java @@ -58,41 +58,6 @@ final class KllFloatsQuantileCalculator { numLevels_ = 0; //not used by test } - float getQuantile(final double phi) { //phi is normalized rank [0,1]. - final long pos = QuantilesHelper.posOfPhi(phi, n_); - return approximatelyAnswerPositonalQuery(pos); - } - - private float approximatelyAnswerPositonalQuery(final long pos) { - assert pos >= 0; - assert pos < n_; - final int index = QuantilesHelper.chunkContainingPos(weights_, pos); - return items_[index]; - } - - private void populateFromSketch(final float[] srcItems, final int[] srcLevels, - final int numLevels, final int numItems) { - final int offset = srcLevels[0]; - System.arraycopy(srcItems, offset, items_, 0, numItems); - int srcLevel = 0; - int dstLevel = 0; - long weight = 1; - while (srcLevel < numLevels) { - final int fromIndex = srcLevels[srcLevel] - offset; - final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive - if (fromIndex < toIndex) { // if equal, skip empty level - Arrays.fill(weights_, fromIndex, toIndex, weight); - levels_[dstLevel] = fromIndex; - levels_[dstLevel + 1] = toIndex; - dstLevel++; - } - srcLevel++; - weight *= 2; - } - weights_[numItems] = 0; - numLevels_ = dstLevel; - } - private static void blockyTandemMergeSort(final float[] items, final long[] weights, final int[] levels, final int numLevels) { if (numLevels == 1) { return; } @@ -167,4 +132,39 @@ private static void tandemMerge( } } + float getQuantile(final double phi) { //phi is normalized rank [0,1]. + final long pos = QuantilesHelper.posOfPhi(phi, n_); + return approximatelyAnswerPositonalQuery(pos); + } + + private float approximatelyAnswerPositonalQuery(final long pos) { + assert pos >= 0; + assert pos < n_; + final int index = QuantilesHelper.chunkContainingPos(weights_, pos); + return items_[index]; + } + + private void populateFromSketch(final float[] srcItems, final int[] srcLevels, + final int numLevels, final int numItems) { + final int offset = srcLevels[0]; + System.arraycopy(srcItems, offset, items_, 0, numItems); + int srcLevel = 0; + int dstLevel = 0; + long weight = 1; + while (srcLevel < numLevels) { + final int fromIndex = srcLevels[srcLevel] - offset; + final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive + if (fromIndex < toIndex) { // if equal, skip empty level + Arrays.fill(weights_, fromIndex, toIndex, weight); + levels_[dstLevel] = fromIndex; + levels_[dstLevel + 1] = toIndex; + dstLevel++; + } + srcLevel++; + weight *= 2; + } + weights_[numItems] = 0; + numLevels_ = dstLevel; + } + } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index b4955e306..321c653d2 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -80,8 +80,8 @@ private KllFloatsSketch(final Memory mem, final MemoryValidate memVal) { * See Memory * @return a heap-based sketch based on the given Memory. */ - //To simplify the code, the PreambleUtil.MemoryCheck does nearly all the validity checking. - //The verified Memory is then passed to the actual private heapify constructor. + //To simplify the code, the MemoryValidate class does nearly all the validity checking. + //The validated Memory is then passed to the actual private heapify constructor. public static KllFloatsSketch heapify(final Memory mem) { final MemoryValidate memVal = new MemoryValidate(mem); if (memVal.doublesSketch) { @@ -90,21 +90,6 @@ public static KllFloatsSketch heapify(final Memory mem) { return new KllFloatsSketch(mem, memVal); } - /** - * Returns upper bound on the compact serialized size of a FloatsSketch given a parameter - * k and stream length. This method can be used if allocation of storage - * is necessary beforehand. - * @param k parameter that controls size of the sketch and accuracy of estimates - * @param n stream length - * @return upper bound on the compact serialized size - * @deprecated use {@link #getMaxSerializedSizeBytes(int, long, SketchType, boolean)} instead. - */ - @Deprecated - public static int getMaxSerializedSizeBytes(final int k, final long n) { - final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, SketchType.FLOATS_SKETCH); - return lvlStats.getCompactBytes(); - } - /** * Returns an approximation to the Cumulative Distribution Function (CDF), which is the * cumulative analog of the PMF, of the input stream given a set of splitPoint (values). @@ -121,10 +106,10 @@ public static int getMaxSerializedSizeBytes(final int k, final long n) { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 double values, which are a consecutive approximation to the CDF - * of the input stream given the splitPoints. The value at array position j of the returned - * CDF array is the sum of the returned values in positions 0 through j of the returned PMF - * array. + * @return an array of m+1 double values on the interval [0.0, 1.0) exclusive, + * which are a consecutive approximation to the CDF of the input stream given the splitPoints. + * The value at array position j of the returned CDF array is the sum of the returned values + * in positions 0 through j of the returned PMF array. */ public double[] getCDF(final float[] splitPoints) { return getFloatsPmfOrCdf(splitPoints, true); @@ -162,8 +147,9 @@ public double[] getCDF(final float[] splitPoints) { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 doubles each of which is an approximation - * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * @return an array of m+1 doubles on the interval [0.0, 1.0) exclusive, + * each of which is an approximation to the fraction of the total input stream values + * (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right * splitPoint, with the exception that the last interval will include maximum value. */ @@ -272,16 +258,6 @@ public double getRank(final float value) { return getFloatRank(value); } - /** - * Returns the current number of compact bytes this FloatsSketch would require to store. - * @return the number of bytes this sketch would require to store. - * @deprecated use {@link KllSketch#getCurrentCompactSerializedSizeBytes()} - */ - @Deprecated - public int getSerializedSizeBytes() { - return getCurrentCompactSerializedSizeBytes(); - } - /** * @return the iterator for this class */ @@ -356,4 +332,30 @@ void setMinDoubleValue(final double value) { } @Override //Used internally void setMinFloatValue(final float value) { minFloatValue_ = value; } + /** + * Returns upper bound on the compact serialized size of a FloatsSketch given a parameter + * k and stream length. This method can be used if allocation of storage + * is necessary beforehand. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param n stream length + * @return upper bound on the compact serialized size + * @deprecated use {@link #getMaxSerializedSizeBytes(int, long, SketchType, boolean)} instead. + */ + @Deprecated + public static int getMaxSerializedSizeBytes(final int k, final long n) { + final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, SketchType.FLOATS_SKETCH); + return lvlStats.getCompactBytes(); + } + + + /** + * Returns the current number of compact bytes this FloatsSketch would require to store. + * @return the number of bytes this sketch would require to store. + * @deprecated use {@link KllSketch#getCurrentCompactSerializedSizeBytes()} + */ + @Deprecated + public int getSerializedSizeBytes() { + return getCurrentCompactSerializedSizeBytes(); + } + } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java index 2642bd210..c2c7e4508 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java @@ -39,6 +39,26 @@ public class KllFloatsSketchIterator { isInitialized_ = false; } + /** + * Gets a value from the current entry in the sketch. + * Don't call this before calling next() for the first time + * or after getting false from next(). + * @return value from the current entry + */ + public float getValue() { + return items_[i_]; + } + + /** + * Gets a weight for the value from the current entry in the sketch. + * Don't call this before calling next() for the first time + * or after getting false from next(). + * @return weight for the value from the current entry + */ + public long getWeight() { + return weight_; + } + /** * Advancing the iterator and checking existence of the next entry * is combined here for efficiency. This results in an undefined @@ -69,24 +89,4 @@ public boolean next() { return true; } - /** - * Gets a value from the current entry in the sketch. - * Don't call this before calling next() for the first time - * or after getting false from next(). - * @return value from the current entry - */ - public float getValue() { - return items_[i_]; - } - - /** - * Gets a weight for the value from the current entry in the sketch. - * Don't call this before calling next() for the first time - * or after getting false from next(). - * @return weight for the value from the current entry - */ - public long getWeight() { - return weight_; - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java index f871ac12f..297f16770 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java @@ -21,25 +21,13 @@ import org.apache.datasketches.memory.WritableMemory; +/** + * This class implements all the methods for the heap sketches that are independent + * of the sketch type (float or double). + * + * @author lrhodes + */ abstract class KllHeapSketch extends KllSketch { - - /* - * Data is stored in items_. - * The data for level i lies in positions levels_[i] through levels_[i + 1] - 1 inclusive. - * Hence, levels_ array must contain (numLevels_ + 1) indices. - * The valid portion of items_ is completely packed and sorted, except for level 0, - * which is filled from the top down. - * - * Invariants: - * 1) After a compaction, or an update, or a merge, all levels are sorted except for level zero. - * 2) After a compaction, (sum of capacities) - (sum of items) >= 1, - * so there is room for least 1 more item in level zero. - * 3) There are no gaps except at the bottom, so if levels_[0] = 0, - * the sketch is exactly filled to capacity and must be compacted. - * 4) Sum of weights of all retained items == N. - * 5) curTotalCap = items_.length = levels_[numLevels_]. - */ - private long n_; // number of items input into this sketch. private final int k; // configured value of K. private int dyMinK_; // dynamic minK for error estimation after merging with different k. @@ -65,13 +53,18 @@ abstract class KllHeapSketch extends KllSketch { } @Override - int getDyMinK() { - return dyMinK_; + public int getK() { + return k; } @Override - public int getK() { - return k; + public long getN() { + return n_; + } + + @Override + int getDyMinK() { + return dyMinK_; } @Override @@ -85,11 +78,6 @@ int[] getLevelsArray() { @Override int getLevelsArrayAt(final int index) { return levels_[index]; } - @Override - public long getN() { - return n_; - } - @Override int getNumLevels() { return numLevels_; diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 2192d86af..cf947785a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -54,7 +54,7 @@ static LevelStats getAllLevelStatsGivenN(final int k, final int m, final long n, static LevelStats getLevelStats(final int k, final int m, final int numLevels, final boolean printDetail, final boolean printSummary, final SketchType sketchType) { - int cumN = 0; + long cumN = 0; int cumCap = 0; if (printDetail) { println("Total Levels: " + numLevels); diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index aeb67d2f0..67c30e34f 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -66,8 +66,32 @@ import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; +/* + * Sampled stream data (floats or doubles) is stored as arrays or Memory objects of items. + * This array is partitioned into sections called levels and the indices into the array of items + * is tracked by a small integer array called levels or levels array. + * The data for level i lies in positions levelsArray[i] through levelsArray[i + 1] - 1 inclusive. + * Hence, the levelsArray must contain (numLevels + 1) indices. + * The valid portion of items array is completely packed and sorted, except for level 0, + * which is filled from the top down. Any items below the index levelsArray[0] is garbage and will be + * overwritten by subsequent updates. + * + * Invariants: + * 1) After a compaction, or an update, or a merge, all levels are sorted except for level zero. + * 2) After a compaction, (sum of capacities) - (sum of items) >= 1, + * so there is room for least 1 more item in level zero. + * 3) There are no gaps except at the bottom, so if levels_[0] = 0, + * the sketch is exactly filled to capacity and must be compacted or the itemsArray and levelsArray + * must be expanded to include more levels. + * 4) Sum of weights of all retained items == N. + * 5) Current total item capacity = itemsArray.length = levelsArray[numLevels]. + */ + + /** - * This class is the root of the KLL sketch class hierarchy. + * This class is the root of the KLL sketch class hierarchy. It includes the public API that is independent + * of either sketch type (float or double) and independent of whether the sketch is targeted for use on the + * heap or Direct (off-heap. * *

    Please refer to the documentation in the package-info:
    * {@link org.apache.datasketches.kll}

    @@ -190,6 +214,7 @@ final static void kllSketchThrow(final int errNo) { case 33: msg = "Given sketch must be of type Double."; break; case 34: msg = "Given sketch must be of type Float."; break; case 35: msg = "Given sketch must not be of type Direct."; break; + default: msg = "Unknown error: errNo: " + errNo; break; } throw new SketchesArgumentException(msg); } @@ -288,8 +313,8 @@ public byte[] toByteArray() { } @Override - public final String toString() { //TODO set back to false, false - return toString(true, true); + public final String toString() { + return toString(false, false); } /** diff --git a/src/main/java/org/apache/datasketches/kll/MemoryValidate.java b/src/main/java/org/apache/datasketches/kll/MemoryValidate.java index fe1daafdd..c932a9891 100644 --- a/src/main/java/org/apache/datasketches/kll/MemoryValidate.java +++ b/src/main/java/org/apache/datasketches/kll/MemoryValidate.java @@ -51,6 +51,13 @@ import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; +/** + * This class performs all the error checking of an incoming Memory object and extracts the key fields in the process. + * This is used by all sketches that read or import Memory objects. + * + * @author lrhodes + * + */ final class MemoryValidate { // first 8 bytes final int preInts; // = extractPreInts(srcMem); @@ -128,10 +135,10 @@ void compactMemoryValidate(final Memory srcMem) { srcMem.getIntArray(offset, myLevelsArr, 0, numLevels); //copies all except the last one myLevelsArr[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); //load the last one levelsArrCompact = Memory.wrap(myLevelsArr); //separate from srcMem, - offset += levelsArrCompact.getCapacity() - Integer.BYTES; // but one larger than srcMem + offset += (int)levelsArrCompact.getCapacity() - Integer.BYTES; // but one larger than srcMem // MIN/MAX MEM - minMaxArrCompact = srcMem.region(offset, 2 * Float.BYTES); - offset += minMaxArrCompact.getCapacity(); + minMaxArrCompact = srcMem.region(offset, 2L * Float.BYTES); + offset += (int)minMaxArrCompact.getCapacity(); // ITEMS MEM itemsArrStart = offset; capacityItems = myLevelsArr[numLevels]; @@ -196,10 +203,10 @@ void compactMemoryValidate(final Memory srcMem) { srcMem.getIntArray(offset, myLevelsArr, 0, numLevels); //all except the last one myLevelsArr[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); //load the last one levelsArrCompact = Memory.wrap(myLevelsArr); //separate from srcMem - offset += levelsArrCompact.getCapacity() - Integer.BYTES; + offset += (int)levelsArrCompact.getCapacity() - Integer.BYTES; // MIN/MAX MEM - minMaxArrCompact = srcMem.region(offset, 2 * Double.BYTES); - offset += minMaxArrCompact.getCapacity(); + minMaxArrCompact = srcMem.region(offset, 2L * Double.BYTES); + offset += (int)minMaxArrCompact.getCapacity(); // ITEMS MEM itemsArrStart = offset; capacityItems = myLevelsArr[numLevels]; @@ -269,13 +276,13 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { int offset = DATA_START_ADR_DOUBLE; //LEVELS - levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1) * Integer.BYTES); + levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1L) * Integer.BYTES); offset += (int)levelsArrUpdatable.getCapacity(); //MIN/MAX - minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2 * Double.BYTES); + minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2L * Double.BYTES); offset += (int)minMaxArrUpdatable.getCapacity(); //ITEMS - capacityItems = levelsArrUpdatable.getInt(numLevels * Integer.BYTES); + capacityItems = levelsArrUpdatable.getInt((long)numLevels * Integer.BYTES); final int itemsArrBytes = capacityItems * Double.BYTES; itemsArrStart = offset; itemsArrStart = memCapacity - itemsArrBytes; @@ -293,13 +300,13 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { numLevels = extractNumLevels(wSrcMem); int offset = DATA_START_ADR_FLOAT; //LEVELS - levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1) * Integer.BYTES); + levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1L) * Integer.BYTES); offset += (int)levelsArrUpdatable.getCapacity(); //MIN/MAX - minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2 * Float.BYTES); + minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2L * Float.BYTES); offset += (int)minMaxArrUpdatable.getCapacity(); //ITEMS - capacityItems = levelsArrUpdatable.getInt(numLevels * Integer.BYTES); + capacityItems = levelsArrUpdatable.getInt((long)numLevels * Integer.BYTES); final int itemsArrBytes = capacityItems * Float.BYTES; itemsArrStart = offset; itemsArrStart = memCapacity - itemsArrBytes; @@ -328,6 +335,7 @@ private static void memoryValidateThrow(final int errNo, final int value) { //case 22: msg = "N != 1 and single item bit is set. N: " + value; break; //case 23: msg = "Family name is not KLL"; break; case 24: msg = "Given Memory has insufficient capacity. Need " + value + " bytes."; break; + default: msg = "Unknown error: errNo: " + errNo; break; } throw new SketchesArgumentException(msg); } From 0cd2d4301bd7ac893972e7d680eb889944f9b28f Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 26 Mar 2022 15:31:39 -0700 Subject: [PATCH 15/31] Remove multiple empty lines --- src/main/java/org/apache/datasketches/kll/KllSketch.java | 8 ++------ tools/SketchesCheckstyle.xml | 1 + 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 67c30e34f..0c4da7abc 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -67,9 +67,9 @@ import org.apache.datasketches.memory.WritableMemory; /* - * Sampled stream data (floats or doubles) is stored as arrays or Memory objects of items. + * Sampled stream data (floats or doubles) is stored as an array or as part of a Memory object. * This array is partitioned into sections called levels and the indices into the array of items - * is tracked by a small integer array called levels or levels array. + * are tracked by a small integer array called levels or levels array. * The data for level i lies in positions levelsArray[i] through levelsArray[i + 1] - 1 inclusive. * Hence, the levelsArray must contain (numLevels + 1) indices. * The valid portion of items array is completely packed and sorted, except for level 0, @@ -87,7 +87,6 @@ * 5) Current total item capacity = itemsArray.length = levelsArray[numLevels]. */ - /** * This class is the root of the KLL sketch class hierarchy. It includes the public API that is independent * of either sketch type (float or double) and independent of whether the sketch is targeted for use on the @@ -219,7 +218,6 @@ final static void kllSketchThrow(final int errNo) { throw new SketchesArgumentException(msg); } - //Public Non-static methods /** @@ -778,7 +776,6 @@ private static double resolveDoubleMaxValue(final double myMax, final double oth return max(myMax, otherMax); } - final void mergeFloatImpl(final KllSketch other) { if (other.isEmpty()) { return; } final long finalN = getN() + other.getN(); @@ -889,7 +886,6 @@ private static float resolveFloatMaxValue(final float myMax, final float otherMa return max(myMax, otherMax); } - abstract void setDoubleItemsArray(double[] floatItems); abstract void setDoubleItemsArrayAt(int index, double value); diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml index 8a587554c..fe398312f 100644 --- a/tools/SketchesCheckstyle.xml +++ b/tools/SketchesCheckstyle.xml @@ -352,6 +352,7 @@ under the License. + From 0f3e3b32cbfb1e300118d868510a83230075c4cf Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 26 Mar 2022 17:17:24 -0700 Subject: [PATCH 16/31] Fixed a small issue with getSerializedSizeBytes(). --- .../datasketches/kll/KllDoublesSketch.java | 2 -- .../datasketches/kll/KllFloatsSketch.java | 28 --------------- .../apache/datasketches/kll/KllSketch.java | 35 ++++++++++++++++--- .../datasketches/kll/KllFloatsSketchTest.java | 4 +-- .../kll/MiscDirectDoublesTest.java | 15 ++++++-- .../kll/MiscDirectFloatsTest.java | 17 +++++++-- 6 files changed, 59 insertions(+), 42 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 595967892..2fc4b0ad8 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -26,8 +26,6 @@ import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; -//intentional space -//intentional space /** * This class implements an on-heap doubles KllSketch. * diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 321c653d2..442c6c73d 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -21,11 +21,9 @@ import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.memory.Memory; /** @@ -332,30 +330,4 @@ void setMinDoubleValue(final double value) { } @Override //Used internally void setMinFloatValue(final float value) { minFloatValue_ = value; } - /** - * Returns upper bound on the compact serialized size of a FloatsSketch given a parameter - * k and stream length. This method can be used if allocation of storage - * is necessary beforehand. - * @param k parameter that controls size of the sketch and accuracy of estimates - * @param n stream length - * @return upper bound on the compact serialized size - * @deprecated use {@link #getMaxSerializedSizeBytes(int, long, SketchType, boolean)} instead. - */ - @Deprecated - public static int getMaxSerializedSizeBytes(final int k, final long n) { - final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, SketchType.FLOATS_SKETCH); - return lvlStats.getCompactBytes(); - } - - - /** - * Returns the current number of compact bytes this FloatsSketch would require to store. - * @return the number of bytes this sketch would require to store. - * @deprecated use {@link KllSketch#getCurrentCompactSerializedSizeBytes()} - */ - @Deprecated - public int getSerializedSizeBytes() { - return getCurrentCompactSerializedSizeBytes(); - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 0c4da7abc..c26ea9d29 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -154,6 +154,21 @@ public static int getKFromEpsilon(final double epsilon, final boolean pmf) { return max(MIN_K, min(MAX_K, k)); } + /** + * Returns upper bound on the compact serialized size of a FloatsSketch given a parameter + * k and stream length. This method can be used if allocation of storage + * is necessary beforehand. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param n stream length + * @return upper bound on the compact serialized size + * @deprecated use {@link #getMaxSerializedSizeBytes(int, long, SketchType, boolean)} instead. + */ + @Deprecated + public static int getMaxSerializedSizeBytes(final int k, final long n) { + final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, SketchType.FLOATS_SKETCH); + return lvlStats.getCompactBytes(); + } + /** * Returns upper bound on the serialized size of a KllSketch given the following parameters. * @param k parameter that controls size of the sketch and accuracy of estimates @@ -181,12 +196,22 @@ public static double getNormalizedRankError(final int k, final boolean pmf) { return KllHelper.getNormalizedRankError(k, pmf); } - static int getSerializedSizeBytes(final int numLevels, final int numRetained, + /** + * Returns the current number of bytes this Sketch would require if serialized. + * @return the number of bytes this sketch would require if serialized. + */ + public int getSerializedSizeBytes() { + return (direct) + ? getCurrentUpdatableSerializedSizeBytes() + : getCurrentCompactSerializedSizeBytes(); + } + + static int getSerializedSizeBytes(final int numLevels, final int numItems, final SketchType sketchType, final boolean updatable) { int levelsBytes = 0; if (!updatable) { - if (numRetained == 0) { return N_LONG_ADR; } - if (numRetained == 1) { + if (numItems == 0) { return N_LONG_ADR; } + if (numItems == 1) { return DATA_START_ADR_SINGLE_ITEM + (sketchType == DOUBLES_SKETCH ? Double.BYTES : Float.BYTES); } levelsBytes = numLevels * Integer.BYTES; @@ -194,9 +219,9 @@ static int getSerializedSizeBytes(final int numLevels, final int numRetained, levelsBytes = (numLevels + 1) * Integer.BYTES; } if (sketchType == DOUBLES_SKETCH) { - return DATA_START_ADR_DOUBLE + levelsBytes + (numRetained + 2) * Double.BYTES; //+2 is for min & max + return DATA_START_ADR_DOUBLE + levelsBytes + (numItems + 2) * Double.BYTES; //+2 is for min & max } else { - return DATA_START_ADR_FLOAT + levelsBytes + (numRetained + 2) * Float.BYTES; + return DATA_START_ADR_FLOAT + levelsBytes + (numItems + 2) * Float.BYTES; } } diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index 3a10d4030..94f1c6a5f 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -402,13 +402,13 @@ public void getQuantiles() { public void checkDeprecatedMethods() { final int k = 200; final int n = 200; - int bytes = KllFloatsSketch.getMaxSerializedSizeBytes(k, n); //assumed float before + int bytes = KllSketch.getMaxSerializedSizeBytes(k, n); //assumed float before assertEquals(bytes, 832); KllFloatsSketch sk = new KllFloatsSketch(k); for (int i = 1; i <= n; i++) { sk.update(i); } final byte[] byteArr = sk.toByteArray(); assertEquals(byteArr.length, 832); - bytes = sk.getSerializedSizeBytes(); //defaults to compact + bytes = sk.getSerializedSizeBytes(); assertEquals(bytes, 832); } diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java index f3b6d7d82..6c7f3d1b9 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java @@ -399,12 +399,24 @@ public void checkSimpleMerge() { assertEquals(sk1.getMinValue(), 1.0); } + @Test + public void checkSizes() { + KllDirectDoublesSketch sk = getDDSketch(20, 0); + for (int i = 1; i <= 21; i++) { sk.update(i); } + //println(sk.toString(true, true)); + byte[] byteArr1 = sk.toUpdatableByteArray(); + int size1 = sk.getCurrentUpdatableSerializedSizeBytes(); + assertEquals(size1, byteArr1.length); + byte[] byteArr2 = sk.toByteArray(); + int size2 = sk.getCurrentCompactSerializedSizeBytes(); + assertEquals(size2, byteArr2.length); + } + private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { KllDoublesSketch sk = new KllDoublesSketch(k); for (int i = 1; i <= n; i++) { sk.update(i); } byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - KllDirectDoublesSketch ddsk = new KllDirectDoublesSketch(wmem, memReqSvr); return ddsk; } @@ -422,4 +434,3 @@ static void println(final String s) { } } - diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java index 27094c5a2..22bbb953b 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java @@ -399,19 +399,31 @@ public void checkSimpleMerge() { assertEquals(sk1.getMinValue(), 1.0F); } + @Test + public void checkSizes() { + KllDirectFloatsSketch sk = getDFSketch(20, 0); + for (int i = 1; i <= 21; i++) { sk.update(i); } + //println(sk.toString(true, true)); + byte[] byteArr1 = sk.toUpdatableByteArray(); + int size1 = sk.getCurrentUpdatableSerializedSizeBytes(); + assertEquals(size1, byteArr1.length); + byte[] byteArr2 = sk.toByteArray(); + int size2 = sk.getCurrentCompactSerializedSizeBytes(); + assertEquals(size2, byteArr2.length); + } + private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { KllFloatsSketch sk = new KllFloatsSketch(k); for (int i = 1; i <= n; i++) { sk.update(i); } byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - KllDirectFloatsSketch dfsk = new KllDirectFloatsSketch(wmem, memReqSvr); return dfsk; } @Test public void printlnTest() { - //println("PRINTING: " + this.getClass().getName()); + println("PRINTING: " + this.getClass().getName()); } /** @@ -422,4 +434,3 @@ static void println(final String s) { } } - From 0cc7eac0d634768dc75ac976774f2e9417f98c7b Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sun, 27 Mar 2022 14:32:47 -0700 Subject: [PATCH 17/31] Improved the KllHelper class to make it useful internally as well as to interested users. --- .../apache/datasketches/kll/KllHelper.java | 373 ++++++++++-------- .../apache/datasketches/kll/KllSketch.java | 11 +- .../datasketches/kll/KllHelperTest.java | 26 +- 3 files changed, 231 insertions(+), 179 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index cf947785a..931e6131f 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -21,102 +21,169 @@ import static java.lang.Math.pow; import static org.apache.datasketches.Util.floorPowerOf2; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; +import static org.apache.datasketches.kll.KllSketch.M; +import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.kll.KllSketch.SketchType; -class KllHelper { - static final String LS = System.getProperty("line.separator"); +/** + * This class provides some useful sketch analysis tools that are used internally and also can be used by + * interested users to understand the internal structure of the sketch as well as the growth properties of the + * sketch given a stream length. + * + * @author lrhodes + * + */ +public class KllHelper { - /** - * Returns very conservative upper bound of the number of levels based on n. - * @param n the length of the stream - * @return floor( log_2(n) ) - */ - static int ubOnNumLevels(final long n) { - return 1 + Long.numberOfTrailingZeros(floorPowerOf2(n)); + public static class GrowthStats { + SketchType sketchType; + int k; + long givenN; + long maxN; + int numLevels; + int maxItems; + int compactBytes; + int updatableBytes; } - static LevelStats getAllLevelStatsGivenN(final int k, final int m, final long n, - final boolean printDetail, final boolean printSummaries, final SketchType sketchType) { - long cumN; - int numLevels = 0; - LevelStats lvlStats; - do { - numLevels++; - lvlStats = getLevelStats(k, m, numLevels, printDetail, printSummaries, sketchType); - cumN = lvlStats.getMaxN(); - } while (cumN < n); - return lvlStats; + public static class LevelStats { + long n; + int numLevels; + int items; + + LevelStats(final long n, final int numLevels, final int items) { + this.n = n; + this.numLevels = numLevels; + this.items = items; + } } - static LevelStats getLevelStats(final int k, final int m, final int numLevels, - final boolean printDetail, final boolean printSummary, final SketchType sketchType) { + /** + * This is the exact powers of 3 from 3^0 to 3^30 where the exponent is the index + */ + private static final long[] powersOfThree = + new long[] {1, 3, 9, 27, 81, 243, 729, 2187, 6561, 19683, 59049, 177147, 531441, + 1594323, 4782969, 14348907, 43046721, 129140163, 387420489, 1162261467, + 3486784401L, 10460353203L, 31381059609L, 94143178827L, 282429536481L, + 847288609443L, 2541865828329L, 7625597484987L, 22876792454961L, 68630377364883L, + 205891132094649L}; + + /** + * Given K and numLevels, this computes and optionally prints the structure of the sketch when the given + * number of levels are completely filled. + * @param k the given sketch parameter + * @param numLevels the given number of levels of the sketch + * @param printSketchStructure if true will print the details of the sketch structure at the given numLevels. + * @return LevelStats with the final summary of the sketch's cumulative N, + * and cumulative items at the given numLevels. + */ + public static LevelStats getFinalSketchStatsAtNumLevels( + final int k, + final int numLevels, + final boolean printSketchStructure) { + int cumItems = 0; long cumN = 0; - int cumCap = 0; - if (printDetail) { - println("Total Levels: " + numLevels); - printf("%6s%12s%8s%16s\n", "Level","Wt","Cap","N"); + if (printSketchStructure) { + println("SKETCH STRUCTURE:"); + println("Given K : " + k); + println("Given NumLevels: " + numLevels); + printf("%6s %8s %12s %18s %18s\n", "Level", "Items", "CumItems", "N at Level", "CumN"); } for (int level = 0; level < numLevels; level++) { - final long levelCap = levelCapacity(k, numLevels, level, m); - final long maxNAtLevel = levelCap << level; - cumN += maxNAtLevel; - cumCap += (int)levelCap; - if (printDetail) { - printf("%6d%,12d%8d%,16d\n", level, 1 << level, levelCap, maxNAtLevel); + final LevelStats lvlStats = getLevelCapacityItems(k, numLevels, level); + cumItems += lvlStats.items; + cumN += lvlStats.n; + if (printSketchStructure) { + printf("%6d %,8d %,12d %,18d %,18d\n", level, lvlStats.items, cumItems, lvlStats.n, cumN); } } - final int compactBytes = KllSketch.getSerializedSizeBytes(numLevels, cumCap, sketchType, false); - final int updatableBytes = KllSketch.getSerializedSizeBytes(numLevels, cumCap, sketchType, true); - if (printDetail) { - printf(" TOTALS%10s %8d%,16d\n", "", cumCap, cumN); - println(" COMPACT BYTES: " + compactBytes); - println(" UPDATABLE BYTES: " + updatableBytes); - println(""); - } - final LevelStats lvlStats = new LevelStats(cumN, numLevels, cumCap, compactBytes, updatableBytes); - if (printSummary) { println(lvlStats.toString()); } - return lvlStats; + return new LevelStats(cumN, numLevels, cumItems); } - static class LevelStats { - private long maxN; - private int compactBytes; - private int updatableBytes; - private int numLevels; - private int maxCap; - - LevelStats(final long maxN, final int numLevels, final int maxCap, final int compactBytes, - final int updatableBytes) { - this.maxN = maxN; - this.numLevels = numLevels; - this.maxCap = maxCap; - this.compactBytes = compactBytes; - this.updatableBytes = updatableBytes; - } - - @Override - public String toString() { - final String[] hdr = {"NumLevels", "MaxCap", "MaxN", "TotCompactBytes", "TotUpdatableBytes"}; - final StringBuilder sb = new StringBuilder(); - sb.append("Level Stats Summary:" + LS); - sb.append(String.format("%10s %10s %14s %17s %17s" + LS, (Object[]) hdr)); - sb.append(String.format("%10d %10d %14d %17d %17d" + LS, numLevels, maxCap, maxN, compactBytes, updatableBytes)); - return sb.toString(); + /** + * Given k, n, and the sketch type, this computes (and optionally prints) the growth scheme for a sketch as it + * grows large enough to accommodate a stream length of n items. + * @param k the given sketch parameter + * @param n the desired stream length + * @param sketchType the given sketch type (DOUBLES_SKETCH or FLOATS_SKETCH) + * @param printGrowthScheme if true the entire growth scheme of the sketch will be printed. + * @return GrowthStats with the final values of the growth scheme + */ + public static GrowthStats getGrowthSchemeForGivenN( + final int k, + final long n, + final SketchType sketchType, + final boolean printGrowthScheme) { + int numLevels = 0; + LevelStats lvlStats; + final GrowthStats gStats = new GrowthStats(); + gStats.k = k; + gStats.givenN = n; + gStats.sketchType = sketchType; + if (printGrowthScheme) { + println("GROWTH SCHEME:"); + println("Given SketchType: " + sketchType.toString()); + println("Given K : " + k); + println("Given N : " + n); + printf("%10s %10s %20s %13s %15s\n", "NumLevels", "MaxItems", "MaxN", "CompactBytes", "UpdatableBytes"); } + int compactBytes; + int updatableBytes; + do { + numLevels++; + lvlStats = getFinalSketchStatsAtNumLevels(k, numLevels, false); + final int maxItems = lvlStats.items; + final long maxN = lvlStats.n; + if (sketchType == DOUBLES_SKETCH) { + compactBytes = maxItems * Double.BYTES + numLevels * Integer.BYTES + 2 * Double.BYTES + DATA_START_ADR_DOUBLE; + updatableBytes = compactBytes + Integer.BYTES; + } else { + compactBytes = maxItems * Float.BYTES + numLevels * Integer.BYTES + 2 * Float.BYTES + DATA_START_ADR_FLOAT; + updatableBytes = compactBytes + Integer.BYTES; + } + if (printGrowthScheme) { + printf("%10d %,10d %,20d %,13d %,15d\n", numLevels, maxItems, maxN, compactBytes, updatableBytes); + } + } while (lvlStats.n < n); + gStats.maxN = lvlStats.n; + gStats.numLevels = lvlStats.numLevels; + gStats.maxItems = lvlStats.items; + gStats.compactBytes = compactBytes; + gStats.updatableBytes = updatableBytes; + return gStats; + } - public long getMaxN() { return maxN; } - - public int getCompactBytes() { return compactBytes; } - - public int getNumLevels() { return numLevels; } - - public int getMaxCap() { return maxCap; } + /** + * Given k, numLevels, this computes the item capacity of a single level. + * @param k the given sketch parameter + * @param numLevels the given number of levels of the sketch + * @param level the specific level to compute its item capacity + * @return LevelStats with the computed N and items for the given level. + */ + public static LevelStats getLevelCapacityItems( + final int k, + final int numLevels, + final int level) { + final int items = KllHelper.levelCapacity(k, numLevels, level, M); + final long n = (long)items << level; + return new LevelStats(n, numLevels, items); + } - public int getUpdatableBytes() { return updatableBytes; } + /** + * Checks the validity of the given value k + * @param k must be greater than 7 and less than 65536. + */ + static void checkK(final int k) { + if (k < MIN_K || k > MAX_K) { + throw new SketchesArgumentException( + "K must be >= " + MIN_K + " and <= " + MAX_K + ": " + k); + } } /** @@ -136,6 +203,53 @@ static int computeTotalItemCapacity(final int k, final int m, final int numLevel return (int) total; } + static int currentLevelSize(final int level, final int numLevels, final int[] levels) { + if (level >= numLevels) { return 0; } + return levels[level + 1] - levels[level]; + } + + /** + * Finds the first level starting with level 0 that exceeds its nominal capacity + * @param k configured size of sketch. Range [m, 2^16] + * @param m minimum level size. Default is 8. + * @param numLevels one-based number of current levels + * @return level to compact + */ + static int findLevelToCompact(final int k, final int m, final int numLevels, final int[] levels) { + int level = 0; + while (true) { + assert level < numLevels; + final int pop = levels[level + 1] - levels[level]; + final int cap = KllHelper.levelCapacity(k, numLevels, level, m); + if (pop >= cap) { + return level; + } + level++; + } + } + + /** + * Gets the normalized rank error given k and pmf. + * Static method version of the getNormalizedRankError(boolean). + * @param k the configuration parameter + * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function. + * Otherwise, it is the "single-sided" normalized rank error for all the other queries. + * @return if pmf is true, the normalized rank error for the getPMF() function. + * Otherwise, it is the "single-sided" normalized rank error for all the other queries. + * @see KllDoublesSketch + */ + // constants were derived as the best fit to 99 percentile empirically measured max error in + // thousands of trials + static double getNormalizedRankError(final int k, final boolean pmf) { + return pmf + ? 2.446 / pow(k, 0.9433) + : 2.296 / pow(k, 0.9723); + } + + static int getNumRetainedAboveLevelZero(final int numLevels, final int[] levels) { + return levels[numLevels] - levels[1]; + } + /** * Returns the item capacity of a specific level. * @param k the accuracy parameter of the sketch. Because of the Java limits on array sizes, @@ -154,6 +268,25 @@ static int levelCapacity(final int k, final int numLevels, final int level, fina return (int) Math.max(m, intCapAux(k, depth)); } + static long sumTheSampleWeights(final int num_levels, final int[] levels) { + long total = 0; + long weight = 1; + for (int i = 0; i < num_levels; i++) { + total += weight * (levels[i + 1] - levels[i]); + weight *= 2; + } + return total; + } + + /** + * Returns very conservative upper bound of the number of levels based on n. + * @param n the length of the stream + * @return floor( log_2(n) ) + */ + static int ubOnNumLevels(final long n) { + return 1 + Long.numberOfTrailingZeros(floorPowerOf2(n)); + } + /** * Computes the actual item capacity of a given level given its depth index. * If the depth of levels exceeds 30, this uses a folding technique to accurately compute the @@ -186,97 +319,19 @@ private static long intCapAuxAux(final long k, final int depth) { } /** - * This is the exact powers of 3 from 3^0 to 3^30 where the exponent is the index - */ - private static final long[] powersOfThree = - new long[] {1, 3, 9, 27, 81, 243, 729, 2187, 6561, 19683, 59049, 177147, 531441, - 1594323, 4782969, 14348907, 43046721, 129140163, 387420489, 1162261467, - 3486784401L, 10460353203L, 31381059609L, 94143178827L, 282429536481L, - 847288609443L, 2541865828329L, 7625597484987L, 22876792454961L, 68630377364883L, - 205891132094649L}; - - static long sumTheSampleWeights(final int num_levels, final int[] levels) { - long total = 0; - long weight = 1; - for (int i = 0; i < num_levels; i++) { - total += weight * (levels[i + 1] - levels[i]); - weight *= 2; - } - return total; - } - - /** - * Gets the normalized rank error given k and pmf. - * Static method version of the getNormalizedRankError(boolean). - * @param k the configuration parameter - * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function. - * Otherwise, it is the "single-sided" normalized rank error for all the other queries. - * @return if pmf is true, the normalized rank error for the getPMF() function. - * Otherwise, it is the "single-sided" normalized rank error for all the other queries. - * @see KllDoublesSketch - */ - // constants were derived as the best fit to 99 percentile empirically measured max error in - // thousands of trials - static double getNormalizedRankError(final int k, final boolean pmf) { - return pmf - ? 2.446 / pow(k, 0.9433) - : 2.296 / pow(k, 0.9723); - } - - /** - * Checks the validity of the given value k - * @param k must be greater than 7 and less than 65536. + * @param fmt format + * @param args arguments */ - static void checkK(final int k) { - if (k < MIN_K || k > MAX_K) { - throw new SketchesArgumentException( - "K must be >= " + MIN_K + " and <= " + MAX_K + ": " + k); - } - } - - /** - * Finds the first level starting with level 0 that exceeds its nominal capacity - * @param k configured size of sketch. Range [m, 2^16] - * @param m minimum level size. Default is 8. - * @param numLevels one-based number of current levels - * @return level to compact - */ - static int findLevelToCompact(final int k, final int m, final int numLevels, final int[] levels) { - int level = 0; - while (true) { - assert level < numLevels; - final int pop = levels[level + 1] - levels[level]; - final int cap = KllHelper.levelCapacity(k, numLevels, level, m); - if (pop >= cap) { - return level; - } - level++; - } - } - - static int currentLevelSize(final int level, final int numLevels, final int[] levels) { - if (level >= numLevels) { return 0; } - return levels[level + 1] - levels[level]; - } - - static int getNumRetainedAboveLevelZero(final int numLevels, final int[] levels) { - return levels[numLevels] - levels[1]; + private static void printf(final String fmt, final Object ... args) { + System.out.printf(fmt, args); //Disable } /** * Println Object o * @param o object to print */ - static void println(final Object o) { - //System.out.println(o.toString()); - } - - /** - * @param fmt format - * @param args arguments - */ - static void printf(final String fmt, final Object ... args) { - //System.out.printf(fmt, args); //Disable + private static void println(final Object o) { + System.out.println(o.toString()); } } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index c26ea9d29..b74ebcfaa 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -27,7 +27,6 @@ import static java.lang.Math.min; import static java.lang.Math.round; import static org.apache.datasketches.Util.isOdd; -import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; @@ -55,6 +54,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertSingleItemFlag; import static org.apache.datasketches.kll.KllPreambleUtil.insertUpdatableFlag; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; +import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import java.util.Arrays; import java.util.Random; @@ -62,7 +62,6 @@ import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.Util; -import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -165,8 +164,8 @@ public static int getKFromEpsilon(final double epsilon, final boolean pmf) { */ @Deprecated public static int getMaxSerializedSizeBytes(final int k, final long n) { - final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, SketchType.FLOATS_SKETCH); - return lvlStats.getCompactBytes(); + final KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, n, FLOATS_SKETCH, false); + return gStats.compactBytes; } /** @@ -179,8 +178,8 @@ public static int getMaxSerializedSizeBytes(final int k, final long n) { */ public static int getMaxSerializedSizeBytes(final int k, final long n, final SketchType sketchType, final boolean updatable) { - final LevelStats lvlStats = getAllLevelStatsGivenN(k, M, n, false, false, sketchType); - return updatable ? lvlStats.getUpdatableBytes() : lvlStats.getCompactBytes(); + final KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, n, sketchType, false); + return updatable ? gStats.updatableBytes : gStats.compactBytes; } /** diff --git a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java index 0fd08ad1d..887d0f05f 100644 --- a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java @@ -19,14 +19,11 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN; -import static org.apache.datasketches.kll.KllHelper.getLevelStats; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import static org.testng.Assert.assertEquals; -import org.apache.datasketches.kll.KllHelper.LevelStats; import org.apache.datasketches.kll.KllSketch.SketchType; import org.apache.datasketches.memory.Memory; import org.testng.annotations.Test; @@ -39,8 +36,8 @@ public void testGetAllLevelStats() { long n = 1L << 30; int k = 200; int m = 8; - LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, false, false, DOUBLES_SKETCH); - assertEquals(lvlStats.getCompactBytes(), 5708); + KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, n, DOUBLES_SKETCH, false); + assertEquals(gStats.compactBytes, 5708); } @Test @@ -54,13 +51,14 @@ public void checkGetKFromEps() { assertEquals(kEpsPmf, k); } - @Test //convert two false below to true for visual checking + @Test public void getStatsAtNumLevels() { int k = 200; int m = 8; int numLevels = 23; - LevelStats lvlStats = getLevelStats(k, m, numLevels, false, false, DOUBLES_SKETCH); - assertEquals(lvlStats.getCompactBytes(), 5708); + KllHelper.LevelStats lvlStats = KllHelper.getFinalSketchStatsAtNumLevels(k, numLevels, false); + assertEquals(lvlStats.items, 697); + assertEquals(lvlStats.n, 1257766904); } @Test @@ -153,9 +151,9 @@ public void testGetAllLevelStats2() { long n = 533; int k = 200; int m = 8; - LevelStats lvlStats = getAllLevelStatsGivenN(k, m, n, true, true, DOUBLES_SKETCH); - assertEquals(lvlStats.getNumLevels(), 2); - assertEquals(lvlStats.getMaxCap(), 333); + KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, n, DOUBLES_SKETCH, false); + assertEquals(gStats.numLevels, 2); + assertEquals(gStats.maxItems, 333); } @@ -164,9 +162,9 @@ public void getStatsAtNumLevels2() { int k = 20; int m = 8; int numLevels = 2; - LevelStats lvlStats = getLevelStats(k, m, numLevels, true, true, DOUBLES_SKETCH); - assertEquals(lvlStats.getNumLevels(), 2); - assertEquals(lvlStats.getMaxCap(), 33); + KllHelper.LevelStats lvlStats = KllHelper.getFinalSketchStatsAtNumLevels(k, numLevels, false); + assertEquals(lvlStats.numLevels, 2); + assertEquals(lvlStats.items, 33); } /** From 70205c55c750651bb1a0900951fadf5ad5f4c6b4 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Tue, 29 Mar 2022 11:39:52 -0700 Subject: [PATCH 18/31] Enabled the ability to grow into a larger configured Memory to reduce the number of calls to the MemoryRequestServer. --- .../kll/KllDirectDoublesSketch.java | 77 +++++++++- .../kll/KllDirectFloatsSketch.java | 79 ++++++++++- .../datasketches/kll/KllDirectSketch.java | 20 ++- .../datasketches/kll/KllDoublesSketch.java | 26 +++- .../datasketches/kll/KllFloatsSketch.java | 26 +++- .../datasketches/kll/KllHeapSketch.java | 21 +-- .../apache/datasketches/kll/KllHelper.java | 43 ++++-- .../datasketches/kll/KllPreambleUtil.java | 35 +++-- .../apache/datasketches/kll/KllSketch.java | 121 +++++++++------- .../datasketches/kll/MemoryValidate.java | 36 ++--- .../KllDirectDoublesSketchIteratorTest.java | 2 +- .../kll/KllDirectDoublesSketchTest.java | 32 ++--- .../KllDirectFloatsSketchIteratorTest.java | 2 +- .../kll/KllDirectFloatsSketchTest.java | 32 ++--- .../kll/KllDoublesSketchTest.java | 26 ++-- .../datasketches/kll/KllFloatsSketchTest.java | 8 +- .../datasketches/kll/KllHelperTest.java | 131 ++++++++++-------- .../datasketches/kll/MemoryValidateTest.java | 56 ++++---- .../kll/MiscDirectDoublesTest.java | 28 ++-- .../kll/MiscDirectFloatsTest.java | 28 ++-- .../datasketches/kll/MiscDoublesTest.java | 11 +- .../datasketches/kll/MiscFloatsTest.java | 21 ++- 22 files changed, 538 insertions(+), 323 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 6d0911c8e..547aff0bb 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -21,7 +21,23 @@ import static java.lang.Math.max; import static java.lang.Math.min; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; +import static org.apache.datasketches.kll.KllPreambleUtil.DOUBLES_SKETCH_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; +import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.insertFlags; +import static org.apache.datasketches.kll.KllPreambleUtil.insertK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertM; +import static org.apache.datasketches.kll.KllPreambleUtil.insertN; +import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; +import org.apache.datasketches.Family; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -36,12 +52,67 @@ public final class KllDirectDoublesSketch extends KllDirectSketch { /** - * + * The actual constructor. * @param wmem the current WritableMemory * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @param memVal the MemoryValadate object + */ + private KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, + final MemoryValidate memVal) { + super(SketchType.DOUBLES_SKETCH, wmem, memReqSvr, memVal); + } + + /** + * Wrap a sketch around the given source Memory containing sketch data that originated from + * this sketch. + * @param srcMem a WritableMemory that contains data. + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return instance of this sketch + */ + public static KllDirectDoublesSketch writableWrap(final WritableMemory srcMem, final MemoryRequestServer memReqSvr) { + final MemoryValidate memVal = new MemoryValidate(srcMem); + return new KllDirectDoublesSketch(srcMem, memReqSvr, memVal); + } + + /** + * Create a new instance of this sketch using default M. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param dstMem the given destination WritableMemory object for use by the sketch + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return a new instance of this sketch + */ + public static KllDirectDoublesSketch newInstance(final int k, final WritableMemory dstMem, + final MemoryRequestServer memReqSvr) { + return newInstance(k, DEFAULT_M, dstMem, memReqSvr); + } + + /** + * Create a new instance of this sketch. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param m parameter that controls the minimum level width. + * @param dstMem the given destination WritableMemory object for use by the sketch + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return a new instance of this sketch */ - public KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr) { - super(SketchType.DOUBLES_SKETCH, wmem, memReqSvr); + public static KllDirectDoublesSketch newInstance(final int k, final int m, final WritableMemory dstMem, + final MemoryRequestServer memReqSvr) { + insertPreInts(dstMem, PREAMBLE_INTS_DOUBLE); + insertSerVer(dstMem, SERIAL_VERSION_UPDATABLE); + insertFamilyID(dstMem, Family.KLL.getID()); + insertFlags(dstMem, DOUBLES_SKETCH_BIT_MASK | UPDATABLE_BIT_MASK); + insertK(dstMem, k); + insertM(dstMem, m); + insertN(dstMem, 0); + insertDyMinK(dstMem, k); + insertNumLevels(dstMem, 1); + int offset = DATA_START_ADR_DOUBLE; + dstMem.putIntArray(offset, new int[] {k, k}, 0, 2); + offset += 2 * Integer.BYTES; + dstMem.putDoubleArray(offset, new double[] {Double.NaN, Double.NaN}, 0, 2); + offset += 2 * Double.BYTES; + dstMem.putDoubleArray(offset, new double[k], 0, k); + final MemoryValidate memVal = new MemoryValidate(dstMem); + return new KllDirectDoublesSketch(dstMem, memReqSvr, memVal); } /** diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index 7a7d928ee..97c214a01 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -21,10 +21,26 @@ import static java.lang.Math.max; import static java.lang.Math.min; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; +import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.insertFlags; +import static org.apache.datasketches.kll.KllPreambleUtil.insertK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertM; +import static org.apache.datasketches.kll.KllPreambleUtil.insertN; +import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; +import org.apache.datasketches.Family; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; +//Intentional /** * This class implements an off-heap floats KllSketch via a WritableMemory instance of the sketch. * @@ -33,15 +49,70 @@ * * @author Lee Rhodes, Kevin Lang */ -public class KllDirectFloatsSketch extends KllDirectSketch { +public final class KllDirectFloatsSketch extends KllDirectSketch { /** - * + * The actual constructor * @param wmem the current WritableMemory * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @param memVal the MemoryValadate object + */ + private KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, + final MemoryValidate memVal) { + super(SketchType.FLOATS_SKETCH, wmem, memReqSvr, memVal); + } + + /** + * Wrap a sketch around the given source Memory containing sketch data that originated from + * this sketch. + * @param srcMem a WritableMemory that contains data. + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return instance of this sketch + */ + public static KllDirectFloatsSketch writableWrap(final WritableMemory srcMem, final MemoryRequestServer memReqSvr) { + final MemoryValidate memVal = new MemoryValidate(srcMem); + return new KllDirectFloatsSketch(srcMem, memReqSvr, memVal); + } + + /** + * Create a new instance of this sketch using default M. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param dstMem the given destination WritableMemory object for use by the sketch + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return a new instance of this sketch + */ + public static KllDirectFloatsSketch newInstance(final int k, final WritableMemory dstMem, + final MemoryRequestServer memReqSvr) { + return newInstance(k, DEFAULT_M, dstMem, memReqSvr); + } + + /** + * Create a new instance of this sketch. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param m parameter that controls the minimum level width. + * @param dstMem the given destination WritableMemory object for use by the sketch + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return a new instance of this sketch */ - public KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr) { - super(SketchType.FLOATS_SKETCH, wmem, memReqSvr); + public static KllDirectFloatsSketch newInstance(final int k, final int m, final WritableMemory dstMem, + final MemoryRequestServer memReqSvr) { + insertPreInts(dstMem, PREAMBLE_INTS_FLOAT); + insertSerVer(dstMem, SERIAL_VERSION_UPDATABLE); + insertFamilyID(dstMem, Family.KLL.getID()); + insertFlags(dstMem, UPDATABLE_BIT_MASK); + insertK(dstMem, k); + insertM(dstMem, m); + insertN(dstMem, 0); + insertDyMinK(dstMem, k); + insertNumLevels(dstMem, 1); + int offset = DATA_START_ADR_FLOAT; + dstMem.putIntArray(offset, new int[] {k, k}, 0, 2); + offset += 2 * Integer.BYTES; + dstMem.putFloatArray(offset, new float[] {Float.NaN, Float.NaN}, 0, 2); + offset += 2 * Float.BYTES; + dstMem.putFloatArray(offset, new float[k], 0, k); + final MemoryValidate memVal = new MemoryValidate(dstMem); + return new KllDirectFloatsSketch(dstMem, memReqSvr, memVal); } /** diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index b3ec22c2b..ef1009819 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -22,6 +22,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.extractDyMinK; import static org.apache.datasketches.kll.KllPreambleUtil.extractK; import static org.apache.datasketches.kll.KllPreambleUtil.extractLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.extractM; import static org.apache.datasketches.kll.KllPreambleUtil.extractN; import static org.apache.datasketches.kll.KllPreambleUtil.extractNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.insertDyMinK; @@ -31,7 +32,6 @@ import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; -import org.apache.datasketches.kll.KllPreambleUtil.Layout; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -42,8 +42,7 @@ abstract class KllDirectSketch extends KllSketch { //All these members are constant for the life of this object. If the WritableMemory changes, // it may require rebuilding this class - final Layout layout; - final boolean updatable; + final boolean updatable = true; WritableMemory levelsArrUpdatable; WritableMemory minMaxArrUpdatable; WritableMemory itemsArrUpdatable; @@ -55,12 +54,9 @@ abstract class KllDirectSketch extends KllSketch { * @param wmem the current WritableMemory * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory */ - KllDirectSketch(final SketchType sketchType, final WritableMemory wmem, final MemoryRequestServer memReqSvr) { + KllDirectSketch(final SketchType sketchType, final WritableMemory wmem, final MemoryRequestServer memReqSvr, + final MemoryValidate memVal) { super(sketchType, wmem, memReqSvr); - final MemoryValidate memVal = new MemoryValidate(wmem); - layout = memVal.layout; - updatable = memVal.updatable; - if (!updatable) { kllSketchThrow(31); } levelsArrUpdatable = memVal.levelsArrUpdatable; minMaxArrUpdatable = memVal.minMaxArrUpdatable; itemsArrUpdatable = memVal.itemsArrUpdatable; @@ -71,6 +67,11 @@ public int getK() { return extractK(wmem); } + @Override + public int getM() { + return extractM(wmem); + } + @Override public long getN() { return extractN(wmem); @@ -123,9 +124,6 @@ int getItemsArrLengthItems() { return getLevelsArray()[getNumLevels()]; } - @Override - String getLayout() { return layout.toString(); } - @Override int[] getLevelsArray() { final int numInts = getNumLevels() + 1; diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 2fc4b0ad8..952d258ed 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -22,6 +22,7 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; @@ -42,20 +43,37 @@ public final class KllDoublesSketch extends KllHeapSketch { private double maxDoubleValue_; /** - * Heap constructor with the default k = 200, which has a rank error of about 1.65%. + * Heap constructor with the default k = 200, and DEFAULT_M of 8. + * This will have a rank error of about 1.65%. */ public KllDoublesSketch() { this(DEFAULT_K); } /** - * Heap constructor with a given parameter k. k can be any value between 8 and + * Heap constructor with a given parameter k. k can be any value between DEFAULT_M and * 65535, inclusive. The default k = 200 results in a normalized rank error of about * 1.65%. Higher values of K will have smaller error but the sketch will be larger (and slower). + * This constructor assumes the DEFAULT_M, which is 8. * @param k parameter that controls size of the sketch and accuracy of estimates */ public KllDoublesSketch(final int k) { - super(k, SketchType.DOUBLES_SKETCH); + this(k, DEFAULT_M); + } + + /** + * Heap constructor with a given parameter k and m. + * k can be any value between DEFAULT_M and 65535, inclusive. + * The default k = 200 results in a normalized rank error of about 1.65%. + * Higher values of K will have smaller error but the sketch will be larger (and slower). + * The DEFAULT_M, which is 8 is recommended for the given parameter m. + * Other values of m should be considered experimental as they have not been + * as well characterized. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param m parameter that controls the minimum level width. + */ + public KllDoublesSketch(final int k, final int m) { + super(k, m, SketchType.DOUBLES_SKETCH); doubleItems_ = new double[k]; minDoubleValue_ = Double.NaN; maxDoubleValue_ = Double.NaN; @@ -67,7 +85,7 @@ public KllDoublesSketch(final int k) { * @param memVal the MemoryCheck object */ private KllDoublesSketch(final Memory mem, final MemoryValidate memVal) { - super(memVal.k, SketchType.DOUBLES_SKETCH); + super(memVal.k, memVal.m, SketchType.DOUBLES_SKETCH); buildHeapKllSketchFromMemory(memVal); } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 442c6c73d..6faec6acf 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -22,6 +22,7 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; @@ -42,20 +43,37 @@ public final class KllFloatsSketch extends KllHeapSketch { private float maxFloatValue_; /** - * Heap constructor with the default k = 200, which has a rank error of about 1.65%. + * Heap constructor with the default k = 200, and DEFAULT_M of 8. + * This will have a rank error of about 1.65%. */ public KllFloatsSketch() { this(DEFAULT_K); } /** - * Heap constructor with a given parameter k. k can be any value between 8 and + * Heap constructor with a given parameter k. k can be any value between DEFAULT_M and * 65535, inclusive. The default k = 200 results in a normalized rank error of about * 1.65%. Higher values of K will have smaller error but the sketch will be larger (and slower). + * This constructor assumes the DEFAULT_M, which is 8. * @param k parameter that controls size of the sketch and accuracy of estimates */ public KllFloatsSketch(final int k) { - super(k, SketchType.FLOATS_SKETCH); + this(k, DEFAULT_M); + } + + /** + * Heap constructor with a given parameter k and m. + * k can be any value between DEFAULT_M and 65535, inclusive. + * The default k = 200 results in a normalized rank error of about 1.65%. + * Higher values of K will have smaller error but the sketch will be larger (and slower). + * The DEFAULT_M, which is 8 is recommended for the given parameter m. + * Other values of m should be considered experimental as they have not been + * as well characterized. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param m parameter that controls the minimum level width. + */ + public KllFloatsSketch(final int k, final int m) { + super(k, m, SketchType.FLOATS_SKETCH); floatItems_ = new float[k]; minFloatValue_ = Float.NaN; maxFloatValue_ = Float.NaN; @@ -67,7 +85,7 @@ public KllFloatsSketch(final int k) { * @param memVal the MemoryCheck object */ private KllFloatsSketch(final Memory mem, final MemoryValidate memVal) { - super(memVal.k, SketchType.FLOATS_SKETCH); + super(memVal.k, memVal.m, SketchType.FLOATS_SKETCH); buildHeapKllSketchFromMemory(memVal); } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java index 297f16770..94c295af0 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java @@ -28,23 +28,26 @@ * @author lrhodes */ abstract class KllHeapSketch extends KllSketch { - private long n_; // number of items input into this sketch. private final int k; // configured value of K. + private final int m; // configured value of M. + private long n_; // number of items input into this sketch. private int dyMinK_; // dynamic minK for error estimation after merging with different k. - private int numLevels_; // one-based number of current levels. private int[] levels_; // array of index offsets into the items[]. Size = numLevels + 1. private boolean isLevelZeroSorted_; /** * Heap constructor. - * @param k configured size of sketch. Range [m, 2^16] + * @param k user configured size of sketch. Range [m, 2^16] + * @param m user configured minimum level width * @param sketchType either DOUBLE_SKETCH or FLOAT_SKETCH */ - KllHeapSketch(final int k, final SketchType sketchType) { + KllHeapSketch(final int k, final int m, final SketchType sketchType) { super(sketchType, null, null); - KllHelper.checkK(k); + KllHelper.checkM(m); + KllHelper.checkK(k, m); this.k = k; + this.m = m; n_ = 0; dyMinK_ = k; numLevels_ = 1; @@ -57,6 +60,11 @@ public int getK() { return k; } + @Override + public int getM() { + return m; + } + @Override public long getN() { return n_; @@ -67,9 +75,6 @@ int getDyMinK() { return dyMinK_; } - @Override - String getLayout() { return "HEAP"; } - @Override int[] getLevelsArray() { return levels_; diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 931e6131f..ba06282e8 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -24,8 +24,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; -import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; -import static org.apache.datasketches.kll.KllSketch.M; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import org.apache.datasketches.SketchesArgumentException; @@ -44,6 +42,7 @@ public class KllHelper { public static class GrowthStats { SketchType sketchType; int k; + int m; long givenN; long maxN; int numLevels; @@ -75,9 +74,10 @@ public static class LevelStats { 205891132094649L}; /** - * Given K and numLevels, this computes and optionally prints the structure of the sketch when the given + * Given k, m, and numLevels, this computes and optionally prints the structure of the sketch when the given * number of levels are completely filled. - * @param k the given sketch parameter + * @param k the given user configured sketch parameter + * @param m the given user configured sketch parameter * @param numLevels the given number of levels of the sketch * @param printSketchStructure if true will print the details of the sketch structure at the given numLevels. * @return LevelStats with the final summary of the sketch's cumulative N, @@ -85,6 +85,7 @@ public static class LevelStats { */ public static LevelStats getFinalSketchStatsAtNumLevels( final int k, + final int m, final int numLevels, final boolean printSketchStructure) { int cumItems = 0; @@ -92,11 +93,12 @@ public static LevelStats getFinalSketchStatsAtNumLevels( if (printSketchStructure) { println("SKETCH STRUCTURE:"); println("Given K : " + k); + println("Given M : " + m); println("Given NumLevels: " + numLevels); printf("%6s %8s %12s %18s %18s\n", "Level", "Items", "CumItems", "N at Level", "CumN"); } for (int level = 0; level < numLevels; level++) { - final LevelStats lvlStats = getLevelCapacityItems(k, numLevels, level); + final LevelStats lvlStats = getLevelCapacityItems(k, m, numLevels, level); cumItems += lvlStats.items; cumN += lvlStats.n; if (printSketchStructure) { @@ -107,9 +109,10 @@ public static LevelStats getFinalSketchStatsAtNumLevels( } /** - * Given k, n, and the sketch type, this computes (and optionally prints) the growth scheme for a sketch as it + * Given k, m, n, and the sketch type, this computes (and optionally prints) the growth scheme for a sketch as it * grows large enough to accommodate a stream length of n items. - * @param k the given sketch parameter + * @param k the given user configured sketch parameter + * @param m the given user configured sketch parameter * @param n the desired stream length * @param sketchType the given sketch type (DOUBLES_SKETCH or FLOATS_SKETCH) * @param printGrowthScheme if true the entire growth scheme of the sketch will be printed. @@ -117,6 +120,7 @@ public static LevelStats getFinalSketchStatsAtNumLevels( */ public static GrowthStats getGrowthSchemeForGivenN( final int k, + final int m, final long n, final SketchType sketchType, final boolean printGrowthScheme) { @@ -124,12 +128,14 @@ public static GrowthStats getGrowthSchemeForGivenN( LevelStats lvlStats; final GrowthStats gStats = new GrowthStats(); gStats.k = k; + gStats.m = m; gStats.givenN = n; gStats.sketchType = sketchType; if (printGrowthScheme) { println("GROWTH SCHEME:"); println("Given SketchType: " + sketchType.toString()); println("Given K : " + k); + println("Given M : " + m); println("Given N : " + n); printf("%10s %10s %20s %13s %15s\n", "NumLevels", "MaxItems", "MaxN", "CompactBytes", "UpdatableBytes"); } @@ -137,7 +143,7 @@ public static GrowthStats getGrowthSchemeForGivenN( int updatableBytes; do { numLevels++; - lvlStats = getFinalSketchStatsAtNumLevels(k, numLevels, false); + lvlStats = getFinalSketchStatsAtNumLevels(k, m, numLevels, false); final int maxItems = lvlStats.items; final long maxN = lvlStats.n; if (sketchType == DOUBLES_SKETCH) { @@ -160,17 +166,19 @@ public static GrowthStats getGrowthSchemeForGivenN( } /** - * Given k, numLevels, this computes the item capacity of a single level. - * @param k the given sketch parameter + * Given k, m, numLevels, this computes the item capacity of a single level. + * @param k the given user sketch configuration parameter + * @param m the given user sketch configuration parameter * @param numLevels the given number of levels of the sketch * @param level the specific level to compute its item capacity * @return LevelStats with the computed N and items for the given level. */ public static LevelStats getLevelCapacityItems( final int k, + final int m, final int numLevels, final int level) { - final int items = KllHelper.levelCapacity(k, numLevels, level, M); + final int items = KllHelper.levelCapacity(k, numLevels, level, m); final long n = (long)items << level; return new LevelStats(n, numLevels, items); } @@ -179,10 +187,17 @@ public static LevelStats getLevelCapacityItems( * Checks the validity of the given value k * @param k must be greater than 7 and less than 65536. */ - static void checkK(final int k) { - if (k < MIN_K || k > MAX_K) { + static void checkK(final int k, final int m) { + if (k < m || k > MAX_K) { throw new SketchesArgumentException( - "K must be >= " + MIN_K + " and <= " + MAX_K + ": " + k); + "K must be >= " + m + " and <= " + MAX_K + ": " + k); + } + } + + static void checkM(final int m) { + if (m < 2 || m > 8 || ((m & 1) == 1)) { + throw new SketchesArgumentException( + "M must be >= 2, <= 8 and even: " + m); } } diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index 8b553d035..61d1f37fd 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -51,15 +51,21 @@ * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | * 1 ||---------------------------------N_LONG---------------------------------------| - * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | - * 2 ||<--------------data----------------| unused |numLevels|--dynamic-min K--------| + * || | | | 20 | 19 | 18 | 17 | 16 | + * 2 ||<-------Levels Arr Start----------]| unused |NumLevels|--Dynamic-Min K--------| + * || | | | | | | | | + * ? ||<-------Min/Max Arr Start---------]|[<----------Levels Arr End----------------| + * || | | | | | | | | + * ? ||<-----Float Items Arr Start-------]|[<---------Min/Max Arr End----------------| + * || | | | | | | | | + * ? || | | | |[<-------Float Items Arr End--------------| * * Serialized float sketch layout, Empty (8 bytes) and Single Item (12 bytes): * Adr: * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | - * 1 || |-------------------data-------------------| + * 1 || |-------------Single Item------------------| * * * @@ -70,16 +76,20 @@ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | * 1 ||---------------------------------N_LONG---------------------------------------| * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | - * 2 ||--------------unused------------------------|numLevels|--dynamic-min K--------| - * || | 24 | - * 3 ||<---------------------------------data----------------------------------------| + * 2 ||<-------Levels Arr Start----------]| unused |NumLevels|--Dynamic-Min K--------| + * || | | | | | | | | + * ? ||<-------Min/Max Arr Start---------]|[<----------Levels Arr End----------------| + * || | | | | | | | | + * ? ||<----Double Items Arr Start-------]|[<---------Min/Max Arr End----------------| + * || | | | | | | | | + * ? || | | | |[<------Double Items Arr End--------------| * * Serialized double sketch layout, Empty (8 bytes) and Single Item (16 bytes): * Adr: * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | * || | 8 | - * 1 ||----------------------------------data----------------------------------------| + * 1 ||------------------------------Single Item-------------------------------------| * * The structure of the data block depends on Layout: * @@ -92,7 +102,7 @@ * Followed by an array of Floats of length retainedItems() * * For DOUBLE_FULL_COMPACT - * The int[] levels array starts at offset DATA_START_ADR_DOUBLE = 24 with a length of numLevels integers; + * The int[] levels array starts at offset DATA_START_ADR_DOUBLE = 20 with a length of numLevels integers; * Followed by Double Min_Value, then Double Max_Value * Followed by an array of Doubles of length retainedItems() * @@ -102,7 +112,7 @@ * Followed by an array of Floats of length KllHelper.computeTotalItemCapacity(...). * * For DOUBLE_UPDATABLE - * The int[] levels array starts at offset DATA_START_ADR_DOUBLE = 24 with a length of (numLevels + 1) integers; + * The int[] levels array starts at offset DATA_START_ADR_DOUBLE = 20 with a length of (numLevels + 1) integers; * Followed by Double Min_Value, then Double Max_Value * Followed by an array of Doubles of length KllHelper.computeTotalItemCapacity(...). * @@ -120,8 +130,7 @@ private KllPreambleUtil() {} * The default value of K */ public static final int DEFAULT_K = 200; - static final int DEFAULT_M = 8; - static final int MIN_K = DEFAULT_M; + public static final int DEFAULT_M = 8; static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short // Preamble byte addresses @@ -144,7 +153,7 @@ private KllPreambleUtil() {} static final int DATA_START_ADR_FLOAT = 20; // float sketch, not single item // DOUBLE SKETCH 19 to 23 is reserved for future use in double sketch - static final int DATA_START_ADR_DOUBLE = 24; // double sketch, not single item + static final int DATA_START_ADR_DOUBLE = 20; // double sketch, not single item //TODO?? // Other static values static final byte SERIAL_VERSION_EMPTY_FULL = 1; // Empty or full preamble, NOT single item format @@ -152,7 +161,7 @@ private KllPreambleUtil() {} static final byte SERIAL_VERSION_UPDATABLE = 3; // static final int PREAMBLE_INTS_EMPTY_SINGLE = 2; // for empty or single item static final int PREAMBLE_INTS_FLOAT = 5; // not empty nor single item, full preamble float - static final int PREAMBLE_INTS_DOUBLE = 6; // not empty nor single item, full preamble double + static final int PREAMBLE_INTS_DOUBLE = 5; // not empty nor single item, full preamble double // Flag bit masks static final int EMPTY_BIT_MASK = 1; diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index b74ebcfaa..97c98fd3d 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -27,12 +27,11 @@ import static java.lang.Math.min; import static java.lang.Math.round; import static org.apache.datasketches.Util.isOdd; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; -import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; @@ -98,8 +97,8 @@ */ public abstract class KllSketch { static final Random random = new Random(); - static final int M = DEFAULT_M; // configured minimum buffer "width", Must always be 8 for now. static final boolean compatible = true; //rank 0.0 and 1.0. compatible with classic Quantiles Sketch + //final int M = DEFAULT_M; // configured minimum buffer "width", default is 8. SketchType sketchType; WritableMemory wmem; MemoryRequestServer memReqSvr; @@ -150,35 +149,38 @@ public static int getKFromEpsilon(final double epsilon, final boolean pmf) { final double krnd = round(kdbl); final double del = abs(krnd - kdbl); final int k = (int) (del < 1E-6 ? krnd : ceil(kdbl)); - return max(MIN_K, min(MAX_K, k)); + return max(2, min(MAX_K, k)); } /** * Returns upper bound on the compact serialized size of a FloatsSketch given a parameter * k and stream length. This method can be used if allocation of storage - * is necessary beforehand. + * is necessary beforehand. This assumes the DEFAULT_M = 8 used in older sketches, it will not + * work with other values of m. * @param k parameter that controls size of the sketch and accuracy of estimates * @param n stream length * @return upper bound on the compact serialized size - * @deprecated use {@link #getMaxSerializedSizeBytes(int, long, SketchType, boolean)} instead. + * @deprecated use {@link #getMaxSerializedSizeBytes(int, int, long, SketchType, boolean)} instead. */ @Deprecated public static int getMaxSerializedSizeBytes(final int k, final long n) { - final KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, n, FLOATS_SKETCH, false); + final KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, DEFAULT_M, n, FLOATS_SKETCH, false); return gStats.compactBytes; } /** * Returns upper bound on the serialized size of a KllSketch given the following parameters. * @param k parameter that controls size of the sketch and accuracy of estimates + * @param m parameter that controls the smallest value of k, and the smallest level width. + * If in doubt, use the default value of 8. * @param n stream length * @param sketchType either DOUBLES_SKETCH or FLOATS_SKETCH * @param updatable true if updatable form, otherwise the standard compact form. * @return upper bound on the serialized size of a KllSketch. */ - public static int getMaxSerializedSizeBytes(final int k, final long n, + public static int getMaxSerializedSizeBytes(final int k, final int m, final long n, final SketchType sketchType, final boolean updatable) { - final KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, n, sketchType, false); + final KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, m, n, sketchType, false); return updatable ? gStats.updatableBytes : gStats.compactBytes; } @@ -257,18 +259,24 @@ public final int getCurrentCompactSerializedSizeBytes() { * @return the current updatable number of bytes this sketch would require to store. */ public final int getCurrentUpdatableSerializedSizeBytes() { - final int itemCap = KllHelper.computeTotalItemCapacity(getK(), M, getNumLevels()); + final int itemCap = KllHelper.computeTotalItemCapacity(getK(), getM(), getNumLevels()); return KllSketch.getSerializedSizeBytes(getNumLevels(), itemCap, sketchType, true); } /** - * Returns the parameter k - * @return parameter k + * Returns the user configured parameter k + * @return the user configured parameter k */ public abstract int getK(); /** - * Returns the length of the input stream. + * Returns the user configured parameter m + * @return the user configured parameter m + */ + public abstract int getM(); + + /** + * Returns the length of the input stream in items. * @return stream length */ public abstract long getN(); @@ -605,8 +613,6 @@ final float[] getFloatsQuantiles(final double[] fractions) { abstract float getFloatItemsArrayAt(int index); - abstract String getLayout(); - abstract int[] getLevelsArray(); abstract int getLevelsArrayAt(int index); @@ -627,14 +633,17 @@ final float[] getFloatsQuantiles(final double[] fractions) { boolean isDoublesSketch() { return sketchType == DOUBLES_SKETCH; } - boolean isFloatsSketch() { return sketchType != DOUBLES_SKETCH; } + boolean isFloatsSketch() { return sketchType == FLOATS_SKETCH; } abstract boolean isLevelZeroSorted(); /** * This method is for direct Double and Float sketches only and does the following: - *
    • Allocates a new WritableMemory of the required size
    • - *
    • Copies over the preamble as is (20 or 24 bytes)
    • + *
        + *
      • Determines if the required sketch bytes will fit in the current Memory. + * If so, it will stretch the positioning of the arrays to fit. Otherwise: + *
      • Allocates a new WritableMemory of the required size
      • + *
      • Copies over the preamble as is (20 bytes)
      • *
      • Creates new memory regions for Levels Array, Min/Max Array, Items Array, but * does not fill them. They may contain garbage.
      • *
      @@ -650,9 +659,9 @@ static WritableMemory memorySpaceMgmt( final int newItemsArrLen) { final SketchType sketchType = sketch.sketchType; final WritableMemory oldWmem = sketch.wmem; + final int typeBytes; final int startAdr; - if (sketchType == DOUBLES_SKETCH) { typeBytes = Double.BYTES; startAdr = DATA_START_ADR_DOUBLE; @@ -660,14 +669,14 @@ static WritableMemory memorySpaceMgmt( typeBytes = Float.BYTES; startAdr = DATA_START_ADR_FLOAT; } - int totalSketchBytes = startAdr; - totalSketchBytes += newLevelsArrLen * Integer.BYTES; - totalSketchBytes += 2 * typeBytes; - totalSketchBytes += newItemsArrLen * typeBytes; + int requiredSketchBytes = startAdr; + requiredSketchBytes += newLevelsArrLen * Integer.BYTES; + requiredSketchBytes += 2 * typeBytes; + requiredSketchBytes += newItemsArrLen * typeBytes; final WritableMemory newWmem; - if (totalSketchBytes > oldWmem.getCapacity()) { //Acquire new WritableMemory - newWmem = sketch.memReqSvr.request(oldWmem, totalSketchBytes); + if (requiredSketchBytes > oldWmem.getCapacity()) { //Acquire new WritableMemory + newWmem = sketch.memReqSvr.request(oldWmem, requiredSketchBytes); oldWmem.copyTo(0, newWmem, 0, startAdr); //copy preamble } else { //Expand in current memory @@ -686,7 +695,7 @@ static WritableMemory memorySpaceMgmt( //ITEMS ARR lengthBytes = newItemsArrLen * typeBytes; sketch.setItemsArrayUpdatable(newWmem.writableRegion(offset, lengthBytes)); - assert totalSketchBytes <= newWmem.getCapacity(); + assert requiredSketchBytes <= newWmem.getCapacity(); return newWmem; } @@ -725,7 +734,7 @@ final void mergeDoubleImpl(final KllSketch other) { populateDoubleWorkArrays(other, workbuf, worklevels, provisionalNumLevels); // notice that workbuf is being used as both the input and output - final int[] result = KllDoublesHelper.generalDoublesCompress(getK(), M, provisionalNumLevels, workbuf, + final int[] result = KllDoublesHelper.generalDoublesCompress(getK(), getM(), provisionalNumLevels, workbuf, worklevels, workbuf, outlevels, isLevelZeroSorted(), random); final int targetItemCount = result[1]; //was finalCapacity. Max size given k, m, numLevels final int curItemCount = result[2]; //was finalPop @@ -835,7 +844,7 @@ final void mergeFloatImpl(final KllSketch other) { populateFloatWorkArrays(other, workbuf, worklevels, provisionalNumLevels); // notice that workbuf is being used as both the input and output - final int[] result = KllFloatsHelper.generalFloatsCompress(getK(), M, provisionalNumLevels, workbuf, + final int[] result = KllFloatsHelper.generalFloatsCompress(getK(), getM(), provisionalNumLevels, workbuf, worklevels, workbuf, outlevels, isLevelZeroSorted(), random); final int targetItemCount = result[1]; //was finalCapacity. Max size given k, m, numLevels final int curItemCount = result[2]; //was finalPop @@ -1018,38 +1027,44 @@ private static void loadFirst8Bytes(final KllSketch sk, final WritableMemory wme insertDoubleSketchFlag(wmem, doubleType); insertUpdatableFlag(wmem, updatable); insertK(wmem, sk.getK()); - insertM(wmem, M); + insertM(wmem, sk.getM()); } @SuppressWarnings("null") final String toStringImpl(final boolean withLevels, final boolean withData) { final boolean doubleType = (sketchType == DOUBLES_SKETCH); final int k = getK(); + final int m = getM(); final String epsPct = String.format("%.3f%%", getNormalizedRankError(false) * 100); final String epsPMFPct = String.format("%.3f%%", getNormalizedRankError(true) * 100); final StringBuilder sb = new StringBuilder(); - final String skType = (doubleType) ? "Doubles" : "Floats"; - sb.append(Util.LS).append("### KLL ").append(skType).append("Sketch summary:").append(Util.LS); - sb.append(" K : ").append(k).append(Util.LS); - sb.append(" Dynamic min K : ").append(getDyMinK()).append(Util.LS); - sb.append(" M : ").append(M).append(Util.LS); - sb.append(" N : ").append(getN()).append(Util.LS); - sb.append(" Epsilon : ").append(epsPct).append(Util.LS); - sb.append(" Epsison PMF : ").append(epsPMFPct).append(Util.LS); - sb.append(" Empty : ").append(isEmpty()).append(Util.LS); - sb.append(" Estimation Mode : ").append(isEstimationMode()).append(Util.LS); - sb.append(" Levels : ").append(getNumLevels()).append(Util.LS); - sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted()).append(Util.LS); + final String skType = (direct ? "Direct" : "") + (doubleType ? "Doubles" : "Floats"); + sb.append(Util.LS).append("### Kll").append(skType).append("Sketch Summary:").append(Util.LS); + sb.append(" K : ").append(k).append(Util.LS); + sb.append(" Dynamic min K : ").append(getDyMinK()).append(Util.LS); + sb.append(" M : ").append(m).append(Util.LS); + sb.append(" N : ").append(getN()).append(Util.LS); + sb.append(" Epsilon : ").append(epsPct).append(Util.LS); + sb.append(" Epsison PMF : ").append(epsPMFPct).append(Util.LS); + sb.append(" Empty : ").append(isEmpty()).append(Util.LS); + sb.append(" Estimation Mode : ").append(isEstimationMode()).append(Util.LS); + sb.append(" Levels : ").append(getNumLevels()).append(Util.LS); + sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted()).append(Util.LS); final int cap = (doubleType) ? getDoubleItemsArray().length : getFloatItemsArray().length; - sb.append(" Capacity Items : ").append(cap).append(Util.LS); - sb.append(" Retained Items : ").append(getNumRetained()).append(Util.LS); - sb.append(" Compact Storage Bytes: ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS); + sb.append(" Capacity Items : ").append(cap).append(Util.LS); + sb.append(" Retained Items : ").append(getNumRetained()).append(Util.LS); + if (direct) { + sb.append(" Updatable Storage Bytes: ").append(getCurrentUpdatableSerializedSizeBytes()).append(Util.LS); + } else { + sb.append(" Compact Storage Bytes : ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS); + } + if (doubleType) { - sb.append(" Min Value : ").append(getMinDoubleValue()).append(Util.LS); - sb.append(" Max Value : ").append(getMaxDoubleValue()).append(Util.LS); + sb.append(" Min Value : ").append(getMinDoubleValue()).append(Util.LS); + sb.append(" Max Value : ").append(getMaxDoubleValue()).append(Util.LS); } else { - sb.append(" Min Value : ").append(getMinFloatValue()).append(Util.LS); - sb.append(" Max Value : ").append(getMaxFloatValue()).append(Util.LS); + sb.append(" Min Value : ").append(getMinFloatValue()).append(Util.LS); + sb.append(" Max Value : ").append(getMaxFloatValue()).append(Util.LS); } sb.append("### End sketch summary").append(Util.LS); @@ -1063,7 +1078,7 @@ final String toStringImpl(final boolean withLevels, final boolean withData) { myFloatItemsArr = getFloatItemsArray(); } if (withLevels) { - sb.append(outputLevels(k, myNumLevels, myLevelsArr)); + sb.append(outputLevels(k, m, myNumLevels, myLevelsArr)); } if (withData) { sb.append(outputData(doubleType, myNumLevels, myLevelsArr, myFloatItemsArr, myDoubleItemsArr)); @@ -1071,14 +1086,14 @@ final String toStringImpl(final boolean withLevels, final boolean withData) { return sb.toString(); } - static String outputLevels(final int k, final int numLevels, final int[] levelsArr) { + static String outputLevels(final int k, final int m, final int numLevels, final int[] levelsArr) { final StringBuilder sb = new StringBuilder(); sb.append("### KLL levels array:").append(Util.LS) .append(" level, offset: nominal capacity, actual size").append(Util.LS); int level = 0; for ( ; level < numLevels; level++) { sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": ") - .append(KllHelper.levelCapacity(k, numLevels, level, M)) + .append(KllHelper.levelCapacity(k, numLevels, level, m)) .append(", ").append(KllHelper.currentLevelSize(level, numLevels, levelsArr)).append(Util.LS); } sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": (Exclusive)") @@ -1256,7 +1271,7 @@ private void addEmptyTopLevelToCompletelyFullSketch() { } assert myCurLevelsArr[0] == 0; //definition of full is part of the growth scheme - final int deltaItemsCap = KllHelper.levelCapacity(getK(), myCurNumLevels + 1, 0, M); + final int deltaItemsCap = KllHelper.levelCapacity(getK(), myCurNumLevels + 1, 0, getM()); myNewTotalItemsCapacity = myCurTotalItemsCapacity + deltaItemsCap; // Check if growing the levels arr if required. @@ -1313,7 +1328,7 @@ private void addEmptyTopLevelToCompletelyFullSketch() { // It cannot be used while merging, while reducing k, or anything else. @SuppressWarnings("null") private void compressWhileUpdatingSketch() { - final int level = KllHelper.findLevelToCompact(getK(), M, getNumLevels(), getLevelsArray()); + final int level = KllHelper.findLevelToCompact(getK(), getM(), getNumLevels(), getLevelsArray()); if (level == getNumLevels() - 1) { //The level to compact is the top level, thus we need to add a level. //Be aware that this operation grows the items array, diff --git a/src/main/java/org/apache/datasketches/kll/MemoryValidate.java b/src/main/java/org/apache/datasketches/kll/MemoryValidate.java index c932a9891..f7a9ce881 100644 --- a/src/main/java/org/apache/datasketches/kll/MemoryValidate.java +++ b/src/main/java/org/apache/datasketches/kll/MemoryValidate.java @@ -23,7 +23,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; @@ -108,9 +107,9 @@ final class MemoryValidate { doublesSketch = extractDoubleSketchFlag(srcMem); updatable = extractUpdatableFlag(srcMem); k = extractK(srcMem); - KllHelper.checkK(k); m = extractM(srcMem); - if (m != DEFAULT_M) { memoryValidateThrow(7, m); } + KllHelper.checkM(m); + KllHelper.checkK(k, m); if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatable) { memoryValidateThrow(10, 0); } if (updatable) { updatableMemoryValidate((WritableMemory) srcMem); } @@ -121,10 +120,9 @@ void compactMemoryValidate(final Memory srcMem) { if (empty && singleItem) { memoryValidateThrow(20, 0); } final int sw = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); switch (sw) { - case 0: { //Float Compact FULL + case 0: { //FLOAT_FULL_COMPACT if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(6, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(2, serVer); } - layout = Layout.FLOAT_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); @@ -149,7 +147,7 @@ void compactMemoryValidate(final Memory srcMem) { sketchBytes = offset + itemsRetained * Float.BYTES; break; } - case 1: { //Float Compact EMPTY + case 1: { //FLOAT_EMPTY_COMPACT if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(1, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(2, serVer); } layout = Layout.FLOAT_EMPTY_COMPACT; @@ -168,7 +166,7 @@ void compactMemoryValidate(final Memory srcMem) { itemsArrStart = DATA_START_ADR_SINGLE_ITEM; break; } - case 4: { //Float Compact SINGLE + case 4: { //FLOAT_SINGLE_COMPACT if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(1, preInts); } if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(4, serVer); } layout = Layout.FLOAT_SINGLE_COMPACT; @@ -190,7 +188,7 @@ void compactMemoryValidate(final Memory srcMem) { itemsArrStart = DATA_START_ADR_SINGLE_ITEM; break; } - case 8: { //Double Compact FULL + case 8: { //DOUBLE_FULL_COMPACT if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(5, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(2, serVer); } layout = Layout.DOUBLE_FULL_COMPACT; @@ -217,7 +215,7 @@ void compactMemoryValidate(final Memory srcMem) { sketchBytes = offset + itemsRetained * Double.BYTES; break; } - case 9: { //Double Compact EMPTY + case 9: { //DOUBLE_EMPTY_COMPACT if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(1, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(2, serVer); } layout = Layout.DOUBLE_EMPTY_COMPACT; @@ -237,7 +235,7 @@ void compactMemoryValidate(final Memory srcMem) { itemsArrStart = DATA_START_ADR_SINGLE_ITEM; break; } - case 12: { //Double Compact SINGLE + case 12: { //DOUBLE_SINGLE_COMPACT if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(1, preInts); } if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(4, serVer); } layout = Layout.DOUBLE_SINGLE_COMPACT; @@ -265,7 +263,7 @@ void compactMemoryValidate(final Memory srcMem) { } void updatableMemoryValidate(final WritableMemory wSrcMem) { - if (doublesSketch) { //Double Updatable FULL + if (doublesSketch) { //DOUBLE_UPDATABLE if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(5, preInts); } layout = Layout.DOUBLE_UPDATABLE; n = extractN(wSrcMem); @@ -285,12 +283,10 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { capacityItems = levelsArrUpdatable.getInt((long)numLevels * Integer.BYTES); final int itemsArrBytes = capacityItems * Double.BYTES; itemsArrStart = offset; - itemsArrStart = memCapacity - itemsArrBytes; - if (itemsArrStart < offset) { memoryValidateThrow(24, offset - itemsArrStart); } - itemsArrUpdatable = wSrcMem.writableRegion(itemsArrStart, itemsArrBytes); - sketchBytes = itemsArrStart + itemsArrBytes; + itemsArrUpdatable = wSrcMem.writableRegion(offset, itemsArrBytes); + sketchBytes = offset + itemsArrBytes; } - else { //Float Updatable FULL + else { //FLOAT_UPDATABLE if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(6, preInts); } layout = Layout.FLOAT_UPDATABLE; n = extractN(wSrcMem); @@ -309,9 +305,7 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { capacityItems = levelsArrUpdatable.getInt((long)numLevels * Integer.BYTES); final int itemsArrBytes = capacityItems * Float.BYTES; itemsArrStart = offset; - itemsArrStart = memCapacity - itemsArrBytes; - if (itemsArrStart < offset) { memoryValidateThrow(24, offset - itemsArrStart); } - itemsArrUpdatable = wSrcMem.writableRegion(itemsArrStart, itemsArrBytes); + itemsArrUpdatable = wSrcMem.writableRegion(offset, itemsArrBytes); sketchBytes = itemsArrStart + itemsArrBytes; } } @@ -326,7 +320,7 @@ private static void memoryValidateThrow(final int errNo, final int value) { case 4: msg = "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; case 5: msg = "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; case 6: msg = "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; - case 7: msg = "The M field must be set to " + DEFAULT_M + ", NOT: " + value; break; + //case 7: msg = "The M field must be set to " + DEFAULT_M + ", NOT: " + value; break; //case 8: msg = "The dynamic MinK must be equal to K, NOT: " + value; break; //case 9: msg = "numLevels must be one, NOT: " + value; break; case 10: msg = "((SerVer == 3) ^ (Updatable Bit)) must = 0."; break; @@ -334,7 +328,7 @@ private static void memoryValidateThrow(final int errNo, final int value) { //case 21: msg = "N != 0 and empty bit is set. N: " + value; break; //case 22: msg = "N != 1 and single item bit is set. N: " + value; break; //case 23: msg = "Family name is not KLL"; break; - case 24: msg = "Given Memory has insufficient capacity. Need " + value + " bytes."; break; + //case 24: msg = "Given Memory has insufficient capacity. Need " + value + " bytes."; break; default: msg = "Unknown error: errNo: " + errNo; break; } throw new SketchesArgumentException(msg); diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java index bb325a44e..4c7033342 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java @@ -70,7 +70,7 @@ private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - KllDirectDoublesSketch ddsk = new KllDirectDoublesSketch(wmem, memReqSvr); + KllDirectDoublesSketch ddsk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); return ddsk; } diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java index a12a50cac..c1b45b0ba 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -21,7 +21,7 @@ //import static org.apache.datasketches.Util.getResourceBytes; //don't have matching numbers from C++ import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; -import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -284,7 +284,7 @@ public void mergeMinAndMaxFromOther() { @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooSmall() { - final KllDirectDoublesSketch sketch1 = getDDSketch(MIN_K - 1, 0); + final KllDirectDoublesSketch sketch1 = getDDSketch(DEFAULT_M - 1, 0); } @SuppressWarnings("unused") @@ -295,11 +295,11 @@ public void kTooLarge() { @Test public void minK() { - final KllDirectDoublesSketch sketch = getDDSketch(MIN_K, 0); + final KllDirectDoublesSketch sketch = getDDSketch(DEFAULT_M, 0); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), MIN_K); + assertEquals(sketch.getK(), DEFAULT_M); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); } @@ -332,7 +332,8 @@ public void serializeDeserializeEmpty() { //compact serialize then heapify using public void serializeDeserializeEmpty2() { //updatable serialize then new (loaded) KllDirectDoublesSketch final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); final byte[] bytes = sketch1.toUpdatableByteArray(); - final KllDirectDoublesSketch sketch2 = new KllDirectDoublesSketch(WritableMemory.writableWrap(bytes),memReqSvr); + final KllDirectDoublesSketch sketch2 = + KllDirectDoublesSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); assertTrue(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); @@ -364,7 +365,8 @@ public void serializeDeserializeOneItem2() { //updatable serialize then new (loa final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); sketch1.update(1); final byte[] bytes = sketch1.toUpdatableByteArray(); - final KllDirectDoublesSketch sketch2 = new KllDirectDoublesSketch(WritableMemory.writableWrap(bytes),memReqSvr); + final KllDirectDoublesSketch sketch2 = + KllDirectDoublesSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); assertFalse(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), 1); @@ -402,7 +404,8 @@ public void serializeDeserialize2() { //updatable serialize then new (loaded) Kl sketch1.update(i); } final byte[] bytes = sketch1.toUpdatableByteArray(); - final KllDirectDoublesSketch sketch2 = new KllDirectDoublesSketch(WritableMemory.writableWrap(bytes),memReqSvr); + final KllDirectDoublesSketch sketch2 = + KllDirectDoublesSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); assertFalse(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); @@ -460,8 +463,8 @@ public void checkSimpleMergeDirect() { //used for troubleshooting println(sk2.toString(true, true)); WritableMemory wmem1 = WritableMemory.writableWrap(sk1.toUpdatableByteArray()); WritableMemory wmem2 = WritableMemory.writableWrap(sk2.toUpdatableByteArray()); - KllDirectDoublesSketch dsk1 = new KllDirectDoublesSketch(wmem1, new DefaultMemoryRequestServer()); - KllDirectDoublesSketch dsk2 = new KllDirectDoublesSketch(wmem2, new DefaultMemoryRequestServer()); + KllDirectDoublesSketch dsk1 = KllDirectDoublesSketch.writableWrap(wmem1, new DefaultMemoryRequestServer()); + KllDirectDoublesSketch dsk2 = KllDirectDoublesSketch.writableWrap(wmem2, new DefaultMemoryRequestServer()); println("BEFORE MERGE"); println(dsk1.toString(true, true)); dsk1.merge(dsk2); @@ -484,7 +487,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { compBytes = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(compBytes); println(KllPreambleUtil.toString(wmem)); - sk = new KllDirectDoublesSketch(wmem, new DefaultMemoryRequestServer()); + sk = KllDirectDoublesSketch.writableWrap(wmem, new DefaultMemoryRequestServer()); assertEquals(sk.getK(), k); assertEquals(sk.getN(), k + 1); assertEquals(sk.getNumRetained(), 11); @@ -492,7 +495,6 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertTrue(sk.isEstimationMode()); assertEquals(sk.getDyMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); - assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMinDoubleValue(), 1.0); @@ -505,7 +507,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { compBytes = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(compBytes); println(KllPreambleUtil.toString(wmem)); - sk = new KllDirectDoublesSketch(wmem, new DefaultMemoryRequestServer()); + sk = KllDirectDoublesSketch.writableWrap(wmem, new DefaultMemoryRequestServer()); assertEquals(sk.getK(), k); assertEquals(sk.getN(), 0); assertEquals(sk.getNumRetained(), 0); @@ -513,7 +515,6 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertFalse(sk.isEstimationMode()); assertEquals(sk.getDyMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMinDoubleValue(), Double.NaN); @@ -527,7 +528,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { compBytes = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(compBytes); println(KllPreambleUtil.toString(wmem)); - sk = new KllDirectDoublesSketch(wmem, new DefaultMemoryRequestServer()); + sk = KllDirectDoublesSketch.writableWrap(wmem, new DefaultMemoryRequestServer()); assertEquals(sk.getK(), k); assertEquals(sk.getN(), 1); assertEquals(sk.getNumRetained(), 1); @@ -535,7 +536,6 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertFalse(sk.isEstimationMode()); assertEquals(sk.getDyMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMinDoubleValue(), 1.0); @@ -575,7 +575,7 @@ private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - KllDirectDoublesSketch ddsk = new KllDirectDoublesSketch(wmem, memReqSvr); + KllDirectDoublesSketch ddsk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); return ddsk; } diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchIteratorTest.java index 60ea42c6a..9b54a7a2a 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchIteratorTest.java @@ -70,7 +70,7 @@ private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - KllDirectFloatsSketch dfsk = new KllDirectFloatsSketch(wmem, memReqSvr); + KllDirectFloatsSketch dfsk = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); return dfsk; } diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java index f7ce55533..c5f40b546 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java @@ -21,7 +21,7 @@ //import static org.apache.datasketches.Util.getResourceBytes; //don't have matching numbers from C++ import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; -import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -283,7 +283,7 @@ public void mergeMinAndMaxFromOther() { @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooSmall() { - final KllDirectFloatsSketch sketch1 = getDFSketch(MIN_K - 1, 0); + final KllDirectFloatsSketch sketch1 = getDFSketch(DEFAULT_M - 1, 0); } @SuppressWarnings("unused") @@ -294,11 +294,11 @@ public void kTooLarge() { @Test public void minK() { - final KllDirectFloatsSketch sketch = getDFSketch(MIN_K, 0); + final KllDirectFloatsSketch sketch = getDFSketch(DEFAULT_M, 0); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), MIN_K); + assertEquals(sketch.getK(), DEFAULT_M); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); } @@ -331,7 +331,8 @@ public void serializeDeserializeEmpty() { //compact serialize then heapify using public void serializeDeserializeEmpty2() { //updatable serialize then new (loaded) KllDirectDoublesSketch final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); final byte[] bytes = sketch1.toUpdatableByteArray(); - final KllDirectFloatsSketch sketch2 = new KllDirectFloatsSketch(WritableMemory.writableWrap(bytes),memReqSvr); + final KllDirectFloatsSketch sketch2 = + KllDirectFloatsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); assertTrue(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); @@ -363,7 +364,8 @@ public void serializeDeserializeOneItem2() { //updatable serialize then new (loa final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); sketch1.update(1); final byte[] bytes = sketch1.toUpdatableByteArray(); - final KllDirectFloatsSketch sketch2 = new KllDirectFloatsSketch(WritableMemory.writableWrap(bytes),memReqSvr); + final KllDirectFloatsSketch sketch2 = + KllDirectFloatsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); assertFalse(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), 1); @@ -401,7 +403,8 @@ public void serializeDeserialize2() { //updatable serialize then new (loaded) Kl sketch1.update(i); } final byte[] bytes = sketch1.toUpdatableByteArray(); - final KllDirectFloatsSketch sketch2 = new KllDirectFloatsSketch(WritableMemory.writableWrap(bytes),memReqSvr); + final KllDirectFloatsSketch sketch2 = + KllDirectFloatsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); assertFalse(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); @@ -459,8 +462,8 @@ public void checkSimpleMergeDirect() { //used for troubleshooting println(sk2.toString(true, true)); WritableMemory wmem1 = WritableMemory.writableWrap(sk1.toUpdatableByteArray()); WritableMemory wmem2 = WritableMemory.writableWrap(sk2.toUpdatableByteArray()); - KllDirectFloatsSketch dsk1 = new KllDirectFloatsSketch(wmem1, new DefaultMemoryRequestServer()); - KllDirectFloatsSketch dsk2 = new KllDirectFloatsSketch(wmem2, new DefaultMemoryRequestServer()); + KllDirectFloatsSketch dsk1 = KllDirectFloatsSketch.writableWrap(wmem1, new DefaultMemoryRequestServer()); + KllDirectFloatsSketch dsk2 = KllDirectFloatsSketch.writableWrap(wmem2, new DefaultMemoryRequestServer()); println("BEFORE MERGE"); println(dsk1.toString(true, true)); dsk1.merge(dsk2); @@ -483,7 +486,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { compBytes = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(compBytes); println(KllPreambleUtil.toString(wmem)); - sk = new KllDirectFloatsSketch(wmem, new DefaultMemoryRequestServer()); + sk = KllDirectFloatsSketch.writableWrap(wmem, new DefaultMemoryRequestServer()); assertEquals(sk.getK(), k); assertEquals(sk.getN(), k + 1); assertEquals(sk.getNumRetained(), 11); @@ -491,7 +494,6 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertTrue(sk.isEstimationMode()); assertEquals(sk.getDyMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); - assertEquals(sk.getLayout(), "FLOAT_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0); assertEquals(sk.getMinFloatValue(), 1.0); @@ -504,7 +506,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { compBytes = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(compBytes); println(KllPreambleUtil.toString(wmem)); - sk = new KllDirectFloatsSketch(wmem, new DefaultMemoryRequestServer()); + sk = KllDirectFloatsSketch.writableWrap(wmem, new DefaultMemoryRequestServer()); assertEquals(sk.getK(), k); assertEquals(sk.getN(), 0); assertEquals(sk.getNumRetained(), 0); @@ -512,7 +514,6 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertFalse(sk.isEstimationMode()); assertEquals(sk.getDyMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "FLOAT_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Double.NaN); assertEquals(sk.getMinFloatValue(), Double.NaN); @@ -526,7 +527,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { compBytes = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(compBytes); println(KllPreambleUtil.toString(wmem)); - sk = new KllDirectFloatsSketch(wmem, new DefaultMemoryRequestServer()); + sk = KllDirectFloatsSketch.writableWrap(wmem, new DefaultMemoryRequestServer()); assertEquals(sk.getK(), k); assertEquals(sk.getN(), 1); assertEquals(sk.getNumRetained(), 1); @@ -534,7 +535,6 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertFalse(sk.isEstimationMode()); assertEquals(sk.getDyMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "FLOAT_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0); assertEquals(sk.getMinFloatValue(), 1.0); @@ -574,7 +574,7 @@ private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - KllDirectFloatsSketch dfsk = new KllDirectFloatsSketch(wmem, memReqSvr); + KllDirectFloatsSketch dfsk = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); return dfsk; } diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index 081903be8..323814c0a 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -21,7 +21,7 @@ //import static org.apache.datasketches.Util.getResourceBytes; //don't have matching numbers from C++ import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; -import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -280,7 +280,7 @@ public void mergeMinAndMaxFromOther() { @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooSmall() { - new KllDoublesSketch(MIN_K - 1); + new KllDoublesSketch(DEFAULT_M - 1); } @SuppressWarnings("unused") @@ -291,11 +291,11 @@ public void kTooLarge() { @Test public void minK() { - final KllDoublesSketch sketch = new KllDoublesSketch(MIN_K); + final KllDoublesSketch sketch = new KllDoublesSketch(DEFAULT_M); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), MIN_K); + assertEquals(sketch.getK(), DEFAULT_M); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); } @@ -340,15 +340,15 @@ public void serializeDeserializeOneItem() { assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Double.BYTES); } -// @Test //not implemented from C++ yet -// public void deserializeOneItemV1() throws Exception { -// final byte[] bytes = getResourceBytes("kll_sketch_float_one_item_v1.sk"); -// final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(bytes)); -// assertFalse(sketch.isEmpty()); -// assertFalse(sketch.isEstimationMode()); -// assertEquals(sketch.getN(), 1); -// assertEquals(sketch.getNumRetained(), 1); -// } + //@Test //not implemented from C++ yet + //public void deserializeOneItemV1() throws Exception { + // final byte[] bytes = getResourceBytes("kll_sketch_float_one_item_v1.sk"); + // final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(bytes)); + // assertFalse(sketch.isEmpty()); + // assertFalse(sketch.isEstimationMode()); + // assertEquals(sketch.getN(), 1); + // assertEquals(sketch.getNumRetained(), 1); + //} @Test public void serializeDeserialize() { diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index 94f1c6a5f..ee74063d7 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -20,7 +20,7 @@ package org.apache.datasketches.kll; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; -import static org.apache.datasketches.kll.KllPreambleUtil.MIN_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.Util.getResourceBytes; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -280,7 +280,7 @@ public void mergeMinAndMaxFromOther() { @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooSmall() { - new KllFloatsSketch(MIN_K - 1); + new KllFloatsSketch(DEFAULT_M - 1); } @SuppressWarnings("unused") @@ -291,11 +291,11 @@ public void kTooLarge() { @Test public void minK() { - final KllFloatsSketch sketch = new KllFloatsSketch(MIN_K); + final KllFloatsSketch sketch = new KllFloatsSketch(DEFAULT_M); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), MIN_K); + assertEquals(sketch.getK(), DEFAULT_M); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); } diff --git a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java index 887d0f05f..b50a6d0a4 100644 --- a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java @@ -19,11 +19,14 @@ package org.apache.datasketches.kll; +import static org.apache.datasketches.kll.KllHelper.checkM; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import static org.testng.Assert.assertEquals; +import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.kll.KllSketch.SketchType; import org.apache.datasketches.memory.Memory; import org.testng.annotations.Test; @@ -31,13 +34,25 @@ @SuppressWarnings("unused") public class KllHelperTest { - @Test //convert two false below to true for visual checking - public void testGetAllLevelStats() { - long n = 1L << 30; - int k = 200; - int m = 8; - KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, n, DOUBLES_SKETCH, false); - assertEquals(gStats.compactBytes, 5708); + /** + * Println Object o + * @param o object to print + */ + static void println(Object o) { + //System.out.println(o.toString()); + } + + @Test + public void checkCheckM() { + try { + checkM(0); + } catch (SketchesArgumentException e) {} + try { + checkM(3); + } catch (SketchesArgumentException e) {} + try { + checkM(10); + } catch (SketchesArgumentException e) {} } @Test @@ -52,13 +67,23 @@ public void checkGetKFromEps() { } @Test - public void getStatsAtNumLevels() { - int k = 200; - int m = 8; - int numLevels = 23; - KllHelper.LevelStats lvlStats = KllHelper.getFinalSketchStatsAtNumLevels(k, numLevels, false); - assertEquals(lvlStats.items, 697); - assertEquals(lvlStats.n, 1257766904); + public void checkIntCapAux() { + int lvlCap = KllHelper.levelCapacity(10, 61, 0, 8); + assertEquals(lvlCap, 8); + lvlCap = KllHelper.levelCapacity(10, 61, 60, 8); + assertEquals(lvlCap, 10); + } + + @Test + public void checkSuperLargeKandLevels() { + //This is beyond what the sketch can be configured for. + final int size = KllHelper.computeTotalItemCapacity(1 << 29, 8, 61); + assertEquals(size, 1_610_612_846); + } + + @Test + public void checkUbOnNumLevels() { + assertEquals(KllHelper.ubOnNumLevels(0), 1); } @Test @@ -98,80 +123,68 @@ public void checkUpdatableSerDe() { assertEquals(sk2.getNumRetained(), retained); } + @Test - public void getMaxCompactFloatsSerializedSizeBytes() { - final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30, FLOATS_SKETCH, false); - assertEquals(sizeBytes, 2908); + public void getMaxCompactDoublesSerializedSizeBytes() { + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, DEFAULT_M, 1L << 30, DOUBLES_SKETCH, false); + assertEquals(sizeBytes, 5704); } @Test - public void getMaxUpdatableFloatsSerializedSizeBytes() { - final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30, FLOATS_SKETCH, true); - assertEquals(sizeBytes, 2912); + public void getMaxCompactFloatsSerializedSizeBytes() { + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, DEFAULT_M, 1L << 30, FLOATS_SKETCH, false); + assertEquals(sizeBytes, 2908); } - @Test - public void getMaxCompactDoublesSerializedSizeBytes() { - final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30, DOUBLES_SKETCH, false); + public void getMaxUpdatableDoubleSerializedSizeBytes() { + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, DEFAULT_M, 1L << 30, DOUBLES_SKETCH, true); assertEquals(sizeBytes, 5708); } @Test - public void getMaxUpdatableDoubleSerializedSizeBytes() { - final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, 1L << 30, DOUBLES_SKETCH, true); - assertEquals(sizeBytes, 5712); + public void getMaxUpdatableFloatsSerializedSizeBytes() { + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, DEFAULT_M, 1L << 30, FLOATS_SKETCH, true); + assertEquals(sizeBytes, 2912); } @Test - public void checkUbOnNumLevels() { - assertEquals(KllHelper.ubOnNumLevels(0), 1); + public void getStatsAtNumLevels() { + int k = 200; + int m = 8; + int numLevels = 23; + KllHelper.LevelStats lvlStats = KllHelper.getFinalSketchStatsAtNumLevels(k, m, numLevels, false); + assertEquals(lvlStats.items, 697); + assertEquals(lvlStats.n, 1257766904); } @Test - public void checkIntCapAux() { - int lvlCap = KllHelper.levelCapacity(10, 61, 0, 8); - assertEquals(lvlCap, 8); - lvlCap = KllHelper.levelCapacity(10, 61, 60, 8); - assertEquals(lvlCap, 10); + public void getStatsAtNumLevels2() { + int k = 20; + int m = 8; + int numLevels = 2; + KllHelper.LevelStats lvlStats = KllHelper.getFinalSketchStatsAtNumLevels(k, DEFAULT_M, numLevels, false); + assertEquals(lvlStats.numLevels, 2); + assertEquals(lvlStats.items, 33); } @Test - public void checkSuperLargeKandLevels() { - //This is beyond what the sketch can be configured for. - final int size = KllHelper.computeTotalItemCapacity(1 << 29, 8, 61); - assertEquals(size, 1_610_612_846); + public void testGetAllLevelStats() { + long n = 1L << 30; + int k = 200; + int m = 8; + KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, m, n, DOUBLES_SKETCH, false); + assertEquals(gStats.compactBytes, 5704); } - - //Experimental - @Test public void testGetAllLevelStats2() { long n = 533; int k = 200; int m = 8; - KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, n, DOUBLES_SKETCH, false); + KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, DEFAULT_M, n, DOUBLES_SKETCH, false); assertEquals(gStats.numLevels, 2); assertEquals(gStats.maxItems, 333); } - - @Test - public void getStatsAtNumLevels2() { - int k = 20; - int m = 8; - int numLevels = 2; - KllHelper.LevelStats lvlStats = KllHelper.getFinalSketchStatsAtNumLevels(k, numLevels, false); - assertEquals(lvlStats.numLevels, 2); - assertEquals(lvlStats.items, 33); - } - - /** - * Println Object o - * @param o object to print - */ - static void println(Object o) { - //System.out.println(o.toString()); - } } diff --git a/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java b/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java index 143570d3b..057a8bd8f 100644 --- a/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java +++ b/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java @@ -21,6 +21,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.*; +import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; @@ -28,13 +29,12 @@ @SuppressWarnings("unused") public class MemoryValidateTest { - @Test(expectedExceptions = SketchesArgumentException.class) public void checkInvalidFamily() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFamilyID(wmem, 14); + insertFamilyID(wmem, Family.KLL.getID() - 1); MemoryValidate memVal = new MemoryValidate(wmem); } @@ -43,7 +43,7 @@ public void checkInvalidSerVer() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertSerVer(wmem, 4); + insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL - 1); MemoryValidate memVal = new MemoryValidate(wmem); } @@ -52,7 +52,7 @@ public void checkInvalidEmptyAndSingle() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, 5); + insertFlags(wmem, EMPTY_BIT_MASK | SINGLE_ITEM_BIT_MASK); MemoryValidate memVal = new MemoryValidate(wmem); } @@ -61,8 +61,8 @@ public void checkInvalidUpdatableAndSerVer() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, 16); - insertSerVer(wmem, 2); + insertFlags(wmem, UPDATABLE_BIT_MASK); + insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL); MemoryValidate memVal = new MemoryValidate(wmem); } @@ -71,8 +71,8 @@ public void checkInvalidPreIntsAndSingle() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, 16); - insertSerVer(wmem, 2); + insertFlags(wmem, UPDATABLE_BIT_MASK); + insertSerVer(wmem, SERIAL_VERSION_SINGLE); MemoryValidate memVal = new MemoryValidate(wmem); } @@ -81,8 +81,8 @@ public void checkInvalidSerVerAndSingle2() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, 4); - insertSerVer(wmem, 1); + insertFlags(wmem, SINGLE_ITEM_BIT_MASK); + insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL); MemoryValidate memVal = new MemoryValidate(wmem); } @@ -91,8 +91,8 @@ public void checkInvalidPreIntsAndSingle2() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, 4); - insertPreInts(wmem, 1); + insertFlags(wmem, SINGLE_ITEM_BIT_MASK); + insertPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); MemoryValidate memVal = new MemoryValidate(wmem); } @@ -101,9 +101,9 @@ public void checkInvalidPreIntsAndDouble() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, 8); - insertPreInts(wmem, 6); - insertSerVer(wmem, 2); + insertFlags(wmem, DOUBLES_SKETCH_BIT_MASK); + insertPreInts(wmem, PREAMBLE_INTS_DOUBLE); + insertSerVer(wmem, SERIAL_VERSION_SINGLE); MemoryValidate memVal = new MemoryValidate(wmem); } @@ -112,9 +112,9 @@ public void checkInvalidDoubleCompactAndSingle() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, 12); //double & single - insertPreInts(wmem, 2);//should be 2 - insertSerVer(wmem, 1); //should be 2 + insertFlags(wmem, SINGLE_ITEM_BIT_MASK | DOUBLES_SKETCH_BIT_MASK); + insertPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); + insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL); MemoryValidate memVal = new MemoryValidate(wmem); } @@ -123,9 +123,9 @@ public void checkInvalidDoubleUpdatableAndSerVer() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertSerVer(wmem, 3); - insertFlags(wmem, 24); //double & updatable - insertPreInts(wmem, 5);//should be 6 + insertSerVer(wmem, SERIAL_VERSION_UPDATABLE); + insertFlags(wmem, DOUBLES_SKETCH_BIT_MASK | UPDATABLE_BIT_MASK); + insertPreInts(wmem, PREAMBLE_INTS_DOUBLE - 1); MemoryValidate memVal = new MemoryValidate(wmem); } @@ -135,8 +135,8 @@ public void checkInvalidFloatFullAndPreInts() { byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); insertFlags(wmem, 0); //float full - insertSerVer(wmem, 2); //should be 1 - insertPreInts(wmem, 5);//should be 5 + insertSerVer(wmem, SERIAL_VERSION_SINGLE); //should be 1 + insertPreInts(wmem, PREAMBLE_INTS_FLOAT); MemoryValidate memVal = new MemoryValidate(wmem); } @@ -145,9 +145,9 @@ public void checkInvalidFloatUpdatableFullAndPreInts() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, 16); //float updatable full - insertSerVer(wmem, 3); //should be 3 - insertPreInts(wmem, 6);//should be 5 + insertFlags(wmem, UPDATABLE_BIT_MASK); //float updatable full + insertSerVer(wmem, SERIAL_VERSION_UPDATABLE); + insertPreInts(wmem, 0);//should be 5 MemoryValidate memVal = new MemoryValidate(wmem); } @@ -156,9 +156,9 @@ public void checkInvalidDoubleCompactSingleAndPreInts() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, 12); //double & single + insertFlags(wmem, DOUBLES_SKETCH_BIT_MASK | SINGLE_ITEM_BIT_MASK); insertPreInts(wmem, 5);//should be 2 - insertSerVer(wmem, 2); //should be 2 + insertSerVer(wmem, SERIAL_VERSION_SINGLE); //should be 2 MemoryValidate memVal = new MemoryValidate(wmem); } diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java index 6c7f3d1b9..d312056ca 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java @@ -121,7 +121,6 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 33); - assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMinDoubleValue(), 1.0); @@ -139,7 +138,6 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMinDoubleValue(), Double.NaN); @@ -158,7 +156,6 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "DOUBLE_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMinDoubleValue(), 1.0); @@ -190,7 +187,6 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 33); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMinDoubleValue(), 1.0); @@ -212,7 +208,6 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMinDoubleValue(), Double.NaN); @@ -235,7 +230,6 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMinDoubleValue(), 1.0); @@ -267,7 +261,6 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 33); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMinDoubleValue(), 1.0); @@ -289,7 +282,6 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMinDoubleValue(), Double.NaN); @@ -312,7 +304,6 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMinDoubleValue(), 1.0); @@ -338,7 +329,7 @@ public void checkMemoryToStringDoubleUpdatable() { s = KllPreambleUtil.memoryToString(wmem); println("step 1: sketch to byte[]/memory & analyze memory"); println(s); - sk2 = new KllDirectDoublesSketch(wmem, memReqSvr); + sk2 = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); upBytes2 = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes2); s = KllPreambleUtil.memoryToString(wmem); @@ -353,7 +344,7 @@ public void checkMemoryToStringDoubleUpdatable() { s = KllPreambleUtil.memoryToString(wmem); println("step 1: sketch to byte[]/memory & analyze memory"); println(s); - sk2 = new KllDirectDoublesSketch(wmem, memReqSvr); + sk2 = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); upBytes2 = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes2); s = KllPreambleUtil.memoryToString(wmem); @@ -369,7 +360,7 @@ public void checkMemoryToStringDoubleUpdatable() { s = KllPreambleUtil.memoryToString(wmem); println("step 1: sketch to byte[]/memory & analyze memory"); println(s); - sk2 = new KllDirectDoublesSketch(wmem, memReqSvr); + sk2 = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); upBytes2 = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes2); s = KllPreambleUtil.memoryToString(wmem); @@ -412,12 +403,21 @@ public void checkSizes() { assertEquals(size2, byteArr2.length); } + @Test + public void checkNewInstance() { + int k = 200; + WritableMemory dstMem = WritableMemory.allocate(6000); + KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(k, dstMem, memReqSvr); + for (int i = 1; i <= 10_000; i++) {sk.update(i); } + println(sk.toString(true, true)); + } + private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { KllDoublesSketch sk = new KllDoublesSketch(k); for (int i = 1; i <= n; i++) { sk.update(i); } byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - KllDirectDoublesSketch ddsk = new KllDirectDoublesSketch(wmem, memReqSvr); + KllDirectDoublesSketch ddsk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); return ddsk; } @@ -430,7 +430,7 @@ public void printlnTest() { * @param s value to print */ static void println(final String s) { - //System.out.println(s); //disable here + System.out.println(s); //disable here } } diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java index 22bbb953b..4dfa54448 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java @@ -121,7 +121,6 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 33); - assertEquals(sk.getLayout(), "FLOAT_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0F); assertEquals(sk.getMinFloatValue(), 1.0F); @@ -139,7 +138,6 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "FLOAT_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Float.NaN); assertEquals(sk.getMinFloatValue(), Float.NaN); @@ -158,7 +156,6 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "FLOAT_UPDATABLE"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0F); assertEquals(sk.getMinFloatValue(), 1.0F); @@ -190,7 +187,6 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 33); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0F); assertEquals(sk.getMinFloatValue(), 1.0f); @@ -212,7 +208,6 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Float.NaN); assertEquals(sk.getMinFloatValue(), Float.NaN); @@ -235,7 +230,6 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0F); assertEquals(sk.getMinFloatValue(), 1.0F); @@ -267,7 +261,6 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 33); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0F); assertEquals(sk.getMinFloatValue(), 1.0F); @@ -289,7 +282,6 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Float.NaN); assertEquals(sk.getMinFloatValue(), Float.NaN); @@ -312,7 +304,6 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0F); assertEquals(sk.getMinFloatValue(), 1.0F); @@ -338,7 +329,7 @@ public void checkMemoryToStringFloatUpdatable() { s = KllPreambleUtil.memoryToString(wmem); println("step 1: sketch to byte[]/memory & analyze memory"); println(s); - sk2 = new KllDirectFloatsSketch(wmem, memReqSvr); + sk2 = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); upBytes2 = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes2); s = KllPreambleUtil.memoryToString(wmem); @@ -353,7 +344,7 @@ public void checkMemoryToStringFloatUpdatable() { s = KllPreambleUtil.memoryToString(wmem); println("step 1: sketch to byte[]/memory & analyze memory"); println(s); - sk2 = new KllDirectFloatsSketch(wmem, memReqSvr); + sk2 = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); upBytes2 = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes2); s = KllPreambleUtil.memoryToString(wmem); @@ -369,7 +360,7 @@ public void checkMemoryToStringFloatUpdatable() { s = KllPreambleUtil.memoryToString(wmem); println("step 1: sketch to byte[]/memory & analyze memory"); println(s); - sk2 = new KllDirectFloatsSketch(wmem, memReqSvr); + sk2 = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); upBytes2 = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes2); s = KllPreambleUtil.memoryToString(wmem); @@ -412,12 +403,21 @@ public void checkSizes() { assertEquals(size2, byteArr2.length); } + @Test + public void checkNewInstance() { + int k = 200; + WritableMemory dstMem = WritableMemory.allocate(3000); + KllDirectFloatsSketch sk = KllDirectFloatsSketch.newInstance(k, dstMem, memReqSvr); + for (int i = 1; i <= 10_000; i++) {sk.update(i); } + println(sk.toString(true, true)); + } + private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { KllFloatsSketch sk = new KllFloatsSketch(k); for (int i = 1; i <= n; i++) { sk.update(i); } byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - KllDirectFloatsSketch dfsk = new KllDirectFloatsSketch(wmem, memReqSvr); + KllDirectFloatsSketch dfsk = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); return dfsk; } @@ -430,7 +430,7 @@ public void printlnTest() { * @param s value to print */ static void println(final String s) { - //System.out.println(s); //disable here + System.out.println(s); //disable here } } diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index af5234c18..1c14720e3 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -57,7 +57,7 @@ public void checkBounds() { public void checkHeapifyExceptions1() { KllDoublesSketch sk = new KllDoublesSketch(); WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); - wmem.putByte(6, (byte)4); //corrupt with different M + wmem.putByte(6, (byte)3); //corrupt with odd M KllDoublesSketch.heapify(wmem); } @@ -176,7 +176,6 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 33); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -196,7 +195,6 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -217,7 +215,6 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMaxFloatValue(), 1.0F); @@ -251,7 +248,6 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 33); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -275,7 +271,6 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -300,7 +295,6 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMaxFloatValue(), 1.0F); @@ -334,7 +328,6 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 33); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -358,7 +351,6 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -383,7 +375,6 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMaxFloatValue(), 1.0F); diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index 6dc4160f3..5e66aea89 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -57,7 +57,7 @@ public void checkBounds() { public void checkHeapifyExceptions1() { KllFloatsSketch sk = new KllFloatsSketch(); WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); - wmem.putByte(6, (byte)4); //corrupt with different M + wmem.putByte(6, (byte)3); //corrupt with odd M KllFloatsSketch.heapify(wmem); } @@ -176,7 +176,6 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 33); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -196,7 +195,6 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -217,7 +215,6 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMaxFloatValue(), 1.0F); @@ -251,7 +248,6 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 33); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -275,7 +271,6 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -300,7 +295,6 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMaxFloatValue(), 1.0F); @@ -334,7 +328,6 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 33); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -358,7 +351,6 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -383,7 +375,6 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getDyMinK(), k); assertTrue(Objects.isNull(sk.getDoubleItemsArray())); assertEquals(sk.getFloatItemsArray().length, 20); - assertEquals(sk.getLayout(), "HEAP"); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMaxFloatValue(), 1.0F); @@ -512,10 +503,11 @@ public void checkMemoryToStringFloatUpdatable() { @Test public void checkSimpleMerge() { int k = 20; + int m = 4; int n1 = 21; int n2 = 43; - KllFloatsSketch sk1 = new KllFloatsSketch(k); - KllFloatsSketch sk2 = new KllFloatsSketch(k); + KllFloatsSketch sk1 = new KllFloatsSketch(k, m); + KllFloatsSketch sk2 = new KllFloatsSketch(k, m); for (int i = 1; i <= n1; i++) { sk1.update(i); } @@ -528,6 +520,11 @@ public void checkSimpleMerge() { println(sk1.toString(true, true)); } + @Test + public void checkOtherM() { + + } + @Test public void printlnTest() { println("PRINTING: " + this.getClass().getName()); From 995ed8238be1f17e28ed8b884d614463f05226d0 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Tue, 29 Mar 2022 21:05:46 -0700 Subject: [PATCH 19/31] Fix issues reported in PR Review, branch "Direct kll double" #360 --- .../kll/KllDirectDoublesSketch.java | 20 ++--- .../kll/KllDirectFloatsSketch.java | 20 ++--- .../datasketches/kll/KllDirectSketch.java | 31 ++++---- .../datasketches/kll/KllDoublesSketch.java | 33 +++++---- .../datasketches/kll/KllFloatsSketch.java | 33 +++++---- .../datasketches/kll/KllHeapSketch.java | 2 +- .../apache/datasketches/kll/KllHelper.java | 12 ++- ...ryValidate.java => KllMemoryValidate.java} | 74 ++++++++++--------- .../datasketches/kll/KllPreambleUtil.java | 2 +- .../apache/datasketches/kll/KllSketch.java | 63 ++++++++++------ .../apache/datasketches/kll/package-info.java | 2 +- .../kll/KllDirectDoublesSketchTest.java | 18 ++--- .../kll/KllDirectFloatsSketchTest.java | 18 ++--- .../datasketches/kll/MemoryValidateTest.java | 26 +++---- .../kll/MiscDirectDoublesTest.java | 41 +++++----- .../kll/MiscDirectFloatsTest.java | 68 +++++++++-------- .../datasketches/kll/MiscDoublesTest.java | 45 +++-------- .../datasketches/kll/MiscFloatsTest.java | 45 +++-------- 18 files changed, 272 insertions(+), 281 deletions(-) rename src/main/java/org/apache/datasketches/kll/{MemoryValidate.java => KllMemoryValidate.java} (86%) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 547aff0bb..be377a005 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -36,6 +36,8 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR32; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR33; import org.apache.datasketches.Family; import org.apache.datasketches.memory.MemoryRequestServer; @@ -58,7 +60,7 @@ public final class KllDirectDoublesSketch extends KllDirectSketch { * @param memVal the MemoryValadate object */ private KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, - final MemoryValidate memVal) { + final KllMemoryValidate memVal) { super(SketchType.DOUBLES_SKETCH, wmem, memReqSvr, memVal); } @@ -70,12 +72,12 @@ private KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestSer * @return instance of this sketch */ public static KllDirectDoublesSketch writableWrap(final WritableMemory srcMem, final MemoryRequestServer memReqSvr) { - final MemoryValidate memVal = new MemoryValidate(srcMem); + final KllMemoryValidate memVal = new KllMemoryValidate(srcMem); return new KllDirectDoublesSketch(srcMem, memReqSvr, memVal); } /** - * Create a new instance of this sketch using default M. + * Create a new instance of this sketch using the default m of 8. * @param k parameter that controls size of the sketch and accuracy of estimates * @param dstMem the given destination WritableMemory object for use by the sketch * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory @@ -94,7 +96,7 @@ public static KllDirectDoublesSketch newInstance(final int k, final WritableMemo * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory * @return a new instance of this sketch */ - public static KllDirectDoublesSketch newInstance(final int k, final int m, final WritableMemory dstMem, + static KllDirectDoublesSketch newInstance(final int k, final int m, final WritableMemory dstMem, final MemoryRequestServer memReqSvr) { insertPreInts(dstMem, PREAMBLE_INTS_DOUBLE); insertSerVer(dstMem, SERIAL_VERSION_UPDATABLE); @@ -111,7 +113,7 @@ public static KllDirectDoublesSketch newInstance(final int k, final int m, final dstMem.putDoubleArray(offset, new double[] {Double.NaN, Double.NaN}, 0, 2); offset += 2 * Double.BYTES; dstMem.putDoubleArray(offset, new double[k], 0, k); - final MemoryValidate memVal = new MemoryValidate(dstMem); + final KllMemoryValidate memVal = new KllMemoryValidate(dstMem); return new KllDirectDoublesSketch(dstMem, memReqSvr, memVal); } @@ -216,7 +218,7 @@ public double getQuantile(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public double getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false))); + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDynamicMinK(), false))); } /** @@ -268,7 +270,7 @@ public double[] getQuantiles(final int numEvenlySpaced) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public double getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDynamicMinK(), false))); } /** @@ -299,8 +301,8 @@ public KllDoublesSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (!other.isDirect()) { kllSketchThrow(32); } - if (!other.isDoublesSketch()) { kllSketchThrow(33); } + if (!other.isDirect()) { kllSketchThrow(ERR32); } + if (!other.isDoublesSketch()) { kllSketchThrow(ERR33); } mergeDoubleImpl(other); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index 97c214a01..92447f62e 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -35,6 +35,8 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR32; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR34; import org.apache.datasketches.Family; import org.apache.datasketches.memory.MemoryRequestServer; @@ -58,7 +60,7 @@ public final class KllDirectFloatsSketch extends KllDirectSketch { * @param memVal the MemoryValadate object */ private KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, - final MemoryValidate memVal) { + final KllMemoryValidate memVal) { super(SketchType.FLOATS_SKETCH, wmem, memReqSvr, memVal); } @@ -70,12 +72,12 @@ private KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServ * @return instance of this sketch */ public static KllDirectFloatsSketch writableWrap(final WritableMemory srcMem, final MemoryRequestServer memReqSvr) { - final MemoryValidate memVal = new MemoryValidate(srcMem); + final KllMemoryValidate memVal = new KllMemoryValidate(srcMem); return new KllDirectFloatsSketch(srcMem, memReqSvr, memVal); } /** - * Create a new instance of this sketch using default M. + * Create a new instance of this sketch using the default m of 8. * @param k parameter that controls size of the sketch and accuracy of estimates * @param dstMem the given destination WritableMemory object for use by the sketch * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory @@ -94,7 +96,7 @@ public static KllDirectFloatsSketch newInstance(final int k, final WritableMemor * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory * @return a new instance of this sketch */ - public static KllDirectFloatsSketch newInstance(final int k, final int m, final WritableMemory dstMem, + static KllDirectFloatsSketch newInstance(final int k, final int m, final WritableMemory dstMem, final MemoryRequestServer memReqSvr) { insertPreInts(dstMem, PREAMBLE_INTS_FLOAT); insertSerVer(dstMem, SERIAL_VERSION_UPDATABLE); @@ -111,7 +113,7 @@ public static KllDirectFloatsSketch newInstance(final int k, final int m, final dstMem.putFloatArray(offset, new float[] {Float.NaN, Float.NaN}, 0, 2); offset += 2 * Float.BYTES; dstMem.putFloatArray(offset, new float[k], 0, k); - final MemoryValidate memVal = new MemoryValidate(dstMem); + final KllMemoryValidate memVal = new KllMemoryValidate(dstMem); return new KllDirectFloatsSketch(dstMem, memReqSvr, memVal); } @@ -216,7 +218,7 @@ public float getQuantile(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public float getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false))); + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDynamicMinK(), false))); } /** @@ -268,7 +270,7 @@ public float[] getQuantiles(final int numEvenlySpaced) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public float getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDynamicMinK(), false))); } /** @@ -299,8 +301,8 @@ public KllFloatsSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (!other.isDirect()) { kllSketchThrow(32); } - if (!other.isFloatsSketch()) { kllSketchThrow(34); } + if (!other.isDirect()) { kllSketchThrow(ERR32); } + if (!other.isFloatsSketch()) { kllSketchThrow(ERR34); } mergeFloatImpl(other); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index ef1009819..8cdde3eea 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -31,6 +31,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR30; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -55,7 +56,7 @@ abstract class KllDirectSketch extends KllSketch { * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory */ KllDirectSketch(final SketchType sketchType, final WritableMemory wmem, final MemoryRequestServer memReqSvr, - final MemoryValidate memVal) { + final KllMemoryValidate memVal) { super(sketchType, wmem, memReqSvr); levelsArrUpdatable = memVal.levelsArrUpdatable; minMaxArrUpdatable = memVal.minMaxArrUpdatable; @@ -101,7 +102,7 @@ public byte[] toUpdatableByteArray() { } @Override - int getDyMinK() { + int getDynamicMinK() { return extractDyMinK(wmem); } @@ -164,14 +165,14 @@ int getNumLevels() { @Override void incN() { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } long n = extractN(wmem); insertN(wmem, ++n); } @Override void incNumLevels() { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } int numLevels = extractNumLevels(wmem); insertNumLevels(wmem, ++numLevels); } @@ -183,7 +184,7 @@ boolean isLevelZeroSorted() { @Override void setDoubleItemsArray(final double[] doubleItems) { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } itemsArrUpdatable.putDoubleArray(0, doubleItems, 0, doubleItems.length); } @@ -194,13 +195,13 @@ void setDoubleItemsArrayAt(final int index, final double value) { @Override void setDyMinK(final int dyMinK) { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } insertDyMinK(wmem, dyMinK); } @Override void setFloatItemsArray(final float[] floatItems) { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } itemsArrUpdatable.putFloatArray(0, floatItems, 0, floatItems.length); } @@ -216,7 +217,7 @@ void setItemsArrayUpdatable(final WritableMemory itemsMem) { @Override void setLevelsArray(final int[] levelsArr) { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } levelsArrUpdatable.putIntArray(0, levelsArr, 0, levelsArr.length); } @@ -246,31 +247,31 @@ void setLevelsArrayUpdatable(final WritableMemory levelsMem) { @Override void setLevelZeroSorted(final boolean sorted) { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } insertLevelZeroSortedFlag(wmem, sorted); } @Override void setMaxDoubleValue(final double value) { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } minMaxArrUpdatable.putDouble(Double.BYTES, value); } @Override void setMaxFloatValue(final float value) { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } minMaxArrUpdatable.putFloat(Float.BYTES, value); } @Override void setMinDoubleValue(final double value) { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } minMaxArrUpdatable.putDouble(0, value); } @Override void setMinFloatValue(final float value) { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } minMaxArrUpdatable.putFloat(0, value); } @@ -281,13 +282,13 @@ void setMinMaxArrayUpdatable(final WritableMemory minMaxMem) { @Override void setN(final long n) { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } insertN(wmem, n); } @Override void setNumLevels(final int numLevels) { - if (!updatable) { kllSketchThrow(30); } + if (!updatable) { kllSketchThrow(ERR30); } insertNumLevels(wmem, numLevels); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 952d258ed..b9e97577f 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -23,6 +23,9 @@ import static java.lang.Math.min; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR33; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR35; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR50; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; @@ -84,7 +87,7 @@ public KllDoublesSketch(final int k, final int m) { * @param mem Memory object that contains data serialized by this sketch. * @param memVal the MemoryCheck object */ - private KllDoublesSketch(final Memory mem, final MemoryValidate memVal) { + private KllDoublesSketch(final Memory mem, final KllMemoryValidate memVal) { super(memVal.k, memVal.m, SketchType.DOUBLES_SKETCH); buildHeapKllSketchFromMemory(memVal); } @@ -99,7 +102,7 @@ private KllDoublesSketch(final Memory mem, final MemoryValidate memVal) { //To simplify the code, the MemoryValidate class does nearly all the validity checking. //The validated Memory is then passed to the actual private heapify constructor. public static KllDoublesSketch heapify(final Memory mem) { - final MemoryValidate memChk = new MemoryValidate(mem); + final KllMemoryValidate memChk = new KllMemoryValidate(mem); if (!memChk.doublesSketch) { throw new SketchesArgumentException("Memory object is not a KllDoublesSketch."); } @@ -203,7 +206,7 @@ public double getQuantile(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public double getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false))); + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDynamicMinK(), false))); } /** @@ -255,7 +258,7 @@ public double[] getQuantiles(final int numEvenlySpaced) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public double getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDynamicMinK(), false))); } /** @@ -286,8 +289,8 @@ public KllDoublesSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (other.isDirect()) { kllSketchThrow(35); } - if (!other.isDoublesSketch()) { kllSketchThrow(33); } + if (other.isDirect()) { kllSketchThrow(ERR35); } + if (!other.isDoublesSketch()) { kllSketchThrow(ERR33); } mergeDoubleImpl(other); } @@ -296,7 +299,7 @@ public void merge(final KllSketch other) { * * @param value an item from a stream of items. NaNs are ignored. */ - public void update(final double value) { //possibly move proxy + public void update(final double value) { updateDouble(value); } @@ -307,22 +310,22 @@ public void update(final double value) { //possibly move proxy double getDoubleItemsArrayAt(final int index) { return doubleItems_[index]; } @Override //Dummy - float[] getFloatItemsArray() { return null; } + float[] getFloatItemsArray() { kllSketchThrow(ERR50); return null; } @Override //Dummy - float getFloatItemsArrayAt(final int index) { return Float.NaN; } + float getFloatItemsArrayAt(final int index) { kllSketchThrow(ERR50); return Float.NaN; } @Override //Used internally double getMaxDoubleValue() { return maxDoubleValue_; } @Override //Dummy - float getMaxFloatValue() { return (float) maxDoubleValue_; } + float getMaxFloatValue() { kllSketchThrow(ERR50); return (float) maxDoubleValue_; } @Override //Used internally double getMinDoubleValue() { return minDoubleValue_; } @Override //Dummy - float getMinFloatValue() { return (float) minDoubleValue_; } + float getMinFloatValue() { kllSketchThrow(ERR50); return (float) minDoubleValue_; } @Override //Used internally void setDoubleItemsArray(final double[] doubleItems) { doubleItems_ = doubleItems; } @@ -331,21 +334,21 @@ public void update(final double value) { //possibly move proxy void setDoubleItemsArrayAt(final int index, final double value) { doubleItems_[index] = value; } @Override //Dummy - void setFloatItemsArray(final float[] floatItems) { } + void setFloatItemsArray(final float[] floatItems) { kllSketchThrow(ERR50); } @Override //Dummy - void setFloatItemsArrayAt(final int index, final float value) { } + void setFloatItemsArrayAt(final int index, final float value) { kllSketchThrow(ERR50); } @Override //Used internally void setMaxDoubleValue(final double value) { maxDoubleValue_ = value; } @Override //Dummy - void setMaxFloatValue(final float value) { } + void setMaxFloatValue(final float value) { kllSketchThrow(ERR50); } @Override //Used internally void setMinDoubleValue(final double value) { minDoubleValue_ = value; } @Override //Dummy - void setMinFloatValue(final float value) { } + void setMinFloatValue(final float value) { kllSketchThrow(ERR50); } } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 6faec6acf..5921f556a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -23,6 +23,9 @@ import static java.lang.Math.min; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR34; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR35; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR50; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; @@ -84,7 +87,7 @@ public KllFloatsSketch(final int k, final int m) { * @param mem Memory object that contains data serialized by this sketch. * @param memVal the MemoryCheck object */ - private KllFloatsSketch(final Memory mem, final MemoryValidate memVal) { + private KllFloatsSketch(final Memory mem, final KllMemoryValidate memVal) { super(memVal.k, memVal.m, SketchType.FLOATS_SKETCH); buildHeapKllSketchFromMemory(memVal); } @@ -99,7 +102,7 @@ private KllFloatsSketch(final Memory mem, final MemoryValidate memVal) { //To simplify the code, the MemoryValidate class does nearly all the validity checking. //The validated Memory is then passed to the actual private heapify constructor. public static KllFloatsSketch heapify(final Memory mem) { - final MemoryValidate memVal = new MemoryValidate(mem); + final KllMemoryValidate memVal = new KllMemoryValidate(mem); if (memVal.doublesSketch) { throw new SketchesArgumentException("Memory object is not a KllFloatsSketch."); } @@ -203,7 +206,7 @@ public float getQuantile(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public float getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false))); + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDynamicMinK(), false))); } /** @@ -255,7 +258,7 @@ public float[] getQuantiles(final int numEvenlySpaced) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public float getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false))); + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDynamicMinK(), false))); } /** @@ -286,8 +289,8 @@ public KllFloatsSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllFloatsSketch other) { - if (other.isDirect()) { kllSketchThrow(35); } - if (!other.isFloatsSketch()) { kllSketchThrow(34); } + if (other.isDirect()) { kllSketchThrow(ERR35); } + if (!other.isFloatsSketch()) { kllSketchThrow(ERR34); } mergeFloatImpl(other); } @@ -301,10 +304,10 @@ public void update(final float value) { } @Override //Dummy - double[] getDoubleItemsArray() { return null; } + double[] getDoubleItemsArray() { kllSketchThrow(ERR50); return null; } @Override //Dummy - double getDoubleItemsArrayAt(final int index) { return Double.NaN; } + double getDoubleItemsArrayAt(final int index) { kllSketchThrow(ERR50); return Double.NaN; } @Override //Used internally float[] getFloatItemsArray() { return floatItems_; } @@ -313,37 +316,37 @@ public void update(final float value) { float getFloatItemsArrayAt(final int index) { return floatItems_[index]; } @Override //Dummy - double getMaxDoubleValue() { return maxFloatValue_; } + double getMaxDoubleValue() { kllSketchThrow(ERR50); return maxFloatValue_; } @Override //Used internally float getMaxFloatValue() { return maxFloatValue_; } @Override //Dummy - double getMinDoubleValue() { return minFloatValue_; } + double getMinDoubleValue() { kllSketchThrow(ERR50); return minFloatValue_; } @Override //Used internally float getMinFloatValue() { return minFloatValue_; } @Override //Dummy - void setDoubleItemsArray(final double[] doubleItems) { } + void setDoubleItemsArray(final double[] doubleItems) { kllSketchThrow(ERR50); } @Override //Dummy - void setDoubleItemsArrayAt(final int index, final double value) { } + void setDoubleItemsArrayAt(final int index, final double value) { kllSketchThrow(ERR50); } @Override //Used internally void setFloatItemsArray(final float[] floatItems) { floatItems_ = floatItems; } - @Override + @Override //Used internally void setFloatItemsArrayAt(final int index, final float value) { floatItems_[index] = value; } @Override //Dummy - void setMaxDoubleValue(final double value) { } + void setMaxDoubleValue(final double value) { kllSketchThrow(ERR50); } @Override //Used internally void setMaxFloatValue(final float value) { maxFloatValue_ = value; } @Override //Dummy - void setMinDoubleValue(final double value) { } + void setMinDoubleValue(final double value) { kllSketchThrow(ERR50); } @Override //Used internally void setMinFloatValue(final float value) { minFloatValue_ = value; } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java index 94c295af0..81c56aa71 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java @@ -71,7 +71,7 @@ public long getN() { } @Override - int getDyMinK() { + int getDynamicMinK() { return dyMinK_; } diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index ba06282e8..09d8fde47 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -24,6 +24,12 @@ import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; +import static org.apache.datasketches.kll.KllSketch.CDF_COEF; +import static org.apache.datasketches.kll.KllSketch.CDF_EXP; +import static org.apache.datasketches.kll.KllSketch.MAX_M; +import static org.apache.datasketches.kll.KllSketch.MIN_M; +import static org.apache.datasketches.kll.KllSketch.PMF_COEF; +import static org.apache.datasketches.kll.KllSketch.PMF_EXP; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import org.apache.datasketches.SketchesArgumentException; @@ -195,7 +201,7 @@ static void checkK(final int k, final int m) { } static void checkM(final int m) { - if (m < 2 || m > 8 || ((m & 1) == 1)) { + if (m < MIN_M || m > MAX_M || ((m & 1) == 1)) { throw new SketchesArgumentException( "M must be >= 2, <= 8 and even: " + m); } @@ -257,8 +263,8 @@ static int findLevelToCompact(final int k, final int m, final int numLevels, fin // thousands of trials static double getNormalizedRankError(final int k, final boolean pmf) { return pmf - ? 2.446 / pow(k, 0.9433) - : 2.296 / pow(k, 0.9723); + ? PMF_COEF / pow(k, PMF_EXP) + : CDF_COEF / pow(k, CDF_EXP); } static int getNumRetainedAboveLevelZero(final int numLevels, final int[] levels) { diff --git a/src/main/java/org/apache/datasketches/kll/MemoryValidate.java b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java similarity index 86% rename from src/main/java/org/apache/datasketches/kll/MemoryValidate.java rename to src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java index f7a9ce881..9015081ba 100644 --- a/src/main/java/org/apache/datasketches/kll/MemoryValidate.java +++ b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java @@ -20,6 +20,14 @@ package org.apache.datasketches.kll; import static org.apache.datasketches.Family.idToFamily; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR0; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR1; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR10; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR2; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR20; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR4; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR5; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR6; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; @@ -57,7 +65,7 @@ * @author lrhodes * */ -final class MemoryValidate { +final class KllMemoryValidate { // first 8 bytes final int preInts; // = extractPreInts(srcMem); final int serVer; @@ -92,13 +100,13 @@ final class MemoryValidate { WritableMemory minMaxArrUpdatable; WritableMemory itemsArrUpdatable; - MemoryValidate(final Memory srcMem) { + KllMemoryValidate(final Memory srcMem) { memCapacity = (int) srcMem.getCapacity(); preInts = extractPreInts(srcMem); serVer = extractSerVer(srcMem); familyID = extractFamilyID(srcMem); - if (familyID != Family.KLL.getID()) { memoryValidateThrow(0, familyID); } + if (familyID != Family.KLL.getID()) { memoryValidateThrow(MERR0, familyID); } famName = idToFamily(familyID).toString(); flags = extractFlags(srcMem); empty = extractEmptyFlag(srcMem); @@ -110,19 +118,19 @@ final class MemoryValidate { m = extractM(srcMem); KllHelper.checkM(m); KllHelper.checkK(k, m); - if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatable) { memoryValidateThrow(10, 0); } + if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatable) { memoryValidateThrow(MERR10, 0); } if (updatable) { updatableMemoryValidate((WritableMemory) srcMem); } else { compactMemoryValidate(srcMem); } } void compactMemoryValidate(final Memory srcMem) { - if (empty && singleItem) { memoryValidateThrow(20, 0); } + if (empty && singleItem) { memoryValidateThrow(MERR20, 0); } final int sw = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); switch (sw) { case 0: { //FLOAT_FULL_COMPACT - if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(6, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(2, serVer); } + if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(MERR6, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(MERR2, serVer); } layout = Layout.FLOAT_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); @@ -148,8 +156,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 1: { //FLOAT_EMPTY_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(1, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(2, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(MERR1, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(MERR2, serVer); } layout = Layout.FLOAT_EMPTY_COMPACT; n = 0; //assumed dyMinK = k; //assumed @@ -167,8 +175,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 4: { //FLOAT_SINGLE_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(1, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(4, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(MERR1, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(MERR4, serVer); } layout = Layout.FLOAT_SINGLE_COMPACT; n = 1; dyMinK = k; @@ -189,8 +197,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 8: { //DOUBLE_FULL_COMPACT - if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(5, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(2, serVer); } + if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(MERR5, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(MERR2, serVer); } layout = Layout.DOUBLE_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); @@ -216,8 +224,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 9: { //DOUBLE_EMPTY_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(1, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(2, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(MERR1, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(MERR2, serVer); } layout = Layout.DOUBLE_EMPTY_COMPACT; n = 0; dyMinK = k; @@ -236,8 +244,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 12: { //DOUBLE_SINGLE_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(1, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(4, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(MERR1, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(MERR4, serVer); } layout = Layout.DOUBLE_SINGLE_COMPACT; n = 1; dyMinK = k; @@ -264,7 +272,7 @@ void compactMemoryValidate(final Memory srcMem) { void updatableMemoryValidate(final WritableMemory wSrcMem) { if (doublesSketch) { //DOUBLE_UPDATABLE - if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(5, preInts); } + if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(MERR5, preInts); } layout = Layout.DOUBLE_UPDATABLE; n = extractN(wSrcMem); empty = n == 0; //empty & singleItem are set for convenience @@ -287,7 +295,7 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { sketchBytes = offset + itemsArrBytes; } else { //FLOAT_UPDATABLE - if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(6, preInts); } + if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(MERR6, preInts); } layout = Layout.FLOAT_UPDATABLE; n = extractN(wSrcMem); empty = n == 0; //empty & singleItem are set for convenience @@ -310,25 +318,19 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { } } - private static void memoryValidateThrow(final int errNo, final int value) { + enum MERRNO { MERR0, MERR1, MERR2, MERR4, MERR5, MERR6, MERR10, MERR20 } + + private static void memoryValidateThrow(final MERRNO errNo, final int value) { String msg = ""; switch (errNo) { - case 0: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; - case 1: msg = "Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; - case 2: msg = "Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: " + value; break; - //case 3: msg = "Single Item Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; - case 4: msg = "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; - case 5: msg = "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; - case 6: msg = "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; - //case 7: msg = "The M field must be set to " + DEFAULT_M + ", NOT: " + value; break; - //case 8: msg = "The dynamic MinK must be equal to K, NOT: " + value; break; - //case 9: msg = "numLevels must be one, NOT: " + value; break; - case 10: msg = "((SerVer == 3) ^ (Updatable Bit)) must = 0."; break; - case 20: msg = "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; - //case 21: msg = "N != 0 and empty bit is set. N: " + value; break; - //case 22: msg = "N != 1 and single item bit is set. N: " + value; break; - //case 23: msg = "Family name is not KLL"; break; - //case 24: msg = "Given Memory has insufficient capacity. Need " + value + " bytes."; break; + case MERR0: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; + case MERR1: msg = "Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; + case MERR2: msg = "Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: " + value; break; + case MERR4: msg = "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; + case MERR5: msg = "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; + case MERR6: msg = "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; + case MERR10: msg = "((SerVer == 3) ^ (Updatable Bit)) must = 0."; break; + case MERR20: msg = "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; default: msg = "Unknown error: errNo: " + errNo; break; } throw new SketchesArgumentException(msg); diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index 61d1f37fd..3fb4a4242 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -199,7 +199,7 @@ static String toString(final Memory mem) { } static String memoryToString(final Memory mem) { - final MemoryValidate memChk = new MemoryValidate(mem); + final KllMemoryValidate memChk = new KllMemoryValidate(mem); final int flags = memChk.flags & 0XFF; final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", " + zeroPad(Integer.toBinaryString(flags), 8); diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 97c98fd3d..d50cb2f12 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -27,10 +27,10 @@ import static java.lang.Math.min; import static java.lang.Math.round; import static org.apache.datasketches.Util.isOdd; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; @@ -96,9 +96,16 @@ * @author Lee Rhodes, Kevin Lang */ public abstract class KllSketch { + static final int MIN_M = 2; + static final int MAX_M = 8; + static final double EPS_DELTA_THRESHOLD = 1E-6; + static final double MIN_EPS = 4.7634E-5; + static final double PMF_COEF = 2.446; + static final double PMF_EXP = 0.9433; + static final double CDF_COEF = 2.296; + static final double CDF_EXP = 0.9723; static final Random random = new Random(); static final boolean compatible = true; //rank 0.0 and 1.0. compatible with classic Quantiles Sketch - //final int M = DEFAULT_M; // configured minimum buffer "width", default is 8. SketchType sketchType; WritableMemory wmem; MemoryRequestServer memReqSvr; @@ -142,14 +149,14 @@ public enum SketchType { FLOATS_SKETCH, DOUBLES_SKETCH } // thousands of trials public static int getKFromEpsilon(final double epsilon, final boolean pmf) { //Ensure that eps is >= than the lowest possible eps given MAX_K and pmf=false. - final double eps = max(epsilon, 4.7634E-5); + final double eps = max(epsilon, MIN_EPS); final double kdbl = pmf - ? exp(log(2.446 / eps) / 0.9433) - : exp(log(2.296 / eps) / 0.9723); + ? exp(log(PMF_COEF / eps) / PMF_EXP) + : exp(log(CDF_COEF / eps) / CDF_EXP); final double krnd = round(kdbl); final double del = abs(krnd - kdbl); - final int k = (int) (del < 1E-6 ? krnd : ceil(kdbl)); - return max(2, min(MAX_K, k)); + final int k = (int) (del < EPS_DELTA_THRESHOLD ? krnd : ceil(kdbl)); + return max(MIN_M, min(MAX_K, k)); } /** @@ -230,15 +237,18 @@ final static boolean isCompatible() { return compatible; } - final static void kllSketchThrow(final int errNo) { + enum ERRNO { ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR50 } + + final static void kllSketchThrow(final ERRNO errNo) { String msg = ""; switch (errNo) { - case 30: msg = "Given sketch Memory is immutable, cannot write."; break; - case 31: msg = "Given sketch Memory is immutable and incompatible."; break; - case 32: msg = "Given sketch must be of type Direct."; break; - case 33: msg = "Given sketch must be of type Double."; break; - case 34: msg = "Given sketch must be of type Float."; break; - case 35: msg = "Given sketch must not be of type Direct."; break; + case ERR30: msg = "Given sketch Memory is immutable, cannot write."; break; + case ERR31: msg = "Given sketch Memory is immutable and incompatible."; break; + case ERR32: msg = "Given sketch must be of type Direct."; break; + case ERR33: msg = "Given sketch must be of type Double."; break; + case ERR34: msg = "Given sketch must be of type Float."; break; + case ERR35: msg = "Given sketch must not be of type Direct."; break; + case ERR50: msg = "This is an artifact of inheritance and should never be called."; break; default: msg = "Unknown error: errNo: " + errNo; break; } throw new SketchesArgumentException(msg); @@ -294,7 +304,7 @@ public final int getCurrentUpdatableSerializedSizeBytes() { * {@link org.apache.datasketches.kll}

      */ public final double getNormalizedRankError(final boolean pmf) { - return getNormalizedRankError(getDyMinK(), pmf); + return getNormalizedRankError(getDynamicMinK(), pmf); } /** @@ -367,7 +377,7 @@ public byte[] toUpdatableByteArray() { //package-private non-static methods - final void buildHeapKllSketchFromMemory(final MemoryValidate memVal) { + final void buildHeapKllSketchFromMemory(final KllMemoryValidate memVal) { final boolean doubleType = (sketchType == DOUBLES_SKETCH); final boolean updatable = memVal.updatable; setLevelZeroSorted(memVal.level0Sorted); @@ -511,7 +521,12 @@ final double[] getDoublesQuantiles(final double[] fractions) { return quantiles; } - abstract int getDyMinK(); + /** + * Dynamic MinK is the value of K that results from a merge with a sketch configured with a value of K lower than + * the k of this sketch. This value is then used in computing the estimated upper and lower bounds of error. + * @return The dynamic minimum K as a result of merging with lower values of k. + */ + abstract int getDynamicMinK(); /** * @return full size of internal items array including garbage; for a doubles sketch this will be null. @@ -711,7 +726,7 @@ final void mergeDoubleImpl(final KllSketch other) { // after the level 0 update, we capture the key mutable variables final double myMin = getMinDoubleValue(); final double myMax = getMaxDoubleValue(); - final int myDyMinK = getDyMinK(); + final int myDyMinK = getDynamicMinK(); final int myCurNumLevels = getNumLevels(); final int[] myCurLevelsArr = getLevelsArray(); @@ -779,7 +794,7 @@ final void mergeDoubleImpl(final KllSketch other) { //Update Preamble: setN(finalN); if (other.isEstimationMode()) { //otherwise the merge brings over exact items. - setDyMinK(min(myDyMinK, other.getDyMinK())); + setDyMinK(min(myDyMinK, other.getDynamicMinK())); } //Update min, max values @@ -821,7 +836,7 @@ final void mergeFloatImpl(final KllSketch other) { // after the level 0 update, we capture the key mutable variables final float myMin = getMinFloatValue(); final float myMax = getMaxFloatValue(); - final int myDyMinK = getDyMinK(); + final int myDyMinK = getDynamicMinK(); final int myCurNumLevels = getNumLevels(); final int[] myCurLevelsArr = getLevelsArray(); @@ -889,7 +904,7 @@ final void mergeFloatImpl(final KllSketch other) { //Update Preamble: setN(finalN); if (other.isEstimationMode()) { //otherwise the merge brings over exact items. - setDyMinK(min(myDyMinK, other.getDyMinK())); + setDyMinK(min(myDyMinK, other.getDynamicMinK())); } //Update min, max values @@ -976,7 +991,7 @@ final byte[] toCompactByteArrayImpl() { } else { // n > 1 //remainder of preamble after first 8 bytes insertN(wmem, getN()); - insertDyMinK(wmem, getDyMinK()); + insertDyMinK(wmem, getDynamicMinK()); insertNumLevels(wmem, getNumLevels()); offset = (doubleType) ? DATA_START_ADR_DOUBLE : DATA_START_ADR_FLOAT; @@ -1041,7 +1056,7 @@ final String toStringImpl(final boolean withLevels, final boolean withData) { final String skType = (direct ? "Direct" : "") + (doubleType ? "Doubles" : "Floats"); sb.append(Util.LS).append("### Kll").append(skType).append("Sketch Summary:").append(Util.LS); sb.append(" K : ").append(k).append(Util.LS); - sb.append(" Dynamic min K : ").append(getDyMinK()).append(Util.LS); + sb.append(" Dynamic min K : ").append(getDynamicMinK()).append(Util.LS); sb.append(" M : ").append(m).append(Util.LS); sb.append(" N : ").append(getN()).append(Util.LS); sb.append(" Epsilon : ").append(epsPct).append(Util.LS); @@ -1162,7 +1177,7 @@ final byte[] toUpdatableByteArrayImpl() { loadFirst8Bytes(this, wmem, true); //remainder of preamble after first 8 bytes insertN(wmem, getN()); - insertDyMinK(wmem, getDyMinK()); + insertDyMinK(wmem, getDynamicMinK()); insertNumLevels(wmem, getNumLevels()); //load data diff --git a/src/main/java/org/apache/datasketches/kll/package-info.java b/src/main/java/org/apache/datasketches/kll/package-info.java index 6dcc6c20d..3071c9766 100644 --- a/src/main/java/org/apache/datasketches/kll/package-info.java +++ b/src/main/java/org/apache/datasketches/kll/package-info.java @@ -35,7 +35,7 @@ * *

      The normalized rank (rank) of any specific value is defined as its * absolute rank divided by N. - * Thus, the normalized rank is a value in the interval [0.0, 1.0), exclusive. + * Thus, the normalized rank is a value in the interval [0.0, 1.0). * In the documentation and Javadocs for this sketch absolute rank is never used so any * reference to just rank should be interpreted to mean normalized rank. * diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java index c1b45b0ba..77c1e7814 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -314,7 +314,7 @@ public void maxK() { } @Test - public void serializeDeserializeEmpty() { //compact serialize then heapify using KllDoublesSketch + public void serializeDeserializeEmptyViaCompactHeapify() { final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); final byte[] bytes = sketch1.toByteArray(); final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); @@ -329,7 +329,7 @@ public void serializeDeserializeEmpty() { //compact serialize then heapify using } @Test - public void serializeDeserializeEmpty2() { //updatable serialize then new (loaded) KllDirectDoublesSketch + public void serializeDeserializeEmptyViaUpdatableWritableWrap() { final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); final byte[] bytes = sketch1.toUpdatableByteArray(); final KllDirectDoublesSketch sketch2 = @@ -345,7 +345,7 @@ public void serializeDeserializeEmpty2() { //updatable serialize then new (loade } @Test - public void serializeDeserializeOneItem() { //compact serialize then heapify using KllDoublesSketch + public void serializeDeserializeOneItemViaCompactHeapify() { final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); sketch1.update(1); final byte[] bytes = sketch1.toByteArray(); @@ -361,7 +361,7 @@ public void serializeDeserializeOneItem() { //compact serialize then heapify usi } @Test - public void serializeDeserializeOneItem2() { //updatable serialize then new (loaded) KllDirectDoublesSketch + public void serializeDeserializeOneItemViaUpdatableWritableWrap() { final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); sketch1.update(1); final byte[] bytes = sketch1.toUpdatableByteArray(); @@ -378,7 +378,7 @@ public void serializeDeserializeOneItem2() { //updatable serialize then new (loa } @Test - public void serializeDeserialize() { //compact serialize then heapify using KllDoublesSketch + public void serializeDeserializeFullViaCompactHeapify() { final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); final int n = 1000; for (int i = 0; i < n; i++) { @@ -397,7 +397,7 @@ public void serializeDeserialize() { //compact serialize then heapify using KllD } @Test - public void serializeDeserialize2() { //updatable serialize then new (loaded) KllDirectDoublesSketch + public void serializeDeserializeFullViaUpdatableWritableWrap() { final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); final int n = 1000; for (int i = 0; i < n; i++) { @@ -493,7 +493,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); @@ -513,7 +513,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); @@ -534,7 +534,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java index c5f40b546..d23979d67 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java @@ -313,7 +313,7 @@ public void maxK() { } @Test - public void serializeDeserializeEmpty() { //compact serialize then heapify using KllDoublesSketch + public void serializeDeserializeEmptyViaCompactHeapify() { final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); final byte[] bytes = sketch1.toByteArray(); final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); @@ -328,7 +328,7 @@ public void serializeDeserializeEmpty() { //compact serialize then heapify using } @Test - public void serializeDeserializeEmpty2() { //updatable serialize then new (loaded) KllDirectDoublesSketch + public void serializeDeserializeEmptyViaUpdatableWritableWrap() { final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); final byte[] bytes = sketch1.toUpdatableByteArray(); final KllDirectFloatsSketch sketch2 = @@ -344,7 +344,7 @@ public void serializeDeserializeEmpty2() { //updatable serialize then new (loade } @Test - public void serializeDeserializeOneItem() { //compact serialize then heapify using KllDoublesSketch + public void serializeDeserializeOneItemViaCompactHeapify() { final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); sketch1.update(1); final byte[] bytes = sketch1.toByteArray(); @@ -360,7 +360,7 @@ public void serializeDeserializeOneItem() { //compact serialize then heapify usi } @Test - public void serializeDeserializeOneItem2() { //updatable serialize then new (loaded) KllDirectDoublesSketch + public void serializeDeserializeOneItemViaUpdatableWritableWrap() { final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); sketch1.update(1); final byte[] bytes = sketch1.toUpdatableByteArray(); @@ -377,7 +377,7 @@ public void serializeDeserializeOneItem2() { //updatable serialize then new (loa } @Test - public void serializeDeserialize() { //compact serialize then heapify using KllDoublesSketch + public void serializeDeserializeFullViaCompactHeapify() { final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); final int n = 1000; for (int i = 0; i < n; i++) { @@ -396,7 +396,7 @@ public void serializeDeserialize() { //compact serialize then heapify using KllD } @Test - public void serializeDeserialize2() { //updatable serialize then new (loaded) KllDirectDoublesSketch + public void serializeDeserializeFullViaUpdatableWritableWrap() { final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); final int n = 1000; for (int i = 0; i < n; i++) { @@ -492,7 +492,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0); @@ -512,7 +512,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Double.NaN); @@ -533,7 +533,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0); diff --git a/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java b/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java index 057a8bd8f..9ce967ea4 100644 --- a/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java +++ b/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java @@ -35,7 +35,7 @@ public void checkInvalidFamily() { byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); insertFamilyID(wmem, Family.KLL.getID() - 1); - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -44,7 +44,7 @@ public void checkInvalidSerVer() { byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL - 1); - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -53,7 +53,7 @@ public void checkInvalidEmptyAndSingle() { byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); insertFlags(wmem, EMPTY_BIT_MASK | SINGLE_ITEM_BIT_MASK); - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -63,7 +63,7 @@ public void checkInvalidUpdatableAndSerVer() { WritableMemory wmem = WritableMemory.writableWrap(byteArr); insertFlags(wmem, UPDATABLE_BIT_MASK); insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL); - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -73,7 +73,7 @@ public void checkInvalidPreIntsAndSingle() { WritableMemory wmem = WritableMemory.writableWrap(byteArr); insertFlags(wmem, UPDATABLE_BIT_MASK); insertSerVer(wmem, SERIAL_VERSION_SINGLE); - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -83,7 +83,7 @@ public void checkInvalidSerVerAndSingle2() { WritableMemory wmem = WritableMemory.writableWrap(byteArr); insertFlags(wmem, SINGLE_ITEM_BIT_MASK); insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL); - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -93,7 +93,7 @@ public void checkInvalidPreIntsAndSingle2() { WritableMemory wmem = WritableMemory.writableWrap(byteArr); insertFlags(wmem, SINGLE_ITEM_BIT_MASK); insertPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -104,7 +104,7 @@ public void checkInvalidPreIntsAndDouble() { insertFlags(wmem, DOUBLES_SKETCH_BIT_MASK); insertPreInts(wmem, PREAMBLE_INTS_DOUBLE); insertSerVer(wmem, SERIAL_VERSION_SINGLE); - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -115,7 +115,7 @@ public void checkInvalidDoubleCompactAndSingle() { insertFlags(wmem, SINGLE_ITEM_BIT_MASK | DOUBLES_SKETCH_BIT_MASK); insertPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL); - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -126,7 +126,7 @@ public void checkInvalidDoubleUpdatableAndSerVer() { insertSerVer(wmem, SERIAL_VERSION_UPDATABLE); insertFlags(wmem, DOUBLES_SKETCH_BIT_MASK | UPDATABLE_BIT_MASK); insertPreInts(wmem, PREAMBLE_INTS_DOUBLE - 1); - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -137,7 +137,7 @@ public void checkInvalidFloatFullAndPreInts() { insertFlags(wmem, 0); //float full insertSerVer(wmem, SERIAL_VERSION_SINGLE); //should be 1 insertPreInts(wmem, PREAMBLE_INTS_FLOAT); - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -148,7 +148,7 @@ public void checkInvalidFloatUpdatableFullAndPreInts() { insertFlags(wmem, UPDATABLE_BIT_MASK); //float updatable full insertSerVer(wmem, SERIAL_VERSION_UPDATABLE); insertPreInts(wmem, 0);//should be 5 - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -159,7 +159,7 @@ public void checkInvalidDoubleCompactSingleAndPreInts() { insertFlags(wmem, DOUBLES_SKETCH_BIT_MASK | SINGLE_ITEM_BIT_MASK); insertPreInts(wmem, 5);//should be 2 insertSerVer(wmem, SERIAL_VERSION_SINGLE); //should be 2 - MemoryValidate memVal = new MemoryValidate(wmem); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); } } diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java index d312056ca..c407b4e37 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java @@ -118,7 +118,7 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getDynamicMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); @@ -135,7 +135,7 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getDynamicMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); @@ -153,7 +153,7 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); + assertEquals(sk.getDynamicMinK(), k); assertTrue(Objects.isNull(sk.getFloatItemsArray())); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); @@ -184,8 +184,7 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); @@ -205,8 +204,7 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); @@ -227,8 +225,7 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); @@ -258,8 +255,7 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); @@ -279,8 +275,7 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); @@ -301,8 +296,7 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); @@ -409,7 +403,20 @@ public void checkNewInstance() { WritableMemory dstMem = WritableMemory.allocate(6000); KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(k, dstMem, memReqSvr); for (int i = 1; i <= 10_000; i++) {sk.update(i); } - println(sk.toString(true, true)); + assertEquals(sk.getMinValue(), 1.0); + assertEquals(sk.getMaxValue(), 10000.0); + //println(sk.toString(true, true)); + } + + @Test + public void checkDifferentM() { + int k = 20; + int m = 4; + WritableMemory dstMem = WritableMemory.allocate(1000); + KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(k, m, dstMem, memReqSvr); + for (int i = 1; i <= 200; i++) {sk.update(i); } + assertEquals(sk.getMinValue(), 1.0); + assertEquals(sk.getMaxValue(), 200.0); } private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { @@ -430,7 +437,7 @@ public void printlnTest() { * @param s value to print */ static void println(final String s) { - System.out.println(s); //disable here + //System.out.println(s); //disable here } } diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java index 4dfa54448..3bd1ce1d4 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java @@ -109,7 +109,7 @@ public void checkSketchInitializeFloatHeap() { int k = 20; //don't change this KllDirectFloatsSketch sk; - //println("#### CASE: DOUBLE FULL HEAP"); + //println("#### CASE: FLOAT FULL HEAP"); sk = getDFSketch(k, 0); for (int i = 1; i <= k + 1; i++) { sk.update(i); } //println(sk.toString(true, true)); @@ -118,8 +118,7 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -127,7 +126,7 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); - //println("#### CASE: DOUBLE HEAP EMPTY"); + //println("#### CASE: FLOAT HEAP EMPTY"); sk = getDFSketch(k, 0); //println(sk.toString(true, true)); assertEquals(sk.getK(), k); @@ -135,8 +134,7 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -144,7 +142,7 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); - //println("#### CASE: DOUBLE HEAP SINGLE"); + //println("#### CASE: FLOAT HEAP SINGLE"); sk = getDFSketch(k, 0); sk.update(1); //println(sk.toString(true, true)); @@ -153,8 +151,7 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0F); @@ -171,7 +168,7 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { byte[] compBytes; WritableMemory wmem; - //println("#### CASE: DOUBLE FULL HEAPIFIED FROM COMPACT"); + //println("#### CASE: FLOAT FULL HEAPIFIED FROM COMPACT"); sk2 = getDFSketch(k, 0); for (int i = 1; i <= k + 1; i++) { sk2.update(i); } //println(sk.toString(true, true)); @@ -184,8 +181,7 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -193,7 +189,7 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); - //println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM COMPACT"); + //println("#### CASE: FLOAT EMPTY HEAPIFIED FROM COMPACT"); sk2 = getDFSketch(k, 0); //println(sk.toString(true, true)); compBytes = sk2.toByteArray(); @@ -205,8 +201,7 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -214,7 +209,7 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); - //println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM COMPACT"); + //println("#### CASE: FLOAT SINGLE HEAPIFIED FROM COMPACT"); sk2 = getDFSketch(k, 0); sk2.update(1); //println(sk2.toString(true, true)); @@ -227,8 +222,7 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0F); @@ -245,7 +239,7 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { byte[] compBytes; WritableMemory wmem; - //println("#### CASE: DOUBLE FULL HEAPIFIED FROM UPDATABLE"); + //println("#### CASE: FLOAT FULL HEAPIFIED FROM UPDATABLE"); sk2 = getDFSketch(k, 0); for (int i = 1; i <= k + 1; i++) { sk2.update(i); } //println(sk2.toString(true, true)); @@ -258,8 +252,7 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -267,7 +260,7 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); - // println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM UPDATABLE"); + // println("#### CASE: FLOAT EMPTY HEAPIFIED FROM UPDATABLE"); sk2 = getDFSketch(k, 0); //println(sk.toString(true, true)); compBytes = sk2.toUpdatableByteArray(); @@ -279,8 +272,7 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -288,7 +280,7 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); - //println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM UPDATABLE"); + //println("#### CASE: FLOAT SINGLE HEAPIFIED FROM UPDATABLE"); sk2 = getDFSketch(k, 0); sk2.update(1); //println(sk.toString(true, true)); @@ -301,8 +293,7 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0F); @@ -321,7 +312,7 @@ public void checkMemoryToStringFloatUpdatable() { WritableMemory wmem; String s; - println("#### CASE: DOUBLE FULL UPDATABLE"); + println("#### CASE: FLOAT FULL UPDATABLE"); sk = getDFSketch(k, 0); for (int i = 1; i <= k + 1; i++) { sk.update(i); } upBytes = sk.toUpdatableByteArray(); @@ -337,7 +328,7 @@ public void checkMemoryToStringFloatUpdatable() { println(s); assertEquals(upBytes, upBytes2); - println("#### CASE: DOUBLE EMPTY UPDATABLE"); + println("#### CASE: FLOAT EMPTY UPDATABLE"); sk = getDFSketch(k, 0); upBytes = sk.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(upBytes); @@ -352,7 +343,7 @@ public void checkMemoryToStringFloatUpdatable() { println(s); assertEquals(upBytes, upBytes2); - println("#### CASE: DOUBLE SINGLE UPDATABL"); + println("#### CASE: FLOAT SINGLE UPDATABL"); sk = getDFSketch(k, 0); sk.update(1); upBytes = sk.toUpdatableByteArray(); @@ -409,7 +400,20 @@ public void checkNewInstance() { WritableMemory dstMem = WritableMemory.allocate(3000); KllDirectFloatsSketch sk = KllDirectFloatsSketch.newInstance(k, dstMem, memReqSvr); for (int i = 1; i <= 10_000; i++) {sk.update(i); } - println(sk.toString(true, true)); + assertEquals(sk.getMinValue(), 1.0F); + assertEquals(sk.getMaxValue(), 10000.0F); + //println(sk.toString(true, true)); + } + + @Test + public void checkDifferentM() { + int k = 20; + int m = 4; + WritableMemory dstMem = WritableMemory.allocate(1000); + KllDirectFloatsSketch sk = KllDirectFloatsSketch.newInstance(k, m, dstMem, memReqSvr); + for (int i = 1; i <= 200; i++) {sk.update(i); } + assertEquals(sk.getMinValue(), 1.0); + assertEquals(sk.getMaxValue(), 200.0); } private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { @@ -430,7 +434,7 @@ public void printlnTest() { * @param s value to print */ static void println(final String s) { - System.out.println(s); //disable here + //System.out.println(s); //disable here } } diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index 1c14720e3..ba28c6034 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -173,14 +173,11 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); - assertEquals(sk.getMaxFloatValue(), 21.0F); assertEquals(sk.getMinDoubleValue(), 1.0); - assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); @@ -192,14 +189,11 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); - assertEquals(sk.getMaxFloatValue(), Float.NaN); assertEquals(sk.getMinDoubleValue(), Double.NaN); - assertEquals(sk.getMinFloatValue(), Float.NaN); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); @@ -212,14 +206,11 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); - assertEquals(sk.getMaxFloatValue(), 1.0F); assertEquals(sk.getMinDoubleValue(), 1.0); - assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); } @@ -245,14 +236,11 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); - assertEquals(sk.getMaxFloatValue(), 21.0F); assertEquals(sk.getMinDoubleValue(), 1.0); - assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); @@ -268,14 +256,11 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); - assertEquals(sk.getMaxFloatValue(), Float.NaN); assertEquals(sk.getMinDoubleValue(), Double.NaN); - assertEquals(sk.getMinFloatValue(), Float.NaN); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); @@ -292,14 +277,11 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); - assertEquals(sk.getMaxFloatValue(), 1.0F); assertEquals(sk.getMinDoubleValue(), 1.0); - assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); } @@ -325,14 +307,11 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); - assertEquals(sk.getMaxFloatValue(), 21.0F); assertEquals(sk.getMinDoubleValue(), 1.0); - assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); @@ -348,14 +327,11 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); - assertEquals(sk.getMaxFloatValue(), Float.NaN); assertEquals(sk.getMinDoubleValue(), Double.NaN); - assertEquals(sk.getMinFloatValue(), Float.NaN); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); @@ -372,14 +348,11 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); - assertEquals(sk.getMaxFloatValue(), 1.0F); assertEquals(sk.getMinDoubleValue(), 1.0); - assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); } diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index 5e66aea89..c42e58557 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -173,13 +173,10 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); - assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMaxFloatValue(), 21.0F); - assertEquals(sk.getMinDoubleValue(), 1.0); assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); @@ -192,13 +189,10 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); - assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMaxFloatValue(), Float.NaN); - assertEquals(sk.getMinDoubleValue(), Double.NaN); assertEquals(sk.getMinFloatValue(), Float.NaN); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); @@ -212,13 +206,10 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); - assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMaxFloatValue(), 1.0F); - assertEquals(sk.getMinDoubleValue(), 1.0); assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); @@ -245,13 +236,10 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); - assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMaxFloatValue(), 21.0F); - assertEquals(sk.getMinDoubleValue(), 1.0); assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); @@ -268,13 +256,10 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); - assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMaxFloatValue(), Float.NaN); - assertEquals(sk.getMinDoubleValue(), Double.NaN); assertEquals(sk.getMinFloatValue(), Float.NaN); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); @@ -292,13 +277,10 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); - assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMaxFloatValue(), 1.0F); - assertEquals(sk.getMinDoubleValue(), 1.0); assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); @@ -325,13 +307,10 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); - assertEquals(sk.getMaxDoubleValue(), 21.0); assertEquals(sk.getMaxFloatValue(), 21.0F); - assertEquals(sk.getMinDoubleValue(), 1.0); assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); @@ -348,13 +327,10 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); - assertEquals(sk.getMaxDoubleValue(), Double.NaN); assertEquals(sk.getMaxFloatValue(), Float.NaN); - assertEquals(sk.getMinDoubleValue(), Double.NaN); assertEquals(sk.getMinFloatValue(), Float.NaN); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); @@ -372,13 +348,10 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDyMinK(), k); - assertTrue(Objects.isNull(sk.getDoubleItemsArray())); + assertEquals(sk.getDynamicMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); - assertEquals(sk.getMaxDoubleValue(), 1.0); assertEquals(sk.getMaxFloatValue(), 1.0F); - assertEquals(sk.getMinDoubleValue(), 1.0); assertEquals(sk.getMinFloatValue(), 1.0F); assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); From 71af0d94f36e370e4a0eff8ced3ae89121070743 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 30 Mar 2022 11:10:59 -0700 Subject: [PATCH 20/31] Fixed issues from latest comments. --- .../kll/KllDirectDoublesSketch.java | 8 +- .../kll/KllDirectFloatsSketch.java | 8 +- .../datasketches/kll/KllDirectSketch.java | 28 +++---- .../datasketches/kll/KllDoublesSketch.java | 26 +++--- .../datasketches/kll/KllFloatsSketch.java | 26 +++--- .../datasketches/kll/KllMemoryValidate.java | 83 ++++++++++--------- .../apache/datasketches/kll/KllSketch.java | 22 ++--- 7 files changed, 105 insertions(+), 96 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index be377a005..50cbae3ff 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -36,8 +36,8 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR32; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR33; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_IS_NOT_DIRECT; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_IS_NOT_DOUBLE; import org.apache.datasketches.Family; import org.apache.datasketches.memory.MemoryRequestServer; @@ -301,8 +301,8 @@ public KllDoublesSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (!other.isDirect()) { kllSketchThrow(ERR32); } - if (!other.isDoublesSketch()) { kllSketchThrow(ERR33); } + if (!other.isDirect()) { kllSketchThrow(ERR_SRC_IS_NOT_DIRECT); } + if (!other.isDoublesSketch()) { kllSketchThrow(ERR_SRC_IS_NOT_DOUBLE); } mergeDoubleImpl(other); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index 92447f62e..c802a2495 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -35,8 +35,8 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR32; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR34; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_IS_NOT_DIRECT; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_IS_NOT_FLOAT; import org.apache.datasketches.Family; import org.apache.datasketches.memory.MemoryRequestServer; @@ -301,8 +301,8 @@ public KllFloatsSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (!other.isDirect()) { kllSketchThrow(ERR32); } - if (!other.isFloatsSketch()) { kllSketchThrow(ERR34); } + if (!other.isDirect()) { kllSketchThrow(ERR_SRC_IS_NOT_DIRECT); } + if (!other.isFloatsSketch()) { kllSketchThrow(ERR_SRC_IS_NOT_FLOAT); } mergeFloatImpl(other); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index 8cdde3eea..6a559aee3 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -31,7 +31,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR30; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_TGT_IS_IMMUTABLE; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -165,14 +165,14 @@ int getNumLevels() { @Override void incN() { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } long n = extractN(wmem); insertN(wmem, ++n); } @Override void incNumLevels() { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } int numLevels = extractNumLevels(wmem); insertNumLevels(wmem, ++numLevels); } @@ -184,7 +184,7 @@ boolean isLevelZeroSorted() { @Override void setDoubleItemsArray(final double[] doubleItems) { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } itemsArrUpdatable.putDoubleArray(0, doubleItems, 0, doubleItems.length); } @@ -195,13 +195,13 @@ void setDoubleItemsArrayAt(final int index, final double value) { @Override void setDyMinK(final int dyMinK) { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } insertDyMinK(wmem, dyMinK); } @Override void setFloatItemsArray(final float[] floatItems) { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } itemsArrUpdatable.putFloatArray(0, floatItems, 0, floatItems.length); } @@ -217,7 +217,7 @@ void setItemsArrayUpdatable(final WritableMemory itemsMem) { @Override void setLevelsArray(final int[] levelsArr) { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } levelsArrUpdatable.putIntArray(0, levelsArr, 0, levelsArr.length); } @@ -247,31 +247,31 @@ void setLevelsArrayUpdatable(final WritableMemory levelsMem) { @Override void setLevelZeroSorted(final boolean sorted) { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } insertLevelZeroSortedFlag(wmem, sorted); } @Override void setMaxDoubleValue(final double value) { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } minMaxArrUpdatable.putDouble(Double.BYTES, value); } @Override void setMaxFloatValue(final float value) { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } minMaxArrUpdatable.putFloat(Float.BYTES, value); } @Override void setMinDoubleValue(final double value) { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } minMaxArrUpdatable.putDouble(0, value); } @Override void setMinFloatValue(final float value) { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } minMaxArrUpdatable.putFloat(0, value); } @@ -282,13 +282,13 @@ void setMinMaxArrayUpdatable(final WritableMemory minMaxMem) { @Override void setN(final long n) { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } insertN(wmem, n); } @Override void setNumLevels(final int numLevels) { - if (!updatable) { kllSketchThrow(ERR30); } + if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } insertNumLevels(wmem, numLevels); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index b9e97577f..eb05d4688 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -23,9 +23,9 @@ import static java.lang.Math.min; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR33; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR35; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR50; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_IS_NOT_DOUBLE; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_CANNOT_BE_DIRECT; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_MUST_NOT_CALL; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; @@ -289,8 +289,8 @@ public KllDoublesSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (other.isDirect()) { kllSketchThrow(ERR35); } - if (!other.isDoublesSketch()) { kllSketchThrow(ERR33); } + if (other.isDirect()) { kllSketchThrow(ERR_SRC_CANNOT_BE_DIRECT); } + if (!other.isDoublesSketch()) { kllSketchThrow(ERR_SRC_IS_NOT_DOUBLE); } mergeDoubleImpl(other); } @@ -310,22 +310,22 @@ public void update(final double value) { double getDoubleItemsArrayAt(final int index) { return doubleItems_[index]; } @Override //Dummy - float[] getFloatItemsArray() { kllSketchThrow(ERR50); return null; } + float[] getFloatItemsArray() { kllSketchThrow(ERR_MUST_NOT_CALL); return null; } @Override //Dummy - float getFloatItemsArrayAt(final int index) { kllSketchThrow(ERR50); return Float.NaN; } + float getFloatItemsArrayAt(final int index) { kllSketchThrow(ERR_MUST_NOT_CALL); return Float.NaN; } @Override //Used internally double getMaxDoubleValue() { return maxDoubleValue_; } @Override //Dummy - float getMaxFloatValue() { kllSketchThrow(ERR50); return (float) maxDoubleValue_; } + float getMaxFloatValue() { kllSketchThrow(ERR_MUST_NOT_CALL); return (float) maxDoubleValue_; } @Override //Used internally double getMinDoubleValue() { return minDoubleValue_; } @Override //Dummy - float getMinFloatValue() { kllSketchThrow(ERR50); return (float) minDoubleValue_; } + float getMinFloatValue() { kllSketchThrow(ERR_MUST_NOT_CALL); return (float) minDoubleValue_; } @Override //Used internally void setDoubleItemsArray(final double[] doubleItems) { doubleItems_ = doubleItems; } @@ -334,21 +334,21 @@ public void update(final double value) { void setDoubleItemsArrayAt(final int index, final double value) { doubleItems_[index] = value; } @Override //Dummy - void setFloatItemsArray(final float[] floatItems) { kllSketchThrow(ERR50); } + void setFloatItemsArray(final float[] floatItems) { kllSketchThrow(ERR_MUST_NOT_CALL); } @Override //Dummy - void setFloatItemsArrayAt(final int index, final float value) { kllSketchThrow(ERR50); } + void setFloatItemsArrayAt(final int index, final float value) { kllSketchThrow(ERR_MUST_NOT_CALL); } @Override //Used internally void setMaxDoubleValue(final double value) { maxDoubleValue_ = value; } @Override //Dummy - void setMaxFloatValue(final float value) { kllSketchThrow(ERR50); } + void setMaxFloatValue(final float value) { kllSketchThrow(ERR_MUST_NOT_CALL); } @Override //Used internally void setMinDoubleValue(final double value) { minDoubleValue_ = value; } @Override //Dummy - void setMinFloatValue(final float value) { kllSketchThrow(ERR50); } + void setMinFloatValue(final float value) { kllSketchThrow(ERR_MUST_NOT_CALL); } } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 5921f556a..2408b865f 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -23,9 +23,9 @@ import static java.lang.Math.min; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR34; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR35; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR50; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_IS_NOT_FLOAT; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_CANNOT_BE_DIRECT; +import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_MUST_NOT_CALL; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; @@ -289,8 +289,8 @@ public KllFloatsSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllFloatsSketch other) { - if (other.isDirect()) { kllSketchThrow(ERR35); } - if (!other.isFloatsSketch()) { kllSketchThrow(ERR34); } + if (other.isDirect()) { kllSketchThrow(ERR_SRC_CANNOT_BE_DIRECT); } + if (!other.isFloatsSketch()) { kllSketchThrow(ERR_SRC_IS_NOT_FLOAT); } mergeFloatImpl(other); } @@ -304,10 +304,10 @@ public void update(final float value) { } @Override //Dummy - double[] getDoubleItemsArray() { kllSketchThrow(ERR50); return null; } + double[] getDoubleItemsArray() { kllSketchThrow(ERR_MUST_NOT_CALL); return null; } @Override //Dummy - double getDoubleItemsArrayAt(final int index) { kllSketchThrow(ERR50); return Double.NaN; } + double getDoubleItemsArrayAt(final int index) { kllSketchThrow(ERR_MUST_NOT_CALL); return Double.NaN; } @Override //Used internally float[] getFloatItemsArray() { return floatItems_; } @@ -316,22 +316,22 @@ public void update(final float value) { float getFloatItemsArrayAt(final int index) { return floatItems_[index]; } @Override //Dummy - double getMaxDoubleValue() { kllSketchThrow(ERR50); return maxFloatValue_; } + double getMaxDoubleValue() { kllSketchThrow(ERR_MUST_NOT_CALL); return maxFloatValue_; } @Override //Used internally float getMaxFloatValue() { return maxFloatValue_; } @Override //Dummy - double getMinDoubleValue() { kllSketchThrow(ERR50); return minFloatValue_; } + double getMinDoubleValue() { kllSketchThrow(ERR_MUST_NOT_CALL); return minFloatValue_; } @Override //Used internally float getMinFloatValue() { return minFloatValue_; } @Override //Dummy - void setDoubleItemsArray(final double[] doubleItems) { kllSketchThrow(ERR50); } + void setDoubleItemsArray(final double[] doubleItems) { kllSketchThrow(ERR_MUST_NOT_CALL); } @Override //Dummy - void setDoubleItemsArrayAt(final int index, final double value) { kllSketchThrow(ERR50); } + void setDoubleItemsArrayAt(final int index, final double value) { kllSketchThrow(ERR_MUST_NOT_CALL); } @Override //Used internally void setFloatItemsArray(final float[] floatItems) { floatItems_ = floatItems; } @@ -340,13 +340,13 @@ public void update(final float value) { void setFloatItemsArrayAt(final int index, final float value) { floatItems_[index] = value; } @Override //Dummy - void setMaxDoubleValue(final double value) { kllSketchThrow(ERR50); } + void setMaxDoubleValue(final double value) { kllSketchThrow(ERR_MUST_NOT_CALL); } @Override //Used internally void setMaxFloatValue(final float value) { maxFloatValue_ = value; } @Override //Dummy - void setMinDoubleValue(final double value) { kllSketchThrow(ERR50); } + void setMinDoubleValue(final double value) { kllSketchThrow(ERR_MUST_NOT_CALL); } @Override //Used internally void setMinFloatValue(final float value) { minFloatValue_ = value; } diff --git a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java index 9015081ba..1b9470ee3 100644 --- a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java +++ b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java @@ -20,14 +20,14 @@ package org.apache.datasketches.kll; import static org.apache.datasketches.Family.idToFamily; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR0; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR1; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR10; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR2; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR20; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR4; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR5; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.MERR6; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_SRC_NOT_KLL; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_EMPTYBIT_AND_PREINTS; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_UPDATABLEBIT_AND_SER_VER; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_EMPTYBIT_AND_SER_VER; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_EMPTYBIT_AND_SINGLEBIT; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_SINGLEBIT_AND_SER_VER; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_DOUBLEBIT_AND_PREINTS; +import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_FLOATBIT_AND_PREINTS; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; @@ -106,7 +106,7 @@ final class KllMemoryValidate { serVer = extractSerVer(srcMem); familyID = extractFamilyID(srcMem); - if (familyID != Family.KLL.getID()) { memoryValidateThrow(MERR0, familyID); } + if (familyID != Family.KLL.getID()) { memoryValidateThrow(ERR_SRC_NOT_KLL, familyID); } famName = idToFamily(familyID).toString(); flags = extractFlags(srcMem); empty = extractEmptyFlag(srcMem); @@ -118,19 +118,19 @@ final class KllMemoryValidate { m = extractM(srcMem); KllHelper.checkM(m); KllHelper.checkK(k, m); - if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatable) { memoryValidateThrow(MERR10, 0); } + if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatable) { memoryValidateThrow(ERR_UPDATABLEBIT_AND_SER_VER, 0); } if (updatable) { updatableMemoryValidate((WritableMemory) srcMem); } else { compactMemoryValidate(srcMem); } } void compactMemoryValidate(final Memory srcMem) { - if (empty && singleItem) { memoryValidateThrow(MERR20, 0); } + if (empty && singleItem) { memoryValidateThrow(ERR_EMPTYBIT_AND_SINGLEBIT, 0); } final int sw = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); switch (sw) { case 0: { //FLOAT_FULL_COMPACT - if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(MERR6, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(MERR2, serVer); } + if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(ERR_FLOATBIT_AND_PREINTS, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(ERR_EMPTYBIT_AND_SER_VER, serVer); } layout = Layout.FLOAT_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); @@ -156,8 +156,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 1: { //FLOAT_EMPTY_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(MERR1, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(MERR2, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(ERR_EMPTYBIT_AND_PREINTS, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(ERR_EMPTYBIT_AND_SER_VER, serVer); } layout = Layout.FLOAT_EMPTY_COMPACT; n = 0; //assumed dyMinK = k; //assumed @@ -175,8 +175,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 4: { //FLOAT_SINGLE_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(MERR1, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(MERR4, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(ERR_EMPTYBIT_AND_PREINTS, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(ERR_SINGLEBIT_AND_SER_VER, serVer); } layout = Layout.FLOAT_SINGLE_COMPACT; n = 1; dyMinK = k; @@ -197,8 +197,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 8: { //DOUBLE_FULL_COMPACT - if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(MERR5, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(MERR2, serVer); } + if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(ERR_DOUBLEBIT_AND_PREINTS, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(ERR_EMPTYBIT_AND_SER_VER, serVer); } layout = Layout.DOUBLE_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); @@ -224,8 +224,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 9: { //DOUBLE_EMPTY_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(MERR1, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(MERR2, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(ERR_EMPTYBIT_AND_PREINTS, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(ERR_EMPTYBIT_AND_SER_VER, serVer); } layout = Layout.DOUBLE_EMPTY_COMPACT; n = 0; dyMinK = k; @@ -244,8 +244,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 12: { //DOUBLE_SINGLE_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(MERR1, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(MERR4, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(ERR_EMPTYBIT_AND_PREINTS, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(ERR_SINGLEBIT_AND_SER_VER, serVer); } layout = Layout.DOUBLE_SINGLE_COMPACT; n = 1; dyMinK = k; @@ -272,7 +272,7 @@ void compactMemoryValidate(final Memory srcMem) { void updatableMemoryValidate(final WritableMemory wSrcMem) { if (doublesSketch) { //DOUBLE_UPDATABLE - if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(MERR5, preInts); } + if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(ERR_DOUBLEBIT_AND_PREINTS, preInts); } layout = Layout.DOUBLE_UPDATABLE; n = extractN(wSrcMem); empty = n == 0; //empty & singleItem are set for convenience @@ -295,7 +295,7 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { sketchBytes = offset + itemsArrBytes; } else { //FLOAT_UPDATABLE - if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(MERR6, preInts); } + if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(ERR_FLOATBIT_AND_PREINTS, preInts); } layout = Layout.FLOAT_UPDATABLE; n = extractN(wSrcMem); empty = n == 0; //empty & singleItem are set for convenience @@ -318,20 +318,29 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { } } - enum MERRNO { MERR0, MERR1, MERR2, MERR4, MERR5, MERR6, MERR10, MERR20 } + enum MERRNO { ERR_SRC_NOT_KLL, ERR_EMPTYBIT_AND_PREINTS, ERR_EMPTYBIT_AND_SER_VER, + ERR_SINGLEBIT_AND_SER_VER, ERR_DOUBLEBIT_AND_PREINTS, ERR_FLOATBIT_AND_PREINTS, ERR_UPDATABLEBIT_AND_SER_VER, + ERR_EMPTYBIT_AND_SINGLEBIT } - private static void memoryValidateThrow(final MERRNO errNo, final int value) { + private static void memoryValidateThrow(final MERRNO errType, final int value) { String msg = ""; - switch (errNo) { - case MERR0: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; - case MERR1: msg = "Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; - case MERR2: msg = "Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: " + value; break; - case MERR4: msg = "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; - case MERR5: msg = "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; - case MERR6: msg = "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; - case MERR10: msg = "((SerVer == 3) ^ (Updatable Bit)) must = 0."; break; - case MERR20: msg = "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; - default: msg = "Unknown error: errNo: " + errNo; break; + switch (errType) { + case ERR_SRC_NOT_KLL: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; + case ERR_EMPTYBIT_AND_PREINTS: msg = + "Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; + case ERR_EMPTYBIT_AND_SER_VER: msg = + "Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: " + value; break; + case ERR_SINGLEBIT_AND_SER_VER: msg = + "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; + case ERR_DOUBLEBIT_AND_PREINTS: msg = + "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; + case ERR_FLOATBIT_AND_PREINTS: msg = + "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; + case ERR_UPDATABLEBIT_AND_SER_VER: msg = + "((SerVer == 3) ^ (Updatable Bit)) must = 0."; break; + case ERR_EMPTYBIT_AND_SINGLEBIT: msg = + "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; + default: msg = "Unknown error"; break; } throw new SketchesArgumentException(msg); } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index d50cb2f12..b15faba3a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -237,19 +237,19 @@ final static boolean isCompatible() { return compatible; } - enum ERRNO { ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR50 } + enum ERRNO { ERR_TGT_IS_IMMUTABLE, ERR_SRC_IS_NOT_DIRECT, ERR_SRC_IS_NOT_DOUBLE, + ERR_SRC_IS_NOT_FLOAT, ERR_SRC_CANNOT_BE_DIRECT, ERR_MUST_NOT_CALL } - final static void kllSketchThrow(final ERRNO errNo) { + final static void kllSketchThrow(final ERRNO errType) { String msg = ""; - switch (errNo) { - case ERR30: msg = "Given sketch Memory is immutable, cannot write."; break; - case ERR31: msg = "Given sketch Memory is immutable and incompatible."; break; - case ERR32: msg = "Given sketch must be of type Direct."; break; - case ERR33: msg = "Given sketch must be of type Double."; break; - case ERR34: msg = "Given sketch must be of type Float."; break; - case ERR35: msg = "Given sketch must not be of type Direct."; break; - case ERR50: msg = "This is an artifact of inheritance and should never be called."; break; - default: msg = "Unknown error: errNo: " + errNo; break; + switch (errType) { + case ERR_TGT_IS_IMMUTABLE: msg = "Given sketch Memory is immutable, cannot write."; break; + case ERR_SRC_IS_NOT_DIRECT: msg = "Given sketch must be of type Direct."; break; + case ERR_SRC_IS_NOT_DOUBLE: msg = "Given sketch must be of type Double."; break; + case ERR_SRC_IS_NOT_FLOAT: msg = "Given sketch must be of type Float."; break; + case ERR_SRC_CANNOT_BE_DIRECT: msg = "Given sketch must not be of type Direct."; break; + case ERR_MUST_NOT_CALL: msg = "This is an artifact of inheritance and should never be called."; break; + default: msg = "Unknown error."; break; } throw new SketchesArgumentException(msg); } From 532a4e8a447ddb1ae175245ddd811caf16dcf9ea Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 30 Mar 2022 11:56:36 -0700 Subject: [PATCH 21/31] Found a few more miscellaneous items. --- .../datasketches/kll/KllDoublesSketch.java | 28 ++++++++----------- .../datasketches/kll/KllFloatsSketch.java | 28 ++++++++----------- .../apache/datasketches/kll/KllHelper.java | 4 +-- .../datasketches/kll/KllPreambleUtil.java | 6 ++-- .../apache/datasketches/kll/KllSketch.java | 3 +- 5 files changed, 31 insertions(+), 38 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index eb05d4688..93bfc2f44 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -39,12 +39,20 @@ * @author Lee Rhodes, Kevin Lang */ public final class KllDoublesSketch extends KllHeapSketch { - - // Specific to the doubles sketch - private double[] doubleItems_; // the continuous array of double items + private double[] doubleItems_; private double minDoubleValue_; private double maxDoubleValue_; + /** + * Private heapify constructor. + * @param mem Memory object that contains data serialized by this sketch. + * @param memVal the MemoryCheck object + */ + private KllDoublesSketch(final Memory mem, final KllMemoryValidate memVal) { + super(memVal.k, memVal.m, SketchType.DOUBLES_SKETCH); + buildHeapKllSketchFromMemory(memVal); + } + /** * Heap constructor with the default k = 200, and DEFAULT_M of 8. * This will have a rank error of about 1.65%. @@ -75,23 +83,13 @@ public KllDoublesSketch(final int k) { * @param k parameter that controls size of the sketch and accuracy of estimates * @param m parameter that controls the minimum level width. */ - public KllDoublesSketch(final int k, final int m) { + KllDoublesSketch(final int k, final int m) { super(k, m, SketchType.DOUBLES_SKETCH); doubleItems_ = new double[k]; minDoubleValue_ = Double.NaN; maxDoubleValue_ = Double.NaN; } - /** - * Private heapify constructor. - * @param mem Memory object that contains data serialized by this sketch. - * @param memVal the MemoryCheck object - */ - private KllDoublesSketch(final Memory mem, final KllMemoryValidate memVal) { - super(memVal.k, memVal.m, SketchType.DOUBLES_SKETCH); - buildHeapKllSketchFromMemory(memVal); - } - /** * Factory heapify takes the sketch image in Memory and instantiates an on-heap sketch. * The resulting sketch will not retain any link to the source Memory. @@ -99,8 +97,6 @@ private KllDoublesSketch(final Memory mem, final KllMemoryValidate memVal) { * See Memory * @return a heap-based sketch based on the given Memory. */ - //To simplify the code, the MemoryValidate class does nearly all the validity checking. - //The validated Memory is then passed to the actual private heapify constructor. public static KllDoublesSketch heapify(final Memory mem) { final KllMemoryValidate memChk = new KllMemoryValidate(mem); if (!memChk.doublesSketch) { diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 2408b865f..57627e76f 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -39,12 +39,20 @@ * @author Lee Rhodes, Kevin Lang */ public final class KllFloatsSketch extends KllHeapSketch { - - // Specific to the floats sketch - private float[] floatItems_; // the continuous array of float items + private float[] floatItems_; private float minFloatValue_; private float maxFloatValue_; + /** + * Private heapify constructor. + * @param mem Memory object that contains data serialized by this sketch. + * @param memVal the MemoryCheck object + */ + private KllFloatsSketch(final Memory mem, final KllMemoryValidate memVal) { + super(memVal.k, memVal.m, SketchType.FLOATS_SKETCH); + buildHeapKllSketchFromMemory(memVal); + } + /** * Heap constructor with the default k = 200, and DEFAULT_M of 8. * This will have a rank error of about 1.65%. @@ -75,23 +83,13 @@ public KllFloatsSketch(final int k) { * @param k parameter that controls size of the sketch and accuracy of estimates * @param m parameter that controls the minimum level width. */ - public KllFloatsSketch(final int k, final int m) { + KllFloatsSketch(final int k, final int m) { super(k, m, SketchType.FLOATS_SKETCH); floatItems_ = new float[k]; minFloatValue_ = Float.NaN; maxFloatValue_ = Float.NaN; } - /** - * Private heapify constructor. - * @param mem Memory object that contains data serialized by this sketch. - * @param memVal the MemoryCheck object - */ - private KllFloatsSketch(final Memory mem, final KllMemoryValidate memVal) { - super(memVal.k, memVal.m, SketchType.FLOATS_SKETCH); - buildHeapKllSketchFromMemory(memVal); - } - /** * Factory heapify takes the sketch image in Memory and instantiates an on-heap sketch. * The resulting sketch will not retain any link to the source Memory. @@ -99,8 +97,6 @@ private KllFloatsSketch(final Memory mem, final KllMemoryValidate memVal) { * See Memory * @return a heap-based sketch based on the given Memory. */ - //To simplify the code, the MemoryValidate class does nearly all the validity checking. - //The validated Memory is then passed to the actual private heapify constructor. public static KllFloatsSketch heapify(final Memory mem) { final KllMemoryValidate memVal = new KllMemoryValidate(mem); if (memVal.doublesSketch) { diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 09d8fde47..83e2bf45c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -24,10 +24,10 @@ import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; +import static org.apache.datasketches.kll.KllPreambleUtil.MAX_M; +import static org.apache.datasketches.kll.KllPreambleUtil.MIN_M; import static org.apache.datasketches.kll.KllSketch.CDF_COEF; import static org.apache.datasketches.kll.KllSketch.CDF_EXP; -import static org.apache.datasketches.kll.KllSketch.MAX_M; -import static org.apache.datasketches.kll.KllSketch.MIN_M; import static org.apache.datasketches.kll.KllSketch.PMF_COEF; import static org.apache.datasketches.kll.KllSketch.PMF_EXP; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index 3fb4a4242..2d006d48f 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -131,7 +131,9 @@ private KllPreambleUtil() {} */ public static final int DEFAULT_K = 200; public static final int DEFAULT_M = 8; - static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short + public static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short + public static final int MAX_M = 8; + public static final int MIN_M = 2; // Preamble byte addresses static final int PREAMBLE_INTS_BYTE_ADR = 0; @@ -153,7 +155,7 @@ private KllPreambleUtil() {} static final int DATA_START_ADR_FLOAT = 20; // float sketch, not single item // DOUBLE SKETCH 19 to 23 is reserved for future use in double sketch - static final int DATA_START_ADR_DOUBLE = 20; // double sketch, not single item //TODO?? + static final int DATA_START_ADR_DOUBLE = 20; // double sketch, not single item // Other static values static final byte SERIAL_VERSION_EMPTY_FULL = 1; // Empty or full preamble, NOT single item format diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index b15faba3a..b93691a3c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -32,6 +32,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; +import static org.apache.datasketches.kll.KllPreambleUtil.MIN_M; import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; @@ -96,8 +97,6 @@ * @author Lee Rhodes, Kevin Lang */ public abstract class KllSketch { - static final int MIN_M = 2; - static final int MAX_M = 8; static final double EPS_DELTA_THRESHOLD = 1E-6; static final double MIN_EPS = 4.7634E-5; static final double PMF_COEF = 2.446; From d446ae5c91a228a7ec35fa59c7cd6b0617e6ab27 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 30 Mar 2022 13:35:11 -0700 Subject: [PATCH 22/31] Fixing feedback from Review. --- .../kll/KllDirectDoublesSketch.java | 8 +- .../kll/KllDirectFloatsSketch.java | 8 +- .../datasketches/kll/KllDirectSketch.java | 28 +++---- .../datasketches/kll/KllDoublesSketch.java | 26 +++---- .../datasketches/kll/KllFloatsSketch.java | 26 +++---- .../datasketches/kll/KllMemoryValidate.java | 74 +++++++++---------- .../apache/datasketches/kll/KllSketch.java | 18 ++--- 7 files changed, 94 insertions(+), 94 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 50cbae3ff..74ce437ea 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -36,8 +36,8 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_IS_NOT_DIRECT; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_IS_NOT_DOUBLE; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DIRECT; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DOUBLE; import org.apache.datasketches.Family; import org.apache.datasketches.memory.MemoryRequestServer; @@ -301,8 +301,8 @@ public KllDoublesSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (!other.isDirect()) { kllSketchThrow(ERR_SRC_IS_NOT_DIRECT); } - if (!other.isDoublesSketch()) { kllSketchThrow(ERR_SRC_IS_NOT_DOUBLE); } + if (!other.isDirect()) { kllSketchThrow(SRC_IS_NOT_DIRECT); } + if (!other.isDoublesSketch()) { kllSketchThrow(SRC_IS_NOT_DOUBLE); } mergeDoubleImpl(other); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index c802a2495..4198abbc8 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -35,8 +35,8 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_IS_NOT_DIRECT; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_IS_NOT_FLOAT; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DIRECT; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_FLOAT; import org.apache.datasketches.Family; import org.apache.datasketches.memory.MemoryRequestServer; @@ -301,8 +301,8 @@ public KllFloatsSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (!other.isDirect()) { kllSketchThrow(ERR_SRC_IS_NOT_DIRECT); } - if (!other.isFloatsSketch()) { kllSketchThrow(ERR_SRC_IS_NOT_FLOAT); } + if (!other.isDirect()) { kllSketchThrow(SRC_IS_NOT_DIRECT); } + if (!other.isFloatsSketch()) { kllSketchThrow(SRC_IS_NOT_FLOAT); } mergeFloatImpl(other); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index 6a559aee3..691106e63 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -31,7 +31,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_TGT_IS_IMMUTABLE; +import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -165,14 +165,14 @@ int getNumLevels() { @Override void incN() { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } long n = extractN(wmem); insertN(wmem, ++n); } @Override void incNumLevels() { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } int numLevels = extractNumLevels(wmem); insertNumLevels(wmem, ++numLevels); } @@ -184,7 +184,7 @@ boolean isLevelZeroSorted() { @Override void setDoubleItemsArray(final double[] doubleItems) { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } itemsArrUpdatable.putDoubleArray(0, doubleItems, 0, doubleItems.length); } @@ -195,13 +195,13 @@ void setDoubleItemsArrayAt(final int index, final double value) { @Override void setDyMinK(final int dyMinK) { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } insertDyMinK(wmem, dyMinK); } @Override void setFloatItemsArray(final float[] floatItems) { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } itemsArrUpdatable.putFloatArray(0, floatItems, 0, floatItems.length); } @@ -217,7 +217,7 @@ void setItemsArrayUpdatable(final WritableMemory itemsMem) { @Override void setLevelsArray(final int[] levelsArr) { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } levelsArrUpdatable.putIntArray(0, levelsArr, 0, levelsArr.length); } @@ -247,31 +247,31 @@ void setLevelsArrayUpdatable(final WritableMemory levelsMem) { @Override void setLevelZeroSorted(final boolean sorted) { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } insertLevelZeroSortedFlag(wmem, sorted); } @Override void setMaxDoubleValue(final double value) { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } minMaxArrUpdatable.putDouble(Double.BYTES, value); } @Override void setMaxFloatValue(final float value) { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } minMaxArrUpdatable.putFloat(Float.BYTES, value); } @Override void setMinDoubleValue(final double value) { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } minMaxArrUpdatable.putDouble(0, value); } @Override void setMinFloatValue(final float value) { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } minMaxArrUpdatable.putFloat(0, value); } @@ -282,13 +282,13 @@ void setMinMaxArrayUpdatable(final WritableMemory minMaxMem) { @Override void setN(final long n) { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } insertN(wmem, n); } @Override void setNumLevels(final int numLevels) { - if (!updatable) { kllSketchThrow(ERR_TGT_IS_IMMUTABLE); } + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } insertNumLevels(wmem, numLevels); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 93bfc2f44..8d249f908 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -23,9 +23,9 @@ import static java.lang.Math.min; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_IS_NOT_DOUBLE; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_CANNOT_BE_DIRECT; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_MUST_NOT_CALL; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DOUBLE; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_CANNOT_BE_DIRECT; +import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; @@ -285,8 +285,8 @@ public KllDoublesSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (other.isDirect()) { kllSketchThrow(ERR_SRC_CANNOT_BE_DIRECT); } - if (!other.isDoublesSketch()) { kllSketchThrow(ERR_SRC_IS_NOT_DOUBLE); } + if (other.isDirect()) { kllSketchThrow(SRC_CANNOT_BE_DIRECT); } + if (!other.isDoublesSketch()) { kllSketchThrow(SRC_IS_NOT_DOUBLE); } mergeDoubleImpl(other); } @@ -306,22 +306,22 @@ public void update(final double value) { double getDoubleItemsArrayAt(final int index) { return doubleItems_[index]; } @Override //Dummy - float[] getFloatItemsArray() { kllSketchThrow(ERR_MUST_NOT_CALL); return null; } + float[] getFloatItemsArray() { kllSketchThrow(MUST_NOT_CALL); return null; } @Override //Dummy - float getFloatItemsArrayAt(final int index) { kllSketchThrow(ERR_MUST_NOT_CALL); return Float.NaN; } + float getFloatItemsArrayAt(final int index) { kllSketchThrow(MUST_NOT_CALL); return Float.NaN; } @Override //Used internally double getMaxDoubleValue() { return maxDoubleValue_; } @Override //Dummy - float getMaxFloatValue() { kllSketchThrow(ERR_MUST_NOT_CALL); return (float) maxDoubleValue_; } + float getMaxFloatValue() { kllSketchThrow(MUST_NOT_CALL); return (float) maxDoubleValue_; } @Override //Used internally double getMinDoubleValue() { return minDoubleValue_; } @Override //Dummy - float getMinFloatValue() { kllSketchThrow(ERR_MUST_NOT_CALL); return (float) minDoubleValue_; } + float getMinFloatValue() { kllSketchThrow(MUST_NOT_CALL); return (float) minDoubleValue_; } @Override //Used internally void setDoubleItemsArray(final double[] doubleItems) { doubleItems_ = doubleItems; } @@ -330,21 +330,21 @@ public void update(final double value) { void setDoubleItemsArrayAt(final int index, final double value) { doubleItems_[index] = value; } @Override //Dummy - void setFloatItemsArray(final float[] floatItems) { kllSketchThrow(ERR_MUST_NOT_CALL); } + void setFloatItemsArray(final float[] floatItems) { kllSketchThrow(MUST_NOT_CALL); } @Override //Dummy - void setFloatItemsArrayAt(final int index, final float value) { kllSketchThrow(ERR_MUST_NOT_CALL); } + void setFloatItemsArrayAt(final int index, final float value) { kllSketchThrow(MUST_NOT_CALL); } @Override //Used internally void setMaxDoubleValue(final double value) { maxDoubleValue_ = value; } @Override //Dummy - void setMaxFloatValue(final float value) { kllSketchThrow(ERR_MUST_NOT_CALL); } + void setMaxFloatValue(final float value) { kllSketchThrow(MUST_NOT_CALL); } @Override //Used internally void setMinDoubleValue(final double value) { minDoubleValue_ = value; } @Override //Dummy - void setMinFloatValue(final float value) { kllSketchThrow(ERR_MUST_NOT_CALL); } + void setMinFloatValue(final float value) { kllSketchThrow(MUST_NOT_CALL); } } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 57627e76f..4ae3aa81c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -23,9 +23,9 @@ import static java.lang.Math.min; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_IS_NOT_FLOAT; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_SRC_CANNOT_BE_DIRECT; -import static org.apache.datasketches.kll.KllSketch.ERRNO.ERR_MUST_NOT_CALL; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_FLOAT; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_CANNOT_BE_DIRECT; +import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; @@ -285,8 +285,8 @@ public KllFloatsSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllFloatsSketch other) { - if (other.isDirect()) { kllSketchThrow(ERR_SRC_CANNOT_BE_DIRECT); } - if (!other.isFloatsSketch()) { kllSketchThrow(ERR_SRC_IS_NOT_FLOAT); } + if (other.isDirect()) { kllSketchThrow(SRC_CANNOT_BE_DIRECT); } + if (!other.isFloatsSketch()) { kllSketchThrow(SRC_IS_NOT_FLOAT); } mergeFloatImpl(other); } @@ -300,10 +300,10 @@ public void update(final float value) { } @Override //Dummy - double[] getDoubleItemsArray() { kllSketchThrow(ERR_MUST_NOT_CALL); return null; } + double[] getDoubleItemsArray() { kllSketchThrow(MUST_NOT_CALL); return null; } @Override //Dummy - double getDoubleItemsArrayAt(final int index) { kllSketchThrow(ERR_MUST_NOT_CALL); return Double.NaN; } + double getDoubleItemsArrayAt(final int index) { kllSketchThrow(MUST_NOT_CALL); return Double.NaN; } @Override //Used internally float[] getFloatItemsArray() { return floatItems_; } @@ -312,22 +312,22 @@ public void update(final float value) { float getFloatItemsArrayAt(final int index) { return floatItems_[index]; } @Override //Dummy - double getMaxDoubleValue() { kllSketchThrow(ERR_MUST_NOT_CALL); return maxFloatValue_; } + double getMaxDoubleValue() { kllSketchThrow(MUST_NOT_CALL); return maxFloatValue_; } @Override //Used internally float getMaxFloatValue() { return maxFloatValue_; } @Override //Dummy - double getMinDoubleValue() { kllSketchThrow(ERR_MUST_NOT_CALL); return minFloatValue_; } + double getMinDoubleValue() { kllSketchThrow(MUST_NOT_CALL); return minFloatValue_; } @Override //Used internally float getMinFloatValue() { return minFloatValue_; } @Override //Dummy - void setDoubleItemsArray(final double[] doubleItems) { kllSketchThrow(ERR_MUST_NOT_CALL); } + void setDoubleItemsArray(final double[] doubleItems) { kllSketchThrow(MUST_NOT_CALL); } @Override //Dummy - void setDoubleItemsArrayAt(final int index, final double value) { kllSketchThrow(ERR_MUST_NOT_CALL); } + void setDoubleItemsArrayAt(final int index, final double value) { kllSketchThrow(MUST_NOT_CALL); } @Override //Used internally void setFloatItemsArray(final float[] floatItems) { floatItems_ = floatItems; } @@ -336,13 +336,13 @@ public void update(final float value) { void setFloatItemsArrayAt(final int index, final float value) { floatItems_[index] = value; } @Override //Dummy - void setMaxDoubleValue(final double value) { kllSketchThrow(ERR_MUST_NOT_CALL); } + void setMaxDoubleValue(final double value) { kllSketchThrow(MUST_NOT_CALL); } @Override //Used internally void setMaxFloatValue(final float value) { maxFloatValue_ = value; } @Override //Dummy - void setMinDoubleValue(final double value) { kllSketchThrow(ERR_MUST_NOT_CALL); } + void setMinDoubleValue(final double value) { kllSketchThrow(MUST_NOT_CALL); } @Override //Used internally void setMinFloatValue(final float value) { minFloatValue_ = value; } diff --git a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java index 1b9470ee3..4a469be94 100644 --- a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java +++ b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java @@ -20,14 +20,14 @@ package org.apache.datasketches.kll; import static org.apache.datasketches.Family.idToFamily; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_SRC_NOT_KLL; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_EMPTYBIT_AND_PREINTS; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_UPDATABLEBIT_AND_SER_VER; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_EMPTYBIT_AND_SER_VER; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_EMPTYBIT_AND_SINGLEBIT; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_SINGLEBIT_AND_SER_VER; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_DOUBLEBIT_AND_PREINTS; -import static org.apache.datasketches.kll.KllMemoryValidate.MERRNO.ERR_FLOATBIT_AND_PREINTS; +import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.SRC_NOT_KLL; +import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.EMPTYBIT_AND_PREINTS; +import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.UPDATABLEBIT_AND_SER_VER; +import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.EMPTYBIT_AND_SER_VER; +import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.EMPTYBIT_AND_SINGLEBIT; +import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.SINGLEBIT_AND_SER_VER; +import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.DOUBLEBIT_AND_PREINTS; +import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.FLOATBIT_AND_PREINTS; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; @@ -106,7 +106,7 @@ final class KllMemoryValidate { serVer = extractSerVer(srcMem); familyID = extractFamilyID(srcMem); - if (familyID != Family.KLL.getID()) { memoryValidateThrow(ERR_SRC_NOT_KLL, familyID); } + if (familyID != Family.KLL.getID()) { memoryValidateThrow(SRC_NOT_KLL, familyID); } famName = idToFamily(familyID).toString(); flags = extractFlags(srcMem); empty = extractEmptyFlag(srcMem); @@ -118,19 +118,19 @@ final class KllMemoryValidate { m = extractM(srcMem); KllHelper.checkM(m); KllHelper.checkK(k, m); - if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatable) { memoryValidateThrow(ERR_UPDATABLEBIT_AND_SER_VER, 0); } + if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatable) { memoryValidateThrow(UPDATABLEBIT_AND_SER_VER, 0); } if (updatable) { updatableMemoryValidate((WritableMemory) srcMem); } else { compactMemoryValidate(srcMem); } } void compactMemoryValidate(final Memory srcMem) { - if (empty && singleItem) { memoryValidateThrow(ERR_EMPTYBIT_AND_SINGLEBIT, 0); } + if (empty && singleItem) { memoryValidateThrow(EMPTYBIT_AND_SINGLEBIT, 0); } final int sw = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); switch (sw) { case 0: { //FLOAT_FULL_COMPACT - if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(ERR_FLOATBIT_AND_PREINTS, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(ERR_EMPTYBIT_AND_SER_VER, serVer); } + if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(FLOATBIT_AND_PREINTS, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(EMPTYBIT_AND_SER_VER, serVer); } layout = Layout.FLOAT_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); @@ -156,8 +156,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 1: { //FLOAT_EMPTY_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(ERR_EMPTYBIT_AND_PREINTS, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(ERR_EMPTYBIT_AND_SER_VER, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(EMPTYBIT_AND_PREINTS, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(EMPTYBIT_AND_SER_VER, serVer); } layout = Layout.FLOAT_EMPTY_COMPACT; n = 0; //assumed dyMinK = k; //assumed @@ -175,8 +175,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 4: { //FLOAT_SINGLE_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(ERR_EMPTYBIT_AND_PREINTS, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(ERR_SINGLEBIT_AND_SER_VER, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(EMPTYBIT_AND_PREINTS, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(SINGLEBIT_AND_SER_VER, serVer); } layout = Layout.FLOAT_SINGLE_COMPACT; n = 1; dyMinK = k; @@ -197,8 +197,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 8: { //DOUBLE_FULL_COMPACT - if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(ERR_DOUBLEBIT_AND_PREINTS, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(ERR_EMPTYBIT_AND_SER_VER, serVer); } + if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(DOUBLEBIT_AND_PREINTS, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(EMPTYBIT_AND_SER_VER, serVer); } layout = Layout.DOUBLE_FULL_COMPACT; n = extractN(srcMem); dyMinK = extractDyMinK(srcMem); @@ -224,8 +224,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 9: { //DOUBLE_EMPTY_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(ERR_EMPTYBIT_AND_PREINTS, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(ERR_EMPTYBIT_AND_SER_VER, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(EMPTYBIT_AND_PREINTS, preInts); } + if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(EMPTYBIT_AND_SER_VER, serVer); } layout = Layout.DOUBLE_EMPTY_COMPACT; n = 0; dyMinK = k; @@ -244,8 +244,8 @@ void compactMemoryValidate(final Memory srcMem) { break; } case 12: { //DOUBLE_SINGLE_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(ERR_EMPTYBIT_AND_PREINTS, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(ERR_SINGLEBIT_AND_SER_VER, serVer); } + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(EMPTYBIT_AND_PREINTS, preInts); } + if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(SINGLEBIT_AND_SER_VER, serVer); } layout = Layout.DOUBLE_SINGLE_COMPACT; n = 1; dyMinK = k; @@ -272,7 +272,7 @@ void compactMemoryValidate(final Memory srcMem) { void updatableMemoryValidate(final WritableMemory wSrcMem) { if (doublesSketch) { //DOUBLE_UPDATABLE - if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(ERR_DOUBLEBIT_AND_PREINTS, preInts); } + if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(DOUBLEBIT_AND_PREINTS, preInts); } layout = Layout.DOUBLE_UPDATABLE; n = extractN(wSrcMem); empty = n == 0; //empty & singleItem are set for convenience @@ -295,7 +295,7 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { sketchBytes = offset + itemsArrBytes; } else { //FLOAT_UPDATABLE - if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(ERR_FLOATBIT_AND_PREINTS, preInts); } + if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(FLOATBIT_AND_PREINTS, preInts); } layout = Layout.FLOAT_UPDATABLE; n = extractN(wSrcMem); empty = n == 0; //empty & singleItem are set for convenience @@ -318,27 +318,27 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { } } - enum MERRNO { ERR_SRC_NOT_KLL, ERR_EMPTYBIT_AND_PREINTS, ERR_EMPTYBIT_AND_SER_VER, - ERR_SINGLEBIT_AND_SER_VER, ERR_DOUBLEBIT_AND_PREINTS, ERR_FLOATBIT_AND_PREINTS, ERR_UPDATABLEBIT_AND_SER_VER, - ERR_EMPTYBIT_AND_SINGLEBIT } + enum MemoryInputError { SRC_NOT_KLL, EMPTYBIT_AND_PREINTS, EMPTYBIT_AND_SER_VER, + SINGLEBIT_AND_SER_VER, DOUBLEBIT_AND_PREINTS, FLOATBIT_AND_PREINTS, UPDATABLEBIT_AND_SER_VER, + EMPTYBIT_AND_SINGLEBIT } - private static void memoryValidateThrow(final MERRNO errType, final int value) { + private static void memoryValidateThrow(final MemoryInputError errType, final int value) { String msg = ""; switch (errType) { - case ERR_SRC_NOT_KLL: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; - case ERR_EMPTYBIT_AND_PREINTS: msg = + case SRC_NOT_KLL: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; + case EMPTYBIT_AND_PREINTS: msg = "Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; - case ERR_EMPTYBIT_AND_SER_VER: msg = + case EMPTYBIT_AND_SER_VER: msg = "Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: " + value; break; - case ERR_SINGLEBIT_AND_SER_VER: msg = + case SINGLEBIT_AND_SER_VER: msg = "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; - case ERR_DOUBLEBIT_AND_PREINTS: msg = + case DOUBLEBIT_AND_PREINTS: msg = "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; - case ERR_FLOATBIT_AND_PREINTS: msg = + case FLOATBIT_AND_PREINTS: msg = "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; - case ERR_UPDATABLEBIT_AND_SER_VER: msg = + case UPDATABLEBIT_AND_SER_VER: msg = "((SerVer == 3) ^ (Updatable Bit)) must = 0."; break; - case ERR_EMPTYBIT_AND_SINGLEBIT: msg = + case EMPTYBIT_AND_SINGLEBIT: msg = "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; default: msg = "Unknown error"; break; } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index b93691a3c..800d986f3 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -236,18 +236,18 @@ final static boolean isCompatible() { return compatible; } - enum ERRNO { ERR_TGT_IS_IMMUTABLE, ERR_SRC_IS_NOT_DIRECT, ERR_SRC_IS_NOT_DOUBLE, - ERR_SRC_IS_NOT_FLOAT, ERR_SRC_CANNOT_BE_DIRECT, ERR_MUST_NOT_CALL } + enum Error { TGT_IS_IMMUTABLE, SRC_IS_NOT_DIRECT, SRC_IS_NOT_DOUBLE, + SRC_IS_NOT_FLOAT, SRC_CANNOT_BE_DIRECT, MUST_NOT_CALL } - final static void kllSketchThrow(final ERRNO errType) { + final static void kllSketchThrow(final Error errType) { String msg = ""; switch (errType) { - case ERR_TGT_IS_IMMUTABLE: msg = "Given sketch Memory is immutable, cannot write."; break; - case ERR_SRC_IS_NOT_DIRECT: msg = "Given sketch must be of type Direct."; break; - case ERR_SRC_IS_NOT_DOUBLE: msg = "Given sketch must be of type Double."; break; - case ERR_SRC_IS_NOT_FLOAT: msg = "Given sketch must be of type Float."; break; - case ERR_SRC_CANNOT_BE_DIRECT: msg = "Given sketch must not be of type Direct."; break; - case ERR_MUST_NOT_CALL: msg = "This is an artifact of inheritance and should never be called."; break; + case TGT_IS_IMMUTABLE: msg = "Given sketch Memory is immutable, cannot write."; break; + case SRC_IS_NOT_DIRECT: msg = "Given sketch must be of type Direct."; break; + case SRC_IS_NOT_DOUBLE: msg = "Given sketch must be of type Double."; break; + case SRC_IS_NOT_FLOAT: msg = "Given sketch must be of type Float."; break; + case SRC_CANNOT_BE_DIRECT: msg = "Given sketch must not be of type Direct."; break; + case MUST_NOT_CALL: msg = "This is an artifact of inheritance and should never be called."; break; default: msg = "Unknown error."; break; } throw new SketchesArgumentException(msg); From 0843950a7bebd62460c6e96398d12ef2a782b13e Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 30 Mar 2022 20:28:25 -0700 Subject: [PATCH 23/31] Fixed issues raised during review. --- .../kll/KllDirectDoublesSketch.java | 80 ++++++++++++- .../kll/KllDirectFloatsSketch.java | 82 ++++++++++++- .../datasketches/kll/KllDirectSketch.java | 112 +----------------- .../datasketches/kll/KllDoublesSketch.java | 4 +- .../datasketches/kll/KllFloatsSketch.java | 4 +- .../datasketches/kll/KllHeapSketch.java | 12 +- .../datasketches/kll/KllMemoryValidate.java | 10 +- .../datasketches/kll/KllPreambleUtil.java | 14 +-- .../apache/datasketches/kll/KllSketch.java | 28 ++--- .../kll/KllDirectDoublesSketchTest.java | 9 +- .../kll/KllDirectFloatsSketchTest.java | 8 +- .../kll/KllDoublesSketchTest.java | 1 - .../kll/MiscDirectDoublesTest.java | 21 ++-- .../kll/MiscDirectFloatsTest.java | 18 +-- .../datasketches/kll/MiscDoublesTest.java | 18 +-- .../datasketches/kll/MiscFloatsTest.java | 18 +-- 16 files changed, 237 insertions(+), 202 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 74ce437ea..93fcbb147 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -27,7 +27,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.insertDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertMinK; import static org.apache.datasketches.kll.KllPreambleUtil.insertFamilyID; import static org.apache.datasketches.kll.KllPreambleUtil.insertFlags; import static org.apache.datasketches.kll.KllPreambleUtil.insertK; @@ -36,8 +36,10 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; +import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DOUBLE; +import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; import org.apache.datasketches.Family; import org.apache.datasketches.memory.MemoryRequestServer; @@ -105,7 +107,7 @@ static KllDirectDoublesSketch newInstance(final int k, final int m, final Writab insertK(dstMem, k); insertM(dstMem, m); insertN(dstMem, 0); - insertDyMinK(dstMem, k); + insertMinK(dstMem, k); insertNumLevels(dstMem, 1); int offset = DATA_START_ADR_DOUBLE; dstMem.putIntArray(offset, new int[] {k, k}, 0, 2); @@ -218,7 +220,7 @@ public double getQuantile(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public double getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDynamicMinK(), false))); + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getMinK(), false))); } /** @@ -270,7 +272,7 @@ public double[] getQuantiles(final int numEvenlySpaced) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public double getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDynamicMinK(), false))); + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getMinK(), false))); } /** @@ -325,4 +327,74 @@ public void update(final double value) { updateDouble(value); } + @Override + double[] getDoubleItemsArray() { + final int items = getItemsArrLengthItems(); + final double[] itemsArr = new double[items]; + itemsArrUpdatable.getDoubleArray(0, itemsArr, 0, items); + return itemsArr; + } + + @Override + double getDoubleItemsArrayAt(final int index) { + return itemsArrUpdatable.getDouble((long)index * Double.BYTES); + } + + @Override + float[] getFloatItemsArray() { kllSketchThrow(MUST_NOT_CALL); return null; } + + @Override + float getFloatItemsArrayAt(final int index) { kllSketchThrow(MUST_NOT_CALL); return Float.NaN; } + + @Override + double getMaxDoubleValue() { + return minMaxArrUpdatable.getDouble(Double.BYTES); + } + + @Override + float getMaxFloatValue() { kllSketchThrow(MUST_NOT_CALL); return Float.NaN; } + + @Override + double getMinDoubleValue() { + return minMaxArrUpdatable.getDouble(0); + } + + @Override + float getMinFloatValue() { kllSketchThrow(MUST_NOT_CALL); return Float.NaN; } + + @Override + void setDoubleItemsArray(final double[] doubleItems) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + itemsArrUpdatable.putDoubleArray(0, doubleItems, 0, doubleItems.length); + } + + @Override + void setDoubleItemsArrayAt(final int index, final double value) { + itemsArrUpdatable.putDouble((long)index * Double.BYTES, value); + } + + @Override + void setFloatItemsArray(final float[] floatItems) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setFloatItemsArrayAt(final int index, final float value) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setMaxDoubleValue(final double value) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + minMaxArrUpdatable.putDouble(Double.BYTES, value); + } + + @Override + void setMaxFloatValue(final float value) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setMinDoubleValue(final double value) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + minMaxArrUpdatable.putDouble(0, value); + } + + @Override + void setMinFloatValue(final float value) { kllSketchThrow(MUST_NOT_CALL); } + } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index 4198abbc8..36173e773 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -26,7 +26,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.insertDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertMinK; import static org.apache.datasketches.kll.KllPreambleUtil.insertFamilyID; import static org.apache.datasketches.kll.KllPreambleUtil.insertFlags; import static org.apache.datasketches.kll.KllPreambleUtil.insertK; @@ -35,14 +35,16 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; +import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_FLOAT; +import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; import org.apache.datasketches.Family; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; -//Intentional +//Intentional extra blank line so the code lines up with KllDirectDoublesSketch /** * This class implements an off-heap floats KllSketch via a WritableMemory instance of the sketch. * @@ -105,7 +107,7 @@ static KllDirectFloatsSketch newInstance(final int k, final int m, final Writabl insertK(dstMem, k); insertM(dstMem, m); insertN(dstMem, 0); - insertDyMinK(dstMem, k); + insertMinK(dstMem, k); insertNumLevels(dstMem, 1); int offset = DATA_START_ADR_FLOAT; dstMem.putIntArray(offset, new int[] {k, k}, 0, 2); @@ -218,7 +220,7 @@ public float getQuantile(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public float getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDynamicMinK(), false))); + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getMinK(), false))); } /** @@ -270,7 +272,7 @@ public float[] getQuantiles(final int numEvenlySpaced) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public float getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDynamicMinK(), false))); + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getMinK(), false))); } /** @@ -325,4 +327,74 @@ public void update(final float value) { updateFloat(value); } + @Override + double[] getDoubleItemsArray() { kllSketchThrow(MUST_NOT_CALL); return null; } + + @Override + double getDoubleItemsArrayAt(final int index) { kllSketchThrow(MUST_NOT_CALL); return Double.NaN; } + + @Override + float[] getFloatItemsArray() { + final int items = getItemsArrLengthItems(); + final float[] itemsArr = new float[items]; + itemsArrUpdatable.getFloatArray(0, itemsArr, 0, items); + return itemsArr; + } + + @Override + float getFloatItemsArrayAt(final int index) { + return itemsArrUpdatable.getFloat((long)index * Float.BYTES); + } + + @Override + double getMaxDoubleValue() { kllSketchThrow(MUST_NOT_CALL); return Double.NaN; } + + @Override + float getMaxFloatValue() { + return minMaxArrUpdatable.getFloat(Float.BYTES); + } + + @Override + double getMinDoubleValue() { kllSketchThrow(MUST_NOT_CALL); return Double.NaN; } + + @Override + float getMinFloatValue() { + return minMaxArrUpdatable.getFloat(0); + } + + @Override + void setDoubleItemsArray(final double[] doubleItems) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setDoubleItemsArrayAt(final int index, final double value) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setFloatItemsArray(final float[] floatItems) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + itemsArrUpdatable.putFloatArray(0, floatItems, 0, floatItems.length); + } + + @Override + void setFloatItemsArrayAt(final int index, final float value) { + itemsArrUpdatable.putFloat((long)index * Float.BYTES, value); + } + + @Override + void setMaxDoubleValue(final double value) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setMaxFloatValue(final float value) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + minMaxArrUpdatable.putFloat(Float.BYTES, value); + } + + @Override + void setMinDoubleValue(final double value) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setMinFloatValue(final float value) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + minMaxArrUpdatable.putFloat(0, value); + } + } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index 691106e63..400caef4b 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -19,18 +19,16 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.KllPreambleUtil.extractDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.extractMinK; import static org.apache.datasketches.kll.KllPreambleUtil.extractK; import static org.apache.datasketches.kll.KllPreambleUtil.extractLevelZeroSortedFlag; import static org.apache.datasketches.kll.KllPreambleUtil.extractM; import static org.apache.datasketches.kll.KllPreambleUtil.extractN; import static org.apache.datasketches.kll.KllPreambleUtil.extractNumLevels; -import static org.apache.datasketches.kll.KllPreambleUtil.insertDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertMinK; import static org.apache.datasketches.kll.KllPreambleUtil.insertLevelZeroSortedFlag; import static org.apache.datasketches.kll.KllPreambleUtil.insertN; import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; -import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; -import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; import org.apache.datasketches.memory.MemoryRequestServer; @@ -41,8 +39,6 @@ * of the sketch type (float or double). */ abstract class KllDirectSketch extends KllSketch { - //All these members are constant for the life of this object. If the WritableMemory changes, - // it may require rebuilding this class final boolean updatable = true; WritableMemory levelsArrUpdatable; WritableMemory minMaxArrUpdatable; @@ -87,38 +83,8 @@ public byte[] toUpdatableByteArray() { } @Override - double[] getDoubleItemsArray() { - if (sketchType == FLOATS_SKETCH) { return null; } - final int items = getItemsArrLengthItems(); - final double[] itemsArr = new double[items]; - itemsArrUpdatable.getDoubleArray(0, itemsArr, 0, items); - return itemsArr; - } - - @Override - double getDoubleItemsArrayAt(final int index) { - if (sketchType == FLOATS_SKETCH) { return Double.NaN; } - return itemsArrUpdatable.getDouble((long)index * Double.BYTES); - } - - @Override - int getDynamicMinK() { - return extractDyMinK(wmem); - } - - @Override - float[] getFloatItemsArray() { - if (sketchType == DOUBLES_SKETCH) { return null; } - final int items = getItemsArrLengthItems(); - final float[] itemsArr = new float[items]; - itemsArrUpdatable.getFloatArray(0, itemsArr, 0, items); - return itemsArr; - } - - @Override - float getFloatItemsArrayAt(final int index) { - if (sketchType == DOUBLES_SKETCH) { return Float.NaN; } - return itemsArrUpdatable.getFloat((long)index * Float.BYTES); + int getMinK() { + return extractMinK(wmem); } int getItemsArrLengthItems() { @@ -138,26 +104,6 @@ int getLevelsArrayAt(final int index) { return levelsArrUpdatable.getInt((long)index * Integer.BYTES); } - @Override - double getMaxDoubleValue() { - return minMaxArrUpdatable.getDouble(Double.BYTES); - } - - @Override - float getMaxFloatValue() { - return minMaxArrUpdatable.getFloat(Float.BYTES); - } - - @Override - double getMinDoubleValue() { - return minMaxArrUpdatable.getDouble(0); - } - - @Override - float getMinFloatValue() { - return minMaxArrUpdatable.getFloat(0); - } - @Override int getNumLevels() { return extractNumLevels(wmem); @@ -183,31 +129,9 @@ boolean isLevelZeroSorted() { } @Override - void setDoubleItemsArray(final double[] doubleItems) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - itemsArrUpdatable.putDoubleArray(0, doubleItems, 0, doubleItems.length); - } - - @Override - void setDoubleItemsArrayAt(final int index, final double value) { - itemsArrUpdatable.putDouble((long)index * Double.BYTES, value); - } - - @Override - void setDyMinK(final int dyMinK) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - insertDyMinK(wmem, dyMinK); - } - - @Override - void setFloatItemsArray(final float[] floatItems) { + void setMinK(final int minK) { if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - itemsArrUpdatable.putFloatArray(0, floatItems, 0, floatItems.length); - } - - @Override - void setFloatItemsArrayAt(final int index, final float value) { - itemsArrUpdatable.putFloat((long)index * Float.BYTES, value); + insertMinK(wmem, minK); } @Override @@ -251,30 +175,6 @@ void setLevelZeroSorted(final boolean sorted) { insertLevelZeroSortedFlag(wmem, sorted); } - @Override - void setMaxDoubleValue(final double value) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - minMaxArrUpdatable.putDouble(Double.BYTES, value); - } - - @Override - void setMaxFloatValue(final float value) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - minMaxArrUpdatable.putFloat(Float.BYTES, value); - } - - @Override - void setMinDoubleValue(final double value) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - minMaxArrUpdatable.putDouble(0, value); - } - - @Override - void setMinFloatValue(final float value) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - minMaxArrUpdatable.putFloat(0, value); - } - @Override void setMinMaxArrayUpdatable(final WritableMemory minMaxMem) { minMaxArrUpdatable = minMaxMem; diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 8d249f908..3db062ce0 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -202,7 +202,7 @@ public double getQuantile(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public double getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDynamicMinK(), false))); + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getMinK(), false))); } /** @@ -254,7 +254,7 @@ public double[] getQuantiles(final int numEvenlySpaced) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public double getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDynamicMinK(), false))); + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getMinK(), false))); } /** diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 4ae3aa81c..c8dd4f7a8 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -202,7 +202,7 @@ public float getQuantile(final double fraction) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public float getQuantileLowerBound(final double fraction) { - return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDynamicMinK(), false))); + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getMinK(), false))); } /** @@ -254,7 +254,7 @@ public float[] getQuantiles(final int numEvenlySpaced) { * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. */ public float getQuantileUpperBound(final double fraction) { - return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDynamicMinK(), false))); + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getMinK(), false))); } /** diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java index 81c56aa71..ab050f66c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java @@ -31,7 +31,7 @@ abstract class KllHeapSketch extends KllSketch { private final int k; // configured value of K. private final int m; // configured value of M. private long n_; // number of items input into this sketch. - private int dyMinK_; // dynamic minK for error estimation after merging with different k. + private int minK_; // dynamic minK for error estimation after merging with different k. private int numLevels_; // one-based number of current levels. private int[] levels_; // array of index offsets into the items[]. Size = numLevels + 1. private boolean isLevelZeroSorted_; @@ -49,7 +49,7 @@ abstract class KllHeapSketch extends KllSketch { this.k = k; this.m = m; n_ = 0; - dyMinK_ = k; + minK_ = k; numLevels_ = 1; levels_ = new int[] {k, k}; isLevelZeroSorted_ = false; @@ -71,8 +71,8 @@ public long getN() { } @Override - int getDynamicMinK() { - return dyMinK_; + int getMinK() { + return minK_; } @Override @@ -104,8 +104,8 @@ boolean isLevelZeroSorted() { } @Override - void setDyMinK(final int dyMinK) { - dyMinK_ = dyMinK; + void setMinK(final int minK) { + minK_ = minK; } @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java index 4a469be94..3b8dbc3a0 100644 --- a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java +++ b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java @@ -38,7 +38,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; import static org.apache.datasketches.kll.KllPreambleUtil.extractDoubleSketchFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.extractDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.extractMinK; import static org.apache.datasketches.kll.KllPreambleUtil.extractEmptyFlag; import static org.apache.datasketches.kll.KllPreambleUtil.extractFamilyID; import static org.apache.datasketches.kll.KllPreambleUtil.extractFlags; @@ -133,7 +133,7 @@ void compactMemoryValidate(final Memory srcMem) { if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(EMPTYBIT_AND_SER_VER, serVer); } layout = Layout.FLOAT_FULL_COMPACT; n = extractN(srcMem); - dyMinK = extractDyMinK(srcMem); + dyMinK = extractMinK(srcMem); numLevels = extractNumLevels(srcMem); int offset = DATA_START_ADR_FLOAT; // LEVELS MEM @@ -201,7 +201,7 @@ void compactMemoryValidate(final Memory srcMem) { if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(EMPTYBIT_AND_SER_VER, serVer); } layout = Layout.DOUBLE_FULL_COMPACT; n = extractN(srcMem); - dyMinK = extractDyMinK(srcMem); + dyMinK = extractMinK(srcMem); numLevels = extractNumLevels(srcMem); int offset = DATA_START_ADR_DOUBLE; // LEVELS MEM @@ -277,7 +277,7 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { n = extractN(wSrcMem); empty = n == 0; //empty & singleItem are set for convenience singleItem = n == 1; // there is no error checking on these bits - dyMinK = extractDyMinK(wSrcMem); + dyMinK = extractMinK(wSrcMem); numLevels = extractNumLevels(wSrcMem); int offset = DATA_START_ADR_DOUBLE; @@ -300,7 +300,7 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { n = extractN(wSrcMem); empty = n == 0; //empty & singleItem are set for convenience singleItem = n == 1; // there is no error checking on these bits - dyMinK = extractDyMinK(wSrcMem); + dyMinK = extractMinK(wSrcMem); numLevels = extractNumLevels(wSrcMem); int offset = DATA_START_ADR_FLOAT; //LEVELS diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index 2d006d48f..eafb80c81 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -52,7 +52,7 @@ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | * 1 ||---------------------------------N_LONG---------------------------------------| * || | | | 20 | 19 | 18 | 17 | 16 | - * 2 ||<-------Levels Arr Start----------]| unused |NumLevels|--Dynamic-Min K--------| + * 2 ||<-------Levels Arr Start----------]| unused |NumLevels|------Min K------------| * || | | | | | | | | * ? ||<-------Min/Max Arr Start---------]|[<----------Levels Arr End----------------| * || | | | | | | | | @@ -76,7 +76,7 @@ * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | * 1 ||---------------------------------N_LONG---------------------------------------| * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | - * 2 ||<-------Levels Arr Start----------]| unused |NumLevels|--Dynamic-Min K--------| + * 2 ||<-------Levels Arr Start----------]| unused |NumLevels|------Min K------------| * || | | | | | | | | * ? ||<-------Min/Max Arr Start---------]|[<----------Levels Arr End----------------| * || | | | | | | | | @@ -148,7 +148,7 @@ private KllPreambleUtil() {} // MULTI-ITEM static final int N_LONG_ADR = 8; // to 15 - static final int DY_MIN_K_SHORT_ADR = 16; // to 17 + static final int MIN_K_SHORT_ADR = 16; // to 17 static final int NUM_LEVELS_BYTE_ADR = 18; // FLOAT SKETCH 19 is reserved for future use in float sketch @@ -301,8 +301,8 @@ static long extractN(final Memory mem) { return mem.getLong(N_LONG_ADR); } - static int extractDyMinK(final Memory mem) { - return mem.getShort(DY_MIN_K_SHORT_ADR) & 0XFFFF; + static int extractMinK(final Memory mem) { + return mem.getShort(MIN_K_SHORT_ADR) & 0XFFFF; } static int extractNumLevels(final Memory mem) { @@ -362,8 +362,8 @@ static void insertN(final WritableMemory wmem, final long value) { wmem.putLong(N_LONG_ADR, value); } - static void insertDyMinK(final WritableMemory wmem, final int value) { - wmem.putShort(DY_MIN_K_SHORT_ADR, (short) value); + static void insertMinK(final WritableMemory wmem, final int value) { + wmem.putShort(MIN_K_SHORT_ADR, (short) value); } static void insertNumLevels(final WritableMemory wmem, final int value) { diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 800d986f3..583883d68 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -41,7 +41,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; import static org.apache.datasketches.kll.KllPreambleUtil.insertDoubleSketchFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.insertDyMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.insertMinK; import static org.apache.datasketches.kll.KllPreambleUtil.insertEmptyFlag; import static org.apache.datasketches.kll.KllPreambleUtil.insertFamilyID; import static org.apache.datasketches.kll.KllPreambleUtil.insertK; @@ -303,7 +303,7 @@ public final int getCurrentUpdatableSerializedSizeBytes() { * {@link org.apache.datasketches.kll}

      */ public final double getNormalizedRankError(final boolean pmf) { - return getNormalizedRankError(getDynamicMinK(), pmf); + return getNormalizedRankError(getMinK(), pmf); } /** @@ -381,7 +381,7 @@ final void buildHeapKllSketchFromMemory(final KllMemoryValidate memVal) { final boolean updatable = memVal.updatable; setLevelZeroSorted(memVal.level0Sorted); setN(memVal.n); - setDyMinK(memVal.dyMinK); + setMinK(memVal.dyMinK); setNumLevels(memVal.numLevels); final int[] myLevelsArr = new int[getNumLevels() + 1]; @@ -521,11 +521,11 @@ final double[] getDoublesQuantiles(final double[] fractions) { } /** - * Dynamic MinK is the value of K that results from a merge with a sketch configured with a value of K lower than + * MinK is the value of K that results from a merge with a sketch configured with a value of K lower than * the k of this sketch. This value is then used in computing the estimated upper and lower bounds of error. - * @return The dynamic minimum K as a result of merging with lower values of k. + * @return The minimum K as a result of merging with lower values of k. */ - abstract int getDynamicMinK(); + abstract int getMinK(); /** * @return full size of internal items array including garbage; for a doubles sketch this will be null. @@ -725,7 +725,7 @@ final void mergeDoubleImpl(final KllSketch other) { // after the level 0 update, we capture the key mutable variables final double myMin = getMinDoubleValue(); final double myMax = getMaxDoubleValue(); - final int myDyMinK = getDynamicMinK(); + final int myDyMinK = getMinK(); final int myCurNumLevels = getNumLevels(); final int[] myCurLevelsArr = getLevelsArray(); @@ -793,7 +793,7 @@ final void mergeDoubleImpl(final KllSketch other) { //Update Preamble: setN(finalN); if (other.isEstimationMode()) { //otherwise the merge brings over exact items. - setDyMinK(min(myDyMinK, other.getDynamicMinK())); + setMinK(min(myDyMinK, other.getMinK())); } //Update min, max values @@ -835,7 +835,7 @@ final void mergeFloatImpl(final KllSketch other) { // after the level 0 update, we capture the key mutable variables final float myMin = getMinFloatValue(); final float myMax = getMaxFloatValue(); - final int myDyMinK = getDynamicMinK(); + final int myDyMinK = getMinK(); final int myCurNumLevels = getNumLevels(); final int[] myCurLevelsArr = getLevelsArray(); @@ -903,7 +903,7 @@ final void mergeFloatImpl(final KllSketch other) { //Update Preamble: setN(finalN); if (other.isEstimationMode()) { //otherwise the merge brings over exact items. - setDyMinK(min(myDyMinK, other.getDynamicMinK())); + setMinK(min(myDyMinK, other.getMinK())); } //Update min, max values @@ -937,7 +937,7 @@ private static float resolveFloatMaxValue(final float myMax, final float otherMa abstract void setDoubleItemsArrayAt(int index, double value); - abstract void setDyMinK(int dyMinK); + abstract void setMinK(int minK); abstract void setFloatItemsArray(float[] floatItems); @@ -990,7 +990,7 @@ final byte[] toCompactByteArrayImpl() { } else { // n > 1 //remainder of preamble after first 8 bytes insertN(wmem, getN()); - insertDyMinK(wmem, getDynamicMinK()); + insertMinK(wmem, getMinK()); insertNumLevels(wmem, getNumLevels()); offset = (doubleType) ? DATA_START_ADR_DOUBLE : DATA_START_ADR_FLOAT; @@ -1055,7 +1055,7 @@ final String toStringImpl(final boolean withLevels, final boolean withData) { final String skType = (direct ? "Direct" : "") + (doubleType ? "Doubles" : "Floats"); sb.append(Util.LS).append("### Kll").append(skType).append("Sketch Summary:").append(Util.LS); sb.append(" K : ").append(k).append(Util.LS); - sb.append(" Dynamic min K : ").append(getDynamicMinK()).append(Util.LS); + sb.append(" Dynamic min K : ").append(getMinK()).append(Util.LS); sb.append(" M : ").append(m).append(Util.LS); sb.append(" N : ").append(getN()).append(Util.LS); sb.append(" Epsilon : ").append(epsPct).append(Util.LS); @@ -1176,7 +1176,7 @@ final byte[] toUpdatableByteArrayImpl() { loadFirst8Bytes(this, wmem, true); //remainder of preamble after first 8 bytes insertN(wmem, getN()); - insertDyMinK(wmem, getDynamicMinK()); + insertMinK(wmem, getMinK()); insertNumLevels(wmem, getNumLevels()); //load data diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java index 77c1e7814..f3ae36e05 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -19,7 +19,6 @@ package org.apache.datasketches.kll; -//import static org.apache.datasketches.Util.getResourceBytes; //don't have matching numbers from C++ import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.testng.Assert.assertEquals; @@ -34,7 +33,6 @@ import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; - @SuppressWarnings("javadoc") public class KllDirectDoublesSketchTest { @@ -493,7 +491,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); @@ -513,7 +511,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); @@ -534,7 +532,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); @@ -592,4 +590,3 @@ static void println(final String s) { } } - diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java index d23979d67..b7f88b72b 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java @@ -19,7 +19,6 @@ package org.apache.datasketches.kll; -//import static org.apache.datasketches.Util.getResourceBytes; //don't have matching numbers from C++ import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.testng.Assert.assertEquals; @@ -492,7 +491,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0); @@ -512,7 +511,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Double.NaN); @@ -533,7 +532,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0); @@ -591,4 +590,3 @@ static void println(final String s) { } } - diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index 323814c0a..0172068f4 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -19,7 +19,6 @@ package org.apache.datasketches.kll; -//import static org.apache.datasketches.Util.getResourceBytes; //don't have matching numbers from C++ import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.testng.Assert.assertEquals; diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java index c407b4e37..eedf5e44e 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java @@ -118,8 +118,7 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); @@ -135,8 +134,7 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); @@ -153,8 +151,7 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); - assertTrue(Objects.isNull(sk.getFloatItemsArray())); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); @@ -184,7 +181,7 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); @@ -204,7 +201,7 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); @@ -225,7 +222,7 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); @@ -255,7 +252,7 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); @@ -275,7 +272,7 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); @@ -296,7 +293,7 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java index 3bd1ce1d4..597ebe5dc 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java @@ -118,7 +118,7 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -134,7 +134,7 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -151,7 +151,7 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0F); @@ -181,7 +181,7 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -201,7 +201,7 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -222,7 +222,7 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0F); @@ -252,7 +252,7 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -272,7 +272,7 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -293,7 +293,7 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0F); diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index ba28c6034..276f52776 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -173,7 +173,7 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); @@ -189,7 +189,7 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); @@ -206,7 +206,7 @@ public void checkSketchInitializeDoubleHeap() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); @@ -236,7 +236,7 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); @@ -256,7 +256,7 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); @@ -277,7 +277,7 @@ public void checkSketchInitializeDoubleHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); @@ -307,7 +307,7 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxDoubleValue(), 21.0); @@ -327,7 +327,7 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), Double.NaN); @@ -348,7 +348,7 @@ public void checkSketchInitializeDoubleHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getDoubleItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxDoubleValue(), 1.0); diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index c42e58557..a53199a20 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -173,7 +173,7 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -189,7 +189,7 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -206,7 +206,7 @@ public void checkSketchInitializeFloatHeap() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0F); @@ -236,7 +236,7 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -256,7 +256,7 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -277,7 +277,7 @@ public void checkSketchInitializeFloatHeapifyCompactMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0F); @@ -307,7 +307,7 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 11); assertFalse(sk.isEmpty()); assertTrue(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 33); assertEquals(sk.getLevelsArray().length, 3); assertEquals(sk.getMaxFloatValue(), 21.0F); @@ -327,7 +327,7 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 0); assertTrue(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), Float.NaN); @@ -348,7 +348,7 @@ public void checkSketchInitializeFloatHeapifyUpdatableMem() { assertEquals(sk.getNumRetained(), 1); assertFalse(sk.isEmpty()); assertFalse(sk.isEstimationMode()); - assertEquals(sk.getDynamicMinK(), k); + assertEquals(sk.getMinK(), k); assertEquals(sk.getFloatItemsArray().length, 20); assertEquals(sk.getLevelsArray().length, 2); assertEquals(sk.getMaxFloatValue(), 1.0F); From 06af8baad7954b65010c4926efd834162da150eb Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Thu, 31 Mar 2022 11:44:09 -0700 Subject: [PATCH 24/31] Removed redundant "exclusive" --- .../apache/datasketches/QuantilesHelper.java | 23 +++++++++++++++---- .../kll/KllDirectDoublesSketch.java | 4 ++-- .../kll/KllDirectFloatsSketch.java | 4 ++-- .../kll/KllDoublesQuantileCalculator.java | 4 ++-- .../datasketches/kll/KllDoublesSketch.java | 4 ++-- .../kll/KllFloatsQuantileCalculator.java | 4 ++-- .../datasketches/kll/KllFloatsSketch.java | 4 ++-- .../quantiles/DoublesAuxiliary.java | 8 +++---- .../quantiles/ItemsAuxiliary.java | 8 +++---- .../HeapUpdateDoublesSketchTest.java | 4 ++-- 10 files changed, 40 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/apache/datasketches/QuantilesHelper.java b/src/main/java/org/apache/datasketches/QuantilesHelper.java index c7546569c..f128a0d2c 100644 --- a/src/main/java/org/apache/datasketches/QuantilesHelper.java +++ b/src/main/java/org/apache/datasketches/QuantilesHelper.java @@ -29,7 +29,7 @@ public class QuantilesHelper { * An array of {1,1,1,0} becomes {0,1,2,3} * @param array of weights where first element is zero * @return total weight - */ //also used by KLL + */ //used by classic Quantiles and KLL public static long convertToPrecedingCummulative(final long[] array) { long subtotal = 0; for (int i = 0; i < array.length; i++) { @@ -43,15 +43,28 @@ public static long convertToPrecedingCummulative(final long[] array) { /** * Returns the linear zero-based index (position) of a value in the hypothetical sorted stream of * values of size n. - * @param phi the fractional position where: 0 ≤ φ ≤ 1.0. + * @param rank the fractional position where: 0 ≤ φ ≤ 1.0. * @param n the size of the stream * @return the index, a value between 0 and n-1. - */ //also used by KLL - public static long posOfPhi(final double phi, final long n) { - final long pos = (long) Math.floor(phi * n); + */ //used by classic Quantiles and KLL + public static long posOfRank(final double rank, final long n) { + final long pos = (long) Math.floor(rank * n); return pos == n ? n - 1 : pos; //avoids ArrayIndexOutOfBoundException } + /** + * Returns the linear zero-based index (position) of a value in the hypothetical sorted stream of + * values of size n. + * @param rank the fractional position where: 0 ≤ φ ≤ 1.0. + * @param n the size of the stream + * @return the index, a value between 0 and n-1. + * @deprecated use {@link #posOfRank(double, long)} instead. Version 3.2.0. + */ //used by classic Quantiles and KLL + @Deprecated + public static long posOfPhi(final double rank, final long n) { + return posOfRank(rank, n); + } + /** * This is written in terms of a plain array to facilitate testing. * @param wtArr the cumulative weights array consisting of chunks diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 93fcbb147..5dc0b347f 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -135,7 +135,7 @@ static KllDirectDoublesSketch newInstance(final int k, final int m, final Writab * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 double values on the interval [0.0, 1.0) exclusive, + * @return an array of m+1 double values on the interval [0.0, 1.0), * which are a consecutive approximation to the CDF of the input stream given the splitPoints. * The value at array position j of the returned CDF array is the sum of the returned values * in positions 0 through j of the returned PMF array. @@ -180,7 +180,7 @@ public double getMinValue() { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 doubles on the interval [0.0, 1.0) exclusive, + * @return an array of m+1 doubles on the interval [0.0, 1.0), * each of which is an approximation to the fraction of the total input stream values * (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index 36173e773..15c4de1d7 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -135,7 +135,7 @@ static KllDirectFloatsSketch newInstance(final int k, final int m, final Writabl * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 double values on the interval [0.0, 1.0) exclusive, + * @return an array of m+1 double values on the interval [0.0, 1.0), * which are a consecutive approximation to the CDF of the input stream given the splitPoints. * The value at array position j of the returned CDF array is the sum of the returned values * in positions 0 through j of the returned PMF array. @@ -180,7 +180,7 @@ public float getMinValue() { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 doubles on the interval [0.0, 1.0) exclusive, + * @return an array of m+1 doubles on the interval [0.0, 1.0), * each of which is an approximation to the fraction of the total input stream values * (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java b/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java index 91453549a..7870002f1 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java @@ -132,8 +132,8 @@ private static void tandemMerge( } } - double getQuantile(final double phi) { //phi is normalized rank [0,1]. - final long pos = QuantilesHelper.posOfPhi(phi, n_); + double getQuantile(final double rank) { + final long pos = QuantilesHelper.posOfRank(rank, n_); return approximatelyAnswerPositonalQuery(pos); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 3db062ce0..39982b809 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -121,7 +121,7 @@ public static KllDoublesSketch heapify(final Memory mem) { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 double values on the interval [0.0, 1.0) exclusive, + * @return an array of m+1 double values on the interval [0.0, 1.0), * which are a consecutive approximation to the CDF of the input stream given the splitPoints. * The value at array position j of the returned CDF array is the sum of the returned values * in positions 0 through j of the returned PMF array. @@ -162,7 +162,7 @@ public double[] getCDF(final double[] splitPoints) { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 doubles on the interval [0.0, 1.0) exclusive, + * @return an array of m+1 doubles on the interval [0.0, 1.0), * each of which is an approximation to the fraction of the total input stream values * (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java index 0fee4046e..87539fc0c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java @@ -132,8 +132,8 @@ private static void tandemMerge( } } - float getQuantile(final double phi) { //phi is normalized rank [0,1]. - final long pos = QuantilesHelper.posOfPhi(phi, n_); + float getQuantile(final double rank) { + final long pos = QuantilesHelper.posOfRank(rank, n_); return approximatelyAnswerPositonalQuery(pos); } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index c8dd4f7a8..7993ccced 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -121,7 +121,7 @@ public static KllFloatsSketch heapify(final Memory mem) { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 double values on the interval [0.0, 1.0) exclusive, + * @return an array of m+1 double values on the interval [0.0, 1.0), * which are a consecutive approximation to the CDF of the input stream given the splitPoints. * The value at array position j of the returned CDF array is the sum of the returned values * in positions 0 through j of the returned PMF array. @@ -162,7 +162,7 @@ public double[] getCDF(final float[] splitPoints) { * the maximum value. * It is not necessary to include either the min or max values in these split points. * - * @return an array of m+1 doubles on the interval [0.0, 1.0) exclusive, + * @return an array of m+1 doubles on the interval [0.0, 1.0), * each of which is an approximation to the fraction of the total input stream values * (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java b/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java index 40d4d3501..307917d12 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java @@ -70,12 +70,12 @@ final class DoublesAuxiliary { /** * Get the estimated quantile given a fractional rank. - * @param fRank the fractional rank where: 0 ≤ fRank ≤ 1.0. + * @param rank the normalized rank where: 0 ≤ rank ≤ 1.0. * @return the estimated quantile */ - double getQuantile(final double fRank) { - checkFractionalRankBounds(fRank); - final long pos = QuantilesHelper.posOfPhi(fRank, auxN_); + double getQuantile(final double rank) { + checkFractionalRankBounds(rank); + final long pos = QuantilesHelper.posOfRank(rank, auxN_); return approximatelyAnswerPositionalQuery(pos); } diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsAuxiliary.java b/src/main/java/org/apache/datasketches/quantiles/ItemsAuxiliary.java index 9a617d431..4905fc221 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsAuxiliary.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsAuxiliary.java @@ -79,13 +79,13 @@ final class ItemsAuxiliary { /** * Get the estimated quantile given a fractional rank. - * @param fRank the fractional rank where: 0 ≤ fRank ≤ 1.0. + * @param rank the normalized rank where: 0 ≤ rank ≤ 1.0. * @return the estimated quantile */ - T getQuantile(final double fRank) { - checkFractionalRankBounds(fRank); + T getQuantile(final double rank) { + checkFractionalRankBounds(rank); if (auxN_ <= 0) { return null; } - final long pos = QuantilesHelper.posOfPhi(fRank, auxN_); + final long pos = QuantilesHelper.posOfRank(rank, auxN_); return approximatelyAnswerPositionalQuery(pos); } diff --git a/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java index 7c017edfb..5513f7d6a 100644 --- a/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java @@ -855,9 +855,9 @@ public void checkPutMemoryTooSmall() { } @Test - public void checkAuxPosOfPhi() throws Exception { + public void checkAuxPosOfRank() throws Exception { long n = 10; - long returnValue = QuantilesHelper.posOfPhi(1.0, 10); + long returnValue = QuantilesHelper.posOfRank(1.0, 10); //println("" + returnValue); assertEquals(returnValue, n-1); } From 0e5dde14f336f3970ec38a356f48461b4435a2e8 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Thu, 31 Mar 2022 16:29:12 -0700 Subject: [PATCH 25/31] Moved public statics from KllPreambleUtil to KllSketch, added reset() method. --- .../kll/KllDirectDoublesSketch.java | 13 +---- .../kll/KllDirectFloatsSketch.java | 13 +---- .../datasketches/kll/KllDirectSketch.java | 21 ++++++++ .../datasketches/kll/KllDoublesSketch.java | 19 +++++-- .../datasketches/kll/KllFloatsSketch.java | 19 +++++-- .../apache/datasketches/kll/KllHelper.java | 9 ++-- .../datasketches/kll/KllPreambleUtil.java | 9 ---- .../apache/datasketches/kll/KllSketch.java | 52 +++++++++++++++---- .../kll/KllDirectDoublesSketchTest.java | 41 ++++++++++----- .../kll/KllDirectFloatsSketchTest.java | 41 ++++++++++----- .../kll/KllDoublesSketchTest.java | 31 ++++++++--- .../datasketches/kll/KllFloatsSketchTest.java | 31 ++++++++--- .../datasketches/kll/KllHelperTest.java | 16 +++--- 13 files changed, 206 insertions(+), 109 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 5dc0b347f..c6720a172 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -22,7 +22,6 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.KllPreambleUtil.DOUBLES_SKETCH_BIT_MASK; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; @@ -87,7 +86,7 @@ public static KllDirectDoublesSketch writableWrap(final WritableMemory srcMem, f */ public static KllDirectDoublesSketch newInstance(final int k, final WritableMemory dstMem, final MemoryRequestServer memReqSvr) { - return newInstance(k, DEFAULT_M, dstMem, memReqSvr); + return newInstance(k, KllSketch.DEFAULT_M, dstMem, memReqSvr); } /** @@ -308,16 +307,6 @@ public void merge(final KllSketch other) { mergeDoubleImpl(other); } - @Override - public byte[] toByteArray() { - return toCompactByteArrayImpl(); - } - - @Override - public String toString(final boolean withLevels, final boolean withData) { - return toStringImpl(withLevels, withData); - } - /** * Updates this sketch with the given data item. * diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index 15c4de1d7..11a5298d5 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -22,7 +22,6 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; @@ -87,7 +86,7 @@ public static KllDirectFloatsSketch writableWrap(final WritableMemory srcMem, fi */ public static KllDirectFloatsSketch newInstance(final int k, final WritableMemory dstMem, final MemoryRequestServer memReqSvr) { - return newInstance(k, DEFAULT_M, dstMem, memReqSvr); + return newInstance(k, KllSketch.DEFAULT_M, dstMem, memReqSvr); } /** @@ -308,16 +307,6 @@ public void merge(final KllSketch other) { mergeFloatImpl(other); } - @Override - public byte[] toByteArray() { - return toCompactByteArrayImpl(); - } - - @Override - public String toString(final boolean withLevels, final boolean withData) { - return toStringImpl(withLevels, withData); - } - /** * Updates this sketch with the given data item. * diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index 400caef4b..13725a518 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -74,6 +74,27 @@ public long getN() { return extractN(wmem); } + @Override + public void reset() { + final int k = getK(); + setN(0); + setMinK(k); + setNumLevels(1); + setLevelsArray(new int[] {k, k}); + setLevelZeroSorted(false); + final int newLevelsArrLen = 2 * Integer.BYTES; + final int newItemsArrLen = k; + KllSketch.memorySpaceMgmt(this, newLevelsArrLen, newItemsArrLen); + levelsArrUpdatable.putIntArray(0L, new int[] {k, k}, 0, 2); + if (sketchType == SketchType.DOUBLES_SKETCH) { + minMaxArrUpdatable.putDoubleArray(0L, new double[] {Double.NaN, Double.NaN}, 0, 2); + itemsArrUpdatable.putDoubleArray(0L, new double[k], 0, k); + } else { + minMaxArrUpdatable.putFloatArray(0L, new float[] {Float.NaN, Float.NaN}, 0, 2); + itemsArrUpdatable.putFloatArray(0L, new float[k], 0, k); + } + } + @Override public byte[] toUpdatableByteArray() { final int bytes = (int) wmem.getCapacity(); diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 39982b809..08f7c5706 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -21,8 +21,6 @@ import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DOUBLE; import static org.apache.datasketches.kll.KllSketch.Error.SRC_CANNOT_BE_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; @@ -58,7 +56,7 @@ private KllDoublesSketch(final Memory mem, final KllMemoryValidate memVal) { * This will have a rank error of about 1.65%. */ public KllDoublesSketch() { - this(DEFAULT_K); + this(KllSketch.DEFAULT_K); } /** @@ -69,7 +67,7 @@ public KllDoublesSketch() { * @param k parameter that controls size of the sketch and accuracy of estimates */ public KllDoublesSketch(final int k) { - this(k, DEFAULT_M); + this(k, KllSketch.DEFAULT_M); } /** @@ -290,6 +288,19 @@ public void merge(final KllSketch other) { mergeDoubleImpl(other); } + @Override + public void reset() { + final int k = getK(); + setN(0); + setMinK(k); + setNumLevels(1); + setLevelsArray(new int[] {k, k}); + setLevelZeroSorted(false); + doubleItems_ = new double[k]; + minDoubleValue_ = Double.NaN; + maxDoubleValue_ = Double.NaN; + } + /** * Updates this sketch with the given data item. * diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 7993ccced..4c53675f8 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -21,8 +21,6 @@ import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_FLOAT; import static org.apache.datasketches.kll.KllSketch.Error.SRC_CANNOT_BE_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; @@ -58,7 +56,7 @@ private KllFloatsSketch(final Memory mem, final KllMemoryValidate memVal) { * This will have a rank error of about 1.65%. */ public KllFloatsSketch() { - this(DEFAULT_K); + this(KllSketch.DEFAULT_K); } /** @@ -69,7 +67,7 @@ public KllFloatsSketch() { * @param k parameter that controls size of the sketch and accuracy of estimates */ public KllFloatsSketch(final int k) { - this(k, DEFAULT_M); + this(k, KllSketch.DEFAULT_M); } /** @@ -290,6 +288,19 @@ public void merge(final KllFloatsSketch other) { mergeFloatImpl(other); } + @Override + public void reset() { + final int k = getK(); + setN(0); + setMinK(k); + setNumLevels(1); + setLevelsArray(new int[] {k, k}); + setLevelZeroSorted(false); + floatItems_ = new float[k]; + minFloatValue_ = Float.NaN; + maxFloatValue_ = Float.NaN; + } + /** * Updates this sketch with the given data item. * diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 83e2bf45c..f089f8da3 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -23,9 +23,6 @@ import static org.apache.datasketches.Util.floorPowerOf2; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; -import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; -import static org.apache.datasketches.kll.KllPreambleUtil.MAX_M; -import static org.apache.datasketches.kll.KllPreambleUtil.MIN_M; import static org.apache.datasketches.kll.KllSketch.CDF_COEF; import static org.apache.datasketches.kll.KllSketch.CDF_EXP; import static org.apache.datasketches.kll.KllSketch.PMF_COEF; @@ -194,14 +191,14 @@ public static LevelStats getLevelCapacityItems( * @param k must be greater than 7 and less than 65536. */ static void checkK(final int k, final int m) { - if (k < m || k > MAX_K) { + if (k < m || k > KllSketch.MAX_K) { throw new SketchesArgumentException( - "K must be >= " + m + " and <= " + MAX_K + ": " + k); + "K must be >= " + m + " and <= " + KllSketch.MAX_K + ": " + k); } } static void checkM(final int m) { - if (m < MIN_M || m > MAX_M || ((m & 1) == 1)) { + if (m < KllSketch.MIN_M || m > KllSketch.MAX_M || ((m & 1) == 1)) { throw new SketchesArgumentException( "M must be >= 2, <= 8 and even: " + m); } diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index eafb80c81..45b6e48de 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -126,15 +126,6 @@ private KllPreambleUtil() {} static final String LS = System.getProperty("line.separator"); - /** - * The default value of K - */ - public static final int DEFAULT_K = 200; - public static final int DEFAULT_M = 8; - public static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short - public static final int MAX_M = 8; - public static final int MIN_M = 2; - // Preamble byte addresses static final int PREAMBLE_INTS_BYTE_ADR = 0; static final int SER_VER_BYTE_ADR = 1; diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 583883d68..64bb19606 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -30,9 +30,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; -import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; -import static org.apache.datasketches.kll.KllPreambleUtil.MIN_M; import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; @@ -110,6 +107,31 @@ public abstract class KllSketch { MemoryRequestServer memReqSvr; boolean direct; + /** + * The default value of K + */ + public static final int DEFAULT_K = 200; + + /** + * The default value of M + */ + static final int DEFAULT_M = 8; + + /** + * The maximum value of K + */ + public static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short + + /** + * The maximum value of M + */ + static final int MAX_M = 8; + + /** + * The minimum value of M + */ + static final int MIN_M = 2; + /** * * @param sketchType either DOUBLE_SKETCH or FLOAT_SKETCH @@ -155,7 +177,7 @@ public static int getKFromEpsilon(final double epsilon, final boolean pmf) { final double krnd = round(kdbl); final double del = abs(krnd - kdbl); final int k = (int) (del < EPS_DELTA_THRESHOLD ? krnd : ceil(kdbl)); - return max(MIN_M, min(MAX_K, k)); + return max(KllSketch.MIN_M, min(KllSketch.MAX_K, k)); } /** @@ -166,27 +188,28 @@ public static int getKFromEpsilon(final double epsilon, final boolean pmf) { * @param k parameter that controls size of the sketch and accuracy of estimates * @param n stream length * @return upper bound on the compact serialized size - * @deprecated use {@link #getMaxSerializedSizeBytes(int, int, long, SketchType, boolean)} instead. + * @deprecated use {@link #getMaxSerializedSizeBytes(int, long, SketchType, boolean)} instead. */ @Deprecated public static int getMaxSerializedSizeBytes(final int k, final long n) { - final KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, DEFAULT_M, n, FLOATS_SKETCH, false); + final KllHelper.GrowthStats gStats = + KllHelper.getGrowthSchemeForGivenN(k, KllSketch.DEFAULT_M, n, FLOATS_SKETCH, false); return gStats.compactBytes; } /** * Returns upper bound on the serialized size of a KllSketch given the following parameters. + * It assumes the default value of m, which is 8. * @param k parameter that controls size of the sketch and accuracy of estimates - * @param m parameter that controls the smallest value of k, and the smallest level width. - * If in doubt, use the default value of 8. * @param n stream length * @param sketchType either DOUBLES_SKETCH or FLOATS_SKETCH * @param updatable true if updatable form, otherwise the standard compact form. * @return upper bound on the serialized size of a KllSketch. */ - public static int getMaxSerializedSizeBytes(final int k, final int m, final long n, + public static int getMaxSerializedSizeBytes(final int k, final long n, final SketchType sketchType, final boolean updatable) { - final KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, m, n, sketchType, false); + final KllHelper.GrowthStats gStats = + KllHelper.getGrowthSchemeForGivenN(k, KllSketch.DEFAULT_M, n, sketchType, false); return updatable ? gStats.updatableBytes : gStats.compactBytes; } @@ -343,6 +366,13 @@ public final boolean isEstimationMode() { return getNumLevels() > 1; } + /** + * This resets the current sketch back to zero entries. + * It retains key parameters such as k, m, and + * SketchType (double or float). + */ + public abstract void reset(); + /** * Returns serialized sketch in a compact byte array form. * @return serialized sketch in a compact byte array form. @@ -693,7 +723,7 @@ static WritableMemory memorySpaceMgmt( newWmem = sketch.memReqSvr.request(oldWmem, requiredSketchBytes); oldWmem.copyTo(0, newWmem, 0, startAdr); //copy preamble } - else { //Expand in current memory + else { //Expand or contract in current memory newWmem = oldWmem; } diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java index f3ae36e05..4fcaa1bf7 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -19,8 +19,6 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -282,32 +280,32 @@ public void mergeMinAndMaxFromOther() { @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooSmall() { - final KllDirectDoublesSketch sketch1 = getDDSketch(DEFAULT_M - 1, 0); + final KllDirectDoublesSketch sketch1 = getDDSketch(KllSketch.DEFAULT_M - 1, 0); } @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooLarge() { - final KllDirectDoublesSketch sketch1 = getDDSketch(MAX_K + 1, 0); + final KllDirectDoublesSketch sketch1 = getDDSketch(KllSketch.MAX_K + 1, 0); } @Test public void minK() { - final KllDirectDoublesSketch sketch = getDDSketch(DEFAULT_M, 0); + final KllDirectDoublesSketch sketch = getDDSketch(KllSketch.DEFAULT_M, 0); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), DEFAULT_M); + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); } @Test public void maxK() { - final KllDirectDoublesSketch sketch = getDDSketch(MAX_K, 0); + final KllDirectDoublesSketch sketch = getDDSketch(KllSketch.MAX_K, 0); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), MAX_K); + assertEquals(sketch.getK(), KllSketch.MAX_K); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); } @@ -461,8 +459,8 @@ public void checkSimpleMergeDirect() { //used for troubleshooting println(sk2.toString(true, true)); WritableMemory wmem1 = WritableMemory.writableWrap(sk1.toUpdatableByteArray()); WritableMemory wmem2 = WritableMemory.writableWrap(sk2.toUpdatableByteArray()); - KllDirectDoublesSketch dsk1 = KllDirectDoublesSketch.writableWrap(wmem1, new DefaultMemoryRequestServer()); - KllDirectDoublesSketch dsk2 = KllDirectDoublesSketch.writableWrap(wmem2, new DefaultMemoryRequestServer()); + KllDirectDoublesSketch dsk1 = KllDirectDoublesSketch.writableWrap(wmem1, memReqSvr); + KllDirectDoublesSketch dsk2 = KllDirectDoublesSketch.writableWrap(wmem2, memReqSvr); println("BEFORE MERGE"); println(dsk1.toString(true, true)); dsk1.merge(dsk2); @@ -485,7 +483,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { compBytes = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(compBytes); println(KllPreambleUtil.toString(wmem)); - sk = KllDirectDoublesSketch.writableWrap(wmem, new DefaultMemoryRequestServer()); + sk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); assertEquals(sk.getK(), k); assertEquals(sk.getN(), k + 1); assertEquals(sk.getNumRetained(), 11); @@ -505,7 +503,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { compBytes = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(compBytes); println(KllPreambleUtil.toString(wmem)); - sk = KllDirectDoublesSketch.writableWrap(wmem, new DefaultMemoryRequestServer()); + sk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); assertEquals(sk.getK(), k); assertEquals(sk.getN(), 0); assertEquals(sk.getNumRetained(), 0); @@ -526,7 +524,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { compBytes = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(compBytes); println(KllPreambleUtil.toString(wmem)); - sk = KllDirectDoublesSketch.writableWrap(wmem, new DefaultMemoryRequestServer()); + sk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); assertEquals(sk.getK(), k); assertEquals(sk.getN(), 1); assertEquals(sk.getNumRetained(), 1); @@ -566,6 +564,23 @@ public void checkGetWritableMemory() { assertTrue(KllSketch.isCompatible()); } + @Test + public void checkReset() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + double min1 = sk.getMinValue(); + double max1 = sk.getMaxValue(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + double min2 = sk.getMinValue(); + double max2 = sk.getMaxValue(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { KllDoublesSketch sk = new KllDoublesSketch(k); diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java index b7f88b72b..4ff004254 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java @@ -19,8 +19,6 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -282,32 +280,32 @@ public void mergeMinAndMaxFromOther() { @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooSmall() { - final KllDirectFloatsSketch sketch1 = getDFSketch(DEFAULT_M - 1, 0); + final KllDirectFloatsSketch sketch1 = getDFSketch(KllSketch.DEFAULT_M - 1, 0); } @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooLarge() { - final KllDirectFloatsSketch sketch1 = getDFSketch(MAX_K + 1, 0); + final KllDirectFloatsSketch sketch1 = getDFSketch(KllSketch.MAX_K + 1, 0); } @Test public void minK() { - final KllDirectFloatsSketch sketch = getDFSketch(DEFAULT_M, 0); + final KllDirectFloatsSketch sketch = getDFSketch(KllSketch.DEFAULT_M, 0); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), DEFAULT_M); + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); } @Test public void maxK() { - final KllDirectFloatsSketch sketch = getDFSketch(MAX_K, 0); + final KllDirectFloatsSketch sketch = getDFSketch(KllSketch.MAX_K, 0); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), MAX_K); + assertEquals(sketch.getK(), KllSketch.MAX_K); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); } @@ -461,8 +459,8 @@ public void checkSimpleMergeDirect() { //used for troubleshooting println(sk2.toString(true, true)); WritableMemory wmem1 = WritableMemory.writableWrap(sk1.toUpdatableByteArray()); WritableMemory wmem2 = WritableMemory.writableWrap(sk2.toUpdatableByteArray()); - KllDirectFloatsSketch dsk1 = KllDirectFloatsSketch.writableWrap(wmem1, new DefaultMemoryRequestServer()); - KllDirectFloatsSketch dsk2 = KllDirectFloatsSketch.writableWrap(wmem2, new DefaultMemoryRequestServer()); + KllDirectFloatsSketch dsk1 = KllDirectFloatsSketch.writableWrap(wmem1, memReqSvr); + KllDirectFloatsSketch dsk2 = KllDirectFloatsSketch.writableWrap(wmem2, memReqSvr); println("BEFORE MERGE"); println(dsk1.toString(true, true)); dsk1.merge(dsk2); @@ -485,7 +483,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { compBytes = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(compBytes); println(KllPreambleUtil.toString(wmem)); - sk = KllDirectFloatsSketch.writableWrap(wmem, new DefaultMemoryRequestServer()); + sk = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); assertEquals(sk.getK(), k); assertEquals(sk.getN(), k + 1); assertEquals(sk.getNumRetained(), 11); @@ -505,7 +503,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { compBytes = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(compBytes); println(KllPreambleUtil.toString(wmem)); - sk = KllDirectFloatsSketch.writableWrap(wmem, new DefaultMemoryRequestServer()); + sk = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); assertEquals(sk.getK(), k); assertEquals(sk.getN(), 0); assertEquals(sk.getNumRetained(), 0); @@ -526,7 +524,7 @@ public void checkSketchInitializeDirectDoubleUpdatableMem() { compBytes = sk2.toUpdatableByteArray(); wmem = WritableMemory.writableWrap(compBytes); println(KllPreambleUtil.toString(wmem)); - sk = KllDirectFloatsSketch.writableWrap(wmem, new DefaultMemoryRequestServer()); + sk = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); assertEquals(sk.getK(), k); assertEquals(sk.getN(), 1); assertEquals(sk.getNumRetained(), 1); @@ -566,6 +564,23 @@ public void checkGetWritableMemory() { assertTrue(KllSketch.isCompatible()); } + @Test + public void checkReset() { + WritableMemory dstMem = WritableMemory.allocate(3000); + KllDirectFloatsSketch sk = KllDirectFloatsSketch.newInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + float min1 = sk.getMinValue(); + float max1 = sk.getMaxValue(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + float min2 = sk.getMinValue(); + float max2 = sk.getMaxValue(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { KllFloatsSketch sk = new KllFloatsSketch(k); diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index 0172068f4..b149d3c1c 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -19,8 +19,6 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -279,32 +277,32 @@ public void mergeMinAndMaxFromOther() { @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooSmall() { - new KllDoublesSketch(DEFAULT_M - 1); + new KllDoublesSketch(KllSketch.DEFAULT_M - 1); } @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooLarge() { - new KllDoublesSketch(MAX_K + 1); + new KllDoublesSketch(KllSketch.MAX_K + 1); } @Test public void minK() { - final KllDoublesSketch sketch = new KllDoublesSketch(DEFAULT_M); + final KllDoublesSketch sketch = new KllDoublesSketch(KllSketch.DEFAULT_M); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), DEFAULT_M); + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); } @Test public void maxK() { - final KllDoublesSketch sketch = new KllDoublesSketch(MAX_K); + final KllDoublesSketch sketch = new KllDoublesSketch(KllSketch.MAX_K); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), MAX_K); + assertEquals(sketch.getK(), KllSketch.MAX_K); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); } @@ -396,4 +394,21 @@ public void getQuantiles() { assertEquals(quantiles1[2], 3.0); } + @Test + public void checkReset() { + KllDoublesSketch sk = new KllDoublesSketch(20); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + double min1 = sk.getMinValue(); + double max1 = sk.getMaxValue(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + double min2 = sk.getMinValue(); + double max2 = sk.getMaxValue(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } + } diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index ee74063d7..466bdf2a3 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -19,8 +19,6 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.Util.getResourceBytes; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -280,32 +278,32 @@ public void mergeMinAndMaxFromOther() { @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooSmall() { - new KllFloatsSketch(DEFAULT_M - 1); + new KllFloatsSketch(KllSketch.DEFAULT_M - 1); } @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooLarge() { - new KllFloatsSketch(MAX_K + 1); + new KllFloatsSketch(KllSketch.MAX_K + 1); } @Test public void minK() { - final KllFloatsSketch sketch = new KllFloatsSketch(DEFAULT_M); + final KllFloatsSketch sketch = new KllFloatsSketch(KllSketch.DEFAULT_M); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), DEFAULT_M); + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); } @Test public void maxK() { - final KllFloatsSketch sketch = new KllFloatsSketch(MAX_K); + final KllFloatsSketch sketch = new KllFloatsSketch(KllSketch.MAX_K); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), MAX_K); + assertEquals(sketch.getK(), KllSketch.MAX_K); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); } @@ -412,4 +410,21 @@ public void checkDeprecatedMethods() { assertEquals(bytes, 832); } + @Test + public void checkReset() { + KllFloatsSketch sk = new KllFloatsSketch(20); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + float min1 = sk.getMinValue(); + float max1 = sk.getMaxValue(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + float min2 = sk.getMinValue(); + float max2 = sk.getMaxValue(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } + } diff --git a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java index b50a6d0a4..791bdd5c5 100644 --- a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java @@ -20,8 +20,6 @@ package org.apache.datasketches.kll; import static org.apache.datasketches.kll.KllHelper.checkM; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K; -import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import static org.testng.Assert.assertEquals; @@ -57,7 +55,7 @@ public void checkCheckM() { @Test public void checkGetKFromEps() { - final int k = DEFAULT_K; + final int k = KllSketch.DEFAULT_K; final double eps = KllHelper.getNormalizedRankError(k, false); final double epsPmf = KllHelper.getNormalizedRankError(k, true); final int kEps = KllSketch.getKFromEpsilon(eps, false); @@ -126,25 +124,25 @@ public void checkUpdatableSerDe() { @Test public void getMaxCompactDoublesSerializedSizeBytes() { - final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, DEFAULT_M, 1L << 30, DOUBLES_SKETCH, false); + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(KllSketch.DEFAULT_K, 1L << 30, DOUBLES_SKETCH, false); assertEquals(sizeBytes, 5704); } @Test public void getMaxCompactFloatsSerializedSizeBytes() { - final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, DEFAULT_M, 1L << 30, FLOATS_SKETCH, false); + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(KllSketch.DEFAULT_K, 1L << 30, FLOATS_SKETCH, false); assertEquals(sizeBytes, 2908); } @Test public void getMaxUpdatableDoubleSerializedSizeBytes() { - final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, DEFAULT_M, 1L << 30, DOUBLES_SKETCH, true); + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(KllSketch.DEFAULT_K, 1L << 30, DOUBLES_SKETCH, true); assertEquals(sizeBytes, 5708); } @Test public void getMaxUpdatableFloatsSerializedSizeBytes() { - final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(DEFAULT_K, DEFAULT_M, 1L << 30, FLOATS_SKETCH, true); + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(KllSketch.DEFAULT_K, 1L << 30, FLOATS_SKETCH, true); assertEquals(sizeBytes, 2912); } @@ -163,7 +161,7 @@ public void getStatsAtNumLevels2() { int k = 20; int m = 8; int numLevels = 2; - KllHelper.LevelStats lvlStats = KllHelper.getFinalSketchStatsAtNumLevels(k, DEFAULT_M, numLevels, false); + KllHelper.LevelStats lvlStats = KllHelper.getFinalSketchStatsAtNumLevels(k, KllSketch.DEFAULT_M, numLevels, false); assertEquals(lvlStats.numLevels, 2); assertEquals(lvlStats.items, 33); } @@ -182,7 +180,7 @@ public void testGetAllLevelStats2() { long n = 533; int k = 200; int m = 8; - KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, DEFAULT_M, n, DOUBLES_SKETCH, false); + KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, KllSketch.DEFAULT_M, n, DOUBLES_SKETCH, false); assertEquals(gStats.numLevels, 2); assertEquals(gStats.maxItems, 333); From d29627dfc3774e31addaa0d04ca2eb78493ff7bb Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Thu, 31 Mar 2022 17:31:39 -0700 Subject: [PATCH 26/31] Removed mentions of m. --- .../org/apache/datasketches/kll/KllDirectSketch.java | 2 +- .../apache/datasketches/kll/KllDoublesSketch.java | 3 +-- .../org/apache/datasketches/kll/KllFloatsSketch.java | 3 +-- .../org/apache/datasketches/kll/KllHeapSketch.java | 2 +- .../java/org/apache/datasketches/kll/KllSketch.java | 12 +++++------- 5 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index 13725a518..e4c000b10 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -65,7 +65,7 @@ public int getK() { } @Override - public int getM() { + int getM() { return extractM(wmem); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 08f7c5706..067a570f5 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -52,7 +52,7 @@ private KllDoublesSketch(final Memory mem, final KllMemoryValidate memVal) { } /** - * Heap constructor with the default k = 200, and DEFAULT_M of 8. + * Heap constructor with the default k = 200. * This will have a rank error of about 1.65%. */ public KllDoublesSketch() { @@ -63,7 +63,6 @@ public KllDoublesSketch() { * Heap constructor with a given parameter k. k can be any value between DEFAULT_M and * 65535, inclusive. The default k = 200 results in a normalized rank error of about * 1.65%. Higher values of K will have smaller error but the sketch will be larger (and slower). - * This constructor assumes the DEFAULT_M, which is 8. * @param k parameter that controls size of the sketch and accuracy of estimates */ public KllDoublesSketch(final int k) { diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 4c53675f8..427fea83e 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -52,7 +52,7 @@ private KllFloatsSketch(final Memory mem, final KllMemoryValidate memVal) { } /** - * Heap constructor with the default k = 200, and DEFAULT_M of 8. + * Heap constructor with the default k = 200. * This will have a rank error of about 1.65%. */ public KllFloatsSketch() { @@ -63,7 +63,6 @@ public KllFloatsSketch() { * Heap constructor with a given parameter k. k can be any value between DEFAULT_M and * 65535, inclusive. The default k = 200 results in a normalized rank error of about * 1.65%. Higher values of K will have smaller error but the sketch will be larger (and slower). - * This constructor assumes the DEFAULT_M, which is 8. * @param k parameter that controls size of the sketch and accuracy of estimates */ public KllFloatsSketch(final int k) { diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java index ab050f66c..fcfcda642 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java @@ -61,7 +61,7 @@ public int getK() { } @Override - public int getM() { + int getM() { return m; } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 64bb19606..df4b85db6 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -183,8 +183,7 @@ public static int getKFromEpsilon(final double epsilon, final boolean pmf) { /** * Returns upper bound on the compact serialized size of a FloatsSketch given a parameter * k and stream length. This method can be used if allocation of storage - * is necessary beforehand. This assumes the DEFAULT_M = 8 used in older sketches, it will not - * work with other values of m. + * is necessary beforehand. * @param k parameter that controls size of the sketch and accuracy of estimates * @param n stream length * @return upper bound on the compact serialized size @@ -199,7 +198,6 @@ public static int getMaxSerializedSizeBytes(final int k, final long n) { /** * Returns upper bound on the serialized size of a KllSketch given the following parameters. - * It assumes the default value of m, which is 8. * @param k parameter that controls size of the sketch and accuracy of estimates * @param n stream length * @param sketchType either DOUBLES_SKETCH or FLOATS_SKETCH @@ -302,10 +300,10 @@ public final int getCurrentUpdatableSerializedSizeBytes() { public abstract int getK(); /** - * Returns the user configured parameter m - * @return the user configured parameter m + * Returns the configured parameter m + * @return the configured parameter m */ - public abstract int getM(); + abstract int getM(); /** * Returns the length of the input stream in items. @@ -368,7 +366,7 @@ public final boolean isEstimationMode() { /** * This resets the current sketch back to zero entries. - * It retains key parameters such as k, m, and + * It retains key parameters such as k and * SketchType (double or float). */ public abstract void reset(); From 091635be25188068eced3ccbaf030c254c447710 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 4 Apr 2022 18:04:28 -0700 Subject: [PATCH 27/31] This post fixes all the issues in the last set of reviews except two: 1) I haven't renamed the memory dedicated "insert*" and "extract*" methods, and 2) I haven't done the hierarchy refactoring along the lines we discussed. I want check this in so that we can at least resolve all the other conversations. --- .../kll/KllDirectDoublesSketch.java | 14 +- .../kll/KllDirectFloatsSketch.java | 12 +- .../datasketches/kll/KllDirectSketch.java | 1 + .../datasketches/kll/KllDoublesSketch.java | 3 +- .../datasketches/kll/KllFloatsSketch.java | 3 +- .../apache/datasketches/kll/KllHelper.java | 13 +- .../datasketches/kll/KllMemoryValidate.java | 289 +++++++----------- .../datasketches/kll/KllPreambleUtil.java | 16 +- .../apache/datasketches/kll/KllSketch.java | 133 ++++---- .../kll/KllDirectDoublesSketchTest.java | 3 +- .../kll/KllDirectFloatsSketchTest.java | 5 +- .../datasketches/kll/KllFloatsSketchTest.java | 2 +- ...teTest.java => KllMemoryValidateTest.java} | 66 ++-- 13 files changed, 225 insertions(+), 335 deletions(-) rename src/test/java/org/apache/datasketches/kll/{MemoryValidateTest.java => KllMemoryValidateTest.java} (72%) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index c6720a172..82ac35344 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -21,16 +21,16 @@ import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.DOUBLES_SKETCH_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.insertMinK; import static org.apache.datasketches.kll.KllPreambleUtil.insertFamilyID; import static org.apache.datasketches.kll.KllPreambleUtil.insertFlags; import static org.apache.datasketches.kll.KllPreambleUtil.insertK; import static org.apache.datasketches.kll.KllPreambleUtil.insertM; +import static org.apache.datasketches.kll.KllPreambleUtil.insertMinK; import static org.apache.datasketches.kll.KllPreambleUtil.insertN; import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; @@ -39,6 +39,7 @@ import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DOUBLE; import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; +import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; import org.apache.datasketches.Family; import org.apache.datasketches.memory.MemoryRequestServer; @@ -92,14 +93,14 @@ public static KllDirectDoublesSketch newInstance(final int k, final WritableMemo /** * Create a new instance of this sketch. * @param k parameter that controls size of the sketch and accuracy of estimates - * @param m parameter that controls the minimum level width. + * @param m parameter that controls the minimum level width in items. * @param dstMem the given destination WritableMemory object for use by the sketch * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory * @return a new instance of this sketch */ static KllDirectDoublesSketch newInstance(final int k, final int m, final WritableMemory dstMem, final MemoryRequestServer memReqSvr) { - insertPreInts(dstMem, PREAMBLE_INTS_DOUBLE); + insertPreInts(dstMem, PREAMBLE_INTS_FULL); insertSerVer(dstMem, SERIAL_VERSION_UPDATABLE); insertFamilyID(dstMem, Family.KLL.getID()); insertFlags(dstMem, DOUBLES_SKETCH_BIT_MASK | UPDATABLE_BIT_MASK); @@ -108,7 +109,7 @@ static KllDirectDoublesSketch newInstance(final int k, final int m, final Writab insertN(dstMem, 0); insertMinK(dstMem, k); insertNumLevels(dstMem, 1); - int offset = DATA_START_ADR_DOUBLE; + int offset = DATA_START_ADR; dstMem.putIntArray(offset, new int[] {k, k}, 0, 2); offset += 2 * Integer.BYTES; dstMem.putDoubleArray(offset, new double[] {Double.NaN, Double.NaN}, 0, 2); @@ -359,6 +360,7 @@ void setDoubleItemsArray(final double[] doubleItems) { @Override void setDoubleItemsArrayAt(final int index, final double value) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } itemsArrUpdatable.putDouble((long)index * Double.BYTES, value); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index 11a5298d5..bd3091452 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -21,8 +21,8 @@ import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; import static org.apache.datasketches.kll.KllPreambleUtil.insertMinK; @@ -38,6 +38,7 @@ import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_FLOAT; import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; +import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; import org.apache.datasketches.Family; import org.apache.datasketches.memory.MemoryRequestServer; @@ -92,14 +93,14 @@ public static KllDirectFloatsSketch newInstance(final int k, final WritableMemor /** * Create a new instance of this sketch. * @param k parameter that controls size of the sketch and accuracy of estimates - * @param m parameter that controls the minimum level width. + * @param m parameter that controls the minimum level width in items. * @param dstMem the given destination WritableMemory object for use by the sketch * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory * @return a new instance of this sketch */ static KllDirectFloatsSketch newInstance(final int k, final int m, final WritableMemory dstMem, final MemoryRequestServer memReqSvr) { - insertPreInts(dstMem, PREAMBLE_INTS_FLOAT); + insertPreInts(dstMem, PREAMBLE_INTS_FULL); insertSerVer(dstMem, SERIAL_VERSION_UPDATABLE); insertFamilyID(dstMem, Family.KLL.getID()); insertFlags(dstMem, UPDATABLE_BIT_MASK); @@ -108,7 +109,7 @@ static KllDirectFloatsSketch newInstance(final int k, final int m, final Writabl insertN(dstMem, 0); insertMinK(dstMem, k); insertNumLevels(dstMem, 1); - int offset = DATA_START_ADR_FLOAT; + int offset = DATA_START_ADR; dstMem.putIntArray(offset, new int[] {k, k}, 0, 2); offset += 2 * Integer.BYTES; dstMem.putFloatArray(offset, new float[] {Float.NaN, Float.NaN}, 0, 2); @@ -365,6 +366,7 @@ void setFloatItemsArray(final float[] floatItems) { @Override void setFloatItemsArrayAt(final int index, final float value) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } itemsArrUpdatable.putFloat((long)index * Float.BYTES, value); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index e4c000b10..5a4b6e8e7 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -30,6 +30,7 @@ import static org.apache.datasketches.kll.KllPreambleUtil.insertN; import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; +import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 067a570f5..dc7ad92b5 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -24,6 +24,7 @@ import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DOUBLE; import static org.apache.datasketches.kll.KllSketch.Error.SRC_CANNOT_BE_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; +import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; @@ -78,7 +79,7 @@ public KllDoublesSketch(final int k) { * Other values of m should be considered experimental as they have not been * as well characterized. * @param k parameter that controls size of the sketch and accuracy of estimates - * @param m parameter that controls the minimum level width. + * @param m parameter that controls the minimum level width in items. */ KllDoublesSketch(final int k, final int m) { super(k, m, SketchType.DOUBLES_SKETCH); diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 427fea83e..0fbabb909 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -24,6 +24,7 @@ import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_FLOAT; import static org.apache.datasketches.kll.KllSketch.Error.SRC_CANNOT_BE_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; +import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; @@ -78,7 +79,7 @@ public KllFloatsSketch(final int k) { * Other values of m should be considered experimental as they have not been * as well characterized. * @param k parameter that controls size of the sketch and accuracy of estimates - * @param m parameter that controls the minimum level width. + * @param m parameter that controls the minimum level width in items. */ KllFloatsSketch(final int k, final int m) { super(k, m, SketchType.FLOATS_SKETCH); diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index f089f8da3..0b24fffc6 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -21,8 +21,7 @@ import static java.lang.Math.pow; import static org.apache.datasketches.Util.floorPowerOf2; -import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; -import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; import static org.apache.datasketches.kll.KllSketch.CDF_COEF; import static org.apache.datasketches.kll.KllSketch.CDF_EXP; import static org.apache.datasketches.kll.KllSketch.PMF_COEF; @@ -144,18 +143,14 @@ public static GrowthStats getGrowthSchemeForGivenN( } int compactBytes; int updatableBytes; + final int typeBytes = (sketchType == DOUBLES_SKETCH) ? Double.BYTES : Float.BYTES; do { numLevels++; lvlStats = getFinalSketchStatsAtNumLevels(k, m, numLevels, false); final int maxItems = lvlStats.items; final long maxN = lvlStats.n; - if (sketchType == DOUBLES_SKETCH) { - compactBytes = maxItems * Double.BYTES + numLevels * Integer.BYTES + 2 * Double.BYTES + DATA_START_ADR_DOUBLE; - updatableBytes = compactBytes + Integer.BYTES; - } else { - compactBytes = maxItems * Float.BYTES + numLevels * Integer.BYTES + 2 * Float.BYTES + DATA_START_ADR_FLOAT; - updatableBytes = compactBytes + Integer.BYTES; - } + compactBytes = maxItems * typeBytes + numLevels * Integer.BYTES + 2 * typeBytes + DATA_START_ADR; + updatableBytes = compactBytes + Integer.BYTES; if (printGrowthScheme) { printf("%10d %,10d %,20d %,13d %,15d\n", numLevels, maxItems, maxN, compactBytes, updatableBytes); } diff --git a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java index 3b8dbc3a0..214bbc675 100644 --- a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java +++ b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java @@ -26,14 +26,13 @@ import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.EMPTYBIT_AND_SER_VER; import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.EMPTYBIT_AND_SINGLEBIT; import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.SINGLEBIT_AND_SER_VER; -import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.DOUBLEBIT_AND_PREINTS; -import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.FLOATBIT_AND_PREINTS; -import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; -import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; +import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.SINGLEBIT_AND_PREINTS; +import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.INVALID_PREINTS; +import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.memoryValidateThrow; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; @@ -86,7 +85,7 @@ final class KllMemoryValidate { // For example, if the layout is compact & empty, n = 0, if compact and single, n = 1, etc. long n; // next 4 bytes - int dyMinK; + int minK; int numLevels; // derived int capacityItems; //capacity of Items array for exporting and for Updatable form @@ -118,232 +117,156 @@ final class KllMemoryValidate { m = extractM(srcMem); KllHelper.checkM(m); KllHelper.checkK(k, m); - if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatable) { memoryValidateThrow(UPDATABLEBIT_AND_SER_VER, 0); } + if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatable) { memoryValidateThrow(UPDATABLEBIT_AND_SER_VER, 1); } if (updatable) { updatableMemoryValidate((WritableMemory) srcMem); } else { compactMemoryValidate(srcMem); } } void compactMemoryValidate(final Memory srcMem) { - if (empty && singleItem) { memoryValidateThrow(EMPTYBIT_AND_SINGLEBIT, 0); } - final int sw = (empty ? 1 : 0) | (singleItem ? 4 : 0) | (doublesSketch ? 8 : 0); + if (empty && singleItem) { memoryValidateThrow(EMPTYBIT_AND_SINGLEBIT, flags); } + final int typeBytes = doublesSketch ? Double.BYTES : Float.BYTES; + final int sw = (empty ? 1 : 0) | (singleItem ? 4 : 0); switch (sw) { - case 0: { //FLOAT_FULL_COMPACT - if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(FLOATBIT_AND_PREINTS, preInts); } + case 0: { //FULL_COMPACT + if (preInts != PREAMBLE_INTS_FULL) { memoryValidateThrow(INVALID_PREINTS, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(EMPTYBIT_AND_SER_VER, serVer); } - layout = Layout.FLOAT_FULL_COMPACT; + layout = doublesSketch ? Layout.DOUBLE_FULL_COMPACT : Layout.FLOAT_FULL_COMPACT; n = extractN(srcMem); - dyMinK = extractMinK(srcMem); + minK = extractMinK(srcMem); numLevels = extractNumLevels(srcMem); - int offset = DATA_START_ADR_FLOAT; + int offset = DATA_START_ADR; + // LEVELS MEM final int[] myLevelsArr = new int[numLevels + 1]; srcMem.getIntArray(offset, myLevelsArr, 0, numLevels); //copies all except the last one myLevelsArr[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); //load the last one levelsArrCompact = Memory.wrap(myLevelsArr); //separate from srcMem, offset += (int)levelsArrCompact.getCapacity() - Integer.BYTES; // but one larger than srcMem - // MIN/MAX MEM - minMaxArrCompact = srcMem.region(offset, 2L * Float.BYTES); + + minMaxArrCompact = srcMem.region(offset, 2L * typeBytes); // MIN/MAX MEM offset += (int)minMaxArrCompact.getCapacity(); + // ITEMS MEM itemsArrStart = offset; capacityItems = myLevelsArr[numLevels]; itemsRetained = capacityItems - myLevelsArr[0]; - final float[] myItemsArr = new float[capacityItems]; - srcMem.getFloatArray(offset, myItemsArr, myLevelsArr[0], itemsRetained); - itemsArrCompact = Memory.wrap(myItemsArr); - sketchBytes = offset + itemsRetained * Float.BYTES; + if (doublesSketch) { + final double[] myItemsArr = new double[capacityItems]; + srcMem.getDoubleArray(offset, myItemsArr, myLevelsArr[0], itemsRetained); + itemsArrCompact = Memory.wrap(myItemsArr); + } else { + final float[] myItemsArr = new float[capacityItems]; + srcMem.getFloatArray(offset, myItemsArr, myLevelsArr[0], itemsRetained); + itemsArrCompact = Memory.wrap(myItemsArr); + } + sketchBytes = offset + itemsRetained * typeBytes; break; } - case 1: { //FLOAT_EMPTY_COMPACT + case 1: { //EMPTY_COMPACT if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(EMPTYBIT_AND_PREINTS, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(EMPTYBIT_AND_SER_VER, serVer); } - layout = Layout.FLOAT_EMPTY_COMPACT; + layout = doublesSketch ? Layout.DOUBLE_EMPTY_COMPACT : Layout.FLOAT_EMPTY_COMPACT; n = 0; //assumed - dyMinK = k; //assumed + minK = k; //assumed numLevels = 1; //assumed - // LEVELS MEM - levelsArrCompact = Memory.wrap(new int[] {k, k}); - // MIN/MAX MEM - minMaxArrCompact = Memory.wrap(new float[] {Float.NaN, Float.NaN}); - // ITEMS MEM capacityItems = k; itemsRetained = 0; - itemsArrCompact = Memory.wrap(new float[k]); - sketchBytes = DATA_START_ADR_SINGLE_ITEM; //also used for empty - itemsArrStart = DATA_START_ADR_SINGLE_ITEM; - break; - } - case 4: { //FLOAT_SINGLE_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(EMPTYBIT_AND_PREINTS, preInts); } - if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(SINGLEBIT_AND_SER_VER, serVer); } - layout = Layout.FLOAT_SINGLE_COMPACT; - n = 1; - dyMinK = k; - numLevels = 1; - // LEVELS MEM - levelsArrCompact = Memory.wrap(new int[] {k - 1, k}); - final float minMax = srcMem.getFloat(DATA_START_ADR_SINGLE_ITEM); - // MIN/MAX MEM - minMaxArrCompact = Memory.wrap(new float[] {minMax, minMax}); - // ITEMS MEM - capacityItems = k; - itemsRetained = 1; - final float[] myFloatItems = new float[k]; - myFloatItems[k - 1] = minMax; - itemsArrCompact = Memory.wrap(myFloatItems); - sketchBytes = DATA_START_ADR_SINGLE_ITEM + Float.BYTES; - itemsArrStart = DATA_START_ADR_SINGLE_ITEM; - break; - } - case 8: { //DOUBLE_FULL_COMPACT - if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(DOUBLEBIT_AND_PREINTS, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(EMPTYBIT_AND_SER_VER, serVer); } - layout = Layout.DOUBLE_FULL_COMPACT; - n = extractN(srcMem); - dyMinK = extractMinK(srcMem); - numLevels = extractNumLevels(srcMem); - int offset = DATA_START_ADR_DOUBLE; - // LEVELS MEM - final int[] myLevelsArr = new int[numLevels + 1]; - srcMem.getIntArray(offset, myLevelsArr, 0, numLevels); //all except the last one - myLevelsArr[numLevels] = KllHelper.computeTotalItemCapacity(k, m, numLevels); //load the last one - levelsArrCompact = Memory.wrap(myLevelsArr); //separate from srcMem - offset += (int)levelsArrCompact.getCapacity() - Integer.BYTES; - // MIN/MAX MEM - minMaxArrCompact = srcMem.region(offset, 2L * Double.BYTES); - offset += (int)minMaxArrCompact.getCapacity(); - // ITEMS MEM - itemsArrStart = offset; - capacityItems = myLevelsArr[numLevels]; - itemsRetained = capacityItems - myLevelsArr[0]; - final double[] myItemsArr = new double[capacityItems]; - srcMem.getDoubleArray(offset, myItemsArr, myLevelsArr[0], itemsRetained); - itemsArrCompact = Memory.wrap(myItemsArr); - sketchBytes = offset + itemsRetained * Double.BYTES; - break; - } - case 9: { //DOUBLE_EMPTY_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(EMPTYBIT_AND_PREINTS, preInts); } - if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(EMPTYBIT_AND_SER_VER, serVer); } - layout = Layout.DOUBLE_EMPTY_COMPACT; - n = 0; - dyMinK = k; - numLevels = 1; - // LEVELS MEM - levelsArrCompact = Memory.wrap(new int[] {k, k}); - // MIN/MAX MEM - minMaxArrCompact = Memory.wrap(new double[] {Double.NaN, Double.NaN}); - // ITEMS MEM - capacityItems = k; - itemsRetained = 0; - itemsArrCompact = Memory.wrap(new double[k]); - sketchBytes = DATA_START_ADR_SINGLE_ITEM; //also used for empty + levelsArrCompact = Memory.wrap(new int[] {k, k}); // LEVELS MEM + if (doublesSketch) { + minMaxArrCompact = Memory.wrap(new double[] {Double.NaN, Double.NaN}); // MIN/MAX MEM + itemsArrCompact = Memory.wrap(new double[k]); // ITEMS MEM + } else { //Floats Sketch + minMaxArrCompact = Memory.wrap(new float[] {Float.NaN, Float.NaN}); // MIN/MAX MEM + itemsArrCompact = Memory.wrap(new float[k]); // ITEMS MEM + } + sketchBytes = DATA_START_ADR_SINGLE_ITEM; //used for empty and single item itemsArrStart = DATA_START_ADR_SINGLE_ITEM; break; } - case 12: { //DOUBLE_SINGLE_COMPACT - if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(EMPTYBIT_AND_PREINTS, preInts); } + case 4: { //SINGLE_COMPACT + if (preInts != PREAMBLE_INTS_EMPTY_SINGLE) { memoryValidateThrow(SINGLEBIT_AND_PREINTS, preInts); } if (serVer != SERIAL_VERSION_SINGLE) { memoryValidateThrow(SINGLEBIT_AND_SER_VER, serVer); } - layout = Layout.DOUBLE_SINGLE_COMPACT; + layout = doublesSketch ? Layout.DOUBLE_SINGLE_COMPACT : Layout.FLOAT_SINGLE_COMPACT; n = 1; - dyMinK = k; + minK = k; numLevels = 1; - - // LEVELS MEM - levelsArrCompact = Memory.wrap(new int[] {k - 1, k}); - final double minMax = srcMem.getDouble(DATA_START_ADR_SINGLE_ITEM); - // MIN/MAX MEM - minMaxArrCompact = Memory.wrap(new double[] {minMax, minMax}); - // ITEMS MEM capacityItems = k; itemsRetained = 1; - final double[] myDoubleItems = new double[k]; - myDoubleItems[k - 1] = minMax; - itemsArrCompact = Memory.wrap(myDoubleItems); - sketchBytes = DATA_START_ADR_SINGLE_ITEM + Double.BYTES; + + levelsArrCompact = Memory.wrap(new int[] {k - 1, k}); // LEVELS MEM + if (doublesSketch) { + final double minMax = srcMem.getDouble(DATA_START_ADR_SINGLE_ITEM); + minMaxArrCompact = Memory.wrap(new double[] {minMax, minMax}); // MIN/MAX MEM + final double[] myDoubleItems = new double[k]; // ITEMS MEM + myDoubleItems[k - 1] = minMax; + itemsArrCompact = Memory.wrap(myDoubleItems); + } else { + final float minMax = srcMem.getFloat(DATA_START_ADR_SINGLE_ITEM); + minMaxArrCompact = Memory.wrap(new float[] {minMax, minMax}); // MIN/MAX MEM + final float[] myFloatItems = new float[k]; // ITEMS MEM + myFloatItems[k - 1] = minMax; + itemsArrCompact = Memory.wrap(myFloatItems); + } + sketchBytes = DATA_START_ADR_SINGLE_ITEM + typeBytes; itemsArrStart = DATA_START_ADR_SINGLE_ITEM; break; } - default: break; //can't happen + default: //can not happen } } void updatableMemoryValidate(final WritableMemory wSrcMem) { - if (doublesSketch) { //DOUBLE_UPDATABLE - if (preInts != PREAMBLE_INTS_DOUBLE) { memoryValidateThrow(DOUBLEBIT_AND_PREINTS, preInts); } - layout = Layout.DOUBLE_UPDATABLE; - n = extractN(wSrcMem); - empty = n == 0; //empty & singleItem are set for convenience - singleItem = n == 1; // there is no error checking on these bits - dyMinK = extractMinK(wSrcMem); - numLevels = extractNumLevels(wSrcMem); + final int typeBytes = doublesSketch ? Double.BYTES : Float.BYTES; + if (preInts != PREAMBLE_INTS_FULL) { memoryValidateThrow(INVALID_PREINTS, preInts); } + layout = doublesSketch ? Layout.DOUBLE_UPDATABLE : Layout.FLOAT_UPDATABLE; - int offset = DATA_START_ADR_DOUBLE; - //LEVELS - levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1L) * Integer.BYTES); - offset += (int)levelsArrUpdatable.getCapacity(); - //MIN/MAX - minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2L * Double.BYTES); - offset += (int)minMaxArrUpdatable.getCapacity(); - //ITEMS - capacityItems = levelsArrUpdatable.getInt((long)numLevels * Integer.BYTES); - final int itemsArrBytes = capacityItems * Double.BYTES; - itemsArrStart = offset; - itemsArrUpdatable = wSrcMem.writableRegion(offset, itemsArrBytes); - sketchBytes = offset + itemsArrBytes; - } - else { //FLOAT_UPDATABLE - if (preInts != PREAMBLE_INTS_FLOAT) { memoryValidateThrow(FLOATBIT_AND_PREINTS, preInts); } - layout = Layout.FLOAT_UPDATABLE; - n = extractN(wSrcMem); - empty = n == 0; //empty & singleItem are set for convenience - singleItem = n == 1; // there is no error checking on these bits - dyMinK = extractMinK(wSrcMem); - numLevels = extractNumLevels(wSrcMem); - int offset = DATA_START_ADR_FLOAT; - //LEVELS - levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1L) * Integer.BYTES); - offset += (int)levelsArrUpdatable.getCapacity(); - //MIN/MAX - minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2L * Float.BYTES); - offset += (int)minMaxArrUpdatable.getCapacity(); - //ITEMS - capacityItems = levelsArrUpdatable.getInt((long)numLevels * Integer.BYTES); - final int itemsArrBytes = capacityItems * Float.BYTES; - itemsArrStart = offset; - itemsArrUpdatable = wSrcMem.writableRegion(offset, itemsArrBytes); - sketchBytes = itemsArrStart + itemsArrBytes; - } + n = extractN(wSrcMem); + empty = n == 0; //empty & singleItem are set for convenience + singleItem = n == 1; // there is no error checking on these bits + minK = extractMinK(wSrcMem); + numLevels = extractNumLevels(wSrcMem); + + int offset = DATA_START_ADR; + + levelsArrUpdatable = wSrcMem.writableRegion(offset, (numLevels + 1L) * Integer.BYTES); //LEVELS + offset += (int)levelsArrUpdatable.getCapacity(); + + minMaxArrUpdatable = wSrcMem.writableRegion(offset, 2L * typeBytes); //MIN/MAX + offset += (int)minMaxArrUpdatable.getCapacity(); + + capacityItems = levelsArrUpdatable.getInt((long)numLevels * Integer.BYTES); //ITEMS + final int itemsArrBytes = capacityItems * typeBytes; + itemsArrStart = offset; + itemsArrUpdatable = wSrcMem.writableRegion(offset, itemsArrBytes); + sketchBytes = offset + itemsArrBytes; } - enum MemoryInputError { SRC_NOT_KLL, EMPTYBIT_AND_PREINTS, EMPTYBIT_AND_SER_VER, - SINGLEBIT_AND_SER_VER, DOUBLEBIT_AND_PREINTS, FLOATBIT_AND_PREINTS, UPDATABLEBIT_AND_SER_VER, - EMPTYBIT_AND_SINGLEBIT } + enum MemoryInputError { + SRC_NOT_KLL("FamilyID Field must be: " + Family.KLL.getID() + ", NOT: "), + EMPTYBIT_AND_PREINTS("Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: "), + EMPTYBIT_AND_SER_VER("Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: "), + SINGLEBIT_AND_SER_VER("Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: "), + SINGLEBIT_AND_PREINTS("Single Item Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: "), + INVALID_PREINTS("PreInts Must Be: " + PREAMBLE_INTS_FULL + ", NOT: "), + UPDATABLEBIT_AND_SER_VER("((SerVer == 3) ^ (Updatable Bit)) must = 0, NOT: "), + EMPTYBIT_AND_SINGLEBIT("Empty flag bit and SingleItem flag bit cannot both be set. Flags: "); - private static void memoryValidateThrow(final MemoryInputError errType, final int value) { - String msg = ""; - switch (errType) { - case SRC_NOT_KLL: msg = "FamilyID Field must be: " + Family.KLL.getID() + ", NOT: " + value; break; - case EMPTYBIT_AND_PREINTS: msg = - "Empty Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: " + value; break; - case EMPTYBIT_AND_SER_VER: msg = - "Empty Bit: 1 -> SerVer: " + SERIAL_VERSION_EMPTY_FULL + ", NOT: " + value; break; - case SINGLEBIT_AND_SER_VER: msg = - "Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: " + value; break; - case DOUBLEBIT_AND_PREINTS: msg = - "Double Sketch Bit: 1 -> PreInts: " + PREAMBLE_INTS_DOUBLE + ", NOT: " + value; break; - case FLOATBIT_AND_PREINTS: msg = - "Double Sketch Bit: 0 -> PreInts: " + PREAMBLE_INTS_FLOAT + ", NOT: " + value; break; - case UPDATABLEBIT_AND_SER_VER: msg = - "((SerVer == 3) ^ (Updatable Bit)) must = 0."; break; - case EMPTYBIT_AND_SINGLEBIT: msg = - "Empty flag bit and SingleItem flag bit cannot both be set. Flags: " + value; break; - default: msg = "Unknown error"; break; + private String msg; + + private MemoryInputError(final String msg) { + this.msg = msg; + } + + private String getMessage() { + return msg; } - throw new SketchesArgumentException(msg); + + final static void memoryValidateThrow(final MemoryInputError errType, final int value) { + throw new SketchesArgumentException(errType.getMessage() + value); + } + } } - diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index 45b6e48de..bd64b5ca7 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -139,22 +139,18 @@ private KllPreambleUtil() {} // MULTI-ITEM static final int N_LONG_ADR = 8; // to 15 - static final int MIN_K_SHORT_ADR = 16; // to 17 + static final int MIN_K_SHORT_ADR = 16; // to 17 static final int NUM_LEVELS_BYTE_ADR = 18; - // FLOAT SKETCH 19 is reserved for future use in float sketch - static final int DATA_START_ADR_FLOAT = 20; // float sketch, not single item - - // DOUBLE SKETCH 19 to 23 is reserved for future use in double sketch - static final int DATA_START_ADR_DOUBLE = 20; // double sketch, not single item + // 19 is reserved for future use + static final int DATA_START_ADR = 20; // Full Sketch, not single item // Other static values static final byte SERIAL_VERSION_EMPTY_FULL = 1; // Empty or full preamble, NOT single item format static final byte SERIAL_VERSION_SINGLE = 2; // only single-item format static final byte SERIAL_VERSION_UPDATABLE = 3; // static final int PREAMBLE_INTS_EMPTY_SINGLE = 2; // for empty or single item - static final int PREAMBLE_INTS_FLOAT = 5; // not empty nor single item, full preamble float - static final int PREAMBLE_INTS_DOUBLE = 5; // not empty nor single item, full preamble double + static final int PREAMBLE_INTS_FULL = 5; // Full preamble, not empty nor single item // Flag bit masks static final int EMPTY_BIT_MASK = 1; @@ -221,7 +217,7 @@ static String memoryToString(final Memory mem) { case DOUBLE_UPDATABLE: { sb.append("Bytes 8-15: N : ").append(memChk.n).append(LS); - sb.append("Bytes 16-17: DyMinK : ").append(memChk.dyMinK).append(LS); + sb.append("Bytes 16-17: DyMinK : ").append(memChk.minK).append(LS); sb.append("Byte 18 : NumLevels : ").append(memChk.numLevels).append(LS); break; } @@ -231,7 +227,7 @@ static String memoryToString(final Memory mem) { case DOUBLE_SINGLE_COMPACT: { sb.append("Assumed : N : ").append(memChk.n).append(LS); - sb.append("Assumed : DyMinK : ").append(memChk.dyMinK).append(LS); + sb.append("Assumed : DyMinK : ").append(memChk.minK).append(LS); sb.append("Assumed : NumLevels : ").append(memChk.numLevels).append(LS); break; } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index df4b85db6..b98c902e4 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -27,13 +27,11 @@ import static java.lang.Math.min; import static java.lang.Math.round; import static org.apache.datasketches.Util.isOdd; -import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE; -import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; @@ -73,7 +71,7 @@ * overwritten by subsequent updates. * * Invariants: - * 1) After a compaction, or an update, or a merge, all levels are sorted except for level zero. + * 1) After a compaction, or an update, or a merge, every level is sorted except for level zero. * 2) After a compaction, (sum of capacities) - (sum of items) >= 1, * so there is room for least 1 more item in level zero. * 3) There are no gaps except at the bottom, so if levels_[0] = 0, @@ -101,7 +99,6 @@ public abstract class KllSketch { static final double CDF_COEF = 2.296; static final double CDF_EXP = 0.9723; static final Random random = new Random(); - static final boolean compatible = true; //rank 0.0 and 1.0. compatible with classic Quantiles Sketch SketchType sketchType; WritableMemory wmem; MemoryRequestServer memReqSvr; @@ -113,7 +110,10 @@ public abstract class KllSketch { public static final int DEFAULT_K = 200; /** - * The default value of M + * The default value of M. The parameter m is the minimum level size in number of items. + * Currently, the public default is 8, but this can be overridden using Package Private methods to + * 2, 4, 6 or 8, and the sketch works just fine. The value 8 was chosen as a compromise between speed and size. + * Choosing smaller values of m less than 8 will make the sketch much slower. */ static final int DEFAULT_M = 8; @@ -123,12 +123,12 @@ public abstract class KllSketch { public static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short /** - * The maximum value of M + * The maximum value of M. See the Javadoc on DEFAULT_M. */ static final int MAX_M = 8; /** - * The minimum value of M + * The minimum value of M. See the Javadoc on DEFAULT_M. */ static final int MIN_M = 2; @@ -188,6 +188,7 @@ public static int getKFromEpsilon(final double epsilon, final boolean pmf) { * @param n stream length * @return upper bound on the compact serialized size * @deprecated use {@link #getMaxSerializedSizeBytes(int, long, SketchType, boolean)} instead. + * Version 3.2.0 */ @Deprecated public static int getMaxSerializedSizeBytes(final int k, final long n) { @@ -234,44 +235,42 @@ public int getSerializedSizeBytes() { : getCurrentCompactSerializedSizeBytes(); } - static int getSerializedSizeBytes(final int numLevels, final int numItems, + //numItems can be either numRetained, or current max capacity at given K and numLevels. + static int getCurrentSerializedSizeBytes(final int numLevels, final int numItems, final SketchType sketchType, final boolean updatable) { + final int typeBytes = (sketchType == DOUBLES_SKETCH) ? Double.BYTES : Float.BYTES; int levelsBytes = 0; - if (!updatable) { - if (numItems == 0) { return N_LONG_ADR; } - if (numItems == 1) { - return DATA_START_ADR_SINGLE_ITEM + (sketchType == DOUBLES_SKETCH ? Double.BYTES : Float.BYTES); - } - levelsBytes = numLevels * Integer.BYTES; - } else { + if (updatable) { levelsBytes = (numLevels + 1) * Integer.BYTES; - } - if (sketchType == DOUBLES_SKETCH) { - return DATA_START_ADR_DOUBLE + levelsBytes + (numItems + 2) * Double.BYTES; //+2 is for min & max } else { - return DATA_START_ADR_FLOAT + levelsBytes + (numItems + 2) * Float.BYTES; + if (numItems == 0) { return N_LONG_ADR; } + if (numItems == 1) { return DATA_START_ADR_SINGLE_ITEM + typeBytes; } + levelsBytes = numLevels * Integer.BYTES; } + return DATA_START_ADR + levelsBytes + (numItems + 2) * typeBytes; //+2 is for min & max } - final static boolean isCompatible() { - return compatible; - } + enum Error { + TGT_IS_IMMUTABLE("Given sketch Memory is immutable, cannot write."), + SRC_IS_NOT_DIRECT("Given sketch must be of type Direct."), + SRC_IS_NOT_DOUBLE("Given sketch must be of type Double."), + SRC_IS_NOT_FLOAT("Given sketch must be of type Float."), + SRC_CANNOT_BE_DIRECT("Given sketch must not be of type Direct."), + MUST_NOT_CALL("This is an artifact of inheritance and should never be called."); + + private String msg; - enum Error { TGT_IS_IMMUTABLE, SRC_IS_NOT_DIRECT, SRC_IS_NOT_DOUBLE, - SRC_IS_NOT_FLOAT, SRC_CANNOT_BE_DIRECT, MUST_NOT_CALL } - - final static void kllSketchThrow(final Error errType) { - String msg = ""; - switch (errType) { - case TGT_IS_IMMUTABLE: msg = "Given sketch Memory is immutable, cannot write."; break; - case SRC_IS_NOT_DIRECT: msg = "Given sketch must be of type Direct."; break; - case SRC_IS_NOT_DOUBLE: msg = "Given sketch must be of type Double."; break; - case SRC_IS_NOT_FLOAT: msg = "Given sketch must be of type Float."; break; - case SRC_CANNOT_BE_DIRECT: msg = "Given sketch must not be of type Direct."; break; - case MUST_NOT_CALL: msg = "This is an artifact of inheritance and should never be called."; break; - default: msg = "Unknown error."; break; + private Error(final String msg) { + this.msg = msg; + } + + private String getMessage() { + return msg; + } + + final static void kllSketchThrow(final Error errType) { + throw new SketchesArgumentException(errType.getMessage()); } - throw new SketchesArgumentException(msg); } //Public Non-static methods @@ -281,7 +280,7 @@ final static void kllSketchThrow(final Error errType) { * @return the current compact number of bytes this sketch would require to store. */ public final int getCurrentCompactSerializedSizeBytes() { - return KllSketch.getSerializedSizeBytes(getNumLevels(), getNumRetained(), sketchType, false); + return KllSketch.getCurrentSerializedSizeBytes(getNumLevels(), getNumRetained(), sketchType, false); } /** @@ -290,7 +289,7 @@ public final int getCurrentCompactSerializedSizeBytes() { */ public final int getCurrentUpdatableSerializedSizeBytes() { final int itemCap = KllHelper.computeTotalItemCapacity(getK(), getM(), getNumLevels()); - return KllSketch.getSerializedSizeBytes(getNumLevels(), itemCap, sketchType, true); + return KllSketch.getCurrentSerializedSizeBytes(getNumLevels(), itemCap, sketchType, true); } /** @@ -300,7 +299,10 @@ public final int getCurrentUpdatableSerializedSizeBytes() { public abstract int getK(); /** - * Returns the configured parameter m + * Returns the configured parameter m, which is the minimum level size in number of items. + * Currently, the public default is 8, but this can be overridden using Package Private methods to + * 2, 4, 6 or 8, and the sketch works just fine. The value 8 was chosen as a compromise between speed and size. + * Choosing smaller values of m will make the sketch much slower. * @return the configured parameter m */ abstract int getM(); @@ -409,7 +411,7 @@ final void buildHeapKllSketchFromMemory(final KllMemoryValidate memVal) { final boolean updatable = memVal.updatable; setLevelZeroSorted(memVal.level0Sorted); setN(memVal.n); - setMinK(memVal.dyMinK); + setMinK(memVal.minK); setNumLevels(memVal.numLevels); final int[] myLevelsArr = new int[getNumLevels() + 1]; @@ -519,10 +521,9 @@ final double getDoublesQuantile(final double fraction) { if (fraction < 0.0 || fraction > 1.0) { throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); } - if (isCompatible()) { - if (fraction == 0.0) { return getMinDoubleValue(); } - if (fraction == 1.0) { return getMaxDoubleValue(); } - } + //These two assumptions make KLL compatible with the previous classic Quantiles Sketch + if (fraction == 0.0) { return getMinDoubleValue(); } + if (fraction == 1.0) { return getMaxDoubleValue(); } final KllDoublesQuantileCalculator quant = getDoublesQuantileCalculator(); return quant.getQuantile(fraction); } @@ -536,8 +537,8 @@ final double[] getDoublesQuantiles(final double[] fractions) { if (fraction < 0.0 || fraction > 1.0) { throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); } - if (fraction == 0.0 && isCompatible()) { quantiles[i] = getMinDoubleValue(); } - else if (fraction == 1.0 && isCompatible()) { quantiles[i] = getMaxDoubleValue(); } + if (fraction == 0.0) { quantiles[i] = getMinDoubleValue(); } + else if (fraction == 1.0) { quantiles[i] = getMaxDoubleValue(); } else { if (quant == null) { quant = getDoublesQuantileCalculator(); @@ -622,10 +623,10 @@ final float getFloatsQuantile(final double fraction) { if (fraction < 0.0 || fraction > 1.0) { throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); } - if (isCompatible()) { - if (fraction == 0.0) { return getMinFloatValue(); } - if (fraction == 1.0) { return getMaxFloatValue(); } - } + //These two assumptions make KLL compatible with the previous classic Quantiles Sketch + if (fraction == 0.0) { return getMinFloatValue(); } + if (fraction == 1.0) { return getMaxFloatValue(); } + final KllFloatsQuantileCalculator quant = getFloatsQuantileCalculator(); return quant.getQuantile(fraction); } @@ -639,8 +640,8 @@ final float[] getFloatsQuantiles(final double[] fractions) { if (fraction < 0.0 || fraction > 1.0) { throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); } - if (fraction == 0.0 && isCompatible()) { quantiles[i] = getMinFloatValue(); } - else if (fraction == 1.0 && isCompatible()) { quantiles[i] = getMaxFloatValue(); } + if (fraction == 0.0) { quantiles[i] = getMinFloatValue(); } + else if (fraction == 1.0) { quantiles[i] = getMaxFloatValue(); } else { if (quant == null) { quant = getFloatsQuantileCalculator(); @@ -701,16 +702,9 @@ static WritableMemory memorySpaceMgmt( final int newItemsArrLen) { final SketchType sketchType = sketch.sketchType; final WritableMemory oldWmem = sketch.wmem; + final int startAdr = DATA_START_ADR; + final int typeBytes = (sketchType == DOUBLES_SKETCH) ? Double.BYTES : Float.BYTES; - final int typeBytes; - final int startAdr; - if (sketchType == DOUBLES_SKETCH) { - typeBytes = Double.BYTES; - startAdr = DATA_START_ADR_DOUBLE; - } else { - typeBytes = Float.BYTES; - startAdr = DATA_START_ADR_FLOAT; - } int requiredSketchBytes = startAdr; requiredSketchBytes += newLevelsArrLen * Integer.BYTES; requiredSketchBytes += 2 * typeBytes; @@ -1020,7 +1014,7 @@ final byte[] toCompactByteArrayImpl() { insertN(wmem, getN()); insertMinK(wmem, getMinK()); insertNumLevels(wmem, getNumLevels()); - offset = (doubleType) ? DATA_START_ADR_DOUBLE : DATA_START_ADR_FLOAT; + offset = DATA_START_ADR; //LOAD LEVELS ARR the last integer in levels_ is NOT serialized final int len = myLevelsArr.length - 1; @@ -1051,12 +1045,9 @@ private static void loadFirst8Bytes(final KllSketch sk, final WritableMemory wme final boolean lvlZeroSorted = sk.isLevelZeroSorted(); final boolean singleItem = sk.getN() == 1; final boolean doubleType = (sk.sketchType == DOUBLES_SKETCH); - final int preInts = - updatable - ? (doubleType ? PREAMBLE_INTS_DOUBLE : PREAMBLE_INTS_FLOAT) - : ((empty || singleItem) - ? PREAMBLE_INTS_EMPTY_SINGLE - : (doubleType) ? PREAMBLE_INTS_DOUBLE : PREAMBLE_INTS_FLOAT); + final int preInts = updatable + ? PREAMBLE_INTS_FULL + : (empty || singleItem) ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_FULL; //load the preamble insertPreInts(wmem, preInts); final int server = updatable ? SERIAL_VERSION_UPDATABLE @@ -1209,7 +1200,7 @@ final byte[] toUpdatableByteArrayImpl() { //load data final boolean doubleType = (sketchType == DOUBLES_SKETCH); - int offset = (doubleType) ? DATA_START_ADR_DOUBLE : DATA_START_ADR_FLOAT; + int offset = DATA_START_ADR; //LOAD LEVELS ARRAY the last integer in levels_ IS serialized final int[] myLevelsArr = getLevelsArray(); diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java index 4fcaa1bf7..7af0269f3 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -102,7 +102,7 @@ public void manyItemsEstimationMode() { } // test getPMF - final double[] pmf = sketch.getPMF(new double[] {n / 2}); // split at median + final double[] pmf = sketch.getPMF(new double[] {n / 2.0}); // split at median assertEquals(pmf.length, 2); assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); @@ -561,7 +561,6 @@ public void checkGetWritableMemory() { assertTrue(sk.isDoublesSketch()); assertFalse(sk.isLevelZeroSorted()); assertFalse(sk.isFloatsSketch()); - assertTrue(KllSketch.isCompatible()); } @Test diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java index 4ff004254..4f6520173 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java @@ -377,9 +377,7 @@ public void serializeDeserializeOneItemViaUpdatableWritableWrap() { public void serializeDeserializeFullViaCompactHeapify() { final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); final int n = 1000; - for (int i = 0; i < n; i++) { - sketch1.update(i); - } + for (int i = 0; i < n; i++) { sketch1.update(i); } final byte[] bytes = sketch1.toByteArray(); final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); @@ -561,7 +559,6 @@ public void checkGetWritableMemory() { assertTrue(sk.isFloatsSketch()); assertFalse(sk.isLevelZeroSorted()); assertFalse(sk.isDoublesSketch()); - assertTrue(KllSketch.isCompatible()); } @Test diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index 466bdf2a3..e1a35f584 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -100,7 +100,7 @@ public void manyItemsEstimationMode() { } // test getPMF - final double[] pmf = sketch.getPMF(new float[] {n / 2}); // split at median + final double[] pmf = sketch.getPMF(new float[] {n / 2.0F}); // split at median assertEquals(pmf.length, 2); assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); diff --git a/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java b/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java similarity index 72% rename from src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java rename to src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java index 9ce967ea4..c60e65d9c 100644 --- a/src/test/java/org/apache/datasketches/kll/MemoryValidateTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java @@ -27,7 +27,7 @@ import org.testng.annotations.Test; @SuppressWarnings("unused") -public class MemoryValidateTest { +public class KllMemoryValidateTest { @Test(expectedExceptions = SketchesArgumentException.class) public void checkInvalidFamily() { @@ -67,98 +67,80 @@ public void checkInvalidUpdatableAndSerVer() { } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkInvalidPreIntsAndSingle() { + public void checkInvalidSingleAndPreInts() { KllFloatsSketch sk = new KllFloatsSketch(); + sk.update(1); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, UPDATABLE_BIT_MASK); - insertSerVer(wmem, SERIAL_VERSION_SINGLE); + insertPreInts(wmem, PREAMBLE_INTS_FULL); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkInvalidSerVerAndSingle2() { + public void checkInvalidSingleAndSerVer() { KllFloatsSketch sk = new KllFloatsSketch(); + sk.update(1); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, SINGLE_ITEM_BIT_MASK); insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkInvalidPreIntsAndSingle2() { - KllFloatsSketch sk = new KllFloatsSketch(); + public void checkInvalidEmptyDoublesAndPreIntsFull() { + KllDoublesSketch sk = new KllDoublesSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, SINGLE_ITEM_BIT_MASK); - insertPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); + insertPreInts(wmem, PREAMBLE_INTS_FULL); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkInvalidPreIntsAndDouble() { - KllFloatsSketch sk = new KllFloatsSketch(); + public void checkInvalidSingleDoubleCompactAndSerVer() { + KllDoublesSketch sk = new KllDoublesSketch(); + sk.update(1); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, DOUBLES_SKETCH_BIT_MASK); - insertPreInts(wmem, PREAMBLE_INTS_DOUBLE); - insertSerVer(wmem, SERIAL_VERSION_SINGLE); - KllMemoryValidate memVal = new KllMemoryValidate(wmem); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkInvalidDoubleCompactAndSingle() { - KllFloatsSketch sk = new KllFloatsSketch(); - byte[] byteArr = sk.toByteArray(); - WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, SINGLE_ITEM_BIT_MASK | DOUBLES_SKETCH_BIT_MASK); - insertPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkInvalidDoubleUpdatableAndSerVer() { - KllFloatsSketch sk = new KllFloatsSketch(); - byte[] byteArr = sk.toByteArray(); + public void checkInvalidDoubleUpdatableAndPreInts() { + KllDoublesSketch sk = new KllDoublesSketch(); + byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertSerVer(wmem, SERIAL_VERSION_UPDATABLE); - insertFlags(wmem, DOUBLES_SKETCH_BIT_MASK | UPDATABLE_BIT_MASK); - insertPreInts(wmem, PREAMBLE_INTS_DOUBLE - 1); + insertPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkInvalidFloatFullAndPreInts() { KllFloatsSketch sk = new KllFloatsSketch(); + sk.update(1); sk.update(2); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, 0); //float full - insertSerVer(wmem, SERIAL_VERSION_SINGLE); //should be 1 - insertPreInts(wmem, PREAMBLE_INTS_FLOAT); + insertPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkInvalidFloatUpdatableFullAndPreInts() { KllFloatsSketch sk = new KllFloatsSketch(); - byte[] byteArr = sk.toByteArray(); + sk.update(1); sk.update(2); + byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, UPDATABLE_BIT_MASK); //float updatable full - insertSerVer(wmem, SERIAL_VERSION_UPDATABLE); - insertPreInts(wmem, 0);//should be 5 + insertPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkInvalidDoubleCompactSingleAndPreInts() { - KllFloatsSketch sk = new KllFloatsSketch(); + KllDoublesSketch sk = new KllDoublesSketch(); + sk.update(1); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, DOUBLES_SKETCH_BIT_MASK | SINGLE_ITEM_BIT_MASK); - insertPreInts(wmem, 5);//should be 2 - insertSerVer(wmem, SERIAL_VERSION_SINGLE); //should be 2 + insertPreInts(wmem, PREAMBLE_INTS_FULL);//should be 2, single KllMemoryValidate memVal = new KllMemoryValidate(wmem); } From 2a65b3169036960134dbd60edcbef096a5acb0ef Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 4 Apr 2022 18:16:47 -0700 Subject: [PATCH 28/31] This push fixes David's method naming issue. --- .../kll/KllDirectDoublesSketch.java | 36 ++++---- .../kll/KllDirectFloatsSketch.java | 36 ++++---- .../datasketches/kll/KllDirectSketch.java | 48 +++++------ .../datasketches/kll/KllMemoryValidate.java | 62 ++++++------- .../datasketches/kll/KllPreambleUtil.java | 86 +++++++++---------- .../apache/datasketches/kll/KllSketch.java | 58 ++++++------- .../kll/KllMemoryValidateTest.java | 26 +++--- 7 files changed, 176 insertions(+), 176 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 82ac35344..834760696 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -26,15 +26,15 @@ import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.insertFamilyID; -import static org.apache.datasketches.kll.KllPreambleUtil.insertFlags; -import static org.apache.datasketches.kll.KllPreambleUtil.insertK; -import static org.apache.datasketches.kll.KllPreambleUtil.insertM; -import static org.apache.datasketches.kll.KllPreambleUtil.insertMinK; -import static org.apache.datasketches.kll.KllPreambleUtil.insertN; -import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; -import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; -import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DOUBLE; @@ -100,15 +100,15 @@ public static KllDirectDoublesSketch newInstance(final int k, final WritableMemo */ static KllDirectDoublesSketch newInstance(final int k, final int m, final WritableMemory dstMem, final MemoryRequestServer memReqSvr) { - insertPreInts(dstMem, PREAMBLE_INTS_FULL); - insertSerVer(dstMem, SERIAL_VERSION_UPDATABLE); - insertFamilyID(dstMem, Family.KLL.getID()); - insertFlags(dstMem, DOUBLES_SKETCH_BIT_MASK | UPDATABLE_BIT_MASK); - insertK(dstMem, k); - insertM(dstMem, m); - insertN(dstMem, 0); - insertMinK(dstMem, k); - insertNumLevels(dstMem, 1); + setMemoryPreInts(dstMem, PREAMBLE_INTS_FULL); + setMemorySerVer(dstMem, SERIAL_VERSION_UPDATABLE); + setMemoryFamilyID(dstMem, Family.KLL.getID()); + setMemoryFlags(dstMem, DOUBLES_SKETCH_BIT_MASK | UPDATABLE_BIT_MASK); + setMemoryK(dstMem, k); + setMemoryM(dstMem, m); + setMemoryN(dstMem, 0); + setMemoryMinK(dstMem, k); + setMemoryNumLevels(dstMem, 1); int offset = DATA_START_ADR; dstMem.putIntArray(offset, new int[] {k, k}, 0, 2); offset += 2 * Integer.BYTES; diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index bd3091452..6e7d89192 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -25,15 +25,15 @@ import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.insertMinK; -import static org.apache.datasketches.kll.KllPreambleUtil.insertFamilyID; -import static org.apache.datasketches.kll.KllPreambleUtil.insertFlags; -import static org.apache.datasketches.kll.KllPreambleUtil.insertK; -import static org.apache.datasketches.kll.KllPreambleUtil.insertM; -import static org.apache.datasketches.kll.KllPreambleUtil.insertN; -import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; -import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; -import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_FLOAT; @@ -100,15 +100,15 @@ public static KllDirectFloatsSketch newInstance(final int k, final WritableMemor */ static KllDirectFloatsSketch newInstance(final int k, final int m, final WritableMemory dstMem, final MemoryRequestServer memReqSvr) { - insertPreInts(dstMem, PREAMBLE_INTS_FULL); - insertSerVer(dstMem, SERIAL_VERSION_UPDATABLE); - insertFamilyID(dstMem, Family.KLL.getID()); - insertFlags(dstMem, UPDATABLE_BIT_MASK); - insertK(dstMem, k); - insertM(dstMem, m); - insertN(dstMem, 0); - insertMinK(dstMem, k); - insertNumLevels(dstMem, 1); + setMemoryPreInts(dstMem, PREAMBLE_INTS_FULL); + setMemorySerVer(dstMem, SERIAL_VERSION_UPDATABLE); + setMemoryFamilyID(dstMem, Family.KLL.getID()); + setMemoryFlags(dstMem, UPDATABLE_BIT_MASK); + setMemoryK(dstMem, k); + setMemoryM(dstMem, m); + setMemoryN(dstMem, 0); + setMemoryMinK(dstMem, k); + setMemoryNumLevels(dstMem, 1); int offset = DATA_START_ADR; dstMem.putIntArray(offset, new int[] {k, k}, 0, 2); offset += 2 * Integer.BYTES; diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index 5a4b6e8e7..ac0a14458 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -19,16 +19,16 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.kll.KllPreambleUtil.extractMinK; -import static org.apache.datasketches.kll.KllPreambleUtil.extractK; -import static org.apache.datasketches.kll.KllPreambleUtil.extractLevelZeroSortedFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.extractM; -import static org.apache.datasketches.kll.KllPreambleUtil.extractN; -import static org.apache.datasketches.kll.KllPreambleUtil.extractNumLevels; -import static org.apache.datasketches.kll.KllPreambleUtil.insertMinK; -import static org.apache.datasketches.kll.KllPreambleUtil.insertLevelZeroSortedFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.insertN; -import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels; import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; @@ -62,17 +62,17 @@ abstract class KllDirectSketch extends KllSketch { @Override public int getK() { - return extractK(wmem); + return getMemoryK(wmem); } @Override int getM() { - return extractM(wmem); + return getMemoryM(wmem); } @Override public long getN() { - return extractN(wmem); + return getMemoryN(wmem); } @Override @@ -106,7 +106,7 @@ public byte[] toUpdatableByteArray() { @Override int getMinK() { - return extractMinK(wmem); + return getMemoryMinK(wmem); } int getItemsArrLengthItems() { @@ -128,32 +128,32 @@ int getLevelsArrayAt(final int index) { @Override int getNumLevels() { - return extractNumLevels(wmem); + return getMemoryNumLevels(wmem); } @Override void incN() { if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - long n = extractN(wmem); - insertN(wmem, ++n); + long n = getMemoryN(wmem); + setMemoryN(wmem, ++n); } @Override void incNumLevels() { if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - int numLevels = extractNumLevels(wmem); - insertNumLevels(wmem, ++numLevels); + int numLevels = getMemoryNumLevels(wmem); + setMemoryNumLevels(wmem, ++numLevels); } @Override boolean isLevelZeroSorted() { - return extractLevelZeroSortedFlag(wmem); + return getMemoryLevelZeroSortedFlag(wmem); } @Override void setMinK(final int minK) { if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - insertMinK(wmem, minK); + setMemoryMinK(wmem, minK); } @Override @@ -194,7 +194,7 @@ void setLevelsArrayUpdatable(final WritableMemory levelsMem) { @Override void setLevelZeroSorted(final boolean sorted) { if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - insertLevelZeroSortedFlag(wmem, sorted); + setMemoryLevelZeroSortedFlag(wmem, sorted); } @Override @@ -205,13 +205,13 @@ void setMinMaxArrayUpdatable(final WritableMemory minMaxMem) { @Override void setN(final long n) { if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - insertN(wmem, n); + setMemoryN(wmem, n); } @Override void setNumLevels(final int numLevels) { if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - insertNumLevels(wmem, numLevels); + setMemoryNumLevels(wmem, numLevels); } } diff --git a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java index 214bbc675..187866dfe 100644 --- a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java +++ b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java @@ -36,20 +36,20 @@ import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; -import static org.apache.datasketches.kll.KllPreambleUtil.extractDoubleSketchFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.extractMinK; -import static org.apache.datasketches.kll.KllPreambleUtil.extractEmptyFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.extractFamilyID; -import static org.apache.datasketches.kll.KllPreambleUtil.extractFlags; -import static org.apache.datasketches.kll.KllPreambleUtil.extractK; -import static org.apache.datasketches.kll.KllPreambleUtil.extractLevelZeroSortedFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.extractM; -import static org.apache.datasketches.kll.KllPreambleUtil.extractN; -import static org.apache.datasketches.kll.KllPreambleUtil.extractNumLevels; -import static org.apache.datasketches.kll.KllPreambleUtil.extractPreInts; -import static org.apache.datasketches.kll.KllPreambleUtil.extractSerVer; -import static org.apache.datasketches.kll.KllPreambleUtil.extractSingleItemFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.extractUpdatableFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryDoubleSketchFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryEmptyFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryFlags; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySingleItemFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryUpdatableFlag; import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; @@ -101,20 +101,20 @@ final class KllMemoryValidate { KllMemoryValidate(final Memory srcMem) { memCapacity = (int) srcMem.getCapacity(); - preInts = extractPreInts(srcMem); - serVer = extractSerVer(srcMem); + preInts = getMemoryPreInts(srcMem); + serVer = getMemorySerVer(srcMem); - familyID = extractFamilyID(srcMem); + familyID = getMemoryFamilyID(srcMem); if (familyID != Family.KLL.getID()) { memoryValidateThrow(SRC_NOT_KLL, familyID); } famName = idToFamily(familyID).toString(); - flags = extractFlags(srcMem); - empty = extractEmptyFlag(srcMem); - level0Sorted = extractLevelZeroSortedFlag(srcMem); - singleItem = extractSingleItemFlag(srcMem); - doublesSketch = extractDoubleSketchFlag(srcMem); - updatable = extractUpdatableFlag(srcMem); - k = extractK(srcMem); - m = extractM(srcMem); + flags = getMemoryFlags(srcMem); + empty = getMemoryEmptyFlag(srcMem); + level0Sorted = getMemoryLevelZeroSortedFlag(srcMem); + singleItem = getMemorySingleItemFlag(srcMem); + doublesSketch = getMemoryDoubleSketchFlag(srcMem); + updatable = getMemoryUpdatableFlag(srcMem); + k = getMemoryK(srcMem); + m = getMemoryM(srcMem); KllHelper.checkM(m); KllHelper.checkK(k, m); if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatable) { memoryValidateThrow(UPDATABLEBIT_AND_SER_VER, 1); } @@ -132,9 +132,9 @@ void compactMemoryValidate(final Memory srcMem) { if (preInts != PREAMBLE_INTS_FULL) { memoryValidateThrow(INVALID_PREINTS, preInts); } if (serVer != SERIAL_VERSION_EMPTY_FULL) { memoryValidateThrow(EMPTYBIT_AND_SER_VER, serVer); } layout = doublesSketch ? Layout.DOUBLE_FULL_COMPACT : Layout.FLOAT_FULL_COMPACT; - n = extractN(srcMem); - minK = extractMinK(srcMem); - numLevels = extractNumLevels(srcMem); + n = getMemoryN(srcMem); + minK = getMemoryMinK(srcMem); + numLevels = getMemoryNumLevels(srcMem); int offset = DATA_START_ADR; // LEVELS MEM @@ -222,11 +222,11 @@ void updatableMemoryValidate(final WritableMemory wSrcMem) { if (preInts != PREAMBLE_INTS_FULL) { memoryValidateThrow(INVALID_PREINTS, preInts); } layout = doublesSketch ? Layout.DOUBLE_UPDATABLE : Layout.FLOAT_UPDATABLE; - n = extractN(wSrcMem); + n = getMemoryN(wSrcMem); empty = n == 0; //empty & singleItem are set for convenience singleItem = n == 1; // there is no error checking on these bits - minK = extractMinK(wSrcMem); - numLevels = extractNumLevels(wSrcMem); + minK = getMemoryMinK(wSrcMem); + numLevels = getMemoryNumLevels(wSrcMem); int offset = DATA_START_ADR; diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index bd64b5ca7..381af3134 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -240,120 +240,120 @@ static String memoryToString(final Memory mem) { return sb.toString(); } - static int extractPreInts(final Memory mem) { + static int getMemoryPreInts(final Memory mem) { return mem.getByte(PREAMBLE_INTS_BYTE_ADR) & 0XFF; } - static int extractSerVer(final Memory mem) { + static int getMemorySerVer(final Memory mem) { return mem.getByte(SER_VER_BYTE_ADR) & 0XFF; } - static int extractFamilyID(final Memory mem) { + static int getMemoryFamilyID(final Memory mem) { return mem.getByte(FAMILY_BYTE_ADR) & 0XFF; } - static int extractFlags(final Memory mem) { + static int getMemoryFlags(final Memory mem) { return mem.getByte(FLAGS_BYTE_ADR) & 0XFF; } - static boolean extractEmptyFlag(final Memory mem) { - return (extractFlags(mem) & EMPTY_BIT_MASK) != 0; + static boolean getMemoryEmptyFlag(final Memory mem) { + return (getMemoryFlags(mem) & EMPTY_BIT_MASK) != 0; } - static boolean extractLevelZeroSortedFlag(final Memory mem) { - return (extractFlags(mem) & LEVEL_ZERO_SORTED_BIT_MASK) != 0; + static boolean getMemoryLevelZeroSortedFlag(final Memory mem) { + return (getMemoryFlags(mem) & LEVEL_ZERO_SORTED_BIT_MASK) != 0; } - static boolean extractSingleItemFlag(final Memory mem) { - return (extractFlags(mem) & SINGLE_ITEM_BIT_MASK) != 0; + static boolean getMemorySingleItemFlag(final Memory mem) { + return (getMemoryFlags(mem) & SINGLE_ITEM_BIT_MASK) != 0; } - static boolean extractDoubleSketchFlag(final Memory mem) { - return (extractFlags(mem) & DOUBLES_SKETCH_BIT_MASK) != 0; + static boolean getMemoryDoubleSketchFlag(final Memory mem) { + return (getMemoryFlags(mem) & DOUBLES_SKETCH_BIT_MASK) != 0; } - static boolean extractUpdatableFlag(final Memory mem) { - return (extractFlags(mem) & UPDATABLE_BIT_MASK) != 0; + static boolean getMemoryUpdatableFlag(final Memory mem) { + return (getMemoryFlags(mem) & UPDATABLE_BIT_MASK) != 0; } - static int extractK(final Memory mem) { + static int getMemoryK(final Memory mem) { return mem.getShort(K_SHORT_ADR) & 0XFFFF; } - static int extractM(final Memory mem) { + static int getMemoryM(final Memory mem) { return mem.getByte(M_BYTE_ADR) & 0XFF; } - static long extractN(final Memory mem) { + static long getMemoryN(final Memory mem) { return mem.getLong(N_LONG_ADR); } - static int extractMinK(final Memory mem) { + static int getMemoryMinK(final Memory mem) { return mem.getShort(MIN_K_SHORT_ADR) & 0XFFFF; } - static int extractNumLevels(final Memory mem) { + static int getMemoryNumLevels(final Memory mem) { return mem.getByte(NUM_LEVELS_BYTE_ADR) & 0XFF; } - static void insertPreInts(final WritableMemory wmem, final int value) { + static void setMemoryPreInts(final WritableMemory wmem, final int value) { wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) value); } - static void insertSerVer(final WritableMemory wmem, final int value) { + static void setMemorySerVer(final WritableMemory wmem, final int value) { wmem.putByte(SER_VER_BYTE_ADR, (byte) value); } - static void insertFamilyID(final WritableMemory wmem, final int value) { + static void setMemoryFamilyID(final WritableMemory wmem, final int value) { wmem.putByte(FAMILY_BYTE_ADR, (byte) value); } - static void insertFlags(final WritableMemory wmem, final int value) { + static void setMemoryFlags(final WritableMemory wmem, final int value) { wmem.putByte(FLAGS_BYTE_ADR, (byte) value); } - static void insertEmptyFlag(final WritableMemory wmem, final boolean empty) { - final int flags = extractFlags(wmem); - insertFlags(wmem, empty ? flags | EMPTY_BIT_MASK : flags & ~EMPTY_BIT_MASK); + static void setMemoryEmptyFlag(final WritableMemory wmem, final boolean empty) { + final int flags = getMemoryFlags(wmem); + setMemoryFlags(wmem, empty ? flags | EMPTY_BIT_MASK : flags & ~EMPTY_BIT_MASK); } - static void insertLevelZeroSortedFlag(final WritableMemory wmem, final boolean levelZeroSorted) { - final int flags = extractFlags(wmem); - insertFlags(wmem, levelZeroSorted ? flags | LEVEL_ZERO_SORTED_BIT_MASK : flags & ~LEVEL_ZERO_SORTED_BIT_MASK); + static void setMemoryLevelZeroSortedFlag(final WritableMemory wmem, final boolean levelZeroSorted) { + final int flags = getMemoryFlags(wmem); + setMemoryFlags(wmem, levelZeroSorted ? flags | LEVEL_ZERO_SORTED_BIT_MASK : flags & ~LEVEL_ZERO_SORTED_BIT_MASK); } - static void insertSingleItemFlag(final WritableMemory wmem, final boolean singleItem) { - final int flags = extractFlags(wmem); - insertFlags(wmem, singleItem ? flags | SINGLE_ITEM_BIT_MASK : flags & ~SINGLE_ITEM_BIT_MASK); + static void setMemorySingleItemFlag(final WritableMemory wmem, final boolean singleItem) { + final int flags = getMemoryFlags(wmem); + setMemoryFlags(wmem, singleItem ? flags | SINGLE_ITEM_BIT_MASK : flags & ~SINGLE_ITEM_BIT_MASK); } - static void insertDoubleSketchFlag(final WritableMemory wmem, final boolean doubleSketch) { - final int flags = extractFlags(wmem); - insertFlags(wmem, doubleSketch ? flags | DOUBLES_SKETCH_BIT_MASK : flags & ~DOUBLES_SKETCH_BIT_MASK); + static void setMemoryDoubleSketchFlag(final WritableMemory wmem, final boolean doubleSketch) { + final int flags = getMemoryFlags(wmem); + setMemoryFlags(wmem, doubleSketch ? flags | DOUBLES_SKETCH_BIT_MASK : flags & ~DOUBLES_SKETCH_BIT_MASK); } - static void insertUpdatableFlag(final WritableMemory wmem, final boolean updatable) { - final int flags = extractFlags(wmem); - insertFlags(wmem, updatable ? flags | UPDATABLE_BIT_MASK : flags & ~UPDATABLE_BIT_MASK); + static void setMemoryUpdatableFlag(final WritableMemory wmem, final boolean updatable) { + final int flags = getMemoryFlags(wmem); + setMemoryFlags(wmem, updatable ? flags | UPDATABLE_BIT_MASK : flags & ~UPDATABLE_BIT_MASK); } - static void insertK(final WritableMemory wmem, final int value) { + static void setMemoryK(final WritableMemory wmem, final int value) { wmem.putShort(K_SHORT_ADR, (short) value); } - static void insertM(final WritableMemory wmem, final int value) { + static void setMemoryM(final WritableMemory wmem, final int value) { wmem.putByte(M_BYTE_ADR, (byte) value); } - static void insertN(final WritableMemory wmem, final long value) { + static void setMemoryN(final WritableMemory wmem, final long value) { wmem.putLong(N_LONG_ADR, value); } - static void insertMinK(final WritableMemory wmem, final int value) { + static void setMemoryMinK(final WritableMemory wmem, final int value) { wmem.putShort(MIN_K_SHORT_ADR, (short) value); } - static void insertNumLevels(final WritableMemory wmem, final int value) { + static void setMemoryNumLevels(final WritableMemory wmem, final int value) { wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) value); } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index b98c902e4..680da9380 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -35,19 +35,19 @@ import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; -import static org.apache.datasketches.kll.KllPreambleUtil.insertDoubleSketchFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.insertMinK; -import static org.apache.datasketches.kll.KllPreambleUtil.insertEmptyFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.insertFamilyID; -import static org.apache.datasketches.kll.KllPreambleUtil.insertK; -import static org.apache.datasketches.kll.KllPreambleUtil.insertLevelZeroSortedFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.insertM; -import static org.apache.datasketches.kll.KllPreambleUtil.insertN; -import static org.apache.datasketches.kll.KllPreambleUtil.insertNumLevels; -import static org.apache.datasketches.kll.KllPreambleUtil.insertPreInts; -import static org.apache.datasketches.kll.KllPreambleUtil.insertSerVer; -import static org.apache.datasketches.kll.KllPreambleUtil.insertSingleItemFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.insertUpdatableFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryDoubleSketchFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryEmptyFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySingleItemFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryUpdatableFlag; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; @@ -1011,9 +1011,9 @@ final byte[] toCompactByteArrayImpl() { } } else { // n > 1 //remainder of preamble after first 8 bytes - insertN(wmem, getN()); - insertMinK(wmem, getMinK()); - insertNumLevels(wmem, getNumLevels()); + setMemoryN(wmem, getN()); + setMemoryMinK(wmem, getMinK()); + setMemoryNumLevels(wmem, getNumLevels()); offset = DATA_START_ADR; //LOAD LEVELS ARR the last integer in levels_ is NOT serialized @@ -1049,18 +1049,18 @@ private static void loadFirst8Bytes(final KllSketch sk, final WritableMemory wme ? PREAMBLE_INTS_FULL : (empty || singleItem) ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_FULL; //load the preamble - insertPreInts(wmem, preInts); + setMemoryPreInts(wmem, preInts); final int server = updatable ? SERIAL_VERSION_UPDATABLE : (singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION_EMPTY_FULL); - insertSerVer(wmem, server); - insertFamilyID(wmem, Family.KLL.getID()); - insertEmptyFlag(wmem, empty); - insertLevelZeroSortedFlag(wmem, lvlZeroSorted); - insertSingleItemFlag(wmem, singleItem); - insertDoubleSketchFlag(wmem, doubleType); - insertUpdatableFlag(wmem, updatable); - insertK(wmem, sk.getK()); - insertM(wmem, sk.getM()); + setMemorySerVer(wmem, server); + setMemoryFamilyID(wmem, Family.KLL.getID()); + setMemoryEmptyFlag(wmem, empty); + setMemoryLevelZeroSortedFlag(wmem, lvlZeroSorted); + setMemorySingleItemFlag(wmem, singleItem); + setMemoryDoubleSketchFlag(wmem, doubleType); + setMemoryUpdatableFlag(wmem, updatable); + setMemoryK(wmem, sk.getK()); + setMemoryM(wmem, sk.getM()); } @SuppressWarnings("null") @@ -1194,9 +1194,9 @@ final byte[] toUpdatableByteArrayImpl() { final WritableMemory wmem = WritableMemory.writableWrap(byteArr); loadFirst8Bytes(this, wmem, true); //remainder of preamble after first 8 bytes - insertN(wmem, getN()); - insertMinK(wmem, getMinK()); - insertNumLevels(wmem, getNumLevels()); + setMemoryN(wmem, getN()); + setMemoryMinK(wmem, getMinK()); + setMemoryNumLevels(wmem, getNumLevels()); //load data final boolean doubleType = (sketchType == DOUBLES_SKETCH); diff --git a/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java b/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java index c60e65d9c..324954156 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java @@ -34,7 +34,7 @@ public void checkInvalidFamily() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFamilyID(wmem, Family.KLL.getID() - 1); + setMemoryFamilyID(wmem, Family.KLL.getID() - 1); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @@ -43,7 +43,7 @@ public void checkInvalidSerVer() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL - 1); + setMemorySerVer(wmem, SERIAL_VERSION_EMPTY_FULL - 1); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @@ -52,7 +52,7 @@ public void checkInvalidEmptyAndSingle() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, EMPTY_BIT_MASK | SINGLE_ITEM_BIT_MASK); + setMemoryFlags(wmem, EMPTY_BIT_MASK | SINGLE_ITEM_BIT_MASK); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @@ -61,8 +61,8 @@ public void checkInvalidUpdatableAndSerVer() { KllFloatsSketch sk = new KllFloatsSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertFlags(wmem, UPDATABLE_BIT_MASK); - insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL); + setMemoryFlags(wmem, UPDATABLE_BIT_MASK); + setMemorySerVer(wmem, SERIAL_VERSION_EMPTY_FULL); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @@ -72,7 +72,7 @@ public void checkInvalidSingleAndPreInts() { sk.update(1); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertPreInts(wmem, PREAMBLE_INTS_FULL); + setMemoryPreInts(wmem, PREAMBLE_INTS_FULL); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @@ -82,7 +82,7 @@ public void checkInvalidSingleAndSerVer() { sk.update(1); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL); + setMemorySerVer(wmem, SERIAL_VERSION_EMPTY_FULL); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @@ -91,7 +91,7 @@ public void checkInvalidEmptyDoublesAndPreIntsFull() { KllDoublesSketch sk = new KllDoublesSketch(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertPreInts(wmem, PREAMBLE_INTS_FULL); + setMemoryPreInts(wmem, PREAMBLE_INTS_FULL); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @@ -101,7 +101,7 @@ public void checkInvalidSingleDoubleCompactAndSerVer() { sk.update(1); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertSerVer(wmem, SERIAL_VERSION_EMPTY_FULL); + setMemorySerVer(wmem, SERIAL_VERSION_EMPTY_FULL); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @@ -110,7 +110,7 @@ public void checkInvalidDoubleUpdatableAndPreInts() { KllDoublesSketch sk = new KllDoublesSketch(); byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); + setMemoryPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @@ -120,7 +120,7 @@ public void checkInvalidFloatFullAndPreInts() { sk.update(1); sk.update(2); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); + setMemoryPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @@ -130,7 +130,7 @@ public void checkInvalidFloatUpdatableFullAndPreInts() { sk.update(1); sk.update(2); byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); + setMemoryPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); KllMemoryValidate memVal = new KllMemoryValidate(wmem); } @@ -140,7 +140,7 @@ public void checkInvalidDoubleCompactSingleAndPreInts() { sk.update(1); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - insertPreInts(wmem, PREAMBLE_INTS_FULL);//should be 2, single + setMemoryPreInts(wmem, PREAMBLE_INTS_FULL);//should be 2, single KllMemoryValidate memVal = new KllMemoryValidate(wmem); } From e93e8cdcf245b638c4a28aafb98effd5cbd8ded8 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 4 Apr 2022 20:34:33 -0700 Subject: [PATCH 29/31] Interim 12 --- .../kll/KllDirectDoublesSketch.java | 16 +++----- .../kll/KllDirectFloatsSketch.java | 16 +++----- .../datasketches/kll/KllDirectSketch.java | 7 ++++ .../datasketches/kll/KllDoublesSketch.java | 4 +- .../datasketches/kll/KllFloatsSketch.java | 4 +- .../datasketches/kll/KllPreambleUtil.java | 4 +- .../apache/datasketches/kll/KllSketch.java | 38 ++++++++++--------- 7 files changed, 45 insertions(+), 44 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 834760696..9560c8799 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -36,8 +36,8 @@ import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; -import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DIRECT; -import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DOUBLE; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_DIRECT; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_DOUBLE; import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; @@ -150,9 +150,7 @@ public double[] getCDF(final double[] splitPoints) { * * @return the max value of the stream */ - public double getMaxValue() { - return getMaxDoubleValue(); - } + public double getMaxValue() { return getMaxDoubleValue(); } /** * Returns the min value of the stream. @@ -160,9 +158,7 @@ public double getMaxValue() { * * @return the min value of the stream */ - public double getMinValue() { - return getMinDoubleValue(); - } + public double getMinValue() { return getMinDoubleValue(); } /** * Returns an approximation to the Probability Mass Function (PMF) of the input stream @@ -303,8 +299,8 @@ public KllDoublesSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (!other.isDirect()) { kllSketchThrow(SRC_IS_NOT_DIRECT); } - if (!other.isDoublesSketch()) { kllSketchThrow(SRC_IS_NOT_DOUBLE); } + if (!other.isDirect()) { kllSketchThrow(SRC_MUST_BE_DIRECT); } + if (!other.isDoublesSketch()) { kllSketchThrow(SRC_MUST_BE_DOUBLE); } mergeDoubleImpl(other); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index 6e7d89192..3ebb9c7e6 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -35,8 +35,8 @@ import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; -import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DIRECT; -import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_FLOAT; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_DIRECT; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_FLOAT; import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; @@ -150,9 +150,7 @@ public double[] getCDF(final float[] splitPoints) { * * @return the max value of the stream */ - public float getMaxValue() { - return getMaxFloatValue(); - } + public float getMaxValue() { return getMaxFloatValue(); } /** * Returns the min value of the stream. @@ -160,9 +158,7 @@ public float getMaxValue() { * * @return the min value of the stream */ - public float getMinValue() { - return getMinFloatValue(); - } + public float getMinValue() { return getMinFloatValue(); } /** * Returns an approximation to the Probability Mass Function (PMF) of the input stream @@ -303,8 +299,8 @@ public KllFloatsSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (!other.isDirect()) { kllSketchThrow(SRC_IS_NOT_DIRECT); } - if (!other.isFloatsSketch()) { kllSketchThrow(SRC_IS_NOT_FLOAT); } + if (!other.isDirect()) { kllSketchThrow(SRC_MUST_BE_DIRECT); } + if (!other.isFloatsSketch()) { kllSketchThrow(SRC_MUST_BE_FLOAT); } mergeFloatImpl(other); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index ac0a14458..556b8dbf9 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -77,6 +77,7 @@ public long getN() { @Override public void reset() { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } final int k = getK(); setN(0); setMinK(k); @@ -158,6 +159,7 @@ void setMinK(final int minK) { @Override void setItemsArrayUpdatable(final WritableMemory itemsMem) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } itemsArrUpdatable = itemsMem; } @@ -169,11 +171,13 @@ void setLevelsArray(final int[] levelsArr) { @Override void setLevelsArrayAt(final int index, final int value) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } levelsArrUpdatable.putInt((long)index * Integer.BYTES, value); } @Override void setLevelsArrayAtMinusEq(final int index, final int minusEq) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } final int offset = index * Integer.BYTES; final int curV = levelsArrUpdatable.getInt(offset); levelsArrUpdatable.putInt(offset, curV - minusEq); @@ -181,6 +185,7 @@ void setLevelsArrayAtMinusEq(final int index, final int minusEq) { @Override void setLevelsArrayAtPlusEq(final int index, final int plusEq) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } final int offset = index * Integer.BYTES; final int curV = levelsArrUpdatable.getInt(offset); levelsArrUpdatable.putInt(offset, curV + plusEq); @@ -188,6 +193,7 @@ void setLevelsArrayAtPlusEq(final int index, final int plusEq) { @Override void setLevelsArrayUpdatable(final WritableMemory levelsMem) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } levelsArrUpdatable = levelsMem; } @@ -199,6 +205,7 @@ void setLevelZeroSorted(final boolean sorted) { @Override void setMinMaxArrayUpdatable(final WritableMemory minMaxMem) { + if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } minMaxArrUpdatable = minMaxMem; } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index dc7ad92b5..63a333afa 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -21,7 +21,7 @@ import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DOUBLE; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_DOUBLE; import static org.apache.datasketches.kll.KllSketch.Error.SRC_CANNOT_BE_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; @@ -284,7 +284,7 @@ public KllDoublesSketchIterator iterator() { */ public void merge(final KllSketch other) { if (other.isDirect()) { kllSketchThrow(SRC_CANNOT_BE_DIRECT); } - if (!other.isDoublesSketch()) { kllSketchThrow(SRC_IS_NOT_DOUBLE); } + if (!other.isDoublesSketch()) { kllSketchThrow(SRC_MUST_BE_DOUBLE); } mergeDoubleImpl(other); } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 0fbabb909..b6c1956c7 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -21,7 +21,7 @@ import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_FLOAT; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_FLOAT; import static org.apache.datasketches.kll.KllSketch.Error.SRC_CANNOT_BE_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; @@ -284,7 +284,7 @@ public KllFloatsSketchIterator iterator() { */ public void merge(final KllFloatsSketch other) { if (other.isDirect()) { kllSketchThrow(SRC_CANNOT_BE_DIRECT); } - if (!other.isFloatsSketch()) { kllSketchThrow(SRC_IS_NOT_FLOAT); } + if (!other.isFloatsSketch()) { kllSketchThrow(SRC_MUST_BE_FLOAT); } mergeFloatImpl(other); } diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index 381af3134..548bd045e 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -217,7 +217,7 @@ static String memoryToString(final Memory mem) { case DOUBLE_UPDATABLE: { sb.append("Bytes 8-15: N : ").append(memChk.n).append(LS); - sb.append("Bytes 16-17: DyMinK : ").append(memChk.minK).append(LS); + sb.append("Bytes 16-17: MinK : ").append(memChk.minK).append(LS); sb.append("Byte 18 : NumLevels : ").append(memChk.numLevels).append(LS); break; } @@ -227,7 +227,7 @@ static String memoryToString(final Memory mem) { case DOUBLE_SINGLE_COMPACT: { sb.append("Assumed : N : ").append(memChk.n).append(LS); - sb.append("Assumed : DyMinK : ").append(memChk.minK).append(LS); + sb.append("Assumed : MinK : ").append(memChk.minK).append(LS); sb.append("Assumed : NumLevels : ").append(memChk.numLevels).append(LS); break; } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 680da9380..b366e38b1 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -252,10 +252,10 @@ static int getCurrentSerializedSizeBytes(final int numLevels, final int numItems enum Error { TGT_IS_IMMUTABLE("Given sketch Memory is immutable, cannot write."), - SRC_IS_NOT_DIRECT("Given sketch must be of type Direct."), - SRC_IS_NOT_DOUBLE("Given sketch must be of type Double."), - SRC_IS_NOT_FLOAT("Given sketch must be of type Float."), - SRC_CANNOT_BE_DIRECT("Given sketch must not be of type Direct."), + SRC_MUST_BE_DIRECT("Given sketch must be of type Direct."), + SRC_MUST_BE_DOUBLE("Given sketch must be of type Double."), + SRC_MUST_BE_FLOAT("Given sketch must be of type Float."), + SRC_CANNOT_BE_DIRECT("Given sketch cannot be of type Direct."), MUST_NOT_CALL("This is an artifact of inheritance and should never be called."); private String msg; @@ -740,6 +740,7 @@ final void mergeDoubleImpl(final KllSketch other) { final long finalN = getN() + other.getN(); //update this sketch with level0 items from the other sketch final double[] otherDoubleItemsArr = other.getDoubleItemsArray(); + final int otherNumLevels = other.getNumLevels(); final int[] otherLevelsArr = other.getLevelsArray(); for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { updateDouble(otherDoubleItemsArr[i]); @@ -747,7 +748,7 @@ final void mergeDoubleImpl(final KllSketch other) { // after the level 0 update, we capture the key mutable variables final double myMin = getMinDoubleValue(); final double myMax = getMaxDoubleValue(); - final int myDyMinK = getMinK(); + final int myMinK = getMinK(); final int myCurNumLevels = getNumLevels(); final int[] myCurLevelsArr = getLevelsArray(); @@ -757,21 +758,21 @@ final void mergeDoubleImpl(final KllSketch other) { final int[] myNewLevelsArr; final double[] myNewDoubleItemsArr; - if (other.getNumLevels() > 1) { //now merge other levels if they exist + if (otherNumLevels > 1) { //now merge other levels if they exist final int tmpSpaceNeeded = getNumRetained() - + KllHelper.getNumRetainedAboveLevelZero(other.getNumLevels(), otherLevelsArr); + + KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr); final double[] workbuf = new double[tmpSpaceNeeded]; final int ub = KllHelper.ubOnNumLevels(finalN); final int[] worklevels = new int[ub + 2]; // ub+1 does not work final int[] outlevels = new int[ub + 2]; - final int provisionalNumLevels = max(myCurNumLevels, other.getNumLevels()); + final int provisionalNumLevels = max(myCurNumLevels, otherNumLevels); populateDoubleWorkArrays(other, workbuf, worklevels, provisionalNumLevels); // notice that workbuf is being used as both the input and output - final int[] result = KllDoublesHelper.generalDoublesCompress(getK(), getM(), provisionalNumLevels, workbuf, - worklevels, workbuf, outlevels, isLevelZeroSorted(), random); + final int[] result = KllDoublesHelper.generalDoublesCompress(getK(), getM(), provisionalNumLevels, + workbuf, worklevels, workbuf, outlevels, isLevelZeroSorted(), random); final int targetItemCount = result[1]; //was finalCapacity. Max size given k, m, numLevels final int curItemCount = result[2]; //was finalPop @@ -815,7 +816,7 @@ final void mergeDoubleImpl(final KllSketch other) { //Update Preamble: setN(finalN); if (other.isEstimationMode()) { //otherwise the merge brings over exact items. - setMinK(min(myDyMinK, other.getMinK())); + setMinK(min(myMinK, other.getMinK())); } //Update min, max values @@ -850,6 +851,7 @@ final void mergeFloatImpl(final KllSketch other) { final long finalN = getN() + other.getN(); //update this sketch with level0 items from the other sketch final float[] otherFloatItemsArr = other.getFloatItemsArray(); + final int otherNumLevels = other.getNumLevels(); final int[] otherLevelsArr = other.getLevelsArray(); for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { updateFloat(otherFloatItemsArr[i]); @@ -857,7 +859,7 @@ final void mergeFloatImpl(final KllSketch other) { // after the level 0 update, we capture the key mutable variables final float myMin = getMinFloatValue(); final float myMax = getMaxFloatValue(); - final int myDyMinK = getMinK(); + final int myMinK = getMinK(); final int myCurNumLevels = getNumLevels(); final int[] myCurLevelsArr = getLevelsArray(); @@ -867,21 +869,21 @@ final void mergeFloatImpl(final KllSketch other) { final int[] myNewLevelsArr; final float[] myNewFloatItemsArr; - if (other.getNumLevels() > 1) { //now merge other levels if they exist + if (otherNumLevels > 1) { //now merge higher levels if they exist final int tmpSpaceNeeded = getNumRetained() - + KllHelper.getNumRetainedAboveLevelZero(other.getNumLevels(), otherLevelsArr); + + KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr); final float[] workbuf = new float[tmpSpaceNeeded]; final int ub = KllHelper.ubOnNumLevels(finalN); final int[] worklevels = new int[ub + 2]; // ub+1 does not work final int[] outlevels = new int[ub + 2]; - final int provisionalNumLevels = max(myCurNumLevels, other.getNumLevels()); + final int provisionalNumLevels = max(myCurNumLevels, otherNumLevels); populateFloatWorkArrays(other, workbuf, worklevels, provisionalNumLevels); // notice that workbuf is being used as both the input and output - final int[] result = KllFloatsHelper.generalFloatsCompress(getK(), getM(), provisionalNumLevels, workbuf, - worklevels, workbuf, outlevels, isLevelZeroSorted(), random); + final int[] result = KllFloatsHelper.generalFloatsCompress(getK(), getM(), provisionalNumLevels, + workbuf, worklevels, workbuf, outlevels, isLevelZeroSorted(), random); final int targetItemCount = result[1]; //was finalCapacity. Max size given k, m, numLevels final int curItemCount = result[2]; //was finalPop @@ -925,7 +927,7 @@ final void mergeFloatImpl(final KllSketch other) { //Update Preamble: setN(finalN); if (other.isEstimationMode()) { //otherwise the merge brings over exact items. - setMinK(min(myDyMinK, other.getMinK())); + setMinK(min(myMinK, other.getMinK())); } //Update min, max values From 5a67bc7214690228754f15c5d35117799c876525 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Tue, 5 Apr 2022 18:39:52 -0700 Subject: [PATCH 30/31] Interim 13 --- .../kll/KllDirectDoublesSketch.java | 50 +- .../kll/KllDirectFloatsSketch.java | 50 +- .../datasketches/kll/KllDirectSketch.java | 67 +- .../datasketches/kll/KllDoublesHelper.java | 410 ++++- .../datasketches/kll/KllDoublesSketch.java | 41 +- .../datasketches/kll/KllFloatsHelper.java | 408 ++++- .../datasketches/kll/KllFloatsSketch.java | 43 +- .../datasketches/kll/KllHeapSketch.java | 30 +- .../apache/datasketches/kll/KllHelper.java | 789 ++++++++- .../datasketches/kll/KllMemoryValidate.java | 8 +- .../datasketches/kll/KllPreambleUtil.java | 2 +- .../apache/datasketches/kll/KllSketch.java | 1413 ++--------------- .../kll/KllDirectDoublesSketchTest.java | 43 +- .../kll/KllDirectFloatsSketchTest.java | 14 +- 14 files changed, 1728 insertions(+), 1640 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 9560c8799..0c89ff3a6 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -36,12 +36,12 @@ import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; -import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_DOUBLE; import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; import org.apache.datasketches.Family; +import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -67,15 +67,12 @@ private KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestSer } /** - * Wrap a sketch around the given source Memory containing sketch data that originated from - * this sketch. - * @param srcMem a WritableMemory that contains data. - * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory - * @return instance of this sketch + * Heapifies the given Memory object and returns a KllDoublesSketch + * @param mem the given Memory object. + * @return a KllDoublesSketch */ - public static KllDirectDoublesSketch writableWrap(final WritableMemory srcMem, final MemoryRequestServer memReqSvr) { - final KllMemoryValidate memVal = new KllMemoryValidate(srcMem); - return new KllDirectDoublesSketch(srcMem, memReqSvr, memVal); + public static KllDoublesSketch heapify(final Memory mem) { + return KllDoublesSketch.heapify(mem); } /** @@ -119,6 +116,18 @@ static KllDirectDoublesSketch newInstance(final int k, final int m, final Writab return new KllDirectDoublesSketch(dstMem, memReqSvr, memVal); } + /** + * Wrap a sketch around the given source Memory containing sketch data that originated from + * this sketch. + * @param srcMem a WritableMemory that contains data. + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return instance of this sketch + */ + public static KllDirectDoublesSketch writableWrap(final WritableMemory srcMem, final MemoryRequestServer memReqSvr) { + final KllMemoryValidate memVal = new KllMemoryValidate(srcMem); + return new KllDirectDoublesSketch(srcMem, memReqSvr, memVal); + } + /** * Returns an approximation to the Cumulative Distribution Function (CDF), which is the * cumulative analog of the PMF, of the input stream given a set of splitPoint (values). @@ -141,7 +150,7 @@ static KllDirectDoublesSketch newInstance(final int k, final int m, final Writab * in positions 0 through j of the returned PMF array. */ public double[] getCDF(final double[] splitPoints) { - return getDoublesPmfOrCdf(splitPoints, true); + return KllDoublesHelper.getDoublesPmfOrCdf(this, splitPoints, true); } /** @@ -183,7 +192,7 @@ public double[] getCDF(final double[] splitPoints) { * splitPoint, with the exception that the last interval will include maximum value. */ public double[] getPMF(final double[] splitPoints) { - return getDoublesPmfOrCdf(splitPoints, false); + return KllDoublesHelper.getDoublesPmfOrCdf(this, splitPoints, false); } /** @@ -205,7 +214,7 @@ public double[] getPMF(final double[] splitPoints) { * @return the approximation to the value at the given fraction */ public double getQuantile(final double fraction) { - return getDoublesQuantile(fraction); + return KllDoublesHelper.getDoublesQuantile(this, fraction); } /** @@ -238,7 +247,7 @@ public double getQuantileLowerBound(final double fraction) { * array. */ public double[] getQuantiles(final double[] fractions) { - return getDoublesQuantiles(fractions); + return KllDoublesHelper.getDoublesQuantiles(this, fractions); } /** @@ -284,7 +293,7 @@ public double getQuantileUpperBound(final double fraction) { * @return an approximate rank of the given value */ public double getRank(final double value) { - return getDoubleRank(value); + return KllDoublesHelper.getDoubleRank(this, value); } /** @@ -299,9 +308,8 @@ public KllDoublesSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (!other.isDirect()) { kllSketchThrow(SRC_MUST_BE_DIRECT); } if (!other.isDoublesSketch()) { kllSketchThrow(SRC_MUST_BE_DOUBLE); } - mergeDoubleImpl(other); + KllDoublesHelper.mergeDoubleImpl(this, other); } /** @@ -310,7 +318,7 @@ public void merge(final KllSketch other) { * @param value an item from a stream of items. NaNs are ignored. */ public void update(final double value) { - updateDouble(value); + KllDoublesHelper.updateDouble(this, value); } @Override @@ -350,13 +358,13 @@ public void update(final double value) { @Override void setDoubleItemsArray(final double[] doubleItems) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } itemsArrUpdatable.putDoubleArray(0, doubleItems, 0, doubleItems.length); } @Override void setDoubleItemsArrayAt(final int index, final double value) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } itemsArrUpdatable.putDouble((long)index * Double.BYTES, value); } @@ -368,7 +376,7 @@ void setDoubleItemsArrayAt(final int index, final double value) { @Override void setMaxDoubleValue(final double value) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } minMaxArrUpdatable.putDouble(Double.BYTES, value); } @@ -377,7 +385,7 @@ void setMaxDoubleValue(final double value) { @Override void setMinDoubleValue(final double value) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } minMaxArrUpdatable.putDouble(0, value); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index 3ebb9c7e6..54abe54dd 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -35,12 +35,12 @@ import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; -import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_FLOAT; import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; import org.apache.datasketches.Family; +import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -67,15 +67,12 @@ private KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServ } /** - * Wrap a sketch around the given source Memory containing sketch data that originated from - * this sketch. - * @param srcMem a WritableMemory that contains data. - * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory - * @return instance of this sketch + * Heapifies the given Memory object and returns a KllFloatsSketch + * @param mem the given Memory object. + * @return a KllFloatsSketch */ - public static KllDirectFloatsSketch writableWrap(final WritableMemory srcMem, final MemoryRequestServer memReqSvr) { - final KllMemoryValidate memVal = new KllMemoryValidate(srcMem); - return new KllDirectFloatsSketch(srcMem, memReqSvr, memVal); + public static KllFloatsSketch heapify(final Memory mem) { + return KllFloatsSketch.heapify(mem); } /** @@ -119,6 +116,18 @@ static KllDirectFloatsSketch newInstance(final int k, final int m, final Writabl return new KllDirectFloatsSketch(dstMem, memReqSvr, memVal); } + /** + * Wrap a sketch around the given source Memory containing sketch data that originated from + * this sketch. + * @param srcMem a WritableMemory that contains data. + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return instance of this sketch + */ + public static KllDirectFloatsSketch writableWrap(final WritableMemory srcMem, final MemoryRequestServer memReqSvr) { + final KllMemoryValidate memVal = new KllMemoryValidate(srcMem); + return new KllDirectFloatsSketch(srcMem, memReqSvr, memVal); + } + /** * Returns an approximation to the Cumulative Distribution Function (CDF), which is the * cumulative analog of the PMF, of the input stream given a set of splitPoint (values). @@ -141,7 +150,7 @@ static KllDirectFloatsSketch newInstance(final int k, final int m, final Writabl * in positions 0 through j of the returned PMF array. */ public double[] getCDF(final float[] splitPoints) { - return getFloatsPmfOrCdf(splitPoints, true); + return KllFloatsHelper.getFloatsPmfOrCdf(this, splitPoints, true); } /** @@ -183,7 +192,7 @@ public double[] getCDF(final float[] splitPoints) { * splitPoint, with the exception that the last interval will include maximum value. */ public double[] getPMF(final float[] splitPoints) { - return getFloatsPmfOrCdf(splitPoints, false); + return KllFloatsHelper.getFloatsPmfOrCdf(this, splitPoints, false); } /** @@ -205,7 +214,7 @@ public double[] getPMF(final float[] splitPoints) { * @return the approximation to the value at the given fraction */ public float getQuantile(final double fraction) { - return getFloatsQuantile(fraction); + return KllFloatsHelper.getFloatsQuantile(this, fraction); } /** @@ -238,7 +247,7 @@ public float getQuantileLowerBound(final double fraction) { * array. */ public float[] getQuantiles(final double[] fractions) { - return getFloatsQuantiles(fractions); + return KllFloatsHelper.getFloatsQuantiles(this, fractions); } /** @@ -284,7 +293,7 @@ public float getQuantileUpperBound(final double fraction) { * @return an approximate rank of the given value */ public double getRank(final float value) { - return getFloatRank(value); + return KllFloatsHelper.getFloatRank(this, value); } /** @@ -299,9 +308,8 @@ public KllFloatsSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (!other.isDirect()) { kllSketchThrow(SRC_MUST_BE_DIRECT); } if (!other.isFloatsSketch()) { kllSketchThrow(SRC_MUST_BE_FLOAT); } - mergeFloatImpl(other); + KllFloatsHelper.mergeFloatImpl(this, other); } /** @@ -310,7 +318,7 @@ public void merge(final KllSketch other) { * @param value an item from a stream of items. NaNs are ignored. */ public void update(final float value) { - updateFloat(value); + KllFloatsHelper.updateFloat(this, value); } @Override @@ -356,13 +364,13 @@ float getMinFloatValue() { @Override void setFloatItemsArray(final float[] floatItems) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } itemsArrUpdatable.putFloatArray(0, floatItems, 0, floatItems.length); } @Override void setFloatItemsArrayAt(final int index, final float value) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } itemsArrUpdatable.putFloat((long)index * Float.BYTES, value); } @@ -371,7 +379,7 @@ void setFloatItemsArrayAt(final int index, final float value) { @Override void setMaxFloatValue(final float value) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } minMaxArrUpdatable.putFloat(Float.BYTES, value); } @@ -380,7 +388,7 @@ void setMaxFloatValue(final float value) { @Override void setMinFloatValue(final float value) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } minMaxArrUpdatable.putFloat(0, value); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java index 556b8dbf9..959d4bd3d 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -40,14 +40,14 @@ * of the sketch type (float or double). */ abstract class KllDirectSketch extends KllSketch { - final boolean updatable = true; + final boolean updatableMemory; WritableMemory levelsArrUpdatable; WritableMemory minMaxArrUpdatable; WritableMemory itemsArrUpdatable; /** - * For the direct sketches it is important that the methods implemented here are designed to work dynamically - * as the sketch grows off-heap. + * For the direct sketches it is important that the methods implemented here are designed to + * work dynamically as the sketch grows off-heap. * @param sketchType either DOUBLE_SKETCH or FLOAT_SKETCH * @param wmem the current WritableMemory * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory @@ -55,6 +55,7 @@ abstract class KllDirectSketch extends KllSketch { KllDirectSketch(final SketchType sketchType, final WritableMemory wmem, final MemoryRequestServer memReqSvr, final KllMemoryValidate memVal) { super(sketchType, wmem, memReqSvr); + updatableMemory = memVal.updatableMemory && memReqSvr != null; levelsArrUpdatable = memVal.levelsArrUpdatable; minMaxArrUpdatable = memVal.minMaxArrUpdatable; itemsArrUpdatable = memVal.itemsArrUpdatable; @@ -65,11 +66,6 @@ public int getK() { return getMemoryK(wmem); } - @Override - int getM() { - return getMemoryM(wmem); - } - @Override public long getN() { return getMemoryN(wmem); @@ -77,7 +73,7 @@ public long getN() { @Override public void reset() { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } final int k = getK(); setN(0); setMinK(k); @@ -86,7 +82,7 @@ public void reset() { setLevelZeroSorted(false); final int newLevelsArrLen = 2 * Integer.BYTES; final int newItemsArrLen = k; - KllSketch.memorySpaceMgmt(this, newLevelsArrLen, newItemsArrLen); + KllHelper.memorySpaceMgmt(this, newLevelsArrLen, newItemsArrLen); levelsArrUpdatable.putIntArray(0L, new int[] {k, k}, 0, 2); if (sketchType == SketchType.DOUBLES_SKETCH) { minMaxArrUpdatable.putDoubleArray(0L, new double[] {Double.NaN, Double.NaN}, 0, 2); @@ -105,11 +101,6 @@ public byte[] toUpdatableByteArray() { return byteArr; } - @Override - int getMinK() { - return getMemoryMinK(wmem); - } - int getItemsArrLengthItems() { return getLevelsArray()[getNumLevels()]; } @@ -127,6 +118,16 @@ int getLevelsArrayAt(final int index) { return levelsArrUpdatable.getInt((long)index * Integer.BYTES); } + @Override + int getM() { + return getMemoryM(wmem); + } + + @Override + int getMinK() { + return getMemoryMinK(wmem); + } + @Override int getNumLevels() { return getMemoryNumLevels(wmem); @@ -134,14 +135,14 @@ int getNumLevels() { @Override void incN() { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } long n = getMemoryN(wmem); setMemoryN(wmem, ++n); } @Override void incNumLevels() { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } int numLevels = getMemoryNumLevels(wmem); setMemoryNumLevels(wmem, ++numLevels); } @@ -151,33 +152,27 @@ boolean isLevelZeroSorted() { return getMemoryLevelZeroSortedFlag(wmem); } - @Override - void setMinK(final int minK) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } - setMemoryMinK(wmem, minK); - } - @Override void setItemsArrayUpdatable(final WritableMemory itemsMem) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } itemsArrUpdatable = itemsMem; } @Override void setLevelsArray(final int[] levelsArr) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } levelsArrUpdatable.putIntArray(0, levelsArr, 0, levelsArr.length); } @Override void setLevelsArrayAt(final int index, final int value) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } levelsArrUpdatable.putInt((long)index * Integer.BYTES, value); } @Override void setLevelsArrayAtMinusEq(final int index, final int minusEq) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } final int offset = index * Integer.BYTES; final int curV = levelsArrUpdatable.getInt(offset); levelsArrUpdatable.putInt(offset, curV - minusEq); @@ -185,7 +180,7 @@ void setLevelsArrayAtMinusEq(final int index, final int minusEq) { @Override void setLevelsArrayAtPlusEq(final int index, final int plusEq) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } final int offset = index * Integer.BYTES; final int curV = levelsArrUpdatable.getInt(offset); levelsArrUpdatable.putInt(offset, curV + plusEq); @@ -193,31 +188,37 @@ void setLevelsArrayAtPlusEq(final int index, final int plusEq) { @Override void setLevelsArrayUpdatable(final WritableMemory levelsMem) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } levelsArrUpdatable = levelsMem; } @Override void setLevelZeroSorted(final boolean sorted) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } setMemoryLevelZeroSortedFlag(wmem, sorted); } + @Override + void setMinK(final int minK) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + setMemoryMinK(wmem, minK); + } + @Override void setMinMaxArrayUpdatable(final WritableMemory minMaxMem) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } minMaxArrUpdatable = minMaxMem; } @Override void setN(final long n) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } setMemoryN(wmem, n); } @Override void setNumLevels(final int numLevels) { - if (!updatable) { kllSketchThrow(TGT_IS_IMMUTABLE); } + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } setMemoryNumLevels(wmem, numLevels); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 9049775a1..25a71f699 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -19,6 +19,8 @@ package org.apache.datasketches.kll; +import static java.lang.Math.max; +import static java.lang.Math.min; import static org.apache.datasketches.Util.isEven; import static org.apache.datasketches.Util.isOdd; @@ -32,7 +34,281 @@ * @author Kevin Lang * @author Alexander Saydakov */ -class KllDoublesHelper { +final class KllDoublesHelper { + + static double getDoubleRank(final KllSketch mine, final double value) { + if (mine.isEmpty()) { return Double.NaN; } + int level = 0; + int weight = 1; + long total = 0; + final double[] myDoubleItemsArr = mine.getDoubleItemsArray(); + final int[] myLevelsArr = mine.getLevelsArray(); + while (level < mine.getNumLevels()) { + final int fromIndex = myLevelsArr[level]; + final int toIndex = myLevelsArr[level + 1]; // exclusive + for (int i = fromIndex; i < toIndex; i++) { + if (myDoubleItemsArr[i] < value) { + total += weight; + } else if (level > 0 || mine.isLevelZeroSorted()) { + break; // levels above 0 are sorted, no point comparing further + } + } + level++; + weight *= 2; + } + return (double) total / mine.getN(); + } + + static double[] getDoublesPmfOrCdf(final KllSketch mine, final double[] splitPoints, final boolean isCdf) { + if (mine.isEmpty()) { return null; } + validateDoubleValues(splitPoints); + final double[] buckets = new double[splitPoints.length + 1]; + final int myNumLevels = mine.getNumLevels(); + final int[] myLevelsArr = mine.getLevelsArray(); + int level = 0; + int weight = 1; + while (level < myNumLevels) { + final int fromIndex = myLevelsArr[level]; + final int toIndex = myLevelsArr[level + 1]; // exclusive + if (level == 0 && !mine.isLevelZeroSorted()) { + KllDoublesHelper.incrementDoublesBucketsUnsortedLevel(mine, fromIndex, toIndex, weight, splitPoints, buckets); + } else { + KllDoublesHelper.incrementDoublesBucketsSortedLevel(mine, fromIndex, toIndex, weight, splitPoints, buckets); + } + level++; + weight *= 2; + } + // normalize and, if CDF, convert to cumulative + if (isCdf) { + double subtotal = 0; + for (int i = 0; i < buckets.length; i++) { + subtotal += buckets[i]; + buckets[i] = subtotal / mine.getN(); + } + } else { + for (int i = 0; i < buckets.length; i++) { + buckets[i] /= mine.getN(); + } + } + return buckets; + } + + static double getDoublesQuantile(final KllSketch mine, final double fraction) { + if (mine.isEmpty()) { return Double.NaN; } + if (fraction < 0.0 || fraction > 1.0) { + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); + } + //These two assumptions make KLL compatible with the previous classic Quantiles Sketch + if (fraction == 0.0) { return mine.getMinDoubleValue(); } + if (fraction == 1.0) { return mine.getMaxDoubleValue(); } + final KllDoublesQuantileCalculator quant = KllDoublesHelper.getDoublesQuantileCalculator(mine); + return quant.getQuantile(fraction); + } + + static double[] getDoublesQuantiles(final KllSketch mine, final double[] fractions) { + if (mine.isEmpty()) { return null; } + KllDoublesQuantileCalculator quant = null; + final double[] quantiles = new double[fractions.length]; + for (int i = 0; i < fractions.length; i++) { + final double fraction = fractions[i]; + if (fraction < 0.0 || fraction > 1.0) { + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); + } + if (fraction == 0.0) { quantiles[i] = mine.getMinDoubleValue(); } + else if (fraction == 1.0) { quantiles[i] = mine.getMaxDoubleValue(); } + else { + if (quant == null) { + quant = KllDoublesHelper.getDoublesQuantileCalculator(mine); + } + quantiles[i] = quant.getQuantile(fraction); + } + } + return quantiles; + } + + static void mergeDoubleImpl(final KllSketch mine, final KllSketch other) { + if (other.isEmpty()) { return; } + final long finalN = mine.getN() + other.getN(); + //update this sketch with level0 items from the other sketch + final double[] otherDoubleItemsArr = other.getDoubleItemsArray(); + final int otherNumLevels = other.getNumLevels(); + final int[] otherLevelsArr = other.getLevelsArray(); + for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { + KllDoublesHelper.updateDouble(mine, otherDoubleItemsArr[i]); + } + // after the level 0 update, we capture the key mutable variables + final double myMin = mine.getMinDoubleValue(); + final double myMax = mine.getMaxDoubleValue(); + final int myMinK = mine.getMinK(); + + final int myCurNumLevels = mine.getNumLevels(); + final int[] myCurLevelsArr = mine.getLevelsArray(); + final double[] myCurDoubleItemsArr = mine.getDoubleItemsArray(); + + final int myNewNumLevels; + final int[] myNewLevelsArr; + final double[] myNewDoubleItemsArr; + + if (otherNumLevels > 1) { //now merge other levels if they exist + final int tmpSpaceNeeded = mine.getNumRetained() + + KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr); + final double[] workbuf = new double[tmpSpaceNeeded]; + final int ub = KllHelper.ubOnNumLevels(finalN); + final int[] worklevels = new int[ub + 2]; // ub+1 does not work + final int[] outlevels = new int[ub + 2]; + + final int provisionalNumLevels = max(myCurNumLevels, otherNumLevels); + + populateDoubleWorkArrays(mine, other, workbuf, worklevels, provisionalNumLevels); + + // notice that workbuf is being used as both the input and output + final int[] result = generalDoublesCompress(mine.getK(), mine.getM(), provisionalNumLevels, + workbuf, worklevels, workbuf, outlevels, mine.isLevelZeroSorted(), KllSketch.random); + final int targetItemCount = result[1]; //was finalCapacity. Max size given k, m, numLevels + final int curItemCount = result[2]; //was finalPop + + // now we need to finalize the results for the "self" sketch + + //THE NEW NUM LEVELS + myNewNumLevels = result[0]; //was finalNumLevels + assert myNewNumLevels <= ub; // ub may be much bigger + + // THE NEW ITEMS ARRAY (was newbuf) + myNewDoubleItemsArr = (targetItemCount == myCurDoubleItemsArr.length) + ? myCurDoubleItemsArr + : new double[targetItemCount]; + final int freeSpaceAtBottom = targetItemCount - curItemCount; + //shift the new items array + System.arraycopy(workbuf, outlevels[0], myNewDoubleItemsArr, freeSpaceAtBottom, curItemCount); + final int theShift = freeSpaceAtBottom - outlevels[0]; + + //calculate the new levels array length + final int finalLevelsArrLen; + if (myCurLevelsArr.length < myNewNumLevels + 1) { finalLevelsArrLen = myNewNumLevels + 1; } + else { finalLevelsArrLen = myCurLevelsArr.length; } + + //THE NEW LEVELS ARRAY + myNewLevelsArr = new int[finalLevelsArrLen]; + for (int lvl = 0; lvl < myNewNumLevels + 1; lvl++) { // includes the "extra" index + myNewLevelsArr[lvl] = outlevels[lvl] + theShift; + } + + //MEMORY SPACE MANAGEMENT + if (mine.updatablMemory) { + mine.wmem = KllHelper.memorySpaceMgmt(mine, myNewLevelsArr.length, myNewDoubleItemsArr.length); + } + + } else { + myNewNumLevels = myCurNumLevels; + myNewLevelsArr = myCurLevelsArr; + myNewDoubleItemsArr = myCurDoubleItemsArr; + } + + //Update Preamble: + mine.setN(finalN); + if (other.isEstimationMode()) { //otherwise the merge brings over exact items. + mine.setMinK(min(myMinK, other.getMinK())); + } + + //Update min, max values + final double otherMin = other.getMinDoubleValue(); + final double otherMax = other.getMaxDoubleValue(); + mine.setMinDoubleValue(resolveDoubleMinValue(myMin, otherMin)); + mine.setMaxDoubleValue(resolveDoubleMaxValue(myMax, otherMax)); + + //Update numLevels, levelsArray, items + mine.setNumLevels(myNewNumLevels); + mine.setLevelsArray(myNewLevelsArr); + mine.setDoubleItemsArray(myNewDoubleItemsArr); + assert KllHelper.sumTheSampleWeights(mine.getNumLevels(), mine.getLevelsArray()) == mine.getN(); + } + + static void mergeSortedDoubleArrays( + final double[] bufA, final int startA, final int lenA, + final double[] bufB, final int startB, final int lenB, + final double[] bufC, final int startC) { + final int lenC = lenA + lenB; + final int limA = startA + lenA; + final int limB = startB + lenB; + final int limC = startC + lenC; + + int a = startA; + int b = startB; + + for (int c = startC; c < limC; c++) { + if (a == limA) { + bufC[c] = bufB[b]; + b++; + } else if (b == limB) { + bufC[c] = bufA[a]; + a++; + } else if (bufA[a] < bufB[b]) { + bufC[c] = bufA[a]; + a++; + } else { + bufC[c] = bufB[b]; + b++; + } + } + assert a == limA; + assert b == limB; + } + + /** + * Validation Method. This must be modified to test validation + * @param buf the items array + * @param start data start + * @param length items length + * @param random instance of Random + */ + static void randomlyHalveDownDoubles(final double[] buf, final int start, final int length, final Random random) { + assert isEven(length); + final int half_length = length / 2; + final int offset = random.nextInt(2); // disable for validation + //final int offset = deterministicOffset(); // enable for validation + int j = start + offset; + for (int i = start; i < (start + half_length); i++) { + buf[i] = buf[j]; + j += 2; + } + } + + /** + * Validation Method. This must be modified to test validation + * @param buf the items array + * @param start data start + * @param length items length + * @param random instance of Random + */ + static void randomlyHalveUpDoubles(final double[] buf, final int start, final int length, final Random random) { + assert isEven(length); + final int half_length = length / 2; + final int offset = random.nextInt(2); // disable for validation + //final int offset = deterministicOffset(); // enable for validation + int j = (start + length) - 1 - offset; + for (int i = (start + length) - 1; i >= (start + half_length); i--) { + buf[i] = buf[j]; + j -= 2; + } + } + + static void updateDouble(final KllSketch mine, final double value) { + if (Double.isNaN(value)) { return; } + if (mine.isEmpty()) { + mine.setMinDoubleValue(value); + mine.setMaxDoubleValue(value); + } else { + if (value < mine.getMinDoubleValue()) { mine.setMinDoubleValue(value); } + if (value > mine.getMaxDoubleValue()) { mine.setMaxDoubleValue(value); } + } + if (mine.getLevelsArrayAt(0) == 0) { KllHelper.compressWhileUpdatingSketch(mine); } + mine.incN(); + mine.setLevelZeroSorted(false); + final int nextPos = mine.getLevelsArrayAt(0) - 1; + assert mine.getLevelsArrayAt(0) >= 0; + mine.setLevelsArrayAt(0, nextPos); + mine.setDoubleItemsArrayAt(nextPos, value); + } /** * Compression algorithm used to merge higher levels. @@ -67,7 +343,7 @@ class KllDoublesHelper { * @param random instance of java.util.Random * @return int array of: {numLevels, targetItemCount, currentItemCount) */ - static int[] generalDoublesCompress( + private static int[] generalDoublesCompress( final int k, final int m, final int numLevelsIn, @@ -155,69 +431,103 @@ static int[] generalDoublesCompress( return new int[] {numLevels, targetItemCount, currentItemCount}; } - static void mergeSortedDoubleArrays( - final double[] bufA, final int startA, final int lenA, - final double[] bufB, final int startB, final int lenB, - final double[] bufC, final int startC) { - final int lenC = lenA + lenB; - final int limA = startA + lenA; - final int limB = startB + lenB; - final int limC = startC + lenC; - - int a = startA; - int b = startB; + private static KllDoublesQuantileCalculator getDoublesQuantileCalculator(final KllSketch mine) { + final int[] myLevelsArr = mine.getLevelsArray(); + final double[] myDoubleItemsArr = mine.getDoubleItemsArray(); + if (!mine.isLevelZeroSorted()) { + Arrays.sort(mine.getDoubleItemsArray(), myLevelsArr[0], myLevelsArr[1]); + mine.setLevelZeroSorted(true); + } + return new KllDoublesQuantileCalculator(myDoubleItemsArr, myLevelsArr, mine.getNumLevels(), mine.getN()); + } - for (int c = startC; c < limC; c++) { - if (a == limA) { - bufC[c] = bufB[b]; - b++; - } else if (b == limB) { - bufC[c] = bufA[a]; - a++; - } else if (bufA[a] < bufB[b]) { - bufC[c] = bufA[a]; - a++; + private static void incrementDoublesBucketsSortedLevel( + final KllSketch mine, final int fromIndex, final int toIndex, + final int weight, final double[] splitPoints, final double[] buckets) { + final double[] myDoubleItemsArr = mine.getDoubleItemsArray(); + int i = fromIndex; + int j = 0; + while (i < toIndex && j < splitPoints.length) { + if (myDoubleItemsArr[i] < splitPoints[j]) { + buckets[j] += weight; // this sample goes into this bucket + i++; // move on to next sample and see whether it also goes into this bucket } else { - bufC[c] = bufB[b]; - b++; + j++; // no more samples for this bucket } } - assert a == limA; - assert b == limB; + // now either i == toIndex (we are out of samples), or + // j == numSplitPoints (we are out of buckets, but there are more samples remaining) + // we only need to do something in the latter case + if (j == splitPoints.length) { + buckets[j] += weight * (toIndex - i); + } } - //This must be modified for validation - static void randomlyHalveDownDoubles(final double[] buf, final int start, final int length, final Random random) { - assert isEven(length); - final int half_length = length / 2; - final int offset = random.nextInt(2); // disable for validation - //final int offset = deterministicOffset(); // enable for validation - int j = start + offset; - for (int i = start; i < (start + half_length); i++) { - buf[i] = buf[j]; - j += 2; + private static void incrementDoublesBucketsUnsortedLevel( + final KllSketch mine, final int fromIndex, final int toIndex, + final int weight, final double[] splitPoints, final double[] buckets) { + final double[] myDoubleItemsArr = mine.getDoubleItemsArray(); + for (int i = fromIndex; i < toIndex; i++) { + int j; + for (j = 0; j < splitPoints.length; j++) { + if (myDoubleItemsArr[i] < splitPoints[j]) { + break; + } + } + buckets[j] += weight; } } - //This must be modified for validation - static void randomlyHalveUpDoubles(final double[] buf, final int start, final int length, final Random random) { - assert isEven(length); - final int half_length = length / 2; - final int offset = random.nextInt(2); // disable for validation - //final int offset = deterministicOffset(); // enable for validation - int j = (start + length) - 1 - offset; - for (int i = (start + length) - 1; i >= (start + half_length); i--) { - buf[i] = buf[j]; - j -= 2; + private static void populateDoubleWorkArrays(final KllSketch mine, final KllSketch other, final double[] workbuf, + final int[] worklevels, final int provisionalNumLevels) { + worklevels[0] = 0; + final int[] myLevelsArr = mine.getLevelsArray(); + final int[] otherLevelsArr = other.getLevelsArray(); + final double[] myDoubleItemsArr = mine.getDoubleItemsArray(); + final double[] otherDoubleItemsArr = other.getDoubleItemsArray(); + + // Note: the level zero data from "other" was already inserted into "self" + final int selfPopZero = KllHelper.currentLevelSize(0, mine.getNumLevels(),myLevelsArr); + System.arraycopy(myDoubleItemsArr, myLevelsArr[0], workbuf, worklevels[0], selfPopZero); + worklevels[1] = worklevels[0] + selfPopZero; + + for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { + final int selfPop = KllHelper.currentLevelSize(lvl, mine.getNumLevels(), myLevelsArr); + final int otherPop = KllHelper.currentLevelSize(lvl, other.getNumLevels(), otherLevelsArr); + worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; + + if (selfPop > 0 && otherPop == 0) { + System.arraycopy(myDoubleItemsArr, myLevelsArr[lvl], workbuf, worklevels[lvl], selfPop); + } else if (selfPop == 0 && otherPop > 0) { + System.arraycopy(otherDoubleItemsArr, otherLevelsArr[lvl], workbuf, worklevels[lvl], otherPop); + } else if (selfPop > 0 && otherPop > 0) { + mergeSortedDoubleArrays(myDoubleItemsArr, myLevelsArr[lvl], selfPop, otherDoubleItemsArr, + otherLevelsArr[lvl], otherPop, workbuf, worklevels[lvl]); + } } } + private static double resolveDoubleMaxValue(final double myMax, final double otherMax) { + if (Double.isNaN(myMax) && Double.isNaN(otherMax)) { return Double.NaN; } + if (Double.isNaN(myMax)) { return otherMax; } + if (Double.isNaN(otherMax)) { return myMax; } + return max(myMax, otherMax); + } + + private static double resolveDoubleMinValue(final double myMin, final double otherMin) { + if (Double.isNaN(myMin) && Double.isNaN(otherMin)) { return Double.NaN; } + if (Double.isNaN(myMin)) { return otherMin; } + if (Double.isNaN(otherMin)) { return myMin; } + return min(myMin, otherMin); + } + /** + * Validation Method. * Checks the sequential validity of the given array of double values. * They must be unique, monotonically increasing and not NaN. * @param values the given array of values */ - static void validateDoubleValues(final double[] values) { + private static void validateDoubleValues(final double[] values) { for (int i = 0; i < values.length; i++) { if (!Double.isFinite(values[i])) { throw new SketchesArgumentException("Values must be finite"); @@ -230,11 +540,11 @@ static void validateDoubleValues(final double[] values) { } /* + * Validation Method. * The following must be enabled for use with the KllDoublesValidationTest, - * which is only enabled for manual testing. In addition, two methods + * which is only enabled for manual testing. In addition, two Validation Methods * above need to be modified as commented. */ - // static int nextOffset = 0; // // private static int deterministicOffset() { diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 63a333afa..c5aadebf0 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -22,11 +22,9 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_DOUBLE; -import static org.apache.datasketches.kll.KllSketch.Error.SRC_CANNOT_BE_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; -import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; /** @@ -49,7 +47,7 @@ public final class KllDoublesSketch extends KllHeapSketch { */ private KllDoublesSketch(final Memory mem, final KllMemoryValidate memVal) { super(memVal.k, memVal.m, SketchType.DOUBLES_SKETCH); - buildHeapKllSketchFromMemory(memVal); + KllHelper.buildHeapKllSketchFromMemory(this, memVal); } /** @@ -57,7 +55,7 @@ private KllDoublesSketch(final Memory mem, final KllMemoryValidate memVal) { * This will have a rank error of about 1.65%. */ public KllDoublesSketch() { - this(KllSketch.DEFAULT_K); + this(KllSketch.DEFAULT_K, KllSketch.DEFAULT_M); } /** @@ -71,15 +69,15 @@ public KllDoublesSketch(final int k) { } /** - * Heap constructor with a given parameter k and m. - * k can be any value between DEFAULT_M and 65535, inclusive. + * Heap constructor with a given parameters k and m. + * + * @param k parameter that controls size of the sketch and accuracy of estimates. + * k can be any value between m and 65535, inclusive. * The default k = 200 results in a normalized rank error of about 1.65%. - * Higher values of K will have smaller error but the sketch will be larger (and slower). - * The DEFAULT_M, which is 8 is recommended for the given parameter m. - * Other values of m should be considered experimental as they have not been - * as well characterized. - * @param k parameter that controls size of the sketch and accuracy of estimates - * @param m parameter that controls the minimum level width in items. + * Higher values of k will have smaller error but the sketch will be larger (and slower). + * @param m parameter controls the minimum level width in items. It can be 2, 4, 6 or 8. + * The DEFAULT_M, which is 8 is recommended. Other values of m should be considered + * experimental as they have not been as well characterized. */ KllDoublesSketch(final int k, final int m) { super(k, m, SketchType.DOUBLES_SKETCH); @@ -97,9 +95,7 @@ public KllDoublesSketch(final int k) { */ public static KllDoublesSketch heapify(final Memory mem) { final KllMemoryValidate memChk = new KllMemoryValidate(mem); - if (!memChk.doublesSketch) { - throw new SketchesArgumentException("Memory object is not a KllDoublesSketch."); - } + if (!memChk.doublesSketch) { Error.kllSketchThrow(SRC_MUST_BE_DOUBLE); } return new KllDoublesSketch(mem, memChk); } @@ -125,7 +121,7 @@ public static KllDoublesSketch heapify(final Memory mem) { * in positions 0 through j of the returned PMF array. */ public double[] getCDF(final double[] splitPoints) { - return getDoublesPmfOrCdf(splitPoints, true); + return KllDoublesHelper.getDoublesPmfOrCdf(this, splitPoints, true); } /** @@ -167,7 +163,7 @@ public double[] getCDF(final double[] splitPoints) { * splitPoint, with the exception that the last interval will include maximum value. */ public double[] getPMF(final double[] splitPoints) { - return getDoublesPmfOrCdf(splitPoints, false); + return KllDoublesHelper.getDoublesPmfOrCdf(this, splitPoints, false); } /** @@ -189,7 +185,7 @@ public double[] getPMF(final double[] splitPoints) { * @return the approximation to the value at the given fraction */ public double getQuantile(final double fraction) { - return getDoublesQuantile(fraction); + return KllDoublesHelper.getDoublesQuantile(this, fraction); } /** @@ -222,7 +218,7 @@ public double getQuantileLowerBound(final double fraction) { * array. */ public double[] getQuantiles(final double[] fractions) { - return getDoublesQuantiles(fractions); + return KllDoublesHelper.getDoublesQuantiles(this, fractions); } /** @@ -268,7 +264,7 @@ public double getQuantileUpperBound(final double fraction) { * @return an approximate rank of the given value */ public double getRank(final double value) { - return getDoubleRank(value); + return KllDoublesHelper.getDoubleRank(this, value); } /** @@ -283,9 +279,8 @@ public KllDoublesSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllSketch other) { - if (other.isDirect()) { kllSketchThrow(SRC_CANNOT_BE_DIRECT); } if (!other.isDoublesSketch()) { kllSketchThrow(SRC_MUST_BE_DOUBLE); } - mergeDoubleImpl(other); + KllDoublesHelper.mergeDoubleImpl(this, other); } @Override @@ -307,7 +302,7 @@ public void reset() { * @param value an item from a stream of items. NaNs are ignored. */ public void update(final double value) { - updateDouble(value); + KllDoublesHelper.updateDouble(this, value); } @Override //Used internally diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index 6f15baf3c..85742f9bb 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -19,6 +19,8 @@ package org.apache.datasketches.kll; +import static java.lang.Math.max; +import static java.lang.Math.min; import static org.apache.datasketches.Util.isEven; import static org.apache.datasketches.Util.isOdd; @@ -32,7 +34,281 @@ * @author Kevin Lang * @author Alexander Saydakov */ -class KllFloatsHelper { +final class KllFloatsHelper { + + static double getFloatRank(final KllSketch mine, final float value) { + if (mine.isEmpty()) { return Double.NaN; } + int level = 0; + int weight = 1; + long total = 0; + final float[] myFloatItemsArr = mine.getFloatItemsArray(); + final int[] myLevelsArr = mine.getLevelsArray(); + while (level < mine.getNumLevels()) { + final int fromIndex = myLevelsArr[level]; + final int toIndex = myLevelsArr[level + 1]; // exclusive + for (int i = fromIndex; i < toIndex; i++) { + if (myFloatItemsArr[i] < value) { + total += weight; + } else if (level > 0 || mine.isLevelZeroSorted()) { + break; // levels above 0 are sorted, no point comparing further + } + } + level++; + weight *= 2; + } + return (double) total / mine.getN(); + } + + static double[] getFloatsPmfOrCdf(final KllSketch mine, final float[] splitPoints, final boolean isCdf) { + if (mine.isEmpty()) { return null; } + validateFloatValues(splitPoints); + final double[] buckets = new double[splitPoints.length + 1]; + final int myNumLevels = mine.getNumLevels(); + final int[] myLevelsArr = mine.getLevelsArray(); + int level = 0; + int weight = 1; + while (level < myNumLevels) { + final int fromIndex = myLevelsArr[level]; + final int toIndex = myLevelsArr[level + 1]; // exclusive + if (level == 0 && !mine.isLevelZeroSorted()) { + KllFloatsHelper.incrementFloatBucketsUnsortedLevel(mine, fromIndex, toIndex, weight, splitPoints, buckets); + } else { + KllFloatsHelper.incrementFloatBucketsSortedLevel(mine, fromIndex, toIndex, weight, splitPoints, buckets); + } + level++; + weight *= 2; + } + // normalize and, if CDF, convert to cumulative + if (isCdf) { + double subtotal = 0; + for (int i = 0; i < buckets.length; i++) { + subtotal += buckets[i]; + buckets[i] = subtotal / mine.getN(); + } + } else { + for (int i = 0; i < buckets.length; i++) { + buckets[i] /= mine.getN(); + } + } + return buckets; + } + + static float getFloatsQuantile(final KllSketch mine, final double fraction) { + if (mine.isEmpty()) { return Float.NaN; } + if (fraction < 0.0 || fraction > 1.0) { + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); + } + //These two assumptions make KLL compatible with the previous classic Quantiles Sketch + if (fraction == 0.0) { return mine.getMinFloatValue(); } + if (fraction == 1.0) { return mine.getMaxFloatValue(); } + final KllFloatsQuantileCalculator quant = KllFloatsHelper.getFloatsQuantileCalculator(mine); + return quant.getQuantile(fraction); + } + + static float[] getFloatsQuantiles(final KllSketch mine, final double[] fractions) { + if (mine.isEmpty()) { return null; } + KllFloatsQuantileCalculator quant = null; + final float[] quantiles = new float[fractions.length]; + for (int i = 0; i < fractions.length; i++) { + final double fraction = fractions[i]; + if (fraction < 0.0 || fraction > 1.0) { + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); + } + if (fraction == 0.0) { quantiles[i] = mine.getMinFloatValue(); } + else if (fraction == 1.0) { quantiles[i] = mine.getMaxFloatValue(); } + else { + if (quant == null) { + quant = KllFloatsHelper.getFloatsQuantileCalculator(mine); + } + quantiles[i] = quant.getQuantile(fraction); + } + } + return quantiles; + } + + static void mergeFloatImpl(final KllSketch mine, final KllSketch other) { + if (other.isEmpty()) { return; } + final long finalN = mine.getN() + other.getN(); + //update this sketch with level0 items from the other sketch + final float[] otherFloatItemsArr = other.getFloatItemsArray(); + final int otherNumLevels = other.getNumLevels(); + final int[] otherLevelsArr = other.getLevelsArray(); + for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { + KllFloatsHelper.updateFloat(mine, otherFloatItemsArr[i]); + } + // after the level 0 update, we capture the key mutable variables + final float myMin = mine.getMinFloatValue(); + final float myMax = mine.getMaxFloatValue(); + final int myMinK = mine.getMinK(); + + final int myCurNumLevels = mine.getNumLevels(); + final int[] myCurLevelsArr = mine.getLevelsArray(); + final float[] myCurFloatItemsArr = mine.getFloatItemsArray(); + + final int myNewNumLevels; + final int[] myNewLevelsArr; + final float[] myNewFloatItemsArr; + + if (otherNumLevels > 1) { //now merge higher levels if they exist + final int tmpSpaceNeeded = mine.getNumRetained() + + KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr); + final float[] workbuf = new float[tmpSpaceNeeded]; + final int ub = KllHelper.ubOnNumLevels(finalN); + final int[] worklevels = new int[ub + 2]; // ub+1 does not work + final int[] outlevels = new int[ub + 2]; + + final int provisionalNumLevels = max(myCurNumLevels, otherNumLevels); + + populateFloatWorkArrays(mine, other, workbuf, worklevels, provisionalNumLevels); + + // notice that workbuf is being used as both the input and output + final int[] result = generalFloatsCompress(mine.getK(), mine.getM(), provisionalNumLevels, + workbuf, worklevels, workbuf, outlevels, mine.isLevelZeroSorted(), KllSketch.random); + final int targetItemCount = result[1]; //was finalCapacity. Max size given k, m, numLevels + final int curItemCount = result[2]; //was finalPop + + // now we need to finalize the results for the "self" sketch + + //THE NEW NUM LEVELS + myNewNumLevels = result[0]; //was finalNumLevels + assert myNewNumLevels <= ub; // ub may be much bigger + + // THE NEW ITEMS ARRAY (was newbuf) + myNewFloatItemsArr = (targetItemCount == myCurFloatItemsArr.length) + ? myCurFloatItemsArr + : new float[targetItemCount]; + final int freeSpaceAtBottom = targetItemCount - curItemCount; + //shift the new items array + System.arraycopy(workbuf, outlevels[0], myNewFloatItemsArr, freeSpaceAtBottom, curItemCount); + final int theShift = freeSpaceAtBottom - outlevels[0]; + + //calculate the new levels array length + final int finalLevelsArrLen; + if (myCurLevelsArr.length < myNewNumLevels + 1) { finalLevelsArrLen = myNewNumLevels + 1; } + else { finalLevelsArrLen = myCurLevelsArr.length; } + + //THE NEW LEVELS ARRAY + myNewLevelsArr = new int[finalLevelsArrLen]; + for (int lvl = 0; lvl < myNewNumLevels + 1; lvl++) { // includes the "extra" index + myNewLevelsArr[lvl] = outlevels[lvl] + theShift; + } + + //MEMORY SPACE MANAGEMENT + if (mine.updatablMemory) { + mine.wmem = KllHelper.memorySpaceMgmt(mine, myNewLevelsArr.length, myNewFloatItemsArr.length); + } + + } else { + myNewNumLevels = myCurNumLevels; + myNewLevelsArr = myCurLevelsArr; + myNewFloatItemsArr = myCurFloatItemsArr; + } + + //Update Preamble: + mine.setN(finalN); + if (other.isEstimationMode()) { //otherwise the merge brings over exact items. + mine.setMinK(min(myMinK, other.getMinK())); + } + + //Update min, max values + final float otherMin = other.getMinFloatValue(); + final float otherMax = other.getMaxFloatValue(); + mine.setMinFloatValue(resolveFloatMinValue(myMin, otherMin)); + mine.setMaxFloatValue(resolveFloatMaxValue(myMax, otherMax)); + + //Update numLevels, levelsArray, items + mine.setNumLevels(myNewNumLevels); + mine.setLevelsArray(myNewLevelsArr); + mine.setFloatItemsArray(myNewFloatItemsArr); + assert KllHelper.sumTheSampleWeights(mine.getNumLevels(), mine.getLevelsArray()) == mine.getN(); + } + + static void mergeSortedFloatArrays( + final float[] bufA, final int startA, final int lenA, + final float[] bufB, final int startB, final int lenB, + final float[] bufC, final int startC) { + final int lenC = lenA + lenB; + final int limA = startA + lenA; + final int limB = startB + lenB; + final int limC = startC + lenC; + + int a = startA; + int b = startB; + + for (int c = startC; c < limC; c++) { + if (a == limA) { + bufC[c] = bufB[b]; + b++; + } else if (b == limB) { + bufC[c] = bufA[a]; + a++; + } else if (bufA[a] < bufB[b]) { + bufC[c] = bufA[a]; + a++; + } else { + bufC[c] = bufB[b]; + b++; + } + } + assert a == limA; + assert b == limB; + } + + /** + * Validation Method. This must be modified to test validation + * @param buf the items array + * @param start data start + * @param length items length + * @param random instance of Random + */ + static void randomlyHalveDownFloats(final float[] buf, final int start, final int length, final Random random) { + assert isEven(length); + final int half_length = length / 2; + final int offset = random.nextInt(2); // disable for validation + //final int offset = deterministicOffset(); // enable for validation + int j = start + offset; + for (int i = start; i < (start + half_length); i++) { + buf[i] = buf[j]; + j += 2; + } + } + + /** + * Validation Method. This must be modified to test validation + * @param buf the items array + * @param start data start + * @param length items length + * @param random instance of Random + */ + static void randomlyHalveUpFloats(final float[] buf, final int start, final int length, final Random random) { + assert isEven(length); + final int half_length = length / 2; + final int offset = random.nextInt(2); // disable for validation + //final int offset = deterministicOffset(); // enable for validation + int j = (start + length) - 1 - offset; + for (int i = (start + length) - 1; i >= (start + half_length); i--) { + buf[i] = buf[j]; + j -= 2; + } + } + + static void updateFloat(final KllSketch mine, final float value) { + if (Float.isNaN(value)) { return; } + if (mine.isEmpty()) { + mine.setMinFloatValue(value); + mine.setMaxFloatValue(value); + } else { + if (value < mine.getMinFloatValue()) { mine.setMinFloatValue(value); } + if (value > mine.getMaxFloatValue()) { mine.setMaxFloatValue(value); } + } + if (mine.getLevelsArrayAt(0) == 0) { KllHelper.compressWhileUpdatingSketch(mine); } + mine.incN(); + mine.setLevelZeroSorted(false); + final int nextPos = mine.getLevelsArrayAt(0) - 1; + assert mine.getLevelsArrayAt(0) >= 0; + mine.setLevelsArrayAt(0, nextPos); + mine.setFloatItemsArrayAt(nextPos, value); + } /** * Compression algorithm used to merge higher levels. @@ -67,7 +343,7 @@ class KllFloatsHelper { * @param random instance of java.util.Random * @return int array of: {numLevels, targetItemCount, currentItemCount) */ - static int[] generalFloatsCompress( + private static int[] generalFloatsCompress( final int k, final int m, final int numLevelsIn, @@ -155,69 +431,103 @@ static int[] generalFloatsCompress( return new int[] {numLevels, targetItemCount, currentItemCount}; } - static void mergeSortedFloatArrays( - final float[] bufA, final int startA, final int lenA, - final float[] bufB, final int startB, final int lenB, - final float[] bufC, final int startC) { - final int lenC = lenA + lenB; - final int limA = startA + lenA; - final int limB = startB + lenB; - final int limC = startC + lenC; - - int a = startA; - int b = startB; + private static KllFloatsQuantileCalculator getFloatsQuantileCalculator(final KllSketch mine) { + final int[] myLevelsArr = mine.getLevelsArray(); + final float[] myFloatItemsArr = mine.getFloatItemsArray(); + if (!mine.isLevelZeroSorted()) { + Arrays.sort(myFloatItemsArr, myLevelsArr[0], myLevelsArr[1]); + mine.setLevelZeroSorted(true); + } + return new KllFloatsQuantileCalculator(myFloatItemsArr, myLevelsArr, mine.getNumLevels(), mine.getN()); + } - for (int c = startC; c < limC; c++) { - if (a == limA) { - bufC[c] = bufB[b]; - b++; - } else if (b == limB) { - bufC[c] = bufA[a]; - a++; - } else if (bufA[a] < bufB[b]) { - bufC[c] = bufA[a]; - a++; + private static void incrementFloatBucketsSortedLevel( + final KllSketch mine, final int fromIndex, final int toIndex, + final int weight, final float[] splitPoints, final double[] buckets) { + final float[] myFloatItemsArr = mine.getFloatItemsArray(); + int i = fromIndex; + int j = 0; + while (i < toIndex && j < splitPoints.length) { + if (myFloatItemsArr[i] < splitPoints[j]) { + buckets[j] += weight; // this sample goes into this bucket + i++; // move on to next sample and see whether it also goes into this bucket } else { - bufC[c] = bufB[b]; - b++; + j++; // no more samples for this bucket } } - assert a == limA; - assert b == limB; + // now either i == toIndex (we are out of samples), or + // j == numSplitPoints (we are out of buckets, but there are more samples remaining) + // we only need to do something in the latter case + if (j == splitPoints.length) { + buckets[j] += weight * (toIndex - i); + } } - //This must be modified for validation - static void randomlyHalveDownFloats(final float[] buf, final int start, final int length, final Random random) { - assert isEven(length); - final int half_length = length / 2; - final int offset = random.nextInt(2); // disable for validation - //final int offset = deterministicOffset(); // enable for validation - int j = start + offset; - for (int i = start; i < (start + half_length); i++) { - buf[i] = buf[j]; - j += 2; + private static void incrementFloatBucketsUnsortedLevel( + final KllSketch mine, final int fromIndex, final int toIndex, + final int weight, final float[] splitPoints, final double[] buckets) { + final float[] myFloatItemsArr = mine.getFloatItemsArray(); + for (int i = fromIndex; i < toIndex; i++) { + int j; + for (j = 0; j < splitPoints.length; j++) { + if (myFloatItemsArr[i] < splitPoints[j]) { + break; + } + } + buckets[j] += weight; } } - //This must be modified for validation - static void randomlyHalveUpFloats(final float[] buf, final int start, final int length, final Random random) { - assert isEven(length); - final int half_length = length / 2; - final int offset = random.nextInt(2); // disable for validation - //final int offset = deterministicOffset(); // enable for validation - int j = (start + length) - 1 - offset; - for (int i = (start + length) - 1; i >= (start + half_length); i--) { - buf[i] = buf[j]; - j -= 2; + private static void populateFloatWorkArrays(final KllSketch mine, final KllSketch other, final float[] workbuf, + final int[] worklevels, final int provisionalNumLevels) { + worklevels[0] = 0; + final int[] myLevelsArr = mine.getLevelsArray(); + final int[] otherLevelsArr = other.getLevelsArray(); + final float[] myFloatItemsArr = mine.getFloatItemsArray(); + final float[] otherFloatItemsArr = other.getFloatItemsArray(); + + // Note: the level zero data from "other" was already inserted into "self" + final int selfPopZero = KllHelper.currentLevelSize(0, mine.getNumLevels(), myLevelsArr); + System.arraycopy( myFloatItemsArr, myLevelsArr[0], workbuf, worklevels[0], selfPopZero); + worklevels[1] = worklevels[0] + selfPopZero; + + for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { + final int selfPop = KllHelper.currentLevelSize(lvl, mine.getNumLevels(), myLevelsArr); + final int otherPop = KllHelper.currentLevelSize(lvl, other.getNumLevels(), otherLevelsArr); + worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; + + if (selfPop > 0 && otherPop == 0) { + System.arraycopy( myFloatItemsArr, myLevelsArr[lvl], workbuf, worklevels[lvl], selfPop); + } else if (selfPop == 0 && otherPop > 0) { + System.arraycopy(otherFloatItemsArr, otherLevelsArr[lvl], workbuf, worklevels[lvl], otherPop); + } else if (selfPop > 0 && otherPop > 0) { + mergeSortedFloatArrays( myFloatItemsArr, myLevelsArr[lvl], selfPop, otherFloatItemsArr, + otherLevelsArr[lvl], otherPop, workbuf, worklevels[lvl]); + } } } + private static float resolveFloatMaxValue(final float myMax, final float otherMax) { + if (Float.isNaN(myMax) && Float.isNaN(otherMax)) { return Float.NaN; } + if (Float.isNaN(myMax)) { return otherMax; } + if (Float.isNaN(otherMax)) { return myMax; } + return max(myMax, otherMax); + } + + private static float resolveFloatMinValue(final float myMin, final float otherMin) { + if (Float.isNaN(myMin) && Float.isNaN(otherMin)) { return Float.NaN; } + if (Float.isNaN(myMin)) { return otherMin; } + if (Float.isNaN(otherMin)) { return myMin; } + return min(myMin, otherMin); + } + /** + * Validation Method. * Checks the sequential validity of the given array of float values. * They must be unique, monotonically increasing and not NaN. * @param values the given array of values */ - static void validateFloatValues(final float[] values) { + private static void validateFloatValues(final float[] values) { for (int i = 0; i < values.length; i++) { if (!Float.isFinite(values[i])) { throw new SketchesArgumentException("Values must be finite"); @@ -230,11 +540,11 @@ static void validateFloatValues(final float[] values) { } /* + * Validation Method. * The following must be enabled for use with the KllFloatsValidationTest, * which is only enabled for manual testing. In addition, two methods * above need to be modified as commented. */ - // static int nextOffset = 0; // // private static int deterministicOffset() { diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index b6c1956c7..2ef1c3de4 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -22,11 +22,9 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_FLOAT; -import static org.apache.datasketches.kll.KllSketch.Error.SRC_CANNOT_BE_DIRECT; import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; -import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; /** @@ -49,7 +47,7 @@ public final class KllFloatsSketch extends KllHeapSketch { */ private KllFloatsSketch(final Memory mem, final KllMemoryValidate memVal) { super(memVal.k, memVal.m, SketchType.FLOATS_SKETCH); - buildHeapKllSketchFromMemory(memVal); + KllHelper.buildHeapKllSketchFromMemory(this, memVal); } /** @@ -57,11 +55,11 @@ private KllFloatsSketch(final Memory mem, final KllMemoryValidate memVal) { * This will have a rank error of about 1.65%. */ public KllFloatsSketch() { - this(KllSketch.DEFAULT_K); + this(KllSketch.DEFAULT_K, KllSketch.DEFAULT_M); } /** - * Heap constructor with a given parameter k. k can be any value between DEFAULT_M and + * Heap constructor with a given parameter k. k can be any value between 8 and * 65535, inclusive. The default k = 200 results in a normalized rank error of about * 1.65%. Higher values of K will have smaller error but the sketch will be larger (and slower). * @param k parameter that controls size of the sketch and accuracy of estimates @@ -71,15 +69,15 @@ public KllFloatsSketch(final int k) { } /** - * Heap constructor with a given parameter k and m. - * k can be any value between DEFAULT_M and 65535, inclusive. + * Heap constructor with a given parameters k and m. + * + * @param k parameter that controls size of the sketch and accuracy of estimates. + * k can be any value between m and 65535, inclusive. * The default k = 200 results in a normalized rank error of about 1.65%. - * Higher values of K will have smaller error but the sketch will be larger (and slower). - * The DEFAULT_M, which is 8 is recommended for the given parameter m. - * Other values of m should be considered experimental as they have not been - * as well characterized. - * @param k parameter that controls size of the sketch and accuracy of estimates - * @param m parameter that controls the minimum level width in items. + * Higher values of k will have smaller error but the sketch will be larger (and slower). + * @param m parameter that controls the minimum level width in items. It can be 2, 4, 6 or 8. + * The DEFAULT_M, which is 8 is recommended. Other values of m should be considered + * experimental as they have not been as well characterized. */ KllFloatsSketch(final int k, final int m) { super(k, m, SketchType.FLOATS_SKETCH); @@ -97,9 +95,7 @@ public KllFloatsSketch(final int k) { */ public static KllFloatsSketch heapify(final Memory mem) { final KllMemoryValidate memVal = new KllMemoryValidate(mem); - if (memVal.doublesSketch) { - throw new SketchesArgumentException("Memory object is not a KllFloatsSketch."); - } + if (memVal.doublesSketch) { Error.kllSketchThrow(SRC_MUST_BE_FLOAT); } return new KllFloatsSketch(mem, memVal); } @@ -125,7 +121,7 @@ public static KllFloatsSketch heapify(final Memory mem) { * in positions 0 through j of the returned PMF array. */ public double[] getCDF(final float[] splitPoints) { - return getFloatsPmfOrCdf(splitPoints, true); + return KllFloatsHelper.getFloatsPmfOrCdf(this, splitPoints, true); } /** @@ -167,7 +163,7 @@ public double[] getCDF(final float[] splitPoints) { * splitPoint, with the exception that the last interval will include maximum value. */ public double[] getPMF(final float[] splitPoints) { - return getFloatsPmfOrCdf(splitPoints, false); + return KllFloatsHelper.getFloatsPmfOrCdf(this, splitPoints, false); } /** @@ -189,7 +185,7 @@ public double[] getPMF(final float[] splitPoints) { * @return the approximation to the value at the given fraction */ public float getQuantile(final double fraction) { - return getFloatsQuantile(fraction); + return KllFloatsHelper.getFloatsQuantile(this, fraction); } /** @@ -222,7 +218,7 @@ public float getQuantileLowerBound(final double fraction) { * array. */ public float[] getQuantiles(final double[] fractions) { - return getFloatsQuantiles(fractions); + return KllFloatsHelper.getFloatsQuantiles(this, fractions); } /** @@ -268,7 +264,7 @@ public float getQuantileUpperBound(final double fraction) { * @return an approximate rank of the given value */ public double getRank(final float value) { - return getFloatRank(value); + return KllFloatsHelper.getFloatRank(this, value); } /** @@ -283,9 +279,8 @@ public KllFloatsSketchIterator iterator() { * @param other sketch to merge into this one */ public void merge(final KllFloatsSketch other) { - if (other.isDirect()) { kllSketchThrow(SRC_CANNOT_BE_DIRECT); } if (!other.isFloatsSketch()) { kllSketchThrow(SRC_MUST_BE_FLOAT); } - mergeFloatImpl(other); + KllFloatsHelper.mergeFloatImpl(this, other); } @Override @@ -307,7 +302,7 @@ public void reset() { * @param value an item from a stream of items. NaNs are ignored. */ public void update(final float value) { - updateFloat(value); + KllFloatsHelper.updateFloat(this, value); } @Override //Dummy diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java index fcfcda642..f50cc2132 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapSketch.java @@ -60,21 +60,11 @@ public int getK() { return k; } - @Override - int getM() { - return m; - } - @Override public long getN() { return n_; } - @Override - int getMinK() { - return minK_; - } - @Override int[] getLevelsArray() { return levels_; @@ -83,6 +73,16 @@ int[] getLevelsArray() { @Override int getLevelsArrayAt(final int index) { return levels_[index]; } + @Override + int getM() { + return m; + } + + @Override + int getMinK() { + return minK_; + } + @Override int getNumLevels() { return numLevels_; @@ -103,11 +103,6 @@ boolean isLevelZeroSorted() { return isLevelZeroSorted_; } - @Override - void setMinK(final int minK) { - minK_ = minK; - } - @Override void setItemsArrayUpdatable(final WritableMemory itemsMem) { } //dummy @@ -137,6 +132,11 @@ void setLevelZeroSorted(final boolean sorted) { this.isLevelZeroSorted_ = sorted; } + @Override + void setMinK(final int minK) { + minK_ = minK; + } + @Override void setMinMaxArrayUpdatable(final WritableMemory minMaxMem) { } //dummy diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 0b24fffc6..93ad2ce82 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -19,29 +19,55 @@ package org.apache.datasketches.kll; +import static java.lang.Math.abs; +import static java.lang.Math.ceil; +import static java.lang.Math.exp; +import static java.lang.Math.log; +import static java.lang.Math.max; +import static java.lang.Math.min; import static java.lang.Math.pow; +import static java.lang.Math.round; import static org.apache.datasketches.Util.floorPowerOf2; +import static org.apache.datasketches.Util.isOdd; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; -import static org.apache.datasketches.kll.KllSketch.CDF_COEF; -import static org.apache.datasketches.kll.KllSketch.CDF_EXP; -import static org.apache.datasketches.kll.KllSketch.PMF_COEF; -import static org.apache.datasketches.kll.KllSketch.PMF_EXP; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryDoubleSketchFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryEmptyFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySingleItemFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryUpdatableFlag; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; +import java.util.Arrays; + +import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.Util; import org.apache.datasketches.kll.KllSketch.SketchType; +import org.apache.datasketches.memory.WritableMemory; /** - * This class provides some useful sketch analysis tools that are used internally and also can be used by - * interested users to understand the internal structure of the sketch as well as the growth properties of the - * sketch given a stream length. + * This class provides some useful sketch analysis tools that are used internally. * * @author lrhodes * */ -public class KllHelper { +final class KllHelper { - public static class GrowthStats { + static class GrowthStats { SketchType sketchType; int k; int m; @@ -53,7 +79,7 @@ public static class GrowthStats { int updatableBytes; } - public static class LevelStats { + static class LevelStats { long n; int numLevels; int items; @@ -65,16 +91,274 @@ public static class LevelStats { } } + static final double EPS_DELTA_THRESHOLD = 1E-6; + static final double MIN_EPS = 4.7634E-5; + static final double PMF_COEF = 2.446; + static final double PMF_EXP = 0.9433; + static final double CDF_COEF = 2.296; + static final double CDF_EXP = 0.9723; + /** * This is the exact powers of 3 from 3^0 to 3^30 where the exponent is the index */ - private static final long[] powersOfThree = + private static long[] powersOfThree = new long[] {1, 3, 9, 27, 81, 243, 729, 2187, 6561, 19683, 59049, 177147, 531441, 1594323, 4782969, 14348907, 43046721, 129140163, 387420489, 1162261467, 3486784401L, 10460353203L, 31381059609L, 94143178827L, 282429536481L, 847288609443L, 2541865828329L, 7625597484987L, 22876792454961L, 68630377364883L, 205891132094649L}; + static void buildHeapKllSketchFromMemory(final KllSketch mine, final KllMemoryValidate memVal) { + final boolean doubleType = (mine.sketchType == DOUBLES_SKETCH); + mine.setLevelZeroSorted(memVal.level0Sorted); + mine.setN(memVal.n); + mine.setMinK(memVal.minK); + mine.setNumLevels(memVal.numLevels); + final int[] myLevelsArr = new int[mine.getNumLevels() + 1]; + + if (memVal.updatableMemory) { + memVal.levelsArrUpdatable.getIntArray(0, myLevelsArr, 0, mine.getNumLevels() + 1); + mine.setLevelsArray(myLevelsArr); + if (doubleType) { + mine.setMinDoubleValue(memVal.minMaxArrUpdatable.getDouble(0)); + mine.setMaxDoubleValue(memVal.minMaxArrUpdatable.getDouble(Double.BYTES)); + final int itemsCap = (int)memVal.itemsArrUpdatable.getCapacity() / Double.BYTES; + final double[] myItemsArr = new double[itemsCap]; + memVal.itemsArrUpdatable.getDoubleArray(0, myItemsArr, 0, itemsCap); + mine.setDoubleItemsArray(myItemsArr); + } else { //float + mine.setMinFloatValue(memVal.minMaxArrUpdatable.getFloat(0)); + mine.setMaxFloatValue(memVal.minMaxArrUpdatable.getFloat(Float.BYTES)); + final int itemsCap = (int)memVal.itemsArrUpdatable.getCapacity() / Float.BYTES; + final float[] myItemsArr = new float[itemsCap]; + memVal.itemsArrUpdatable.getFloatArray(0, myItemsArr, 0, itemsCap); + mine.setFloatItemsArray(myItemsArr); + } + } else { //compact + memVal.levelsArrCompact.getIntArray(0, myLevelsArr, 0, mine.getNumLevels() + 1); + mine.setLevelsArray(myLevelsArr); + if (doubleType) { + mine.setMinDoubleValue(memVal.minMaxArrCompact.getDouble(0)); + mine.setMaxDoubleValue(memVal.minMaxArrCompact.getDouble(Double.BYTES)); + final int itemsCap = (int)memVal.itemsArrCompact.getCapacity() / Double.BYTES; + final double[] myItemsArr = new double[itemsCap]; + memVal.itemsArrCompact.getDoubleArray(0, myItemsArr, 0, itemsCap); + mine.setDoubleItemsArray(myItemsArr); + } else { //float + mine.setMinFloatValue(memVal.minMaxArrCompact.getFloat(0)); + mine.setMaxFloatValue(memVal.minMaxArrCompact.getFloat(Float.BYTES)); + final int itemsCap = (int)memVal.itemsArrCompact.getCapacity() / Float.BYTES; + final float[] myItemsArr = new float[itemsCap]; + memVal.itemsArrCompact.getFloatArray(0, myItemsArr, 0, itemsCap); + mine.setFloatItemsArray(myItemsArr); + } + } + } + + /** + * Checks the validity of the given value k + * @param k must be greater than 7 and less than 65536. + */ + static void checkK(final int k, final int m) { + if (k < m || k > KllSketch.MAX_K) { + throw new SketchesArgumentException( + "K must be >= " + m + " and <= " + KllSketch.MAX_K + ": " + k); + } + } + + static void checkM(final int m) { + if (m < KllSketch.MIN_M || m > KllSketch.MAX_M || ((m & 1) == 1)) { + throw new SketchesArgumentException( + "M must be >= 2, <= 8 and even: " + m); + } + } + + /** + * The following code is only valid in the special case of exactly reaching capacity while updating. + * It cannot be used while merging, while reducing k, or anything else. + * @param mine the current sketch + */ + static void compressWhileUpdatingSketch(final KllSketch mine) { + final int level = + findLevelToCompact(mine.getK(), mine.getM(), mine.getNumLevels(), mine.getLevelsArray()); + if (level == mine.getNumLevels() - 1) { + //The level to compact is the top level, thus we need to add a level. + //Be aware that this operation grows the items array, + //shifts the items data and the level boundaries of the data, + //and grows the levels array and increments numLevels_. + KllHelper.addEmptyTopLevelToCompletelyFullSketch(mine); + } + + final int[] myLevelsArr = mine.getLevelsArray(); + final int rawBeg = myLevelsArr[level]; + final int rawEnd = myLevelsArr[level + 1]; + // +2 is OK because we already added a new top level if necessary + final int popAbove = myLevelsArr[level + 2] - rawEnd; + final int rawPop = rawEnd - rawBeg; + final boolean oddPop = isOdd(rawPop); + final int adjBeg = oddPop ? rawBeg + 1 : rawBeg; + final int adjPop = oddPop ? rawPop - 1 : rawPop; + final int halfAdjPop = adjPop / 2; + + // level zero might not be sorted, so we must sort it if we wish to compact it + float[] myFloatItemsArr; + double[] myDoubleItemsArr; + + if (mine.sketchType == DOUBLES_SKETCH) { + myFloatItemsArr = null; + myDoubleItemsArr = mine.getDoubleItemsArray(); + if (level == 0) { + if (mine.updatablMemory) { + myDoubleItemsArr = mine.getDoubleItemsArray(); + Arrays.sort(myDoubleItemsArr, adjBeg, adjBeg + adjPop); + mine.setDoubleItemsArray(myDoubleItemsArr); + } else { + Arrays.sort(mine.getDoubleItemsArray(), adjBeg, adjBeg + adjPop); + } + } + if (popAbove == 0) { + if (mine.updatablMemory) { + myDoubleItemsArr = mine.getDoubleItemsArray(); + KllDoublesHelper.randomlyHalveUpDoubles(myDoubleItemsArr, adjBeg, adjPop, KllSketch.random); + mine.setDoubleItemsArray(myDoubleItemsArr); + } else { + KllDoublesHelper.randomlyHalveUpDoubles(mine.getDoubleItemsArray(), adjBeg, adjPop, KllSketch.random); + } + } else { + if (mine.updatablMemory) { + myDoubleItemsArr = mine.getDoubleItemsArray(); + KllDoublesHelper.randomlyHalveDownDoubles(myDoubleItemsArr, adjBeg, adjPop, KllSketch.random); + mine.setDoubleItemsArray(myDoubleItemsArr); + } else { + KllDoublesHelper.randomlyHalveDownDoubles(mine.getDoubleItemsArray(), adjBeg, adjPop, KllSketch.random); + } + if (mine.updatablMemory ) { + myDoubleItemsArr = mine.getDoubleItemsArray(); + KllDoublesHelper.mergeSortedDoubleArrays( + myDoubleItemsArr, adjBeg, halfAdjPop, + myDoubleItemsArr, rawEnd, popAbove, + myDoubleItemsArr, adjBeg + halfAdjPop); + mine.setDoubleItemsArray(myDoubleItemsArr); + } else { + myDoubleItemsArr = mine.getDoubleItemsArray(); + KllDoublesHelper.mergeSortedDoubleArrays( + myDoubleItemsArr, adjBeg, halfAdjPop, + myDoubleItemsArr, rawEnd, popAbove, + myDoubleItemsArr, adjBeg + halfAdjPop); + } + } + } else { //Float sketch + myFloatItemsArr = mine.getFloatItemsArray(); + myDoubleItemsArr = null; + if (level == 0) { + if (mine.updatablMemory) { + myFloatItemsArr = mine.getFloatItemsArray(); + Arrays.sort(myFloatItemsArr, adjBeg, adjBeg + adjPop); + mine.setFloatItemsArray(myFloatItemsArr); + } else { + Arrays.sort(mine.getFloatItemsArray(), adjBeg, adjBeg + adjPop); + } + } + if (popAbove == 0) { + if (mine.updatablMemory) { + myFloatItemsArr = mine.getFloatItemsArray(); + KllFloatsHelper.randomlyHalveUpFloats(myFloatItemsArr, adjBeg, adjPop, KllSketch.random); + mine.setFloatItemsArray(myFloatItemsArr); + } else { + KllFloatsHelper.randomlyHalveUpFloats(mine.getFloatItemsArray(), adjBeg, adjPop, KllSketch.random); + } + } else { + if (mine.updatablMemory) { + myFloatItemsArr = mine.getFloatItemsArray(); + KllFloatsHelper.randomlyHalveDownFloats(myFloatItemsArr, adjBeg, adjPop, KllSketch.random); + mine.setFloatItemsArray(myFloatItemsArr); + } else { + KllFloatsHelper.randomlyHalveDownFloats(mine.getFloatItemsArray(), adjBeg, adjPop, KllSketch.random); + } + if (mine.updatablMemory ) { + myFloatItemsArr = mine.getFloatItemsArray(); + KllFloatsHelper.mergeSortedFloatArrays( + myFloatItemsArr, adjBeg, halfAdjPop, + myFloatItemsArr, rawEnd, popAbove, + myFloatItemsArr, adjBeg + halfAdjPop); + mine.setFloatItemsArray(myFloatItemsArr); + } else { + myFloatItemsArr = mine.getFloatItemsArray(); + KllFloatsHelper.mergeSortedFloatArrays( + myFloatItemsArr, adjBeg, halfAdjPop, + myFloatItemsArr, rawEnd, popAbove, + myFloatItemsArr, adjBeg + halfAdjPop); + } + } + } + mine.setLevelsArrayAtMinusEq(level + 1, halfAdjPop); // adjust boundaries of the level above + + if (oddPop) { + mine.setLevelsArrayAt(level, mine.getLevelsArrayAt(level + 1) - 1); // the current level now contains one item + if (mine.sketchType == DOUBLES_SKETCH) { + mine.setDoubleItemsArrayAt( + mine.getLevelsArrayAt(level), mine.getDoubleItemsArrayAt(rawBeg)); // namely this leftover guy + } else { + mine.setFloatItemsArrayAt( + mine.getLevelsArrayAt(level), mine.getFloatItemsArrayAt(rawBeg)); // namely this leftover guy + } + + } else { + mine.setLevelsArrayAt(level, mine.getLevelsArrayAt(level + 1)); // the current level is now empty + } + + // verify that we freed up halfAdjPop array slots just below the current level + assert mine.getLevelsArrayAt(level) == rawBeg + halfAdjPop; + + // finally, we need to shift up the data in the levels below + // so that the freed-up space can be used by level zero + if (level > 0) { + final int amount = rawBeg - mine.getLevelsArrayAt(0); + if (mine.sketchType == DOUBLES_SKETCH) { + if (mine.updatablMemory) { + myDoubleItemsArr = mine.getDoubleItemsArray(); + System.arraycopy(myDoubleItemsArr, myLevelsArr[0], myDoubleItemsArr, myLevelsArr[0] + halfAdjPop, amount); + mine.setDoubleItemsArray(myDoubleItemsArr); + } else { + System.arraycopy(myDoubleItemsArr, myLevelsArr[0], myDoubleItemsArr, myLevelsArr[0] + halfAdjPop, amount); + } + } else { + if (mine.updatablMemory) { + myFloatItemsArr = mine.getFloatItemsArray(); + System.arraycopy(myFloatItemsArr, myLevelsArr[0], myFloatItemsArr, myLevelsArr[0] + halfAdjPop, amount); + mine.setFloatItemsArray(myFloatItemsArr); + } else { + System.arraycopy(myFloatItemsArr, myLevelsArr[0], myFloatItemsArr, myLevelsArr[0] + halfAdjPop, amount); + } + } + for (int lvl = 0; lvl < level; lvl++) { + mine.setLevelsArrayAtPlusEq(lvl, halfAdjPop); + } + } + } + + /** + * Returns the maximum number of items that this sketch can handle + * @param k The sizing / accuracy parameter of the sketch in items. + * Note: this method actually works for k values up to k = 2^29 and 61 levels, + * however only k values up to (2^16 - 1) are currently used by the sketch. + * @param m the size of the smallest level in items. Default is 8. + * @param numLevels the upper bound number of levels based on n items. + * @return the total item capacity of the sketch. + */ + static int computeTotalItemCapacity(final int k, final int m, final int numLevels) { + long total = 0; + for (int level = 0; level < numLevels; level++) { + total += levelCapacity(k, numLevels, level, m); + } + return (int) total; + } + + static int currentLevelSize(final int level, final int numLevels, final int[] levels) { + if (level >= numLevels) { return 0; } + return levels[level + 1] - levels[level]; + } + /** * Given k, m, and numLevels, this computes and optionally prints the structure of the sketch when the given * number of levels are completely filled. @@ -85,7 +369,7 @@ public static class LevelStats { * @return LevelStats with the final summary of the sketch's cumulative N, * and cumulative items at the given numLevels. */ - public static LevelStats getFinalSketchStatsAtNumLevels( + static LevelStats getFinalSketchStatsAtNumLevels( final int k, final int m, final int numLevels, @@ -120,7 +404,7 @@ public static LevelStats getFinalSketchStatsAtNumLevels( * @param printGrowthScheme if true the entire growth scheme of the sketch will be printed. * @return GrowthStats with the final values of the growth scheme */ - public static GrowthStats getGrowthSchemeForGivenN( + static GrowthStats getGrowthSchemeForGivenN( final int k, final int m, final long n, @@ -163,6 +447,20 @@ public static GrowthStats getGrowthSchemeForGivenN( return gStats; } + // constants were derived as the best fit to 99 percentile empirically measured max error in + // thousands of trials + static int getKFromEpsilon(final double epsilon, final boolean pmf) { + //Ensure that eps is >= than the lowest possible eps given MAX_K and pmf=false. + final double eps = max(epsilon, MIN_EPS); + final double kdbl = pmf + ? exp(log(PMF_COEF / eps) / PMF_EXP) + : exp(log(CDF_COEF / eps) / CDF_EXP); + final double krnd = round(kdbl); + final double del = abs(krnd - kdbl); + final int k = (int) (del < EPS_DELTA_THRESHOLD ? krnd : ceil(kdbl)); + return max(KllSketch.MIN_M, min(KllSketch.MAX_K, k)); + } + /** * Given k, m, numLevels, this computes the item capacity of a single level. * @param k the given user sketch configuration parameter @@ -171,7 +469,7 @@ public static GrowthStats getGrowthSchemeForGivenN( * @param level the specific level to compute its item capacity * @return LevelStats with the computed N and items for the given level. */ - public static LevelStats getLevelCapacityItems( + static LevelStats getLevelCapacityItems( final int k, final int m, final int numLevels, @@ -181,66 +479,6 @@ public static LevelStats getLevelCapacityItems( return new LevelStats(n, numLevels, items); } - /** - * Checks the validity of the given value k - * @param k must be greater than 7 and less than 65536. - */ - static void checkK(final int k, final int m) { - if (k < m || k > KllSketch.MAX_K) { - throw new SketchesArgumentException( - "K must be >= " + m + " and <= " + KllSketch.MAX_K + ": " + k); - } - } - - static void checkM(final int m) { - if (m < KllSketch.MIN_M || m > KllSketch.MAX_M || ((m & 1) == 1)) { - throw new SketchesArgumentException( - "M must be >= 2, <= 8 and even: " + m); - } - } - - /** - * Returns the maximum number of items that this sketch can handle - * @param k The sizing / accuracy parameter of the sketch in items. - * Note: this method actually works for k values up to k = 2^29 and 61 levels, - * however only k values up to (2^16 - 1) are currently used by the sketch. - * @param m the size of the smallest level in items. Default is 8. - * @param numLevels the upper bound number of levels based on n items. - * @return the total item capacity of the sketch. - */ - static int computeTotalItemCapacity(final int k, final int m, final int numLevels) { - long total = 0; - for (int level = 0; level < numLevels; level++) { - total += levelCapacity(k, numLevels, level, m); - } - return (int) total; - } - - static int currentLevelSize(final int level, final int numLevels, final int[] levels) { - if (level >= numLevels) { return 0; } - return levels[level + 1] - levels[level]; - } - - /** - * Finds the first level starting with level 0 that exceeds its nominal capacity - * @param k configured size of sketch. Range [m, 2^16] - * @param m minimum level size. Default is 8. - * @param numLevels one-based number of current levels - * @return level to compact - */ - static int findLevelToCompact(final int k, final int m, final int numLevels, final int[] levels) { - int level = 0; - while (true) { - assert level < numLevels; - final int pop = levels[level + 1] - levels[level]; - final int cap = KllHelper.levelCapacity(k, numLevels, level, m); - if (pop >= cap) { - return level; - } - level++; - } - } - /** * Gets the normalized rank error given k and pmf. * Static method version of the getNormalizedRankError(boolean). @@ -281,6 +519,131 @@ static int levelCapacity(final int k, final int numLevels, final int level, fina return (int) Math.max(m, intCapAux(k, depth)); } + /** + * This method is for direct Double and Float sketches only and does the following: + *
        + *
      • Determines if the required sketch bytes will fit in the current Memory. + * If so, it will stretch the positioning of the arrays to fit. Otherwise: + *
      • Allocates a new WritableMemory of the required size
      • + *
      • Copies over the preamble as is (20 bytes)
      • + *
      • Creates new memory regions for Levels Array, Min/Max Array, Items Array, but + * does not fill them. They may contain garbage.
      • + *
      + * The caller is responsible for filling these regions and updating the preamble. + * @param sketch The current sketch that needs to be expanded. + * @param newLevelsArrLen the element length of the new Levels array. + * @param newItemsArrLen the element length of the new Items array. + * @return the new expanded memory with preamble. + */ + static WritableMemory memorySpaceMgmt( + final KllSketch sketch, + final int newLevelsArrLen, + final int newItemsArrLen) { + final KllSketch.SketchType sketchType = sketch.sketchType; + final WritableMemory oldWmem = sketch.wmem; + final int startAdr = DATA_START_ADR; + final int typeBytes = (sketchType == DOUBLES_SKETCH) ? Double.BYTES : Float.BYTES; + + int requiredSketchBytes = startAdr; + requiredSketchBytes += newLevelsArrLen * Integer.BYTES; + requiredSketchBytes += 2 * typeBytes; + requiredSketchBytes += newItemsArrLen * typeBytes; + final WritableMemory newWmem; + + if (requiredSketchBytes > oldWmem.getCapacity()) { //Acquire new WritableMemory + newWmem = sketch.memReqSvr.request(oldWmem, requiredSketchBytes); + oldWmem.copyTo(0, newWmem, 0, startAdr); //copy preamble + } + else { //Expand or contract in current memory + newWmem = oldWmem; + } + + int offset = startAdr; + //LEVELS ARR + int lengthBytes = newLevelsArrLen * Integer.BYTES; + sketch.setLevelsArrayUpdatable(newWmem.writableRegion(offset, lengthBytes)); // + offset += lengthBytes; + //MIN MAX ARR + lengthBytes = 2 * typeBytes; + sketch.setMinMaxArrayUpdatable(newWmem.writableRegion(offset, lengthBytes)); + offset += lengthBytes; + //ITEMS ARR + lengthBytes = newItemsArrLen * typeBytes; + sketch.setItemsArrayUpdatable(newWmem.writableRegion(offset, lengthBytes)); + assert requiredSketchBytes <= newWmem.getCapacity(); + return newWmem; + } + + static String outputData(final boolean doubleType, final int numLevels, final int[] levelsArr, + final float[] floatItemsArr, final double[] doubleItemsArr) { + final StringBuilder sb = new StringBuilder(); + sb.append("### KLL items data {index, item}:").append(Util.LS); + if (levelsArr[0] > 0) { + sb.append(" Garbage:" + Util.LS); + if (doubleType) { + for (int i = 0; i < levelsArr[0]; i++) { + sb.append(" ").append(i + ", ").append(doubleItemsArr[i]).append(Util.LS); + } + } else { + for (int i = 0; i < levelsArr[0]; i++) { + sb.append(" ").append(i + ", ").append(floatItemsArr[i]).append(Util.LS); + } + } + } + int level = 0; + if (doubleType) { + while (level < numLevels) { + final int fromIndex = levelsArr[level]; + final int toIndex = levelsArr[level + 1]; // exclusive + if (fromIndex < toIndex) { + sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level)); + sb.append(Util.LS); + } + + for (int i = fromIndex; i < toIndex; i++) { + sb.append(" ").append(i + ", ").append(doubleItemsArr[i]).append(Util.LS); + } + level++; + } + } + else { + while (level < numLevels) { + final int fromIndex = levelsArr[level]; + final int toIndex = levelsArr[level + 1]; // exclusive + if (fromIndex <= toIndex) { + sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level)); + sb.append(Util.LS); + } + + for (int i = fromIndex; i < toIndex; i++) { + sb.append(" ").append(i + ", ").append(floatItemsArr[i]).append(Util.LS); + } + level++; + } + } + sb.append(" level[" + level + "]: offset: " + levelsArr[level] + " (Exclusive)"); + sb.append(Util.LS); + sb.append("### End items data").append(Util.LS); + + return sb.toString(); + } + + static String outputLevels(final int k, final int m, final int numLevels, final int[] levelsArr) { + final StringBuilder sb = new StringBuilder(); + sb.append("### KLL levels array:").append(Util.LS) + .append(" level, offset: nominal capacity, actual size").append(Util.LS); + int level = 0; + for ( ; level < numLevels; level++) { + sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": ") + .append(KllHelper.levelCapacity(k, numLevels, level, m)) + .append(", ").append(KllHelper.currentLevelSize(level, numLevels, levelsArr)).append(Util.LS); + } + sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": (Exclusive)") + .append(Util.LS); + sb.append("### End levels array").append(Util.LS); + return sb.toString(); + } + static long sumTheSampleWeights(final int num_levels, final int[] levels) { long total = 0; long weight = 1; @@ -291,6 +654,146 @@ static long sumTheSampleWeights(final int num_levels, final int[] levels) { return total; } + static byte[] toCompactByteArrayImpl(final KllSketch mine) { + final byte[] byteArr = new byte[mine.getCurrentCompactSerializedSizeBytes()]; + final WritableMemory wmem = WritableMemory.writableWrap(byteArr); + loadFirst8Bytes(mine, wmem, false); + if (mine.getN() == 0) { return byteArr; } //empty + final boolean doubleType = (mine.sketchType == DOUBLES_SKETCH); + + //load data + int offset = DATA_START_ADR_SINGLE_ITEM; + final int[] myLevelsArr = mine.getLevelsArray(); + if (mine.getN() == 1) { //single item + if (doubleType) { + wmem.putDouble(offset, mine.getDoubleItemsArray()[myLevelsArr[0]]); + } else { + wmem.putFloat(offset, mine.getFloatItemsArray()[myLevelsArr[0]]); + } + } else { // n > 1 + //remainder of preamble after first 8 bytes + setMemoryN(wmem, mine.getN()); + setMemoryMinK(wmem, mine.getMinK()); + setMemoryNumLevels(wmem, mine.getNumLevels()); + offset = DATA_START_ADR; + + //LOAD LEVELS ARR the last integer in levels_ is NOT serialized + final int len = myLevelsArr.length - 1; + wmem.putIntArray(offset, myLevelsArr, 0, len); + offset += len * Integer.BYTES; + + //LOAD MIN, MAX VALUES FOLLOWED BY ITEMS ARRAY + if (doubleType) { + wmem.putDouble(offset,mine. getMinDoubleValue()); + offset += Double.BYTES; + wmem.putDouble(offset, mine.getMaxDoubleValue()); + offset += Double.BYTES; + wmem.putDoubleArray(offset, mine.getDoubleItemsArray(), myLevelsArr[0], mine.getNumRetained()); + } else { + wmem.putFloat(offset, mine.getMinFloatValue()); + offset += Float.BYTES; + wmem.putFloat(offset, mine.getMaxFloatValue()); + offset += Float.BYTES; + wmem.putFloatArray(offset, mine.getFloatItemsArray(), myLevelsArr[0], mine.getNumRetained()); + } + } + return byteArr; + } + + @SuppressWarnings("null") + static String toStringImpl(final KllSketch mine, final boolean withLevels, final boolean withData) { + final boolean doubleType = (mine.sketchType == DOUBLES_SKETCH); + final int k = mine.getK(); + final int m = mine.getM(); + final String epsPct = String.format("%.3f%%", mine.getNormalizedRankError(false) * 100); + final String epsPMFPct = String.format("%.3f%%", mine.getNormalizedRankError(true) * 100); + final StringBuilder sb = new StringBuilder(); + final String skType = (mine.updatablMemory ? "Direct" : "") + (doubleType ? "Doubles" : "Floats"); + sb.append(Util.LS).append("### Kll").append(skType).append("Sketch Summary:").append(Util.LS); + sb.append(" K : ").append(k).append(Util.LS); + sb.append(" Dynamic min K : ").append(mine.getMinK()).append(Util.LS); + sb.append(" M : ").append(m).append(Util.LS); + sb.append(" N : ").append(mine.getN()).append(Util.LS); + sb.append(" Epsilon : ").append(epsPct).append(Util.LS); + sb.append(" Epsison PMF : ").append(epsPMFPct).append(Util.LS); + sb.append(" Empty : ").append(mine.isEmpty()).append(Util.LS); + sb.append(" Estimation Mode : ").append(mine.isEstimationMode()).append(Util.LS); + sb.append(" Levels : ").append(mine.getNumLevels()).append(Util.LS); + sb.append(" Level 0 Sorted : ").append(mine.isLevelZeroSorted()).append(Util.LS); + final int cap = (doubleType) ? mine.getDoubleItemsArray().length : mine.getFloatItemsArray().length; + sb.append(" Capacity Items : ").append(cap).append(Util.LS); + sb.append(" Retained Items : ").append(mine.getNumRetained()).append(Util.LS); + if (mine.updatablMemory) { + sb.append(" Updatable Storage Bytes: ").append(mine.getCurrentUpdatableSerializedSizeBytes()).append(Util.LS); + } else { + sb.append(" Compact Storage Bytes : ").append(mine.getCurrentCompactSerializedSizeBytes()).append(Util.LS); + } + + if (doubleType) { + sb.append(" Min Value : ").append(mine.getMinDoubleValue()).append(Util.LS); + sb.append(" Max Value : ").append(mine.getMaxDoubleValue()).append(Util.LS); + } else { + sb.append(" Min Value : ").append(mine.getMinFloatValue()).append(Util.LS); + sb.append(" Max Value : ").append(mine.getMaxFloatValue()).append(Util.LS); + } + sb.append("### End sketch summary").append(Util.LS); + + final int myNumLevels = mine.getNumLevels(); + final int[] myLevelsArr = mine.getLevelsArray(); + double[] myDoubleItemsArr = null; + float[] myFloatItemsArr = null; + if (doubleType) { + myDoubleItemsArr = mine.getDoubleItemsArray(); + } else { + myFloatItemsArr = mine.getFloatItemsArray(); + } + if (withLevels) { + sb.append(outputLevels(k, m, myNumLevels, myLevelsArr)); + } + if (withData) { + sb.append(outputData(doubleType, myNumLevels, myLevelsArr, myFloatItemsArr, myDoubleItemsArr)); + } + return sb.toString(); + } + + static byte[] toUpdatableByteArrayImpl(final KllSketch mine) { + final byte[] byteArr = new byte[mine.getCurrentUpdatableSerializedSizeBytes()]; + final WritableMemory wmem = WritableMemory.writableWrap(byteArr); + loadFirst8Bytes(mine, wmem, true); + //remainder of preamble after first 8 bytes + setMemoryN(wmem, mine.getN()); + setMemoryMinK(wmem, mine.getMinK()); + setMemoryNumLevels(wmem, mine.getNumLevels()); + + //load data + final boolean doubleType = (mine.sketchType == DOUBLES_SKETCH); + int offset = DATA_START_ADR; + + //LOAD LEVELS ARRAY the last integer in levels_ IS serialized + final int[] myLevelsArr = mine.getLevelsArray(); + final int len = myLevelsArr.length; + wmem.putIntArray(offset, myLevelsArr, 0, len); + offset += len * Integer.BYTES; + + //LOAD MIN, MAX VALUES FOLLOWED BY ITEMS ARRAY + if (doubleType) { + wmem.putDouble(offset, mine.getMinDoubleValue()); + offset += Double.BYTES; + wmem.putDouble(offset, mine.getMaxDoubleValue()); + offset += Double.BYTES; + final double[] doubleItemsArr = mine.getDoubleItemsArray(); + wmem.putDoubleArray(offset, doubleItemsArr, 0, doubleItemsArr.length); + } else { + wmem.putFloat(offset, mine.getMinFloatValue()); + offset += Float.BYTES; + wmem.putFloat(offset,mine.getMaxFloatValue()); + offset += Float.BYTES; + final float[] floatItemsArr = mine.getFloatItemsArray(); + wmem.putFloatArray(offset, floatItemsArr, 0, floatItemsArr.length); + } + return byteArr; + } + /** * Returns very conservative upper bound of the number of levels based on n. * @param n the length of the stream @@ -300,6 +803,118 @@ static int ubOnNumLevels(final long n) { return 1 + Long.numberOfTrailingZeros(floorPowerOf2(n)); } + /** + * This grows the levels arr by 1 (if needed) and increases the capacity of the items array + * at the bottom. Only numLevels, the levels array and the items array are affected. + * @param mine the current sketch + */ + @SuppressWarnings("null") + private static void addEmptyTopLevelToCompletelyFullSketch(final KllSketch mine) { + final int[] myCurLevelsArr = mine.getLevelsArray(); + final int myCurNumLevels = mine.getNumLevels(); + final int myCurTotalItemsCapacity = myCurLevelsArr[myCurNumLevels]; + double minDouble = Double.NaN; + double maxDouble = Double.NaN; + float minFloat = Float.NaN; + float maxFloat = Float.NaN; + + double[] myCurDoubleItemsArr = null; + float[] myCurFloatItemsArr = null; + + final int myNewNumLevels; + final int[] myNewLevelsArr; + final int myNewTotalItemsCapacity; + + float[] myNewFloatItemsArr = null; + double[] myNewDoubleItemsArr = null; + + if (mine.sketchType == DOUBLES_SKETCH) { + minDouble = mine.getMinDoubleValue(); + maxDouble = mine.getMaxDoubleValue(); + myCurDoubleItemsArr = mine.getDoubleItemsArray(); + //assert we are following a certain growth scheme + assert myCurDoubleItemsArr.length == myCurTotalItemsCapacity; + } else { //FLOATS_SKETCH + minFloat = mine.getMinFloatValue(); + maxFloat = mine.getMaxFloatValue(); + myCurFloatItemsArr = mine.getFloatItemsArray(); + assert myCurFloatItemsArr.length == myCurTotalItemsCapacity; + } + assert myCurLevelsArr[0] == 0; //definition of full is part of the growth scheme + + final int deltaItemsCap = levelCapacity(mine.getK(), myCurNumLevels + 1, 0, mine.getM()); + myNewTotalItemsCapacity = myCurTotalItemsCapacity + deltaItemsCap; + + // Check if growing the levels arr if required. + // Note that merging MIGHT over-grow levels_, in which case we might not have to grow it + final boolean growLevelsArr = myCurLevelsArr.length < myCurNumLevels + 2; + + // GROW LEVELS ARRAY + if (growLevelsArr) { + //grow levels arr by one and copy the old data to the new array, extra space at the top. + myNewLevelsArr = Arrays.copyOf(myCurLevelsArr, myCurNumLevels + 2); + assert myNewLevelsArr.length == myCurLevelsArr.length + 1; + myNewNumLevels = myCurNumLevels + 1; + mine.incNumLevels(); //increment the class member + } else { + myNewLevelsArr = myCurLevelsArr; + myNewNumLevels = myCurNumLevels; + } + // This loop updates all level indices EXCLUDING the "extra" index at the top + for (int level = 0; level <= myNewNumLevels - 1; level++) { + myNewLevelsArr[level] += deltaItemsCap; + } + myNewLevelsArr[myNewNumLevels] = myNewTotalItemsCapacity; // initialize the new "extra" index at the top + + // GROW ITEMS ARRAY + if (mine.sketchType == DOUBLES_SKETCH) { + myNewDoubleItemsArr = new double[myNewTotalItemsCapacity]; + // copy and shift the current data into the new array + System.arraycopy(myCurDoubleItemsArr, 0, myNewDoubleItemsArr, deltaItemsCap, myCurTotalItemsCapacity); + } else { + myNewFloatItemsArr = new float[myNewTotalItemsCapacity]; + // copy and shift the current items data into the new array + System.arraycopy(myCurFloatItemsArr, 0, myNewFloatItemsArr, deltaItemsCap, myCurTotalItemsCapacity); + } + + //MEMORY SPACE MANAGEMENT + if (mine.updatablMemory) { + mine.wmem = memorySpaceMgmt(mine, myNewLevelsArr.length, myNewTotalItemsCapacity); + } + //update our sketch with new expanded spaces + mine.setNumLevels(myNewNumLevels); + mine.setLevelsArray(myNewLevelsArr); + if (mine.sketchType == DOUBLES_SKETCH) { + mine.setMinDoubleValue(minDouble); + mine.setMaxDoubleValue(maxDouble); + mine.setDoubleItemsArray(myNewDoubleItemsArr); + } else { //Float sketch + mine.setMinFloatValue(minFloat); + mine.setMaxFloatValue(maxFloat); + mine.setFloatItemsArray(myNewFloatItemsArr); + } + } + + /** + * Finds the first level starting with level 0 that exceeds its nominal capacity + * @param k configured size of sketch. Range [m, 2^16] + * @param m minimum level size. Default is 8. + * @param numLevels one-based number of current levels + * @return level to compact + */ + private static int findLevelToCompact(final int k, final int m, final int numLevels, final int[] levels) { + int level = 0; + while (true) { + assert level < numLevels; + final int pop = levels[level + 1] - levels[level]; + final int cap = KllHelper.levelCapacity(k, numLevels, level, m); + if (pop >= cap) { + return level; + } + level++; + } + } + /** * Computes the actual item capacity of a given level given its depth index. * If the depth of levels exceeds 30, this uses a folding technique to accurately compute the @@ -331,6 +946,30 @@ private static long intCapAuxAux(final long k, final int depth) { return result; } + private static void loadFirst8Bytes(final KllSketch sk, final WritableMemory wmem, + final boolean updatable) { + final boolean empty = sk.getN() == 0; + final boolean lvlZeroSorted = sk.isLevelZeroSorted(); + final boolean singleItem = sk.getN() == 1; + final boolean doubleType = (sk.sketchType == DOUBLES_SKETCH); + final int preInts = updatable + ? PREAMBLE_INTS_FULL + : (empty || singleItem) ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_FULL; + //load the preamble + setMemoryPreInts(wmem, preInts); + final int server = updatable ? SERIAL_VERSION_UPDATABLE + : (singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION_EMPTY_FULL); + setMemorySerVer(wmem, server); + setMemoryFamilyID(wmem, Family.KLL.getID()); + setMemoryEmptyFlag(wmem, empty); + setMemoryLevelZeroSortedFlag(wmem, lvlZeroSorted); + setMemorySingleItemFlag(wmem, singleItem); + setMemoryDoubleSketchFlag(wmem, doubleType); + setMemoryUpdatableFlag(wmem, updatable); + setMemoryK(wmem, sk.getK()); + setMemoryM(wmem, sk.getM()); + } + /** * @param fmt format * @param args arguments @@ -346,5 +985,5 @@ private static void printf(final String fmt, final Object ... args) { private static void println(final Object o) { System.out.println(o.toString()); } -} +} diff --git a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java index 187866dfe..e64a4a1c0 100644 --- a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java +++ b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java @@ -75,7 +75,7 @@ final class KllMemoryValidate { boolean singleItem; final boolean level0Sorted; final boolean doublesSketch; - final boolean updatable; + final boolean updatableMemory; final int k; final int m; final int memCapacity; @@ -112,14 +112,14 @@ final class KllMemoryValidate { level0Sorted = getMemoryLevelZeroSortedFlag(srcMem); singleItem = getMemorySingleItemFlag(srcMem); doublesSketch = getMemoryDoubleSketchFlag(srcMem); - updatable = getMemoryUpdatableFlag(srcMem); + updatableMemory = getMemoryUpdatableFlag(srcMem); k = getMemoryK(srcMem); m = getMemoryM(srcMem); KllHelper.checkM(m); KllHelper.checkK(k, m); - if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatable) { memoryValidateThrow(UPDATABLEBIT_AND_SER_VER, 1); } + if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatableMemory) { memoryValidateThrow(UPDATABLEBIT_AND_SER_VER, 1); } - if (updatable) { updatableMemoryValidate((WritableMemory) srcMem); } + if (updatableMemory) { updatableMemoryValidate((WritableMemory) srcMem); } else { compactMemoryValidate(srcMem); } } diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index 548bd045e..0c5f7d273 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -205,7 +205,7 @@ static String memoryToString(final Memory mem) { sb.append(" 1 LEVEL_ZERO_SORTED : ").append(memChk.level0Sorted).append(LS); sb.append(" 2 SINGLE_ITEM COMPACT: ").append(memChk.singleItem).append(LS); sb.append(" 3 DOUBLES_SKETCH : ").append(memChk.doublesSketch).append(LS); - sb.append(" 4 UPDATABLE : ").append(memChk.updatable).append(LS); + sb.append(" 4 UPDATABLE : ").append(memChk.updatableMemory).append(LS); sb.append("Bytes 4-5 : K : ").append(memChk.k).append(LS); sb.append("Byte 6 : Min Level Cap, M : ").append(memChk.m).append(LS); sb.append("Byte 7 : (Reserved) : ").append(LS); diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index b366e38b1..804148d47 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -19,44 +19,15 @@ package org.apache.datasketches.kll; -import static java.lang.Math.abs; -import static java.lang.Math.ceil; -import static java.lang.Math.exp; -import static java.lang.Math.log; -import static java.lang.Math.max; -import static java.lang.Math.min; -import static java.lang.Math.round; -import static org.apache.datasketches.Util.isOdd; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; -import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; -import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; -import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; -import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryDoubleSketchFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryMinK; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryEmptyFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryN; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySingleItemFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryUpdatableFlag; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; -import java.util.Arrays; import java.util.Random; -import org.apache.datasketches.Family; import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.Util; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -92,48 +63,70 @@ * @author Lee Rhodes, Kevin Lang */ public abstract class KllSketch { - static final double EPS_DELTA_THRESHOLD = 1E-6; - static final double MIN_EPS = 4.7634E-5; - static final double PMF_COEF = 2.446; - static final double PMF_EXP = 0.9433; - static final double CDF_COEF = 2.296; - static final double CDF_EXP = 0.9723; - static final Random random = new Random(); - SketchType sketchType; - WritableMemory wmem; - MemoryRequestServer memReqSvr; - boolean direct; + + public enum SketchType { FLOATS_SKETCH, DOUBLES_SKETCH } + + enum Error { + TGT_IS_IMMUTABLE("Given sketch Memory is immutable, cannot write."), + SRC_MUST_BE_DIRECT("Given sketch must be of type Direct."), + SRC_MUST_BE_DOUBLE("Given sketch must be of type Double."), + SRC_MUST_BE_FLOAT("Given sketch must be of type Float."), + SRC_CANNOT_BE_DIRECT("Given sketch cannot be of type Direct."), + MUST_NOT_CALL("This is an artifact of inheritance and should never be called."); + + private String msg; + + private Error(final String msg) { + this.msg = msg; + } + + final static void kllSketchThrow(final Error errType) { + throw new SketchesArgumentException(errType.getMessage()); + } + + private String getMessage() { + return msg; + } + } /** * The default value of K */ public static final int DEFAULT_K = 200; + /** + * The maximum value of K + */ + public static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short + /** * The default value of M. The parameter m is the minimum level size in number of items. * Currently, the public default is 8, but this can be overridden using Package Private methods to * 2, 4, 6 or 8, and the sketch works just fine. The value 8 was chosen as a compromise between speed and size. - * Choosing smaller values of m less than 8 will make the sketch much slower. + * Choosing smaller values of m less than 8 will make the sketch slower. */ static final int DEFAULT_M = 8; /** - * The maximum value of K - */ - public static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short - - /** - * The maximum value of M. See the Javadoc on DEFAULT_M. + * The maximum value of M. + * @see #DEFAULT_M */ static final int MAX_M = 8; /** - * The minimum value of M. See the Javadoc on DEFAULT_M. + * The minimum value of M. + * @see #DEFAULT_M */ static final int MIN_M = 2; + static final Random random = new Random(); + final SketchType sketchType; + final MemoryRequestServer memReqSvr; + final boolean updatablMemory; + WritableMemory wmem; + /** - * + * Constructor * @param sketchType either DOUBLE_SKETCH or FLOAT_SKETCH * @param wmem the current WritableMemory or null * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory @@ -142,18 +135,14 @@ public abstract class KllSketch { this.sketchType = sketchType; this.wmem = wmem; if (wmem != null) { - this.direct = true; + this.updatablMemory = memReqSvr != null; this.memReqSvr = memReqSvr; } else { - this.direct = false; + this.updatablMemory = false; this.memReqSvr = null; } } -public enum SketchType { FLOATS_SKETCH, DOUBLES_SKETCH } - - //Static methods - /** * Gets the approximate value of k to use given epsilon, the normalized rank error. * @param epsilon the normalized rank error between zero and one. @@ -166,18 +155,8 @@ public enum SketchType { FLOATS_SKETCH, DOUBLES_SKETCH } * {@link org.apache.datasketches.kll}

      * @return the value of k given a value of epsilon. */ - // constants were derived as the best fit to 99 percentile empirically measured max error in - // thousands of trials public static int getKFromEpsilon(final double epsilon, final boolean pmf) { - //Ensure that eps is >= than the lowest possible eps given MAX_K and pmf=false. - final double eps = max(epsilon, MIN_EPS); - final double kdbl = pmf - ? exp(log(PMF_COEF / eps) / PMF_EXP) - : exp(log(CDF_COEF / eps) / CDF_EXP); - final double krnd = round(kdbl); - final double del = abs(krnd - kdbl); - final int k = (int) (del < EPS_DELTA_THRESHOLD ? krnd : ceil(kdbl)); - return max(KllSketch.MIN_M, min(KllSketch.MAX_K, k)); + return KllHelper.getKFromEpsilon(epsilon, pmf); } /** @@ -202,14 +181,14 @@ public static int getMaxSerializedSizeBytes(final int k, final long n) { * @param k parameter that controls size of the sketch and accuracy of estimates * @param n stream length * @param sketchType either DOUBLES_SKETCH or FLOATS_SKETCH - * @param updatable true if updatable form, otherwise the standard compact form. + * @param updatableMemory true if updatableMemory form, otherwise the standard compact form. * @return upper bound on the serialized size of a KllSketch. */ public static int getMaxSerializedSizeBytes(final int k, final long n, - final SketchType sketchType, final boolean updatable) { + final SketchType sketchType, final boolean updatableMemory) { final KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, KllSketch.DEFAULT_M, n, sketchType, false); - return updatable ? gStats.updatableBytes : gStats.compactBytes; + return updatableMemory ? gStats.updatableBytes : gStats.compactBytes; } /** @@ -225,22 +204,12 @@ public static double getNormalizedRankError(final int k, final boolean pmf) { return KllHelper.getNormalizedRankError(k, pmf); } - /** - * Returns the current number of bytes this Sketch would require if serialized. - * @return the number of bytes this sketch would require if serialized. - */ - public int getSerializedSizeBytes() { - return (direct) - ? getCurrentUpdatableSerializedSizeBytes() - : getCurrentCompactSerializedSizeBytes(); - } - //numItems can be either numRetained, or current max capacity at given K and numLevels. static int getCurrentSerializedSizeBytes(final int numLevels, final int numItems, - final SketchType sketchType, final boolean updatable) { + final SketchType sketchType, final boolean updatableMemory) { final int typeBytes = (sketchType == DOUBLES_SKETCH) ? Double.BYTES : Float.BYTES; int levelsBytes = 0; - if (updatable) { + if (updatableMemory) { levelsBytes = (numLevels + 1) * Integer.BYTES; } else { if (numItems == 0) { return N_LONG_ADR; } @@ -250,31 +219,6 @@ static int getCurrentSerializedSizeBytes(final int numLevels, final int numItems return DATA_START_ADR + levelsBytes + (numItems + 2) * typeBytes; //+2 is for min & max } - enum Error { - TGT_IS_IMMUTABLE("Given sketch Memory is immutable, cannot write."), - SRC_MUST_BE_DIRECT("Given sketch must be of type Direct."), - SRC_MUST_BE_DOUBLE("Given sketch must be of type Double."), - SRC_MUST_BE_FLOAT("Given sketch must be of type Float."), - SRC_CANNOT_BE_DIRECT("Given sketch cannot be of type Direct."), - MUST_NOT_CALL("This is an artifact of inheritance and should never be called."); - - private String msg; - - private Error(final String msg) { - this.msg = msg; - } - - private String getMessage() { - return msg; - } - - final static void kllSketchThrow(final Error errType) { - throw new SketchesArgumentException(errType.getMessage()); - } - } - - //Public Non-static methods - /** * Returns the current compact number of bytes this sketch would require to store. * @return the current compact number of bytes this sketch would require to store. @@ -284,8 +228,8 @@ public final int getCurrentCompactSerializedSizeBytes() { } /** - * Returns the current updatable number of bytes this sketch would require to store. - * @return the current updatable number of bytes this sketch would require to store. + * Returns the current updatableMemory number of bytes this sketch would require to store. + * @return the current updatableMemory number of bytes this sketch would require to store. */ public final int getCurrentUpdatableSerializedSizeBytes() { final int itemCap = KllHelper.computeTotalItemCapacity(getK(), getM(), getNumLevels()); @@ -298,15 +242,6 @@ public final int getCurrentUpdatableSerializedSizeBytes() { */ public abstract int getK(); - /** - * Returns the configured parameter m, which is the minimum level size in number of items. - * Currently, the public default is 8, but this can be overridden using Package Private methods to - * 2, 4, 6 or 8, and the sketch works just fine. The value 8 was chosen as a compromise between speed and size. - * Choosing smaller values of m will make the sketch much slower. - * @return the configured parameter m - */ - abstract int getM(); - /** * Returns the length of the input stream in items. * @return stream length @@ -337,6 +272,16 @@ public final int getNumRetained() { return getLevelsArray()[getNumLevels()] - getLevelsArray()[0]; } + /** + * Returns the current number of bytes this Sketch would require if serialized. + * @return the number of bytes this sketch would require if serialized. + */ + public int getSerializedSizeBytes() { + return (updatablMemory) + ? getCurrentUpdatableSerializedSizeBytes() + : getCurrentCompactSerializedSizeBytes(); + } + /** * This returns the WritableMemory for Direct type sketches, * otherwise returns null. @@ -346,10 +291,6 @@ public WritableMemory getWritableMemory() { return wmem; } - public final boolean isDirect() { - return direct; - } - /** * Returns true if this sketch is empty. * @return empty flag @@ -366,6 +307,10 @@ public final boolean isEstimationMode() { return getNumLevels() > 1; } + public final boolean isUpdatableMemory() { + return updatablMemory; + } + /** * This resets the current sketch back to zero entries. * It retains key parameters such as k and @@ -378,7 +323,7 @@ public final boolean isEstimationMode() { * @return serialized sketch in a compact byte array form. */ public byte[] toByteArray() { - return toCompactByteArrayImpl(); + return KllHelper.toCompactByteArrayImpl(this); } @Override @@ -393,273 +338,44 @@ public final String toString() { * @return string representation of sketch summary */ public String toString(final boolean withLevels, final boolean withData) { - return toStringImpl(withLevels, withData); + return KllHelper.toStringImpl(this, withLevels, withData); } /** - * Returns serialized sketch in an updatable byte array form. - * @return serialized sketch in an updatable byte array form. + * Returns serialized sketch in an updatableMemory byte array form. + * @return serialized sketch in an updatableMemory byte array form. */ public byte[] toUpdatableByteArray() { - return toUpdatableByteArrayImpl(); - } - - //package-private non-static methods - - final void buildHeapKllSketchFromMemory(final KllMemoryValidate memVal) { - final boolean doubleType = (sketchType == DOUBLES_SKETCH); - final boolean updatable = memVal.updatable; - setLevelZeroSorted(memVal.level0Sorted); - setN(memVal.n); - setMinK(memVal.minK); - setNumLevels(memVal.numLevels); - final int[] myLevelsArr = new int[getNumLevels() + 1]; - - if (updatable) { - memVal.levelsArrUpdatable.getIntArray(0, myLevelsArr, 0, getNumLevels() + 1); - setLevelsArray(myLevelsArr); - if (doubleType) { - setMinDoubleValue(memVal.minMaxArrUpdatable.getDouble(0)); - setMaxDoubleValue(memVal.minMaxArrUpdatable.getDouble(Double.BYTES)); - final int itemsCap = (int)memVal.itemsArrUpdatable.getCapacity() / Double.BYTES; - final double[] myItemsArr = new double[itemsCap]; - memVal.itemsArrUpdatable.getDoubleArray(0, myItemsArr, 0, itemsCap); - setDoubleItemsArray(myItemsArr); - } else { //float - setMinFloatValue(memVal.minMaxArrUpdatable.getFloat(0)); - setMaxFloatValue(memVal.minMaxArrUpdatable.getFloat(Float.BYTES)); - final int itemsCap = (int)memVal.itemsArrUpdatable.getCapacity() / Float.BYTES; - final float[] myItemsArr = new float[itemsCap]; - memVal.itemsArrUpdatable.getFloatArray(0, myItemsArr, 0, itemsCap); - setFloatItemsArray(myItemsArr); - } - } else { //compact - memVal.levelsArrCompact.getIntArray(0, myLevelsArr, 0, getNumLevels() + 1); - setLevelsArray(myLevelsArr); - if (doubleType) { - setMinDoubleValue(memVal.minMaxArrCompact.getDouble(0)); - setMaxDoubleValue(memVal.minMaxArrCompact.getDouble(Double.BYTES)); - final int itemsCap = (int)memVal.itemsArrCompact.getCapacity() / Double.BYTES; - final double[] myItemsArr = new double[itemsCap]; - memVal.itemsArrCompact.getDoubleArray(0, myItemsArr, 0, itemsCap); - setDoubleItemsArray(myItemsArr); - } else { //float - setMinFloatValue(memVal.minMaxArrCompact.getFloat(0)); - setMaxFloatValue(memVal.minMaxArrCompact.getFloat(Float.BYTES)); - final int itemsCap = (int)memVal.itemsArrCompact.getCapacity() / Float.BYTES; - final float[] myItemsArr = new float[itemsCap]; - memVal.itemsArrCompact.getFloatArray(0, myItemsArr, 0, itemsCap); - setFloatItemsArray(myItemsArr); - } - } + return KllHelper.toUpdatableByteArrayImpl(this); } /** - * @return full size of internal items array including garbage; for a floats sketch this will be null. + * @return full size of internal items array including garbage. */ abstract double[] getDoubleItemsArray(); - final double getDoubleRank(final double value) { - if (isEmpty()) { return Double.NaN; } - int level = 0; - int weight = 1; - long total = 0; - final double[] myDoubleItemsArr = getDoubleItemsArray(); - final int[] myLevelsArr = getLevelsArray(); - while (level < getNumLevels()) { - final int fromIndex = myLevelsArr[level]; - final int toIndex = myLevelsArr[level + 1]; // exclusive - for (int i = fromIndex; i < toIndex; i++) { - if (myDoubleItemsArr[i] < value) { - total += weight; - } else if (level > 0 || isLevelZeroSorted()) { - break; // levels above 0 are sorted, no point comparing further - } - } - level++; - weight *= 2; - } - return (double) total / getN(); - } - - final double[] getDoublesPmfOrCdf(final double[] splitPoints, final boolean isCdf) { - if (isEmpty()) { return null; } - KllDoublesHelper.validateDoubleValues(splitPoints); - final double[] buckets = new double[splitPoints.length + 1]; - final int myNumLevels = getNumLevels(); - final int[] myLevelsArr = getLevelsArray(); - int level = 0; - int weight = 1; - while (level < myNumLevels) { - final int fromIndex = myLevelsArr[level]; - final int toIndex = myLevelsArr[level + 1]; // exclusive - if (level == 0 && !isLevelZeroSorted()) { - incrementDoublesBucketsUnsortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); - } else { - incrementDoublesBucketsSortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); - } - level++; - weight *= 2; - } - // normalize and, if CDF, convert to cumulative - if (isCdf) { - double subtotal = 0; - for (int i = 0; i < buckets.length; i++) { - subtotal += buckets[i]; - buckets[i] = subtotal / getN(); - } - } else { - for (int i = 0; i < buckets.length; i++) { - buckets[i] /= getN(); - } - } - return buckets; - } - - final double getDoublesQuantile(final double fraction) { - if (isEmpty()) { return Double.NaN; } - if (fraction < 0.0 || fraction > 1.0) { - throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); - } - //These two assumptions make KLL compatible with the previous classic Quantiles Sketch - if (fraction == 0.0) { return getMinDoubleValue(); } - if (fraction == 1.0) { return getMaxDoubleValue(); } - final KllDoublesQuantileCalculator quant = getDoublesQuantileCalculator(); - return quant.getQuantile(fraction); - } - - final double[] getDoublesQuantiles(final double[] fractions) { - if (isEmpty()) { return null; } - KllDoublesQuantileCalculator quant = null; - final double[] quantiles = new double[fractions.length]; - for (int i = 0; i < fractions.length; i++) { - final double fraction = fractions[i]; - if (fraction < 0.0 || fraction > 1.0) { - throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); - } - if (fraction == 0.0) { quantiles[i] = getMinDoubleValue(); } - else if (fraction == 1.0) { quantiles[i] = getMaxDoubleValue(); } - else { - if (quant == null) { - quant = getDoublesQuantileCalculator(); - } - quantiles[i] = quant.getQuantile(fraction); - } - } - return quantiles; - } - - /** - * MinK is the value of K that results from a merge with a sketch configured with a value of K lower than - * the k of this sketch. This value is then used in computing the estimated upper and lower bounds of error. - * @return The minimum K as a result of merging with lower values of k. - */ - abstract int getMinK(); + abstract double getDoubleItemsArrayAt(int index); /** - * @return full size of internal items array including garbage; for a doubles sketch this will be null. + * @return full size of internal items array including garbage. */ abstract float[] getFloatItemsArray(); - final double getFloatRank(final float value) { - if (isEmpty()) { return Double.NaN; } - int level = 0; - int weight = 1; - long total = 0; - final float[] myFloatItemsArr = getFloatItemsArray(); - final int[] myLevelsArr = getLevelsArray(); - while (level < getNumLevels()) { - final int fromIndex = myLevelsArr[level]; - final int toIndex = myLevelsArr[level + 1]; // exclusive - for (int i = fromIndex; i < toIndex; i++) { - if (myFloatItemsArr[i] < value) { - total += weight; - } else if (level > 0 || isLevelZeroSorted()) { - break; // levels above 0 are sorted, no point comparing further - } - } - level++; - weight *= 2; - } - return (double) total / getN(); - } - - final double[] getFloatsPmfOrCdf(final float[] splitPoints, final boolean isCdf) { - if (isEmpty()) { return null; } - KllFloatsHelper.validateFloatValues(splitPoints); - final double[] buckets = new double[splitPoints.length + 1]; - final int myNumLevels = getNumLevels(); - final int[] myLevelsArr = getLevelsArray(); - int level = 0; - int weight = 1; - while (level < myNumLevels) { - final int fromIndex = myLevelsArr[level]; - final int toIndex = myLevelsArr[level + 1]; // exclusive - if (level == 0 && !isLevelZeroSorted()) { - incrementFloatBucketsUnsortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); - } else { - incrementFloatBucketsSortedLevel(fromIndex, toIndex, weight, splitPoints, buckets); - } - level++; - weight *= 2; - } - // normalize and, if CDF, convert to cumulative - if (isCdf) { - double subtotal = 0; - for (int i = 0; i < buckets.length; i++) { - subtotal += buckets[i]; - buckets[i] = subtotal / getN(); - } - } else { - for (int i = 0; i < buckets.length; i++) { - buckets[i] /= getN(); - } - } - return buckets; - } - - final float getFloatsQuantile(final double fraction) { - if (isEmpty()) { return Float.NaN; } - if (fraction < 0.0 || fraction > 1.0) { - throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); - } - //These two assumptions make KLL compatible with the previous classic Quantiles Sketch - if (fraction == 0.0) { return getMinFloatValue(); } - if (fraction == 1.0) { return getMaxFloatValue(); } - - final KllFloatsQuantileCalculator quant = getFloatsQuantileCalculator(); - return quant.getQuantile(fraction); - } - - final float[] getFloatsQuantiles(final double[] fractions) { - if (isEmpty()) { return null; } - KllFloatsQuantileCalculator quant = null; - final float[] quantiles = new float[fractions.length]; - for (int i = 0; i < fractions.length; i++) { - final double fraction = fractions[i]; - if (fraction < 0.0 || fraction > 1.0) { - throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); - } - if (fraction == 0.0) { quantiles[i] = getMinFloatValue(); } - else if (fraction == 1.0) { quantiles[i] = getMaxFloatValue(); } - else { - if (quant == null) { - quant = getFloatsQuantileCalculator(); - } - quantiles[i] = quant.getQuantile(fraction); - } - } - return quantiles; - } - - abstract double getDoubleItemsArrayAt(int index); - abstract float getFloatItemsArrayAt(int index); abstract int[] getLevelsArray(); abstract int getLevelsArrayAt(int index); + /** + * Returns the configured parameter m, which is the minimum level size in number of items. + * Currently, the public default is 8, but this can be overridden using Package Private methods to + * 2, 4, 6 or 8, and the sketch works just fine. The value 8 was chosen as a compromise between speed and size. + * Choosing smaller values of m will make the sketch much slower. + * @return the configured parameter m + */ + abstract int getM(); + abstract double getMaxDoubleValue(); abstract float getMaxFloatValue(); @@ -668,6 +384,13 @@ final float[] getFloatsQuantiles(final double[] fractions) { abstract float getMinFloatValue(); + /** + * MinK is the value of K that results from a merge with a sketch configured with a value of K lower than + * the k of this sketch. This value is then used in computing the estimated upper and lower bounds of error. + * @return The minimum K as a result of merging with lower values of k. + */ + abstract int getMinK(); + abstract int getNumLevels(); abstract void incN(); @@ -680,289 +403,10 @@ final float[] getFloatsQuantiles(final double[] fractions) { abstract boolean isLevelZeroSorted(); - /** - * This method is for direct Double and Float sketches only and does the following: - *
        - *
      • Determines if the required sketch bytes will fit in the current Memory. - * If so, it will stretch the positioning of the arrays to fit. Otherwise: - *
      • Allocates a new WritableMemory of the required size
      • - *
      • Copies over the preamble as is (20 bytes)
      • - *
      • Creates new memory regions for Levels Array, Min/Max Array, Items Array, but - * does not fill them. They may contain garbage.
      • - *
      - * The caller is responsible for filling these regions and updating the preamble. - * @param sketch The current sketch that needs to be expanded. - * @param newLevelsArrLen the element length of the new Levels array. - * @param newItemsArrLen the element length of the new Items array. - * @return the new expanded memory with preamble. - */ - static WritableMemory memorySpaceMgmt( - final KllSketch sketch, - final int newLevelsArrLen, - final int newItemsArrLen) { - final SketchType sketchType = sketch.sketchType; - final WritableMemory oldWmem = sketch.wmem; - final int startAdr = DATA_START_ADR; - final int typeBytes = (sketchType == DOUBLES_SKETCH) ? Double.BYTES : Float.BYTES; - - int requiredSketchBytes = startAdr; - requiredSketchBytes += newLevelsArrLen * Integer.BYTES; - requiredSketchBytes += 2 * typeBytes; - requiredSketchBytes += newItemsArrLen * typeBytes; - final WritableMemory newWmem; - - if (requiredSketchBytes > oldWmem.getCapacity()) { //Acquire new WritableMemory - newWmem = sketch.memReqSvr.request(oldWmem, requiredSketchBytes); - oldWmem.copyTo(0, newWmem, 0, startAdr); //copy preamble - } - else { //Expand or contract in current memory - newWmem = oldWmem; - } - - int offset = startAdr; - //LEVELS ARR - int lengthBytes = newLevelsArrLen * Integer.BYTES; - sketch.setLevelsArrayUpdatable(newWmem.writableRegion(offset, lengthBytes)); // - offset += lengthBytes; - //MIN MAX ARR - lengthBytes = 2 * typeBytes; - sketch.setMinMaxArrayUpdatable(newWmem.writableRegion(offset, lengthBytes)); - offset += lengthBytes; - //ITEMS ARR - lengthBytes = newItemsArrLen * typeBytes; - sketch.setItemsArrayUpdatable(newWmem.writableRegion(offset, lengthBytes)); - assert requiredSketchBytes <= newWmem.getCapacity(); - return newWmem; - } - - final void mergeDoubleImpl(final KllSketch other) { - if (other.isEmpty()) { return; } - final long finalN = getN() + other.getN(); - //update this sketch with level0 items from the other sketch - final double[] otherDoubleItemsArr = other.getDoubleItemsArray(); - final int otherNumLevels = other.getNumLevels(); - final int[] otherLevelsArr = other.getLevelsArray(); - for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { - updateDouble(otherDoubleItemsArr[i]); - } - // after the level 0 update, we capture the key mutable variables - final double myMin = getMinDoubleValue(); - final double myMax = getMaxDoubleValue(); - final int myMinK = getMinK(); - - final int myCurNumLevels = getNumLevels(); - final int[] myCurLevelsArr = getLevelsArray(); - final double[] myCurDoubleItemsArr = getDoubleItemsArray(); - - final int myNewNumLevels; - final int[] myNewLevelsArr; - final double[] myNewDoubleItemsArr; - - if (otherNumLevels > 1) { //now merge other levels if they exist - final int tmpSpaceNeeded = getNumRetained() - + KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr); - final double[] workbuf = new double[tmpSpaceNeeded]; - final int ub = KllHelper.ubOnNumLevels(finalN); - final int[] worklevels = new int[ub + 2]; // ub+1 does not work - final int[] outlevels = new int[ub + 2]; - - final int provisionalNumLevels = max(myCurNumLevels, otherNumLevels); - - populateDoubleWorkArrays(other, workbuf, worklevels, provisionalNumLevels); - - // notice that workbuf is being used as both the input and output - final int[] result = KllDoublesHelper.generalDoublesCompress(getK(), getM(), provisionalNumLevels, - workbuf, worklevels, workbuf, outlevels, isLevelZeroSorted(), random); - final int targetItemCount = result[1]; //was finalCapacity. Max size given k, m, numLevels - final int curItemCount = result[2]; //was finalPop - - // now we need to finalize the results for the "self" sketch - - //THE NEW NUM LEVELS - myNewNumLevels = result[0]; //was finalNumLevels - assert myNewNumLevels <= ub; // ub may be much bigger - - // THE NEW ITEMS ARRAY (was newbuf) - myNewDoubleItemsArr = (targetItemCount == myCurDoubleItemsArr.length) - ? myCurDoubleItemsArr - : new double[targetItemCount]; - final int freeSpaceAtBottom = targetItemCount - curItemCount; - //shift the new items array - System.arraycopy(workbuf, outlevels[0], myNewDoubleItemsArr, freeSpaceAtBottom, curItemCount); - final int theShift = freeSpaceAtBottom - outlevels[0]; - - //calculate the new levels array length - final int finalLevelsArrLen; - if (myCurLevelsArr.length < myNewNumLevels + 1) { finalLevelsArrLen = myNewNumLevels + 1; } - else { finalLevelsArrLen = myCurLevelsArr.length; } - - //THE NEW LEVELS ARRAY - myNewLevelsArr = new int[finalLevelsArrLen]; - for (int lvl = 0; lvl < myNewNumLevels + 1; lvl++) { // includes the "extra" index - myNewLevelsArr[lvl] = outlevels[lvl] + theShift; - } - - //MEMORY SPACE MANAGEMENT - if (direct) { - wmem = memorySpaceMgmt(this, myNewLevelsArr.length, myNewDoubleItemsArr.length); - } //End direct - - } else { - myNewNumLevels = myCurNumLevels; - myNewLevelsArr = myCurLevelsArr; - myNewDoubleItemsArr = myCurDoubleItemsArr; - } - - //Update Preamble: - setN(finalN); - if (other.isEstimationMode()) { //otherwise the merge brings over exact items. - setMinK(min(myMinK, other.getMinK())); - } - - //Update min, max values - final double otherMin = other.getMinDoubleValue(); - final double otherMax = other.getMaxDoubleValue(); - setMinDoubleValue(resolveDoubleMinValue(myMin, otherMin)); - setMaxDoubleValue(resolveDoubleMaxValue(myMax, otherMax)); - - //Update numLevels, levelsArray, items - setNumLevels(myNewNumLevels); - setLevelsArray(myNewLevelsArr); - setDoubleItemsArray(myNewDoubleItemsArr); - assert KllHelper.sumTheSampleWeights(getNumLevels(), getLevelsArray()) == getN(); - } - - private static double resolveDoubleMinValue(final double myMin, final double otherMin) { - if (Double.isNaN(myMin) && Double.isNaN(otherMin)) { return Double.NaN; } - if (Double.isNaN(myMin)) { return otherMin; } - if (Double.isNaN(otherMin)) { return myMin; } - return min(myMin, otherMin); - } - - private static double resolveDoubleMaxValue(final double myMax, final double otherMax) { - if (Double.isNaN(myMax) && Double.isNaN(otherMax)) { return Double.NaN; } - if (Double.isNaN(myMax)) { return otherMax; } - if (Double.isNaN(otherMax)) { return myMax; } - return max(myMax, otherMax); - } - - final void mergeFloatImpl(final KllSketch other) { - if (other.isEmpty()) { return; } - final long finalN = getN() + other.getN(); - //update this sketch with level0 items from the other sketch - final float[] otherFloatItemsArr = other.getFloatItemsArray(); - final int otherNumLevels = other.getNumLevels(); - final int[] otherLevelsArr = other.getLevelsArray(); - for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { - updateFloat(otherFloatItemsArr[i]); - } - // after the level 0 update, we capture the key mutable variables - final float myMin = getMinFloatValue(); - final float myMax = getMaxFloatValue(); - final int myMinK = getMinK(); - - final int myCurNumLevels = getNumLevels(); - final int[] myCurLevelsArr = getLevelsArray(); - final float[] myCurFloatItemsArr = getFloatItemsArray(); - - final int myNewNumLevels; - final int[] myNewLevelsArr; - final float[] myNewFloatItemsArr; - - if (otherNumLevels > 1) { //now merge higher levels if they exist - final int tmpSpaceNeeded = getNumRetained() - + KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr); - final float[] workbuf = new float[tmpSpaceNeeded]; - final int ub = KllHelper.ubOnNumLevels(finalN); - final int[] worklevels = new int[ub + 2]; // ub+1 does not work - final int[] outlevels = new int[ub + 2]; - - final int provisionalNumLevels = max(myCurNumLevels, otherNumLevels); - - populateFloatWorkArrays(other, workbuf, worklevels, provisionalNumLevels); - - // notice that workbuf is being used as both the input and output - final int[] result = KllFloatsHelper.generalFloatsCompress(getK(), getM(), provisionalNumLevels, - workbuf, worklevels, workbuf, outlevels, isLevelZeroSorted(), random); - final int targetItemCount = result[1]; //was finalCapacity. Max size given k, m, numLevels - final int curItemCount = result[2]; //was finalPop - - // now we need to finalize the results for the "self" sketch - - //THE NEW NUM LEVELS - myNewNumLevels = result[0]; //was finalNumLevels - assert myNewNumLevels <= ub; // ub may be much bigger - - // THE NEW ITEMS ARRAY (was newbuf) - myNewFloatItemsArr = (targetItemCount == myCurFloatItemsArr.length) - ? myCurFloatItemsArr - : new float[targetItemCount]; - final int freeSpaceAtBottom = targetItemCount - curItemCount; - //shift the new items array - System.arraycopy(workbuf, outlevels[0], myNewFloatItemsArr, freeSpaceAtBottom, curItemCount); - final int theShift = freeSpaceAtBottom - outlevels[0]; - - //calculate the new levels array length - final int finalLevelsArrLen; - if (myCurLevelsArr.length < myNewNumLevels + 1) { finalLevelsArrLen = myNewNumLevels + 1; } - else { finalLevelsArrLen = myCurLevelsArr.length; } - - //THE NEW LEVELS ARRAY - myNewLevelsArr = new int[finalLevelsArrLen]; - for (int lvl = 0; lvl < myNewNumLevels + 1; lvl++) { // includes the "extra" index - myNewLevelsArr[lvl] = outlevels[lvl] + theShift; - } - - //MEMORY SPACE MANAGEMENT - if (direct) { - wmem = memorySpaceMgmt(this, myNewLevelsArr.length, myNewFloatItemsArr.length); - } //End direct - - } else { - myNewNumLevels = myCurNumLevels; - myNewLevelsArr = myCurLevelsArr; - myNewFloatItemsArr = myCurFloatItemsArr; - } - - //Update Preamble: - setN(finalN); - if (other.isEstimationMode()) { //otherwise the merge brings over exact items. - setMinK(min(myMinK, other.getMinK())); - } - - //Update min, max values - final float otherMin = other.getMinFloatValue(); - final float otherMax = other.getMaxFloatValue(); - setMinFloatValue(resolveFloatMinValue(myMin, otherMin)); - setMaxFloatValue(resolveFloatMaxValue(myMax, otherMax)); - - //Update numLevels, levelsArray, items - setNumLevels(myNewNumLevels); - setLevelsArray(myNewLevelsArr); - setFloatItemsArray(myNewFloatItemsArr); - assert KllHelper.sumTheSampleWeights(getNumLevels(), getLevelsArray()) == getN(); - } - - private static float resolveFloatMinValue(final float myMin, final float otherMin) { - if (Float.isNaN(myMin) && Float.isNaN(otherMin)) { return Float.NaN; } - if (Float.isNaN(myMin)) { return otherMin; } - if (Float.isNaN(otherMin)) { return myMin; } - return min(myMin, otherMin); - } - - private static float resolveFloatMaxValue(final float myMax, final float otherMax) { - if (Float.isNaN(myMax) && Float.isNaN(otherMax)) { return Float.NaN; } - if (Float.isNaN(myMax)) { return otherMax; } - if (Float.isNaN(otherMax)) { return myMax; } - return max(myMax, otherMax); - } - abstract void setDoubleItemsArray(double[] floatItems); abstract void setDoubleItemsArrayAt(int index, double value); - abstract void setMinK(int minK); - abstract void setFloatItemsArray(float[] floatItems); abstract void setFloatItemsArrayAt(int index, float value); @@ -973,10 +417,10 @@ private static float resolveFloatMaxValue(final float myMax, final float otherMa abstract void setLevelsArrayAt(int index, int value); - abstract void setLevelsArrayAtPlusEq(int index, int plusEq); - abstract void setLevelsArrayAtMinusEq(int index, int minusEq); + abstract void setLevelsArrayAtPlusEq(int index, int plusEq); + abstract void setLevelsArrayUpdatable(WritableMemory levelsMem); abstract void setLevelZeroSorted(boolean sorted); @@ -989,681 +433,12 @@ private static float resolveFloatMaxValue(final float myMax, final float otherMa abstract void setMinFloatValue(float value); + abstract void setMinK(int minK); + abstract void setMinMaxArrayUpdatable(WritableMemory minMaxMem); abstract void setN(long n); abstract void setNumLevels(int numLevels); - final byte[] toCompactByteArrayImpl() { - final byte[] byteArr = new byte[getCurrentCompactSerializedSizeBytes()]; - final WritableMemory wmem = WritableMemory.writableWrap(byteArr); - loadFirst8Bytes(this, wmem, false); - if (getN() == 0) { return byteArr; } //empty - final boolean doubleType = (sketchType == DOUBLES_SKETCH); - - //load data - int offset = DATA_START_ADR_SINGLE_ITEM; - final int[] myLevelsArr = getLevelsArray(); - if (getN() == 1) { //single item - if (doubleType) { - wmem.putDouble(offset, getDoubleItemsArray()[myLevelsArr[0]]); - } else { - wmem.putFloat(offset, getFloatItemsArray()[myLevelsArr[0]]); - } - } else { // n > 1 - //remainder of preamble after first 8 bytes - setMemoryN(wmem, getN()); - setMemoryMinK(wmem, getMinK()); - setMemoryNumLevels(wmem, getNumLevels()); - offset = DATA_START_ADR; - - //LOAD LEVELS ARR the last integer in levels_ is NOT serialized - final int len = myLevelsArr.length - 1; - wmem.putIntArray(offset, myLevelsArr, 0, len); - offset += len * Integer.BYTES; - - //LOAD MIN, MAX VALUES FOLLOWED BY ITEMS ARRAY - if (doubleType) { - wmem.putDouble(offset, getMinDoubleValue()); - offset += Double.BYTES; - wmem.putDouble(offset, getMaxDoubleValue()); - offset += Double.BYTES; - wmem.putDoubleArray(offset, getDoubleItemsArray(), myLevelsArr[0], getNumRetained()); - } else { - wmem.putFloat(offset, getMinFloatValue()); - offset += Float.BYTES; - wmem.putFloat(offset, getMaxFloatValue()); - offset += Float.BYTES; - wmem.putFloatArray(offset, getFloatItemsArray(), myLevelsArr[0], getNumRetained()); - } - } - return byteArr; - } - - private static void loadFirst8Bytes(final KllSketch sk, final WritableMemory wmem, - final boolean updatable) { - final boolean empty = sk.getN() == 0; - final boolean lvlZeroSorted = sk.isLevelZeroSorted(); - final boolean singleItem = sk.getN() == 1; - final boolean doubleType = (sk.sketchType == DOUBLES_SKETCH); - final int preInts = updatable - ? PREAMBLE_INTS_FULL - : (empty || singleItem) ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_FULL; - //load the preamble - setMemoryPreInts(wmem, preInts); - final int server = updatable ? SERIAL_VERSION_UPDATABLE - : (singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION_EMPTY_FULL); - setMemorySerVer(wmem, server); - setMemoryFamilyID(wmem, Family.KLL.getID()); - setMemoryEmptyFlag(wmem, empty); - setMemoryLevelZeroSortedFlag(wmem, lvlZeroSorted); - setMemorySingleItemFlag(wmem, singleItem); - setMemoryDoubleSketchFlag(wmem, doubleType); - setMemoryUpdatableFlag(wmem, updatable); - setMemoryK(wmem, sk.getK()); - setMemoryM(wmem, sk.getM()); - } - - @SuppressWarnings("null") - final String toStringImpl(final boolean withLevels, final boolean withData) { - final boolean doubleType = (sketchType == DOUBLES_SKETCH); - final int k = getK(); - final int m = getM(); - final String epsPct = String.format("%.3f%%", getNormalizedRankError(false) * 100); - final String epsPMFPct = String.format("%.3f%%", getNormalizedRankError(true) * 100); - final StringBuilder sb = new StringBuilder(); - final String skType = (direct ? "Direct" : "") + (doubleType ? "Doubles" : "Floats"); - sb.append(Util.LS).append("### Kll").append(skType).append("Sketch Summary:").append(Util.LS); - sb.append(" K : ").append(k).append(Util.LS); - sb.append(" Dynamic min K : ").append(getMinK()).append(Util.LS); - sb.append(" M : ").append(m).append(Util.LS); - sb.append(" N : ").append(getN()).append(Util.LS); - sb.append(" Epsilon : ").append(epsPct).append(Util.LS); - sb.append(" Epsison PMF : ").append(epsPMFPct).append(Util.LS); - sb.append(" Empty : ").append(isEmpty()).append(Util.LS); - sb.append(" Estimation Mode : ").append(isEstimationMode()).append(Util.LS); - sb.append(" Levels : ").append(getNumLevels()).append(Util.LS); - sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted()).append(Util.LS); - final int cap = (doubleType) ? getDoubleItemsArray().length : getFloatItemsArray().length; - sb.append(" Capacity Items : ").append(cap).append(Util.LS); - sb.append(" Retained Items : ").append(getNumRetained()).append(Util.LS); - if (direct) { - sb.append(" Updatable Storage Bytes: ").append(getCurrentUpdatableSerializedSizeBytes()).append(Util.LS); - } else { - sb.append(" Compact Storage Bytes : ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS); - } - - if (doubleType) { - sb.append(" Min Value : ").append(getMinDoubleValue()).append(Util.LS); - sb.append(" Max Value : ").append(getMaxDoubleValue()).append(Util.LS); - } else { - sb.append(" Min Value : ").append(getMinFloatValue()).append(Util.LS); - sb.append(" Max Value : ").append(getMaxFloatValue()).append(Util.LS); - } - sb.append("### End sketch summary").append(Util.LS); - - final int myNumLevels = getNumLevels(); - final int[] myLevelsArr = getLevelsArray(); - double[] myDoubleItemsArr = null; - float[] myFloatItemsArr = null; - if (doubleType) { - myDoubleItemsArr = getDoubleItemsArray(); - } else { - myFloatItemsArr = getFloatItemsArray(); - } - if (withLevels) { - sb.append(outputLevels(k, m, myNumLevels, myLevelsArr)); - } - if (withData) { - sb.append(outputData(doubleType, myNumLevels, myLevelsArr, myFloatItemsArr, myDoubleItemsArr)); - } - return sb.toString(); - } - - static String outputLevels(final int k, final int m, final int numLevels, final int[] levelsArr) { - final StringBuilder sb = new StringBuilder(); - sb.append("### KLL levels array:").append(Util.LS) - .append(" level, offset: nominal capacity, actual size").append(Util.LS); - int level = 0; - for ( ; level < numLevels; level++) { - sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": ") - .append(KllHelper.levelCapacity(k, numLevels, level, m)) - .append(", ").append(KllHelper.currentLevelSize(level, numLevels, levelsArr)).append(Util.LS); - } - sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": (Exclusive)") - .append(Util.LS); - sb.append("### End levels array").append(Util.LS); - return sb.toString(); - } - - static String outputData(final boolean doubleType, final int numLevels, final int[] levelsArr, - final float[] floatItemsArr, final double[] doubleItemsArr) { - final StringBuilder sb = new StringBuilder(); - sb.append("### KLL items data {index, item}:").append(Util.LS); - if (levelsArr[0] > 0) { - sb.append(" Garbage:" + Util.LS); - if (doubleType) { - for (int i = 0; i < levelsArr[0]; i++) { - sb.append(" ").append(i + ", ").append(doubleItemsArr[i]).append(Util.LS); - } - } else { - for (int i = 0; i < levelsArr[0]; i++) { - sb.append(" ").append(i + ", ").append(floatItemsArr[i]).append(Util.LS); - } - } - } - int level = 0; - if (doubleType) { - while (level < numLevels) { - final int fromIndex = levelsArr[level]; - final int toIndex = levelsArr[level + 1]; // exclusive - if (fromIndex < toIndex) { - sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level)); - sb.append(Util.LS); - } - - for (int i = fromIndex; i < toIndex; i++) { - sb.append(" ").append(i + ", ").append(doubleItemsArr[i]).append(Util.LS); - } - level++; - } - } - else { - while (level < numLevels) { - final int fromIndex = levelsArr[level]; - final int toIndex = levelsArr[level + 1]; // exclusive - if (fromIndex <= toIndex) { - sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level)); - sb.append(Util.LS); - } - - for (int i = fromIndex; i < toIndex; i++) { - sb.append(" ").append(i + ", ").append(floatItemsArr[i]).append(Util.LS); - } - level++; - } - } - sb.append(" level[" + level + "]: offset: " + levelsArr[level] + " (Exclusive)"); - sb.append(Util.LS); - sb.append("### End items data").append(Util.LS); - - return sb.toString(); - } - - final byte[] toUpdatableByteArrayImpl() { - final byte[] byteArr = new byte[getCurrentUpdatableSerializedSizeBytes()]; - final WritableMemory wmem = WritableMemory.writableWrap(byteArr); - loadFirst8Bytes(this, wmem, true); - //remainder of preamble after first 8 bytes - setMemoryN(wmem, getN()); - setMemoryMinK(wmem, getMinK()); - setMemoryNumLevels(wmem, getNumLevels()); - - //load data - final boolean doubleType = (sketchType == DOUBLES_SKETCH); - int offset = DATA_START_ADR; - - //LOAD LEVELS ARRAY the last integer in levels_ IS serialized - final int[] myLevelsArr = getLevelsArray(); - final int len = myLevelsArr.length; - wmem.putIntArray(offset, myLevelsArr, 0, len); - offset += len * Integer.BYTES; - - //LOAD MIN, MAX VALUES FOLLOWED BY ITEMS ARRAY - if (doubleType) { - wmem.putDouble(offset, getMinDoubleValue()); - offset += Double.BYTES; - wmem.putDouble(offset, getMaxDoubleValue()); - offset += Double.BYTES; - final double[] doubleItemsArr = getDoubleItemsArray(); - wmem.putDoubleArray(offset, doubleItemsArr, 0, doubleItemsArr.length); - } else { - wmem.putFloat(offset, getMinFloatValue()); - offset += Float.BYTES; - wmem.putFloat(offset, getMaxFloatValue()); - offset += Float.BYTES; - final float[] floatItemsArr = getFloatItemsArray(); - wmem.putFloatArray(offset, floatItemsArr, 0, floatItemsArr.length); - } - return byteArr; - } - - final void updateDouble(final double value) { - if (Double.isNaN(value)) { return; } - if (isEmpty()) { - setMinDoubleValue(value); - setMaxDoubleValue(value); - } else { - if (value < getMinDoubleValue()) { setMinDoubleValue(value); } - if (value > getMaxDoubleValue()) { setMaxDoubleValue(value); } - } - if (getLevelsArrayAt(0) == 0) { compressWhileUpdatingSketch(); } - incN(); - setLevelZeroSorted(false); - final int nextPos = getLevelsArrayAt(0) - 1; - assert getLevelsArrayAt(0) >= 0; - setLevelsArrayAt(0, nextPos); - setDoubleItemsArrayAt(nextPos, value); - } - - final void updateFloat(final float value) { - if (Float.isNaN(value)) { return; } - if (isEmpty()) { - setMinFloatValue(value); - setMaxFloatValue(value); - } else { - if (value < getMinFloatValue()) { setMinFloatValue(value); } - if (value > getMaxFloatValue()) { setMaxFloatValue(value); } - } - - if (getLevelsArrayAt(0) == 0) { compressWhileUpdatingSketch(); } - incN(); - setLevelZeroSorted(false); - final int nextPos = getLevelsArrayAt(0) - 1; - assert getLevelsArrayAt(0) >= 0; - setLevelsArrayAt(0, nextPos); - setFloatItemsArrayAt(nextPos, value); - } - - //Private non-static methods - - /** - * This grows the levels arr by 1 (if needed) and increases the capacity of the items array - * at the bottom. Only numLevels, the levels array and the items array are affected. - */ - @SuppressWarnings("null") - private void addEmptyTopLevelToCompletelyFullSketch() { - final int[] myCurLevelsArr = getLevelsArray(); - final int myCurNumLevels = getNumLevels(); - final int myCurTotalItemsCapacity = myCurLevelsArr[myCurNumLevels]; - double minDouble = Double.NaN; - double maxDouble = Double.NaN; - float minFloat = Float.NaN; - float maxFloat = Float.NaN; - - double[] myCurDoubleItemsArr = null; - float[] myCurFloatItemsArr = null; - - final int myNewNumLevels; - final int[] myNewLevelsArr; - final int myNewTotalItemsCapacity; - - float[] myNewFloatItemsArr = null; - double[] myNewDoubleItemsArr = null; - - if (sketchType == DOUBLES_SKETCH) { - minDouble = getMinDoubleValue(); - maxDouble = getMaxDoubleValue(); - myCurDoubleItemsArr = getDoubleItemsArray(); - //assert we are following a certain growth scheme - assert myCurDoubleItemsArr.length == myCurTotalItemsCapacity; - } else { //FLOATS_SKETCH - minFloat = getMinFloatValue(); - maxFloat = getMaxFloatValue(); - myCurFloatItemsArr = getFloatItemsArray(); - assert myCurFloatItemsArr.length == myCurTotalItemsCapacity; - } - assert myCurLevelsArr[0] == 0; //definition of full is part of the growth scheme - - final int deltaItemsCap = KllHelper.levelCapacity(getK(), myCurNumLevels + 1, 0, getM()); - myNewTotalItemsCapacity = myCurTotalItemsCapacity + deltaItemsCap; - - // Check if growing the levels arr if required. - // Note that merging MIGHT over-grow levels_, in which case we might not have to grow it - final boolean growLevelsArr = myCurLevelsArr.length < myCurNumLevels + 2; - - // GROW LEVELS ARRAY - if (growLevelsArr) { - //grow levels arr by one and copy the old data to the new array, extra space at the top. - myNewLevelsArr = Arrays.copyOf(myCurLevelsArr, myCurNumLevels + 2); - assert myNewLevelsArr.length == myCurLevelsArr.length + 1; - myNewNumLevels = myCurNumLevels + 1; - incNumLevels(); //increment the class member - } else { - myNewLevelsArr = myCurLevelsArr; - myNewNumLevels = myCurNumLevels; - } - // This loop updates all level indices EXCLUDING the "extra" index at the top - for (int level = 0; level <= myNewNumLevels - 1; level++) { - myNewLevelsArr[level] += deltaItemsCap; - } - myNewLevelsArr[myNewNumLevels] = myNewTotalItemsCapacity; // initialize the new "extra" index at the top - - // GROW ITEMS ARRAY - if (sketchType == DOUBLES_SKETCH) { - myNewDoubleItemsArr = new double[myNewTotalItemsCapacity]; - // copy and shift the current data into the new array - System.arraycopy(myCurDoubleItemsArr, 0, myNewDoubleItemsArr, deltaItemsCap, myCurTotalItemsCapacity); - } else { - myNewFloatItemsArr = new float[myNewTotalItemsCapacity]; - // copy and shift the current items data into the new array - System.arraycopy(myCurFloatItemsArr, 0, myNewFloatItemsArr, deltaItemsCap, myCurTotalItemsCapacity); - } - - //MEMORY SPACE MANAGEMENT - if (direct) { - wmem = memorySpaceMgmt(this, myNewLevelsArr.length, myNewTotalItemsCapacity); - } - //update our sketch with new expanded spaces - setNumLevels(myNewNumLevels); - setLevelsArray(myNewLevelsArr); - if (sketchType == DOUBLES_SKETCH) { - setMinDoubleValue(minDouble); - setMaxDoubleValue(maxDouble); - setDoubleItemsArray(myNewDoubleItemsArr); - } else { //Float sketch - setMinFloatValue(minFloat); - setMaxFloatValue(maxFloat); - setFloatItemsArray(myNewFloatItemsArr); - } - } - - // The following code is only valid in the special case of exactly reaching capacity while updating. - // It cannot be used while merging, while reducing k, or anything else. - @SuppressWarnings("null") - private void compressWhileUpdatingSketch() { - final int level = KllHelper.findLevelToCompact(getK(), getM(), getNumLevels(), getLevelsArray()); - if (level == getNumLevels() - 1) { - //The level to compact is the top level, thus we need to add a level. - //Be aware that this operation grows the items array, - //shifts the items data and the level boundaries of the data, - //and grows the levels array and increments numLevels_. - addEmptyTopLevelToCompletelyFullSketch(); - } - - final int[] myLevelsArr = getLevelsArray(); - final int rawBeg = myLevelsArr[level]; - final int rawEnd = myLevelsArr[level + 1]; - // +2 is OK because we already added a new top level if necessary - final int popAbove = myLevelsArr[level + 2] - rawEnd; - final int rawPop = rawEnd - rawBeg; - final boolean oddPop = isOdd(rawPop); - final int adjBeg = oddPop ? rawBeg + 1 : rawBeg; - final int adjPop = oddPop ? rawPop - 1 : rawPop; - final int halfAdjPop = adjPop / 2; - - // level zero might not be sorted, so we must sort it if we wish to compact it - float[] myFloatItemsArr; - double[] myDoubleItemsArr; - - if (sketchType == DOUBLES_SKETCH) { - myFloatItemsArr = null; - myDoubleItemsArr = getDoubleItemsArray(); - if (level == 0) { - if (direct) { - myDoubleItemsArr = getDoubleItemsArray(); - Arrays.sort(myDoubleItemsArr, adjBeg, adjBeg + adjPop); - setDoubleItemsArray(myDoubleItemsArr); - } else { - Arrays.sort(getDoubleItemsArray(), adjBeg, adjBeg + adjPop); - } - } - if (popAbove == 0) { - if (direct) { - myDoubleItemsArr = getDoubleItemsArray(); - KllDoublesHelper.randomlyHalveUpDoubles(myDoubleItemsArr, adjBeg, adjPop, random); - setDoubleItemsArray(myDoubleItemsArr); - } else { - KllDoublesHelper.randomlyHalveUpDoubles(getDoubleItemsArray(), adjBeg, adjPop, random); - } - } else { - if (direct) { - myDoubleItemsArr = getDoubleItemsArray(); - KllDoublesHelper.randomlyHalveDownDoubles(myDoubleItemsArr, adjBeg, adjPop, random); - setDoubleItemsArray(myDoubleItemsArr); - } else { - KllDoublesHelper.randomlyHalveDownDoubles(getDoubleItemsArray(), adjBeg, adjPop, random); - } - if (direct ) { - myDoubleItemsArr = getDoubleItemsArray(); - KllDoublesHelper.mergeSortedDoubleArrays( - myDoubleItemsArr, adjBeg, halfAdjPop, - myDoubleItemsArr, rawEnd, popAbove, - myDoubleItemsArr, adjBeg + halfAdjPop); - setDoubleItemsArray(myDoubleItemsArr); - } else { - myDoubleItemsArr = getDoubleItemsArray(); - KllDoublesHelper.mergeSortedDoubleArrays( - myDoubleItemsArr, adjBeg, halfAdjPop, - myDoubleItemsArr, rawEnd, popAbove, - myDoubleItemsArr, adjBeg + halfAdjPop); - } - } - } else { //Float sketch - myFloatItemsArr = getFloatItemsArray(); - myDoubleItemsArr = null; - if (level == 0) { - if (direct) { - myFloatItemsArr = getFloatItemsArray(); - Arrays.sort(myFloatItemsArr, adjBeg, adjBeg + adjPop); - setFloatItemsArray(myFloatItemsArr); - } else { - Arrays.sort(getFloatItemsArray(), adjBeg, adjBeg + adjPop); - } - } - if (popAbove == 0) { - if (direct) { - myFloatItemsArr = getFloatItemsArray(); - KllFloatsHelper.randomlyHalveUpFloats(myFloatItemsArr, adjBeg, adjPop, random); - setFloatItemsArray(myFloatItemsArr); - } else { - KllFloatsHelper.randomlyHalveUpFloats(getFloatItemsArray(), adjBeg, adjPop, random); - } - } else { - if (direct) { - myFloatItemsArr = getFloatItemsArray(); - KllFloatsHelper.randomlyHalveDownFloats(myFloatItemsArr, adjBeg, adjPop, random); - setFloatItemsArray(myFloatItemsArr); - } else { - KllFloatsHelper.randomlyHalveDownFloats(getFloatItemsArray(), adjBeg, adjPop, random); - } - if (direct ) { - myFloatItemsArr = getFloatItemsArray(); - KllFloatsHelper.mergeSortedFloatArrays( - myFloatItemsArr, adjBeg, halfAdjPop, - myFloatItemsArr, rawEnd, popAbove, - myFloatItemsArr, adjBeg + halfAdjPop); - setFloatItemsArray(myFloatItemsArr); - } else { - myFloatItemsArr = getFloatItemsArray(); - KllFloatsHelper.mergeSortedFloatArrays( - myFloatItemsArr, adjBeg, halfAdjPop, - myFloatItemsArr, rawEnd, popAbove, - myFloatItemsArr, adjBeg + halfAdjPop); - } - } - } - setLevelsArrayAtMinusEq(level + 1, halfAdjPop); // adjust boundaries of the level above - - if (oddPop) { - setLevelsArrayAt(level, getLevelsArrayAt(level + 1) - 1); // the current level now contains one item - if (sketchType == DOUBLES_SKETCH) { - setDoubleItemsArrayAt(getLevelsArrayAt(level), getDoubleItemsArrayAt(rawBeg)); // namely this leftover guy - } else { - setFloatItemsArrayAt(getLevelsArrayAt(level), getFloatItemsArrayAt(rawBeg)); // namely this leftover guy - } - - } else { - setLevelsArrayAt(level, getLevelsArrayAt(level + 1)); // the current level is now empty - } - - // verify that we freed up halfAdjPop array slots just below the current level - assert getLevelsArrayAt(level) == rawBeg + halfAdjPop; - - // finally, we need to shift up the data in the levels below - // so that the freed-up space can be used by level zero - if (level > 0) { - final int amount = rawBeg - getLevelsArrayAt(0); - if (sketchType == DOUBLES_SKETCH) { - if (direct) { - myDoubleItemsArr = getDoubleItemsArray(); - System.arraycopy(myDoubleItemsArr, myLevelsArr[0], myDoubleItemsArr, myLevelsArr[0] + halfAdjPop, amount); - setDoubleItemsArray(myDoubleItemsArr); - } else { - System.arraycopy(myDoubleItemsArr, myLevelsArr[0], myDoubleItemsArr, myLevelsArr[0] + halfAdjPop, amount); - } - } else { - if (direct) { - myFloatItemsArr = getFloatItemsArray(); - System.arraycopy(myFloatItemsArr, myLevelsArr[0], myFloatItemsArr, myLevelsArr[0] + halfAdjPop, amount); - setFloatItemsArray(myFloatItemsArr); - } else { - System.arraycopy(myFloatItemsArr, myLevelsArr[0], myFloatItemsArr, myLevelsArr[0] + halfAdjPop, amount); - } - } - for (int lvl = 0; lvl < level; lvl++) { - setLevelsArrayAtPlusEq(lvl, halfAdjPop); - } - } - } - - private KllDoublesQuantileCalculator getDoublesQuantileCalculator() { - final int[] myLevelsArr = getLevelsArray(); - final double[] myDoubleItemsArr = getDoubleItemsArray(); - if (!isLevelZeroSorted()) { - Arrays.sort(getDoubleItemsArray(), myLevelsArr[0], myLevelsArr[1]); - setLevelZeroSorted(true); - } - return new KllDoublesQuantileCalculator(myDoubleItemsArr, myLevelsArr, getNumLevels(), getN()); - } - - private KllFloatsQuantileCalculator getFloatsQuantileCalculator() { - final int[] myLevelsArr = getLevelsArray(); - final float[] myFloatItemsArr = getFloatItemsArray(); - if (!isLevelZeroSorted()) { - Arrays.sort(myFloatItemsArr, myLevelsArr[0], myLevelsArr[1]); - setLevelZeroSorted(true); - } - return new KllFloatsQuantileCalculator(myFloatItemsArr, myLevelsArr, getNumLevels(), getN()); - } - - private void incrementDoublesBucketsSortedLevel(final int fromIndex, final int toIndex, - final int weight, final double[] splitPoints, final double[] buckets) { - final double[] myDoubleItemsArr = getDoubleItemsArray(); - int i = fromIndex; - int j = 0; - while (i < toIndex && j < splitPoints.length) { - if (myDoubleItemsArr[i] < splitPoints[j]) { - buckets[j] += weight; // this sample goes into this bucket - i++; // move on to next sample and see whether it also goes into this bucket - } else { - j++; // no more samples for this bucket - } - } - // now either i == toIndex (we are out of samples), or - // j == numSplitPoints (we are out of buckets, but there are more samples remaining) - // we only need to do something in the latter case - if (j == splitPoints.length) { - buckets[j] += weight * (toIndex - i); - } - } - - private void incrementDoublesBucketsUnsortedLevel(final int fromIndex, final int toIndex, - final int weight, final double[] splitPoints, final double[] buckets) { - final double[] myDoubleItemsArr = getDoubleItemsArray(); - for (int i = fromIndex; i < toIndex; i++) { - int j; - for (j = 0; j < splitPoints.length; j++) { - if (myDoubleItemsArr[i] < splitPoints[j]) { - break; - } - } - buckets[j] += weight; - } - } - - private void incrementFloatBucketsSortedLevel(final int fromIndex, final int toIndex, - final int weight, final float[] splitPoints, final double[] buckets) { - final float[] myFloatItemsArr = getFloatItemsArray(); - int i = fromIndex; - int j = 0; - while (i < toIndex && j < splitPoints.length) { - if (myFloatItemsArr[i] < splitPoints[j]) { - buckets[j] += weight; // this sample goes into this bucket - i++; // move on to next sample and see whether it also goes into this bucket - } else { - j++; // no more samples for this bucket - } - } - // now either i == toIndex (we are out of samples), or - // j == numSplitPoints (we are out of buckets, but there are more samples remaining) - // we only need to do something in the latter case - if (j == splitPoints.length) { - buckets[j] += weight * (toIndex - i); - } - } - - private void incrementFloatBucketsUnsortedLevel(final int fromIndex, final int toIndex, - final int weight, final float[] splitPoints, final double[] buckets) { - final float[] myFloatItemsArr = getFloatItemsArray(); - for (int i = fromIndex; i < toIndex; i++) { - int j; - for (j = 0; j < splitPoints.length; j++) { - if (myFloatItemsArr[i] < splitPoints[j]) { - break; - } - } - buckets[j] += weight; - } - } - - private void populateDoubleWorkArrays(final KllSketch other, final double[] workbuf, - final int[] worklevels, final int provisionalNumLevels) { - worklevels[0] = 0; - final int[] myLevelsArr = getLevelsArray(); - final int[] otherLevelsArr = other.getLevelsArray(); - final double[] myDoubleItemsArr = getDoubleItemsArray(); - final double[] otherDoubleItemsArr = other.getDoubleItemsArray(); - - // Note: the level zero data from "other" was already inserted into "self" - final int selfPopZero = KllHelper.currentLevelSize(0, getNumLevels(),myLevelsArr); - System.arraycopy(myDoubleItemsArr, myLevelsArr[0], workbuf, worklevels[0], selfPopZero); - worklevels[1] = worklevels[0] + selfPopZero; - - for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { - final int selfPop = KllHelper.currentLevelSize(lvl, getNumLevels(), myLevelsArr); - final int otherPop = KllHelper.currentLevelSize(lvl, other.getNumLevels(), otherLevelsArr); - worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; - - if (selfPop > 0 && otherPop == 0) { - System.arraycopy(myDoubleItemsArr, myLevelsArr[lvl], workbuf, worklevels[lvl], selfPop); - } else if (selfPop == 0 && otherPop > 0) { - System.arraycopy(otherDoubleItemsArr, otherLevelsArr[lvl], workbuf, worklevels[lvl], otherPop); - } else if (selfPop > 0 && otherPop > 0) { - KllDoublesHelper.mergeSortedDoubleArrays(myDoubleItemsArr, myLevelsArr[lvl], selfPop, otherDoubleItemsArr, - otherLevelsArr[lvl], otherPop, workbuf, worklevels[lvl]); - } - } - } - - private void populateFloatWorkArrays(final KllSketch other, final float[] workbuf, - final int[] worklevels, final int provisionalNumLevels) { - worklevels[0] = 0; - final int[] myLevelsArr = getLevelsArray(); - final int[] otherLevelsArr = other.getLevelsArray(); - final float[] myFloatItemsArr = getFloatItemsArray(); - final float[] otherFloatItemsArr = other.getFloatItemsArray(); - - // Note: the level zero data from "other" was already inserted into "self" - final int selfPopZero = KllHelper.currentLevelSize(0, getNumLevels(), myLevelsArr); - System.arraycopy( myFloatItemsArr, myLevelsArr[0], workbuf, worklevels[0], selfPopZero); - worklevels[1] = worklevels[0] + selfPopZero; - - for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { - final int selfPop = KllHelper.currentLevelSize(lvl, getNumLevels(), myLevelsArr); - final int otherPop = KllHelper.currentLevelSize(lvl, other.getNumLevels(), otherLevelsArr); - worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; - - if (selfPop > 0 && otherPop == 0) { - System.arraycopy( myFloatItemsArr, myLevelsArr[lvl], workbuf, worklevels[lvl], selfPop); - } else if (selfPop == 0 && otherPop > 0) { - System.arraycopy(otherFloatItemsArr, otherLevelsArr[lvl], workbuf, worklevels[lvl], otherPop); - } else if (selfPop > 0 && otherPop > 0) { - KllFloatsHelper.mergeSortedFloatArrays( myFloatItemsArr, myLevelsArr[lvl], selfPop, otherFloatItemsArr, - otherLevelsArr[lvl], otherPop, workbuf, worklevels[lvl]); - } - } - } - } diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java index 7af0269f3..e07d7d3c7 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -545,7 +545,7 @@ public void checkGetWritableMemory() { assertEquals(sketch.getK(), 200); assertEquals(sketch.getN(), 200); assertFalse(sketch.isEmpty()); - assertTrue(sketch.isDirect()); + assertTrue(sketch.isUpdatableMemory()); assertFalse(sketch.isEstimationMode()); assertTrue(sketch.isDoublesSketch()); assertFalse(sketch.isLevelZeroSorted()); @@ -556,7 +556,7 @@ public void checkGetWritableMemory() { assertEquals(sk.getK(), 200); assertEquals(sk.getN(), 200); assertFalse(sk.isEmpty()); - assertFalse(sk.isDirect()); + assertFalse(sk.isUpdatableMemory()); assertFalse(sk.isEstimationMode()); assertTrue(sk.isDoublesSketch()); assertFalse(sk.isLevelZeroSorted()); @@ -581,12 +581,49 @@ public void checkReset() { assertEquals(max2, max1); } + @Test + public void checkHeapify() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + KllDoublesSketch sk2 = KllDirectDoublesSketch.heapify(dstMem); + assertEquals(sk2.getMinValue(), 1.0); + assertEquals(sk2.getMaxValue(), 100.0); + } + + @Test + public void checkMergeKllDoublesSketch() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 21; i++) { sk.update(i); } + KllDoublesSketch sk2 = new KllDoublesSketch(20); + for (int i = 1; i <= 21; i++ ) { sk2.update(i + 100); } + sk.merge(sk2); + } + + @Test + public void checkReverseMergeKllDoubleSketch() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 21; i++) { sk.update(i); } + KllDoublesSketch sk2 = new KllDoublesSketch(20); + for (int i = 1; i <= 21; i++ ) { sk2.update(i + 100); } + sk2.merge(sk); + } + +// @Test +// public void checkWrapKllDoubleSketch() { +// KllDoublesSketch sk = new KllDoublesSketch(20); +// for (int i = 1; i <= 21; i++ ) { sk.update(i); } +// Memory srcMem = Memory.wrap(sk.toByteArray()); +// KllDirectDoublesSketch sk2 = KllDirectDoublesSketch.writableWrap(srcMem, memReqSvr); +// } + private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { KllDoublesSketch sk = new KllDoublesSketch(k); for (int i = 1; i <= n; i++) { sk.update(i); } byte[] byteArr = sk.toUpdatableByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - KllDirectDoublesSketch ddsk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); return ddsk; } diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java index 4f6520173..025004380 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java @@ -543,7 +543,7 @@ public void checkGetWritableMemory() { assertEquals(sketch.getK(), 200); assertEquals(sketch.getN(), 200); assertFalse(sketch.isEmpty()); - assertTrue(sketch.isDirect()); + assertTrue(sketch.isUpdatableMemory()); assertFalse(sketch.isEstimationMode()); assertTrue(sketch.isFloatsSketch()); assertFalse(sketch.isLevelZeroSorted()); @@ -554,7 +554,7 @@ public void checkGetWritableMemory() { assertEquals(sk.getK(), 200); assertEquals(sk.getN(), 200); assertFalse(sk.isEmpty()); - assertFalse(sk.isDirect()); + assertFalse(sk.isUpdatableMemory()); assertFalse(sk.isEstimationMode()); assertTrue(sk.isFloatsSketch()); assertFalse(sk.isLevelZeroSorted()); @@ -579,6 +579,16 @@ public void checkReset() { assertEquals(max2, max1); } + @Test + public void checkHeapify() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllDirectFloatsSketch sk = KllDirectFloatsSketch.newInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + KllFloatsSketch sk2 = KllDirectFloatsSketch.heapify(dstMem); + assertEquals(sk2.getMinValue(), 1.0); + assertEquals(sk2.getMaxValue(), 100.0); + } + private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { KllFloatsSketch sk = new KllFloatsSketch(k); for (int i = 1; i <= n; i++) { sk.update(i); } From 934e26ab30c8a9e47105597afe11f6cdd62b453a Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 6 Apr 2022 11:17:09 -0700 Subject: [PATCH 31/31] Added isSameResource(Memory). --- .../java/org/apache/datasketches/kll/KllSketch.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 804148d47..faa5d1081 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -28,6 +28,7 @@ import java.util.Random; import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -311,6 +312,18 @@ public final boolean isUpdatableMemory() { return updatablMemory; } + /** + * Returns true if the backing resource of this is identical with the backing resource + * of that. The capacities must be the same. If this is a region, + * the region offset must also be the same. + * @param that A different non-null object + * @return true if the backing resource of this is the same as the backing resource + * of that. + */ + public final boolean isSameResource(final Memory that) { + return wmem.isSameResource(that); + } + /** * This resets the current sketch back to zero entries. * It retains key parameters such as k and