diff --git a/src/main/java/org/apache/datasketches/QuantilesHelper.java b/src/main/java/org/apache/datasketches/QuantilesHelper.java index c7546569c..f128a0d2c 100644 --- a/src/main/java/org/apache/datasketches/QuantilesHelper.java +++ b/src/main/java/org/apache/datasketches/QuantilesHelper.java @@ -29,7 +29,7 @@ public class QuantilesHelper { * An array of {1,1,1,0} becomes {0,1,2,3} * @param array of weights where first element is zero * @return total weight - */ //also used by KLL + */ //used by classic Quantiles and KLL public static long convertToPrecedingCummulative(final long[] array) { long subtotal = 0; for (int i = 0; i < array.length; i++) { @@ -43,15 +43,28 @@ public static long convertToPrecedingCummulative(final long[] array) { /** * Returns the linear zero-based index (position) of a value in the hypothetical sorted stream of * values of size n. - * @param phi the fractional position where: 0 ≤ φ ≤ 1.0. + * @param rank the fractional position where: 0 ≤ φ ≤ 1.0. * @param n the size of the stream * @return the index, a value between 0 and n-1. - */ //also used by KLL - public static long posOfPhi(final double phi, final long n) { - final long pos = (long) Math.floor(phi * n); + */ //used by classic Quantiles and KLL + public static long posOfRank(final double rank, final long n) { + final long pos = (long) Math.floor(rank * n); return pos == n ? n - 1 : pos; //avoids ArrayIndexOutOfBoundException } + /** + * Returns the linear zero-based index (position) of a value in the hypothetical sorted stream of + * values of size n. + * @param rank the fractional position where: 0 ≤ φ ≤ 1.0. + * @param n the size of the stream + * @return the index, a value between 0 and n-1. + * @deprecated use {@link #posOfRank(double, long)} instead. Version 3.2.0. + */ //used by classic Quantiles and KLL + @Deprecated + public static long posOfPhi(final double rank, final long n) { + return posOfRank(rank, n); + } + /** * This is written in terms of a plain array to facilitate testing. * @param wtArr the cumulative weights array consisting of chunks diff --git a/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java b/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java deleted file mode 100644 index be2335f79..000000000 --- a/src/main/java/org/apache/datasketches/kll/BaseKllSketch.java +++ /dev/null @@ -1,326 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.kll; - -import static java.lang.Math.abs; -import static java.lang.Math.ceil; -import static java.lang.Math.exp; -import static java.lang.Math.log; -import static java.lang.Math.max; -import static java.lang.Math.min; -import static java.lang.Math.pow; -import static java.lang.Math.round; - -import java.util.Random; - -import org.apache.datasketches.SketchesArgumentException; - -abstract class BaseKllSketch { - - /* Serialized float sketch layout, more than one item: - * Adr: - * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | - * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | - * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | - * 1 ||---------------------------------N_LONG---------------------------------------| - * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | - * 2 ||<--------------data----------------| unused |numLevels|-------min K-----------| - * - * - * - * Serialized float sketch layout, Empty and Single Item: - * Adr: - * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | - * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | - * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | - * 1 || |-------------------data-------------------| - */ - - /* Serialized double sketch layout, more than one item: - * Adr: - * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | - * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | - * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | - * 1 ||---------------------------------N_LONG---------------------------------------| - * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | - * 2 ||<-------------unused------------------------|numLevels|-------min K-----------| - * || | 24 | - * 3 ||<---------------------------------data----------------------------------------| - * - * Serialized double sketch layout, Empty and Single Item: - * Adr: - * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | - * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | - * || | 8 | - * 1 ||----------------------------------data----------------------------------------| - */ - - /** - * The default value of K. - */ - public static final int DEFAULT_K = 200; - static final int DEFAULT_M = 8; - static final int MIN_K = DEFAULT_M; - static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short - - // Preamble byte addresses - static final int PREAMBLE_INTS_BYTE = 0; - static final int SER_VER_BYTE = 1; - static final int FAMILY_BYTE = 2; - static final int FLAGS_BYTE = 3; - static final int K_SHORT = 4; // to 5 - static final int M_BYTE = 6; - // 7 is reserved for future use - // SINGLE ITEM ONLY - static final int DATA_START_SINGLE_ITEM = 8; - - // MULTI-ITEM - static final int N_LONG = 8; // to 15 - static final int MIN_K_SHORT = 16; // to 17 - static final int NUM_LEVELS_BYTE = 18; - - // FLOAT SKETCH 19 is reserved for future use in float sketch - static final int DATA_START_FLOAT = 20; // float sketch, not single item - - // DOUBLE SKETCH 19 to 23 is reserved for future use in double sketch - static final int DATA_START_DOUBLE = 24; // double sketch, not single item - - // Other static values - static final byte SERIAL_VERSION = 1; - static final byte SERIAL_VERSION_SINGLE = 2; // only used to specify the single-item format - static final int PREAMBLE_INTS_EMPTY_SINGLE = 2; // for empty and single item - static final int PREAMBLE_INTS_FLOAT = 5; // not empty or single item - static final int PREAMBLE_INTS_DOUBLE = 6; // not empty or single item - - enum Flags { IS_EMPTY, IS_LEVEL_ZERO_SORTED, IS_SINGLE_ITEM } - - /* - * Data is stored in items_. - * The data for level i lies in positions levels_[i] through levels_[i + 1] - 1 inclusive. - * Hence levels_ must contain (numLevels_ + 1) indices. - * The valid portion of items_ is completely packed, except for level 0. - * Level 0 is filled from the top down. - * - * Invariants: - * 1) After a compaction, or an update, or a merge, all levels are sorted except for level zero. - * 2) After a compaction, (sum of capacities) - (sum of items) >= 1, - * so there is room for least 1 more item in level zero. - * 3) There are no gaps except at the bottom, so if levels_[0] = 0, - * the sketch is exactly filled to capacity and must be compacted. - * 4) Sum of weights of retained items == N. - * 5) curTotalCap == items_.length == levels_[numLevels_]. - */ - - final int k_; // configured value of K - final int m_; // configured minimum buffer "width", Must always be DEFAULT_M for now. - - int minK_; // for error estimation after merging with different k - long n_; // number of items input into this sketch - int numLevels_; // one-based number of current levels, - int[] levels_; // array of index offsets into the items[]. Size = numLevels + 1. - boolean isLevelZeroSorted_; - - final boolean compatible; //compatible with quantiles sketch - static final Random random = new Random(); - - /** - * Heap constructor. - * @param k configured size of sketch. Range [m, 2^16] - * @param m minimum level size. Default is 8. - */ - BaseKllSketch(final int k, final int m, final boolean compatible) { - checkK(k); - k_ = k; - minK_ = k; - m_ = m; - numLevels_ = 1; - levels_ = new int[] {k, k}; - isLevelZeroSorted_ = false; - this.compatible = compatible; - } - - // public functions - - /** - * Returns the parameter k - * @return parameter k - */ - public int getK() { - return k_; - } - - /** - * Gets the approximate value of k to use given epsilon, the normalized rank error. - * @param epsilon the normalized rank error between zero and one. - * @param pmf if true, this function returns the value of k assuming the input epsilon - * is the desired "double-sided" epsilon for the getPMF() function. Otherwise, this function - * returns the value of k assuming the input epsilon is the desired "single-sided" - * epsilon for all the other queries. - * @return the value of k given a value of epsilon. - * @see KllDoublesSketch - */ - // constants were derived as the best fit to 99 percentile empirically measured max error in - // thousands of trials - public static int getKFromEpsilon(final double epsilon, final boolean pmf) { - //Ensure that eps is >= than the lowest possible eps given MAX_K and pmf=false. - final double eps = max(epsilon, 4.7634E-5); - final double kdbl = pmf - ? exp(log(2.446 / eps) / 0.9433) - : exp(log(2.296 / eps) / 0.9723); - final double krnd = round(kdbl); - final double del = abs(krnd - kdbl); - final int k = (int) (del < 1E-6 ? krnd : ceil(kdbl)); - return max(MIN_K, min(MAX_K, k)); - } - - /** - * Returns the length of the input stream. - * @return stream length - */ - public long getN() { - return n_; - } - - /** - * Gets the approximate rank error of this sketch normalized as a fraction between zero and one. - * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function. - * Otherwise, it is the "single-sided" normalized rank error for all the other queries. - * @return if pmf is true, returns the normalized rank error for the getPMF() function. - * Otherwise, it is the "single-sided" normalized rank error for all the other queries. - * @see KllDoublesSketch - */ - public double getNormalizedRankError(final boolean pmf) { - return getNormalizedRankError(minK_, pmf); - } - - /** - * Gets the normalized rank error given k and pmf. - * Static method version of the getNormalizedRankError(boolean). - * @param k the configuration parameter - * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function. - * Otherwise, it is the "single-sided" normalized rank error for all the other queries. - * @return if pmf is true, the normalized rank error for the getPMF() function. - * Otherwise, it is the "single-sided" normalized rank error for all the other queries. - * @see KllDoublesSketch - */ - // constants were derived as the best fit to 99 percentile empirically measured max error in - // thousands of trials - public static double getNormalizedRankError(final int k, final boolean pmf) { - return pmf - ? 2.446 / pow(k, 0.9433) - : 2.296 / pow(k, 0.9723); - } - - /** - * Returns the number of retained items (samples) in the sketch. - * @return the number of retained items (samples) in the sketch - */ - public int getNumRetained() { - return levels_[numLevels_] - levels_[0]; - } - - /** - * Returns true if this sketch is empty. - * @return empty flag - */ - public boolean isEmpty() { - return n_ == 0; - } - - /** - * Returns true if this sketch is in estimation mode. - * @return estimation mode flag - */ - public boolean isEstimationMode() { - return numLevels_ > 1; - } - - /** - * Returns serialized sketch in a compact byte array form. - * @return serialized sketch in a compact byte array form. - */ - public abstract byte[] toByteArray(); - - - @Override - public String toString() { - return toString(false, false); - } - - /** - * Returns a summary of the sketch as a string. - * @param withLevels if true include information about levels - * @param withData if true include sketch data - * @return string representation of sketch summary - */ - public abstract String toString(final boolean withLevels, final boolean withData); - - // Restricted Methods - - /** - * Checks the validity of the given value k - * @param k must be greater than 7 and less than 65536. - */ - private static void checkK(final int k) { - if (k < MIN_K || k > MAX_K) { - throw new SketchesArgumentException( - "K must be >= " + MIN_K + " and <= " + MAX_K + ": " + k); - } - } - - /** - * Finds the first level starting with level 0 that exceeds its nominal capacity - * @return level to compact - */ - int findLevelToCompact() { // - int level = 0; - while (true) { - assert level < numLevels_; - final int pop = levels_[level + 1] - levels_[level]; - final int cap = KllHelper.levelCapacity(k_, numLevels_, level, m_); - if (pop >= cap) { - return level; - } - level++; - } - } - - int currentLevelSize(final int level) { - if (level >= numLevels_) { return 0; } - return levels_[level + 1] - levels_[level]; - } - - int getNumRetainedAboveLevelZero() { - if (numLevels_ == 1) { return 0; } - return levels_[numLevels_] - levels_[1]; - } - - // for testing - - int[] getLevels() { - return levels_; - } - - int getNumLevels() { - return numLevels_; - } - -} - diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java new file mode 100644 index 000000000..0c89ff3a6 --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -0,0 +1,395 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static java.lang.Math.max; +import static java.lang.Math.min; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.DOUBLES_SKETCH_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; +import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; +import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_DOUBLE; +import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; +import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; + +import org.apache.datasketches.Family; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +/** + * This class implements an off-heap doubles KllSketch via a WritableMemory instance of the sketch. + * + *
Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}
The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function. + * + *
If the sketch is empty this returns null.
+ * + * @param splitPoints an array of m unique, monotonically increasing double values + * that divide the real number line into m+1 consecutive disjoint intervals. + * The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and + * exclusive of the right splitPoint, with the exception that the last interval will include + * the maximum value. + * It is not necessary to include either the min or max values in these split points. + * + * @return an array of m+1 double values on the interval [0.0, 1.0), + * which are a consecutive approximation to the CDF of the input stream given the splitPoints. + * The value at array position j of the returned CDF array is the sum of the returned values + * in positions 0 through j of the returned PMF array. + */ + public double[] getCDF(final double[] splitPoints) { + return KllDoublesHelper.getDoublesPmfOrCdf(this, splitPoints, true); + } + + /** + * Returns the max value of the stream. + * If the sketch is empty this returns NaN. + * + * @return the max value of the stream + */ + public double getMaxValue() { return getMaxDoubleValue(); } + + /** + * Returns the min value of the stream. + * If the sketch is empty this returns NaN. + * + * @return the min value of the stream + */ + public double getMinValue() { return getMinDoubleValue(); } + + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * given a set of splitPoints (values). + * + *The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(true) function. + * + *
If the sketch is empty this returns null.
+ * + * @param splitPoints an array of m unique, monotonically increasing double values + * that divide the real number line into m+1 consecutive disjoint intervals. + * The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and + * exclusive of the right splitPoint, with the exception that the last interval will include + * the maximum value. + * It is not necessary to include either the min or max values in these split points. + * + * @return an array of m+1 doubles on the interval [0.0, 1.0), + * each of which is an approximation to the fraction of the total input stream values + * (the mass) that fall into one of those intervals. + * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right + * splitPoint, with the exception that the last interval will include maximum value. + */ + public double[] getPMF(final double[] splitPoints) { + return KllDoublesHelper.getDoublesPmfOrCdf(this, splitPoints, false); + } + + /** + * Returns an approximation to the value of the data item + * that would be preceded by the given fraction of a hypothetical sorted + * version of the input stream so far. + * + *We note that this method has a fairly large overhead (microseconds instead of nanoseconds) + * so it should not be called multiple times to get different quantiles from the same + * sketch. Instead use getQuantiles(), which pays the overhead only once. + * + *
If the sketch is empty this returns NaN. + * + * @param fraction the specified fractional position in the hypothetical sorted stream. + * These are also called normalized ranks or fractional ranks. + * If fraction = 0.0, the true minimum value of the stream is returned. + * If fraction = 1.0, the true maximum value of the stream is returned. + * + * @return the approximation to the value at the given fraction + */ + public double getQuantile(final double fraction) { + return KllDoublesHelper.getDoublesQuantile(this, fraction); + } + + /** + * Gets the lower bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. + * @param fraction the given normalized rank as a fraction + * @return the lower bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. + */ + public double getQuantileLowerBound(final double fraction) { + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getMinK(), false))); + } + + /** + * This is a more efficient multiple-query version of getQuantile(). + * + *
This returns an array that could have been generated by using getQuantile() with many + * different fractional ranks, but would be very inefficient. + * This method incurs the internal set-up overhead once and obtains multiple quantile values in + * a single query. It is strongly recommend that this method be used instead of multiple calls + * to getQuantile(). + * + *
If the sketch is empty this returns null. + * + * @param fractions given array of fractional positions in the hypothetical sorted stream. + * These are also called normalized ranks or fractional ranks. + * These fractions must be in the interval [0.0, 1.0], inclusive. + * + * @return array of approximations to the given fractions in the same order as given fractions + * array. + */ + public double[] getQuantiles(final double[] fractions) { + return KllDoublesHelper.getDoublesQuantiles(this, fractions); + } + + /** + * This is also a more efficient multiple-query version of getQuantile() and allows the caller to + * specify the number of evenly spaced fractional ranks. + * + *
If the sketch is empty this returns null. + * + * @param numEvenlySpaced an integer that specifies the number of evenly spaced fractional ranks. + * This must be a positive integer greater than 0. A value of 1 will return the min value. + * A value of 2 will return the min and the max value. A value of 3 will return the min, + * the median and the max value, etc. + * + * @return array of approximations to the given fractions in the same order as given fractions + * array. + */ + public double[] getQuantiles(final int numEvenlySpaced) { + if (isEmpty()) { return null; } + return getQuantiles(org.apache.datasketches.Util.evenlySpaced(0.0, 1.0, numEvenlySpaced)); + } + + /** + * Gets the upper bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. + * @param fraction the given normalized rank as a fraction + * @return the upper bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. + */ + public double getQuantileUpperBound(final double fraction) { + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getMinK(), false))); + } + + /** + * Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1, + * inclusive. + * + *
The resulting approximation has a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function. + * + *
If the sketch is empty this returns NaN.
+ * + * @param value to be ranked + * @return an approximate rank of the given value + */ + public double getRank(final double value) { + return KllDoublesHelper.getDoubleRank(this, value); + } + + /** + * @return the iterator for this class + */ + public KllDoublesSketchIterator iterator() { + return new KllDoublesSketchIterator(getDoubleItemsArray(), getLevelsArray(), getNumLevels()); + } + + /** + * Merges another sketch into this one. + * @param other sketch to merge into this one + */ + public void merge(final KllSketch other) { + if (!other.isDoublesSketch()) { kllSketchThrow(SRC_MUST_BE_DOUBLE); } + KllDoublesHelper.mergeDoubleImpl(this, other); + } + + /** + * Updates this sketch with the given data item. + * + * @param value an item from a stream of items. NaNs are ignored. + */ + public void update(final double value) { + KllDoublesHelper.updateDouble(this, value); + } + + @Override + double[] getDoubleItemsArray() { + final int items = getItemsArrLengthItems(); + final double[] itemsArr = new double[items]; + itemsArrUpdatable.getDoubleArray(0, itemsArr, 0, items); + return itemsArr; + } + + @Override + double getDoubleItemsArrayAt(final int index) { + return itemsArrUpdatable.getDouble((long)index * Double.BYTES); + } + + @Override + float[] getFloatItemsArray() { kllSketchThrow(MUST_NOT_CALL); return null; } + + @Override + float getFloatItemsArrayAt(final int index) { kllSketchThrow(MUST_NOT_CALL); return Float.NaN; } + + @Override + double getMaxDoubleValue() { + return minMaxArrUpdatable.getDouble(Double.BYTES); + } + + @Override + float getMaxFloatValue() { kllSketchThrow(MUST_NOT_CALL); return Float.NaN; } + + @Override + double getMinDoubleValue() { + return minMaxArrUpdatable.getDouble(0); + } + + @Override + float getMinFloatValue() { kllSketchThrow(MUST_NOT_CALL); return Float.NaN; } + + @Override + void setDoubleItemsArray(final double[] doubleItems) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + itemsArrUpdatable.putDoubleArray(0, doubleItems, 0, doubleItems.length); + } + + @Override + void setDoubleItemsArrayAt(final int index, final double value) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + itemsArrUpdatable.putDouble((long)index * Double.BYTES, value); + } + + @Override + void setFloatItemsArray(final float[] floatItems) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setFloatItemsArrayAt(final int index, final float value) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setMaxDoubleValue(final double value) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + minMaxArrUpdatable.putDouble(Double.BYTES, value); + } + + @Override + void setMaxFloatValue(final float value) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setMinDoubleValue(final double value) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + minMaxArrUpdatable.putDouble(0, value); + } + + @Override + void setMinFloatValue(final float value) { kllSketchThrow(MUST_NOT_CALL); } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java new file mode 100644 index 000000000..54abe54dd --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -0,0 +1,395 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static java.lang.Math.max; +import static java.lang.Math.min; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; +import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; +import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_FLOAT; +import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; +import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; + +import org.apache.datasketches.Family; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +//Intentional extra blank line so the code lines up with KllDirectDoublesSketch +/** + * This class implements an off-heap floats KllSketch via a WritableMemory instance of the sketch. + * + *Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}
The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function. + * + *
If the sketch is empty this returns null.
+ * + * @param splitPoints an array of m unique, monotonically increasing float values + * that divide the real number line into m+1 consecutive disjoint intervals. + * The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and + * exclusive of the right splitPoint, with the exception that the last interval will include + * the maximum value. + * It is not necessary to include either the min or max values in these split points. + * + * @return an array of m+1 double values on the interval [0.0, 1.0), + * which are a consecutive approximation to the CDF of the input stream given the splitPoints. + * The value at array position j of the returned CDF array is the sum of the returned values + * in positions 0 through j of the returned PMF array. + */ + public double[] getCDF(final float[] splitPoints) { + return KllFloatsHelper.getFloatsPmfOrCdf(this, splitPoints, true); + } + + /** + * Returns the max value of the stream. + * If the sketch is empty this returns NaN. + * + * @return the max value of the stream + */ + public float getMaxValue() { return getMaxFloatValue(); } + + /** + * Returns the min value of the stream. + * If the sketch is empty this returns NaN. + * + * @return the min value of the stream + */ + public float getMinValue() { return getMinFloatValue(); } + + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * given a set of splitPoints (values). + * + *The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(true) function. + * + *
If the sketch is empty this returns null.
+ * + * @param splitPoints an array of m unique, monotonically increasing float values + * that divide the real number line into m+1 consecutive disjoint intervals. + * The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and + * exclusive of the right splitPoint, with the exception that the last interval will include + * the maximum value. + * It is not necessary to include either the min or max values in these split points. + * + * @return an array of m+1 doubles on the interval [0.0, 1.0), + * each of which is an approximation to the fraction of the total input stream values + * (the mass) that fall into one of those intervals. + * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right + * splitPoint, with the exception that the last interval will include maximum value. + */ + public double[] getPMF(final float[] splitPoints) { + return KllFloatsHelper.getFloatsPmfOrCdf(this, splitPoints, false); + } + + /** + * Returns an approximation to the value of the data item + * that would be preceded by the given fraction of a hypothetical sorted + * version of the input stream so far. + * + *We note that this method has a fairly large overhead (microseconds instead of nanoseconds) + * so it should not be called multiple times to get different quantiles from the same + * sketch. Instead use getQuantiles(), which pays the overhead only once. + * + *
If the sketch is empty this returns NaN. + * + * @param fraction the specified fractional position in the hypothetical sorted stream. + * These are also called normalized ranks or fractional ranks. + * If fraction = 0.0, the true minimum value of the stream is returned. + * If fraction = 1.0, the true maximum value of the stream is returned. + * + * @return the approximation to the value at the given fraction + */ + public float getQuantile(final double fraction) { + return KllFloatsHelper.getFloatsQuantile(this, fraction); + } + + /** + * Gets the lower bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. + * @param fraction the given normalized rank as a fraction + * @return the lower bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. + */ + public float getQuantileLowerBound(final double fraction) { + return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getMinK(), false))); + } + + /** + * This is a more efficient multiple-query version of getQuantile(). + * + *
This returns an array that could have been generated by using getQuantile() with many + * different fractional ranks, but would be very inefficient. + * This method incurs the internal set-up overhead once and obtains multiple quantile values in + * a single query. It is strongly recommend that this method be used instead of multiple calls + * to getQuantile(). + * + *
If the sketch is empty this returns null. + * + * @param fractions given array of fractional positions in the hypothetical sorted stream. + * These are also called normalized ranks or fractional ranks. + * These fractions must be in the interval [0.0, 1.0], inclusive. + * + * @return array of approximations to the given fractions in the same order as given fractions + * array. + */ + public float[] getQuantiles(final double[] fractions) { + return KllFloatsHelper.getFloatsQuantiles(this, fractions); + } + + /** + * This is also a more efficient multiple-query version of getQuantile() and allows the caller to + * specify the number of evenly spaced fractional ranks. + * + *
If the sketch is empty this returns null. + * + * @param numEvenlySpaced an integer that specifies the number of evenly spaced fractional ranks. + * This must be a positive integer greater than 0. A value of 1 will return the min value. + * A value of 2 will return the min and the max value. A value of 3 will return the min, + * the median and the max value, etc. + * + * @return array of approximations to the given fractions in the same order as given fractions + * array. + */ + public float[] getQuantiles(final int numEvenlySpaced) { + if (isEmpty()) { return null; } + return getQuantiles(org.apache.datasketches.Util.evenlySpaced(0.0, 1.0, numEvenlySpaced)); + } + + /** + * Gets the upper bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. + * @param fraction the given normalized rank as a fraction + * @return the upper bound of the value interval in which the true quantile of the given rank + * exists with a confidence of at least 99%. Returns NaN if the sketch is empty. + */ + public float getQuantileUpperBound(final double fraction) { + return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getMinK(), false))); + } + + /** + * Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1, + * inclusive. + * + *
The resulting approximation has a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function. + * + *
If the sketch is empty this returns NaN.
+ * + * @param value to be ranked + * @return an approximate rank of the given value + */ + public double getRank(final float value) { + return KllFloatsHelper.getFloatRank(this, value); + } + + /** + * @return the iterator for this class + */ + public KllFloatsSketchIterator iterator() { + return new KllFloatsSketchIterator(getFloatItemsArray(), getLevelsArray(), getNumLevels()); + } + + /** + * Merges another sketch into this one. + * @param other sketch to merge into this one + */ + public void merge(final KllSketch other) { + if (!other.isFloatsSketch()) { kllSketchThrow(SRC_MUST_BE_FLOAT); } + KllFloatsHelper.mergeFloatImpl(this, other); + } + + /** + * Updates this sketch with the given data item. + * + * @param value an item from a stream of items. NaNs are ignored. + */ + public void update(final float value) { + KllFloatsHelper.updateFloat(this, value); + } + + @Override + double[] getDoubleItemsArray() { kllSketchThrow(MUST_NOT_CALL); return null; } + + @Override + double getDoubleItemsArrayAt(final int index) { kllSketchThrow(MUST_NOT_CALL); return Double.NaN; } + + @Override + float[] getFloatItemsArray() { + final int items = getItemsArrLengthItems(); + final float[] itemsArr = new float[items]; + itemsArrUpdatable.getFloatArray(0, itemsArr, 0, items); + return itemsArr; + } + + @Override + float getFloatItemsArrayAt(final int index) { + return itemsArrUpdatable.getFloat((long)index * Float.BYTES); + } + + @Override + double getMaxDoubleValue() { kllSketchThrow(MUST_NOT_CALL); return Double.NaN; } + + @Override + float getMaxFloatValue() { + return minMaxArrUpdatable.getFloat(Float.BYTES); + } + + @Override + double getMinDoubleValue() { kllSketchThrow(MUST_NOT_CALL); return Double.NaN; } + + @Override + float getMinFloatValue() { + return minMaxArrUpdatable.getFloat(0); + } + + @Override + void setDoubleItemsArray(final double[] doubleItems) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setDoubleItemsArrayAt(final int index, final double value) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setFloatItemsArray(final float[] floatItems) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + itemsArrUpdatable.putFloatArray(0, floatItems, 0, floatItems.length); + } + + @Override + void setFloatItemsArrayAt(final int index, final float value) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + itemsArrUpdatable.putFloat((long)index * Float.BYTES, value); + } + + @Override + void setMaxDoubleValue(final double value) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setMaxFloatValue(final float value) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + minMaxArrUpdatable.putFloat(Float.BYTES, value); + } + + @Override + void setMinDoubleValue(final double value) { kllSketchThrow(MUST_NOT_CALL); } + + @Override + void setMinFloatValue(final float value) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + minMaxArrUpdatable.putFloat(0, value); + } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java new file mode 100644 index 000000000..959d4bd3d --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllDirectSketch.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels; +import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_IMMUTABLE; +import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; + +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +/** + * This class implements all the methods for the Direct (off-heap) sketches that are independent + * of the sketch type (float or double). + */ +abstract class KllDirectSketch extends KllSketch { + final boolean updatableMemory; + WritableMemory levelsArrUpdatable; + WritableMemory minMaxArrUpdatable; + WritableMemory itemsArrUpdatable; + + /** + * For the direct sketches it is important that the methods implemented here are designed to + * work dynamically as the sketch grows off-heap. + * @param sketchType either DOUBLE_SKETCH or FLOAT_SKETCH + * @param wmem the current WritableMemory + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + */ + KllDirectSketch(final SketchType sketchType, final WritableMemory wmem, final MemoryRequestServer memReqSvr, + final KllMemoryValidate memVal) { + super(sketchType, wmem, memReqSvr); + updatableMemory = memVal.updatableMemory && memReqSvr != null; + levelsArrUpdatable = memVal.levelsArrUpdatable; + minMaxArrUpdatable = memVal.minMaxArrUpdatable; + itemsArrUpdatable = memVal.itemsArrUpdatable; + } + + @Override + public int getK() { + return getMemoryK(wmem); + } + + @Override + public long getN() { + return getMemoryN(wmem); + } + + @Override + public void reset() { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + final int k = getK(); + setN(0); + setMinK(k); + setNumLevels(1); + setLevelsArray(new int[] {k, k}); + setLevelZeroSorted(false); + final int newLevelsArrLen = 2 * Integer.BYTES; + final int newItemsArrLen = k; + KllHelper.memorySpaceMgmt(this, newLevelsArrLen, newItemsArrLen); + levelsArrUpdatable.putIntArray(0L, new int[] {k, k}, 0, 2); + if (sketchType == SketchType.DOUBLES_SKETCH) { + minMaxArrUpdatable.putDoubleArray(0L, new double[] {Double.NaN, Double.NaN}, 0, 2); + itemsArrUpdatable.putDoubleArray(0L, new double[k], 0, k); + } else { + minMaxArrUpdatable.putFloatArray(0L, new float[] {Float.NaN, Float.NaN}, 0, 2); + itemsArrUpdatable.putFloatArray(0L, new float[k], 0, k); + } + } + + @Override + public byte[] toUpdatableByteArray() { + final int bytes = (int) wmem.getCapacity(); + final byte[] byteArr = new byte[bytes]; + wmem.getByteArray(0, byteArr, 0, bytes); + return byteArr; + } + + int getItemsArrLengthItems() { + return getLevelsArray()[getNumLevels()]; + } + + @Override + int[] getLevelsArray() { + final int numInts = getNumLevels() + 1; + final int[] myLevelsArr = new int[numInts]; + levelsArrUpdatable.getIntArray(0, myLevelsArr, 0, numInts); + return myLevelsArr; + } + + @Override + int getLevelsArrayAt(final int index) { + return levelsArrUpdatable.getInt((long)index * Integer.BYTES); + } + + @Override + int getM() { + return getMemoryM(wmem); + } + + @Override + int getMinK() { + return getMemoryMinK(wmem); + } + + @Override + int getNumLevels() { + return getMemoryNumLevels(wmem); + } + + @Override + void incN() { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + long n = getMemoryN(wmem); + setMemoryN(wmem, ++n); + } + + @Override + void incNumLevels() { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + int numLevels = getMemoryNumLevels(wmem); + setMemoryNumLevels(wmem, ++numLevels); + } + + @Override + boolean isLevelZeroSorted() { + return getMemoryLevelZeroSortedFlag(wmem); + } + + @Override + void setItemsArrayUpdatable(final WritableMemory itemsMem) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + itemsArrUpdatable = itemsMem; + } + + @Override + void setLevelsArray(final int[] levelsArr) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + levelsArrUpdatable.putIntArray(0, levelsArr, 0, levelsArr.length); + } + + @Override + void setLevelsArrayAt(final int index, final int value) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + levelsArrUpdatable.putInt((long)index * Integer.BYTES, value); + } + + @Override + void setLevelsArrayAtMinusEq(final int index, final int minusEq) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + final int offset = index * Integer.BYTES; + final int curV = levelsArrUpdatable.getInt(offset); + levelsArrUpdatable.putInt(offset, curV - minusEq); + } + + @Override + void setLevelsArrayAtPlusEq(final int index, final int plusEq) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + final int offset = index * Integer.BYTES; + final int curV = levelsArrUpdatable.getInt(offset); + levelsArrUpdatable.putInt(offset, curV + plusEq); + } + + @Override + void setLevelsArrayUpdatable(final WritableMemory levelsMem) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + levelsArrUpdatable = levelsMem; + } + + @Override + void setLevelZeroSorted(final boolean sorted) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + setMemoryLevelZeroSortedFlag(wmem, sorted); + } + + @Override + void setMinK(final int minK) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + setMemoryMinK(wmem, minK); + } + + @Override + void setMinMaxArrayUpdatable(final WritableMemory minMaxMem) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + minMaxArrUpdatable = minMaxMem; + } + + @Override + void setN(final long n) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + setMemoryN(wmem, n); + } + + @Override + void setNumLevels(final int numLevels) { + if (!updatableMemory) { kllSketchThrow(TGT_IS_IMMUTABLE); } + setMemoryNumLevels(wmem, numLevels); + } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 9b738553e..25a71f699 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -19,6 +19,8 @@ package org.apache.datasketches.kll; +import static java.lang.Math.max; +import static java.lang.Math.min; import static org.apache.datasketches.Util.isEven; import static org.apache.datasketches.Util.isOdd; @@ -32,23 +34,193 @@ * @author Kevin Lang * @author Alexander Saydakov */ -class KllDoublesHelper { +final class KllDoublesHelper { + + static double getDoubleRank(final KllSketch mine, final double value) { + if (mine.isEmpty()) { return Double.NaN; } + int level = 0; + int weight = 1; + long total = 0; + final double[] myDoubleItemsArr = mine.getDoubleItemsArray(); + final int[] myLevelsArr = mine.getLevelsArray(); + while (level < mine.getNumLevels()) { + final int fromIndex = myLevelsArr[level]; + final int toIndex = myLevelsArr[level + 1]; // exclusive + for (int i = fromIndex; i < toIndex; i++) { + if (myDoubleItemsArr[i] < value) { + total += weight; + } else if (level > 0 || mine.isLevelZeroSorted()) { + break; // levels above 0 are sorted, no point comparing further + } + } + level++; + weight *= 2; + } + return (double) total / mine.getN(); + } - /** - * Checks the sequential validity of the given array of double values. - * They must be unique, monotonically increasing and not NaN. - * @param values the given array of values - */ - static void validateDoubleValues(final double[] values) { - for (int i = 0; i < values.length; i++) { - if (!Double.isFinite(values[i])) { - throw new SketchesArgumentException("Values must be finite"); + static double[] getDoublesPmfOrCdf(final KllSketch mine, final double[] splitPoints, final boolean isCdf) { + if (mine.isEmpty()) { return null; } + validateDoubleValues(splitPoints); + final double[] buckets = new double[splitPoints.length + 1]; + final int myNumLevels = mine.getNumLevels(); + final int[] myLevelsArr = mine.getLevelsArray(); + int level = 0; + int weight = 1; + while (level < myNumLevels) { + final int fromIndex = myLevelsArr[level]; + final int toIndex = myLevelsArr[level + 1]; // exclusive + if (level == 0 && !mine.isLevelZeroSorted()) { + KllDoublesHelper.incrementDoublesBucketsUnsortedLevel(mine, fromIndex, toIndex, weight, splitPoints, buckets); + } else { + KllDoublesHelper.incrementDoublesBucketsSortedLevel(mine, fromIndex, toIndex, weight, splitPoints, buckets); } - if (i < values.length - 1 && values[i] >= values[i + 1]) { - throw new SketchesArgumentException( - "Values must be unique and monotonically increasing"); + level++; + weight *= 2; + } + // normalize and, if CDF, convert to cumulative + if (isCdf) { + double subtotal = 0; + for (int i = 0; i < buckets.length; i++) { + subtotal += buckets[i]; + buckets[i] = subtotal / mine.getN(); + } + } else { + for (int i = 0; i < buckets.length; i++) { + buckets[i] /= mine.getN(); + } + } + return buckets; + } + + static double getDoublesQuantile(final KllSketch mine, final double fraction) { + if (mine.isEmpty()) { return Double.NaN; } + if (fraction < 0.0 || fraction > 1.0) { + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); + } + //These two assumptions make KLL compatible with the previous classic Quantiles Sketch + if (fraction == 0.0) { return mine.getMinDoubleValue(); } + if (fraction == 1.0) { return mine.getMaxDoubleValue(); } + final KllDoublesQuantileCalculator quant = KllDoublesHelper.getDoublesQuantileCalculator(mine); + return quant.getQuantile(fraction); + } + + static double[] getDoublesQuantiles(final KllSketch mine, final double[] fractions) { + if (mine.isEmpty()) { return null; } + KllDoublesQuantileCalculator quant = null; + final double[] quantiles = new double[fractions.length]; + for (int i = 0; i < fractions.length; i++) { + final double fraction = fractions[i]; + if (fraction < 0.0 || fraction > 1.0) { + throw new SketchesArgumentException("Fraction cannot be less than zero nor greater than 1.0"); + } + if (fraction == 0.0) { quantiles[i] = mine.getMinDoubleValue(); } + else if (fraction == 1.0) { quantiles[i] = mine.getMaxDoubleValue(); } + else { + if (quant == null) { + quant = KllDoublesHelper.getDoublesQuantileCalculator(mine); + } + quantiles[i] = quant.getQuantile(fraction); } } + return quantiles; + } + + static void mergeDoubleImpl(final KllSketch mine, final KllSketch other) { + if (other.isEmpty()) { return; } + final long finalN = mine.getN() + other.getN(); + //update this sketch with level0 items from the other sketch + final double[] otherDoubleItemsArr = other.getDoubleItemsArray(); + final int otherNumLevels = other.getNumLevels(); + final int[] otherLevelsArr = other.getLevelsArray(); + for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { + KllDoublesHelper.updateDouble(mine, otherDoubleItemsArr[i]); + } + // after the level 0 update, we capture the key mutable variables + final double myMin = mine.getMinDoubleValue(); + final double myMax = mine.getMaxDoubleValue(); + final int myMinK = mine.getMinK(); + + final int myCurNumLevels = mine.getNumLevels(); + final int[] myCurLevelsArr = mine.getLevelsArray(); + final double[] myCurDoubleItemsArr = mine.getDoubleItemsArray(); + + final int myNewNumLevels; + final int[] myNewLevelsArr; + final double[] myNewDoubleItemsArr; + + if (otherNumLevels > 1) { //now merge other levels if they exist + final int tmpSpaceNeeded = mine.getNumRetained() + + KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr); + final double[] workbuf = new double[tmpSpaceNeeded]; + final int ub = KllHelper.ubOnNumLevels(finalN); + final int[] worklevels = new int[ub + 2]; // ub+1 does not work + final int[] outlevels = new int[ub + 2]; + + final int provisionalNumLevels = max(myCurNumLevels, otherNumLevels); + + populateDoubleWorkArrays(mine, other, workbuf, worklevels, provisionalNumLevels); + + // notice that workbuf is being used as both the input and output + final int[] result = generalDoublesCompress(mine.getK(), mine.getM(), provisionalNumLevels, + workbuf, worklevels, workbuf, outlevels, mine.isLevelZeroSorted(), KllSketch.random); + final int targetItemCount = result[1]; //was finalCapacity. Max size given k, m, numLevels + final int curItemCount = result[2]; //was finalPop + + // now we need to finalize the results for the "self" sketch + + //THE NEW NUM LEVELS + myNewNumLevels = result[0]; //was finalNumLevels + assert myNewNumLevels <= ub; // ub may be much bigger + + // THE NEW ITEMS ARRAY (was newbuf) + myNewDoubleItemsArr = (targetItemCount == myCurDoubleItemsArr.length) + ? myCurDoubleItemsArr + : new double[targetItemCount]; + final int freeSpaceAtBottom = targetItemCount - curItemCount; + //shift the new items array + System.arraycopy(workbuf, outlevels[0], myNewDoubleItemsArr, freeSpaceAtBottom, curItemCount); + final int theShift = freeSpaceAtBottom - outlevels[0]; + + //calculate the new levels array length + final int finalLevelsArrLen; + if (myCurLevelsArr.length < myNewNumLevels + 1) { finalLevelsArrLen = myNewNumLevels + 1; } + else { finalLevelsArrLen = myCurLevelsArr.length; } + + //THE NEW LEVELS ARRAY + myNewLevelsArr = new int[finalLevelsArrLen]; + for (int lvl = 0; lvl < myNewNumLevels + 1; lvl++) { // includes the "extra" index + myNewLevelsArr[lvl] = outlevels[lvl] + theShift; + } + + //MEMORY SPACE MANAGEMENT + if (mine.updatablMemory) { + mine.wmem = KllHelper.memorySpaceMgmt(mine, myNewLevelsArr.length, myNewDoubleItemsArr.length); + } + + } else { + myNewNumLevels = myCurNumLevels; + myNewLevelsArr = myCurLevelsArr; + myNewDoubleItemsArr = myCurDoubleItemsArr; + } + + //Update Preamble: + mine.setN(finalN); + if (other.isEstimationMode()) { //otherwise the merge brings over exact items. + mine.setMinK(min(myMinK, other.getMinK())); + } + + //Update min, max values + final double otherMin = other.getMinDoubleValue(); + final double otherMax = other.getMaxDoubleValue(); + mine.setMinDoubleValue(resolveDoubleMinValue(myMin, otherMin)); + mine.setMaxDoubleValue(resolveDoubleMaxValue(myMax, otherMax)); + + //Update numLevels, levelsArray, items + mine.setNumLevels(myNewNumLevels); + mine.setLevelsArray(myNewLevelsArr); + mine.setDoubleItemsArray(myNewDoubleItemsArr); + assert KllHelper.sumTheSampleWeights(mine.getNumLevels(), mine.getLevelsArray()) == mine.getN(); } static void mergeSortedDoubleArrays( @@ -82,6 +254,62 @@ static void mergeSortedDoubleArrays( assert b == limB; } + /** + * Validation Method. This must be modified to test validation + * @param buf the items array + * @param start data start + * @param length items length + * @param random instance of Random + */ + static void randomlyHalveDownDoubles(final double[] buf, final int start, final int length, final Random random) { + assert isEven(length); + final int half_length = length / 2; + final int offset = random.nextInt(2); // disable for validation + //final int offset = deterministicOffset(); // enable for validation + int j = start + offset; + for (int i = start; i < (start + half_length); i++) { + buf[i] = buf[j]; + j += 2; + } + } + + /** + * Validation Method. This must be modified to test validation + * @param buf the items array + * @param start data start + * @param length items length + * @param random instance of Random + */ + static void randomlyHalveUpDoubles(final double[] buf, final int start, final int length, final Random random) { + assert isEven(length); + final int half_length = length / 2; + final int offset = random.nextInt(2); // disable for validation + //final int offset = deterministicOffset(); // enable for validation + int j = (start + length) - 1 - offset; + for (int i = (start + length) - 1; i >= (start + half_length); i--) { + buf[i] = buf[j]; + j -= 2; + } + } + + static void updateDouble(final KllSketch mine, final double value) { + if (Double.isNaN(value)) { return; } + if (mine.isEmpty()) { + mine.setMinDoubleValue(value); + mine.setMaxDoubleValue(value); + } else { + if (value < mine.getMinDoubleValue()) { mine.setMinDoubleValue(value); } + if (value > mine.getMaxDoubleValue()) { mine.setMaxDoubleValue(value); } + } + if (mine.getLevelsArrayAt(0) == 0) { KllHelper.compressWhileUpdatingSketch(mine); } + mine.incN(); + mine.setLevelZeroSorted(false); + final int nextPos = mine.getLevelsArrayAt(0) - 1; + assert mine.getLevelsArrayAt(0) >= 0; + mine.setLevelsArrayAt(0, nextPos); + mine.setDoubleItemsArrayAt(nextPos, value); + } + /** * Compression algorithm used to merge higher levels. *Here is what we do for each level:
@@ -112,9 +340,10 @@ static void mergeSortedDoubleArrays( * @param outBuf the same array as inBuf * @param outLevels the same size as inLevels * @param isLevelZeroSorted true if this.level 0 is sorted + * @param random instance of java.util.Random * @return int array of: {numLevels, targetItemCount, currentItemCount) */ - static int[] generalDoublesCompress( + private static int[] generalDoublesCompress( final int k, final int m, final int numLevelsIn, @@ -192,52 +421,130 @@ static int[] generalDoublesCompress( numLevels++; targetItemCount += KllHelper.levelCapacity(k, numLevels, 0, m); } - } // end of code for compacting a level // determine whether we have processed all levels yet (including any new levels that we created) - if (curLevel == (numLevels - 1)) { doneYet = true; } - } // end of loop over levels assert (outLevels[numLevels] - outLevels[0]) == currentItemCount; - return new int[] {numLevels, targetItemCount, currentItemCount}; } - //This must be modified for validation - static void randomlyHalveDownDoubles(final double[] buf, final int start, final int length, final Random random) { - assert isEven(length); - final int half_length = length / 2; - final int offset = random.nextInt(2); // disable for validation - //final int offset = deterministicOffset(); // enable for validation - int j = start + offset; - for (int i = start; i < (start + half_length); i++) { - buf[i] = buf[j]; - j += 2; + private static KllDoublesQuantileCalculator getDoublesQuantileCalculator(final KllSketch mine) { + final int[] myLevelsArr = mine.getLevelsArray(); + final double[] myDoubleItemsArr = mine.getDoubleItemsArray(); + if (!mine.isLevelZeroSorted()) { + Arrays.sort(mine.getDoubleItemsArray(), myLevelsArr[0], myLevelsArr[1]); + mine.setLevelZeroSorted(true); } + return new KllDoublesQuantileCalculator(myDoubleItemsArr, myLevelsArr, mine.getNumLevels(), mine.getN()); } - //This must be modified for validation - static void randomlyHalveUpDoubles(final double[] buf, final int start, final int length, final Random random) { - assert isEven(length); - final int half_length = length / 2; - final int offset = random.nextInt(2); // disable for validation - //final int offset = deterministicOffset(); // enable for validation - int j = (start + length) - 1 - offset; - for (int i = (start + length) - 1; i >= (start + half_length); i--) { - buf[i] = buf[j]; - j -= 2; + private static void incrementDoublesBucketsSortedLevel( + final KllSketch mine, final int fromIndex, final int toIndex, + final int weight, final double[] splitPoints, final double[] buckets) { + final double[] myDoubleItemsArr = mine.getDoubleItemsArray(); + int i = fromIndex; + int j = 0; + while (i < toIndex && j < splitPoints.length) { + if (myDoubleItemsArr[i] < splitPoints[j]) { + buckets[j] += weight; // this sample goes into this bucket + i++; // move on to next sample and see whether it also goes into this bucket + } else { + j++; // no more samples for this bucket + } + } + // now either i == toIndex (we are out of samples), or + // j == numSplitPoints (we are out of buckets, but there are more samples remaining) + // we only need to do something in the latter case + if (j == splitPoints.length) { + buckets[j] += weight * (toIndex - i); + } + } + + private static void incrementDoublesBucketsUnsortedLevel( + final KllSketch mine, final int fromIndex, final int toIndex, + final int weight, final double[] splitPoints, final double[] buckets) { + final double[] myDoubleItemsArr = mine.getDoubleItemsArray(); + for (int i = fromIndex; i < toIndex; i++) { + int j; + for (j = 0; j < splitPoints.length; j++) { + if (myDoubleItemsArr[i] < splitPoints[j]) { + break; + } + } + buckets[j] += weight; + } + } + + private static void populateDoubleWorkArrays(final KllSketch mine, final KllSketch other, final double[] workbuf, + final int[] worklevels, final int provisionalNumLevels) { + worklevels[0] = 0; + final int[] myLevelsArr = mine.getLevelsArray(); + final int[] otherLevelsArr = other.getLevelsArray(); + final double[] myDoubleItemsArr = mine.getDoubleItemsArray(); + final double[] otherDoubleItemsArr = other.getDoubleItemsArray(); + + // Note: the level zero data from "other" was already inserted into "self" + final int selfPopZero = KllHelper.currentLevelSize(0, mine.getNumLevels(),myLevelsArr); + System.arraycopy(myDoubleItemsArr, myLevelsArr[0], workbuf, worklevels[0], selfPopZero); + worklevels[1] = worklevels[0] + selfPopZero; + + for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { + final int selfPop = KllHelper.currentLevelSize(lvl, mine.getNumLevels(), myLevelsArr); + final int otherPop = KllHelper.currentLevelSize(lvl, other.getNumLevels(), otherLevelsArr); + worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; + + if (selfPop > 0 && otherPop == 0) { + System.arraycopy(myDoubleItemsArr, myLevelsArr[lvl], workbuf, worklevels[lvl], selfPop); + } else if (selfPop == 0 && otherPop > 0) { + System.arraycopy(otherDoubleItemsArr, otherLevelsArr[lvl], workbuf, worklevels[lvl], otherPop); + } else if (selfPop > 0 && otherPop > 0) { + mergeSortedDoubleArrays(myDoubleItemsArr, myLevelsArr[lvl], selfPop, otherDoubleItemsArr, + otherLevelsArr[lvl], otherPop, workbuf, worklevels[lvl]); + } + } + } + + private static double resolveDoubleMaxValue(final double myMax, final double otherMax) { + if (Double.isNaN(myMax) && Double.isNaN(otherMax)) { return Double.NaN; } + if (Double.isNaN(myMax)) { return otherMax; } + if (Double.isNaN(otherMax)) { return myMax; } + return max(myMax, otherMax); + } + + private static double resolveDoubleMinValue(final double myMin, final double otherMin) { + if (Double.isNaN(myMin) && Double.isNaN(otherMin)) { return Double.NaN; } + if (Double.isNaN(myMin)) { return otherMin; } + if (Double.isNaN(otherMin)) { return myMin; } + return min(myMin, otherMin); + } + + /** + * Validation Method. + * Checks the sequential validity of the given array of double values. + * They must be unique, monotonically increasing and not NaN. + * @param values the given array of values + */ + private static void validateDoubleValues(final double[] values) { + for (int i = 0; i < values.length; i++) { + if (!Double.isFinite(values[i])) { + throw new SketchesArgumentException("Values must be finite"); + } + if (i < values.length - 1 && values[i] >= values[i + 1]) { + throw new SketchesArgumentException( + "Values must be unique and monotonically increasing"); + } } } /* + * Validation Method. * The following must be enabled for use with the KllDoublesValidationTest, - * which is only enabled for manual testing. In addition, the two methods + * which is only enabled for manual testing. In addition, two Validation Methods * above need to be modified as commented. */ - // static int nextOffset = 0; // // private static int deterministicOffset() { @@ -247,4 +554,3 @@ static void randomlyHalveUpDoubles(final double[] buf, final int start, final in // } } - diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java b/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java index ba269836f..7870002f1 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java @@ -58,41 +58,6 @@ final class KllDoublesQuantileCalculator { numLevels_ = 0; //not used by test } - double getQuantile(final double phi) { //phi is normalized rank [0,1]. - final long pos = QuantilesHelper.posOfPhi(phi, n_); - return approximatelyAnswerPositonalQuery(pos); - } - - private double approximatelyAnswerPositonalQuery(final long pos) { - assert pos >= 0; - assert pos < n_; - final int index = QuantilesHelper.chunkContainingPos(weights_, pos); - return items_[index]; - } - - private void populateFromSketch(final double[] srcItems, final int[] srcLevels, - final int numLevels, final int numItems) { - final int offset = srcLevels[0]; - System.arraycopy(srcItems, offset, items_, 0, numItems); - int srcLevel = 0; - int dstLevel = 0; - long weight = 1; - while (srcLevel < numLevels) { - final int fromIndex = srcLevels[srcLevel] - offset; - final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive - if (fromIndex < toIndex) { // if equal, skip empty level - Arrays.fill(weights_, fromIndex, toIndex, weight); - levels_[dstLevel] = fromIndex; - levels_[dstLevel + 1] = toIndex; - dstLevel++; - } - srcLevel++; - weight *= 2; - } - weights_[numItems] = 0; - numLevels_ = dstLevel; - } - private static void blockyTandemMergeSort(final double[] items, final long[] weights, final int[] levels, final int numLevels) { if (numLevels == 1) { return; } @@ -167,5 +132,39 @@ private static void tandemMerge( } } -} + double getQuantile(final double rank) { + final long pos = QuantilesHelper.posOfRank(rank, n_); + return approximatelyAnswerPositonalQuery(pos); + } + + private double approximatelyAnswerPositonalQuery(final long pos) { + assert pos >= 0; + assert pos < n_; + final int index = QuantilesHelper.chunkContainingPos(weights_, pos); + return items_[index]; + } + private void populateFromSketch(final double[] srcItems, final int[] srcLevels, + final int numLevels, final int numItems) { + final int offset = srcLevels[0]; + System.arraycopy(srcItems, offset, items_, 0, numItems); + int srcLevel = 0; + int dstLevel = 0; + long weight = 1; + while (srcLevel < numLevels) { + final int fromIndex = srcLevels[srcLevel] - offset; + final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive + if (fromIndex < toIndex) { // if equal, skip empty level + Arrays.fill(weights_, fromIndex, toIndex, weight); + levels_[dstLevel] = fromIndex; + levels_[dstLevel + 1] = toIndex; + dstLevel++; + } + srcLevel++; + weight *= 2; + } + weights_[numItems] = 0; + numLevels_ = dstLevel; + } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 1503ddb60..c5aadebf0 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -21,117 +21,69 @@ import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.Util.isOdd; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_DOUBLE; +import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; +import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; -import java.util.Arrays; - -import org.apache.datasketches.Family; -import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.Util; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** - * Please refer to the documentation in the package-info:Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}
Here is what we do for each level:
@@ -112,9 +340,10 @@ static void mergeSortedFloatArrays( * @param outBuf the same array as inBuf * @param outLevels the same size as inLevels * @param isLevelZeroSorted true if this.level 0 is sorted + * @param random instance of java.util.Random * @return int array of: {numLevels, targetItemCount, currentItemCount) */ - static int[] generalFloatsCompress( + private static int[] generalFloatsCompress( final int k, final int m, final int numLevelsIn, @@ -192,52 +421,130 @@ static int[] generalFloatsCompress( numLevels++; targetItemCount += KllHelper.levelCapacity(k, numLevels, 0, m); } - } // end of code for compacting a level // determine whether we have processed all levels yet (including any new levels that we created) - if (curLevel == (numLevels - 1)) { doneYet = true; } - } // end of loop over levels assert (outLevels[numLevels] - outLevels[0]) == currentItemCount; - return new int[] {numLevels, targetItemCount, currentItemCount}; } - //This must be modified for validation - static void randomlyHalveDownFloats(final float[] buf, final int start, final int length, final Random random) { - assert isEven(length); - final int half_length = length / 2; - final int offset = random.nextInt(2); // disable for validation - //final int offset = deterministicOffset(); // enable for validation - int j = start + offset; - for (int i = start; i < (start + half_length); i++) { - buf[i] = buf[j]; - j += 2; + private static KllFloatsQuantileCalculator getFloatsQuantileCalculator(final KllSketch mine) { + final int[] myLevelsArr = mine.getLevelsArray(); + final float[] myFloatItemsArr = mine.getFloatItemsArray(); + if (!mine.isLevelZeroSorted()) { + Arrays.sort(myFloatItemsArr, myLevelsArr[0], myLevelsArr[1]); + mine.setLevelZeroSorted(true); } + return new KllFloatsQuantileCalculator(myFloatItemsArr, myLevelsArr, mine.getNumLevels(), mine.getN()); } - //This must be modified for validation - static void randomlyHalveUpFloats(final float[] buf, final int start, final int length, final Random random) { - assert isEven(length); - final int half_length = length / 2; - final int offset = random.nextInt(2); // disable for validation - //final int offset = deterministicOffset(); // enable for validation - int j = (start + length) - 1 - offset; - for (int i = (start + length) - 1; i >= (start + half_length); i--) { - buf[i] = buf[j]; - j -= 2; + private static void incrementFloatBucketsSortedLevel( + final KllSketch mine, final int fromIndex, final int toIndex, + final int weight, final float[] splitPoints, final double[] buckets) { + final float[] myFloatItemsArr = mine.getFloatItemsArray(); + int i = fromIndex; + int j = 0; + while (i < toIndex && j < splitPoints.length) { + if (myFloatItemsArr[i] < splitPoints[j]) { + buckets[j] += weight; // this sample goes into this bucket + i++; // move on to next sample and see whether it also goes into this bucket + } else { + j++; // no more samples for this bucket + } + } + // now either i == toIndex (we are out of samples), or + // j == numSplitPoints (we are out of buckets, but there are more samples remaining) + // we only need to do something in the latter case + if (j == splitPoints.length) { + buckets[j] += weight * (toIndex - i); + } + } + + private static void incrementFloatBucketsUnsortedLevel( + final KllSketch mine, final int fromIndex, final int toIndex, + final int weight, final float[] splitPoints, final double[] buckets) { + final float[] myFloatItemsArr = mine.getFloatItemsArray(); + for (int i = fromIndex; i < toIndex; i++) { + int j; + for (j = 0; j < splitPoints.length; j++) { + if (myFloatItemsArr[i] < splitPoints[j]) { + break; + } + } + buckets[j] += weight; + } + } + + private static void populateFloatWorkArrays(final KllSketch mine, final KllSketch other, final float[] workbuf, + final int[] worklevels, final int provisionalNumLevels) { + worklevels[0] = 0; + final int[] myLevelsArr = mine.getLevelsArray(); + final int[] otherLevelsArr = other.getLevelsArray(); + final float[] myFloatItemsArr = mine.getFloatItemsArray(); + final float[] otherFloatItemsArr = other.getFloatItemsArray(); + + // Note: the level zero data from "other" was already inserted into "self" + final int selfPopZero = KllHelper.currentLevelSize(0, mine.getNumLevels(), myLevelsArr); + System.arraycopy( myFloatItemsArr, myLevelsArr[0], workbuf, worklevels[0], selfPopZero); + worklevels[1] = worklevels[0] + selfPopZero; + + for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { + final int selfPop = KllHelper.currentLevelSize(lvl, mine.getNumLevels(), myLevelsArr); + final int otherPop = KllHelper.currentLevelSize(lvl, other.getNumLevels(), otherLevelsArr); + worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; + + if (selfPop > 0 && otherPop == 0) { + System.arraycopy( myFloatItemsArr, myLevelsArr[lvl], workbuf, worklevels[lvl], selfPop); + } else if (selfPop == 0 && otherPop > 0) { + System.arraycopy(otherFloatItemsArr, otherLevelsArr[lvl], workbuf, worklevels[lvl], otherPop); + } else if (selfPop > 0 && otherPop > 0) { + mergeSortedFloatArrays( myFloatItemsArr, myLevelsArr[lvl], selfPop, otherFloatItemsArr, + otherLevelsArr[lvl], otherPop, workbuf, worklevels[lvl]); + } + } + } + + private static float resolveFloatMaxValue(final float myMax, final float otherMax) { + if (Float.isNaN(myMax) && Float.isNaN(otherMax)) { return Float.NaN; } + if (Float.isNaN(myMax)) { return otherMax; } + if (Float.isNaN(otherMax)) { return myMax; } + return max(myMax, otherMax); + } + + private static float resolveFloatMinValue(final float myMin, final float otherMin) { + if (Float.isNaN(myMin) && Float.isNaN(otherMin)) { return Float.NaN; } + if (Float.isNaN(myMin)) { return otherMin; } + if (Float.isNaN(otherMin)) { return myMin; } + return min(myMin, otherMin); + } + + /** + * Validation Method. + * Checks the sequential validity of the given array of float values. + * They must be unique, monotonically increasing and not NaN. + * @param values the given array of values + */ + private static void validateFloatValues(final float[] values) { + for (int i = 0; i < values.length; i++) { + if (!Float.isFinite(values[i])) { + throw new SketchesArgumentException("Values must be finite"); + } + if (i < values.length - 1 && values[i] >= values[i + 1]) { + throw new SketchesArgumentException( + "Values must be unique and monotonically increasing"); + } } } /* + * Validation Method. * The following must be enabled for use with the KllFloatsValidationTest, - * which is only enabled for manual testing. In addition, the two methods + * which is only enabled for manual testing. In addition, two methods * above need to be modified as commented. */ - // static int nextOffset = 0; // // private static int deterministicOffset() { diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java index 97f628e29..87539fc0c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java @@ -58,41 +58,6 @@ final class KllFloatsQuantileCalculator { numLevels_ = 0; //not used by test } - float getQuantile(final double phi) { //phi is normalized rank [0,1]. - final long pos = QuantilesHelper.posOfPhi(phi, n_); - return approximatelyAnswerPositonalQuery(pos); - } - - private float approximatelyAnswerPositonalQuery(final long pos) { - assert pos >= 0; - assert pos < n_; - final int index = QuantilesHelper.chunkContainingPos(weights_, pos); - return items_[index]; - } - - private void populateFromSketch(final float[] srcItems, final int[] srcLevels, - final int numLevels, final int numItems) { - final int offset = srcLevels[0]; - System.arraycopy(srcItems, offset, items_, 0, numItems); - int srcLevel = 0; - int dstLevel = 0; - long weight = 1; - while (srcLevel < numLevels) { - final int fromIndex = srcLevels[srcLevel] - offset; - final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive - if (fromIndex < toIndex) { // if equal, skip empty level - Arrays.fill(weights_, fromIndex, toIndex, weight); - levels_[dstLevel] = fromIndex; - levels_[dstLevel + 1] = toIndex; - dstLevel++; - } - srcLevel++; - weight *= 2; - } - weights_[numItems] = 0; - numLevels_ = dstLevel; - } - private static void blockyTandemMergeSort(final float[] items, final long[] weights, final int[] levels, final int numLevels) { if (numLevels == 1) { return; } @@ -167,4 +132,39 @@ private static void tandemMerge( } } + float getQuantile(final double rank) { + final long pos = QuantilesHelper.posOfRank(rank, n_); + return approximatelyAnswerPositonalQuery(pos); + } + + private float approximatelyAnswerPositonalQuery(final long pos) { + assert pos >= 0; + assert pos < n_; + final int index = QuantilesHelper.chunkContainingPos(weights_, pos); + return items_[index]; + } + + private void populateFromSketch(final float[] srcItems, final int[] srcLevels, + final int numLevels, final int numItems) { + final int offset = srcLevels[0]; + System.arraycopy(srcItems, offset, items_, 0, numItems); + int srcLevel = 0; + int dstLevel = 0; + long weight = 1; + while (srcLevel < numLevels) { + final int fromIndex = srcLevels[srcLevel] - offset; + final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive + if (fromIndex < toIndex) { // if equal, skip empty level + Arrays.fill(weights_, fromIndex, toIndex, weight); + levels_[dstLevel] = fromIndex; + levels_[dstLevel + 1] = toIndex; + dstLevel++; + } + srcLevel++; + weight *= 2; + } + weights_[numItems] = 0; + numLevels_ = dstLevel; + } + } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 7d2639fc8..2ef1c3de4 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -21,32 +21,41 @@ import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.Util.isOdd; +import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_FLOAT; +import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL; +import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; -import java.util.Arrays; - -import org.apache.datasketches.Family; -import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.Util; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** - * Please refer to the documentation in the package-info:Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}
The intent of the design of this class was to isolate the detailed knowledge of the bit and + * byte layout of the serialized form of the sketches derived from the base sketch classes into one place. + * This allows the possibility of the introduction of different serialization + * schemes with minimal impact on the rest of the library.
+ * + *+ * LAYOUT: The low significance bytes of this long based data structure are on the right. + * The multi-byte primitives are stored in native byte order. + * The single byte fields are treated as unsigned.
+ * + *An empty sketch requires only 8 bytes, which is only preamble. + * A serialized, non-empty KllDoublesSketch requires at least 16 bytes of preamble. + * A serialized, non-empty KllFloatsSketch requires at least 12 bytes of preamble.
+ * + *{@code + * Serialized float sketch layout, more than one item: + * Adr: + * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | + * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | + * 1 ||---------------------------------N_LONG---------------------------------------| + * || | | | 20 | 19 | 18 | 17 | 16 | + * 2 ||<-------Levels Arr Start----------]| unused |NumLevels|------Min K------------| + * || | | | | | | | | + * ? ||<-------Min/Max Arr Start---------]|[<----------Levels Arr End----------------| + * || | | | | | | | | + * ? ||<-----Float Items Arr Start-------]|[<---------Min/Max Arr End----------------| + * || | | | | | | | | + * ? || | | | |[<-------Float Items Arr End--------------| + * + * Serialized float sketch layout, Empty (8 bytes) and Single Item (12 bytes): + * Adr: + * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | + * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | + * 1 || |-------------Single Item------------------| + * + * + * + * Serialized double sketch layout, more than one item: + * Adr: + * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | + * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | + * 1 ||---------------------------------N_LONG---------------------------------------| + * || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | + * 2 ||<-------Levels Arr Start----------]| unused |NumLevels|------Min K------------| + * || | | | | | | | | + * ? ||<-------Min/Max Arr Start---------]|[<----------Levels Arr End----------------| + * || | | | | | | | | + * ? ||<----Double Items Arr Start-------]|[<---------Min/Max Arr End----------------| + * || | | | | | | | | + * ? || | | | |[<------Double Items Arr End--------------| + * + * Serialized double sketch layout, Empty (8 bytes) and Single Item (16 bytes): + * Adr: + * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | + * || | 8 | + * 1 ||------------------------------Single Item-------------------------------------| + * + * The structure of the data block depends on Layout: + * + * For FLOAT_SINGLE_COMPACT or DOUBLE_SINGLE_COMPACT: + * The single data item is at offset DATA_START_ADR_SINGLE_ITEM = 8 + * + * For FLOAT_FULL_COMPACT: + * The int[] levels array starts at offset DATA_START_ADR_FLOAT = 20 with a length of numLevels integers; + * Followed by Float Min_Value, then Float Max_Value + * Followed by an array of Floats of length retainedItems() + * + * For DOUBLE_FULL_COMPACT + * The int[] levels array starts at offset DATA_START_ADR_DOUBLE = 20 with a length of numLevels integers; + * Followed by Double Min_Value, then Double Max_Value + * Followed by an array of Doubles of length retainedItems() + * + * For FLOAT_UPDATABLE + * The int[] levels array starts at offset DATA_START_ADR_FLOAT = 20 with a length of (numLevels + 1) integers; + * Followed by Float Min_Value, then Float Max_Value + * Followed by an array of Floats of length KllHelper.computeTotalItemCapacity(...). + * + * For DOUBLE_UPDATABLE + * The int[] levels array starts at offset DATA_START_ADR_DOUBLE = 20 with a length of (numLevels + 1) integers; + * Followed by Double Min_Value, then Double Max_Value + * Followed by an array of Doubles of length KllHelper.computeTotalItemCapacity(...). + * + * }+ * + * @author Lee Rhodes + */ +final class KllPreambleUtil { + + private KllPreambleUtil() {} + + static final String LS = System.getProperty("line.separator"); + + // Preamble byte addresses + static final int PREAMBLE_INTS_BYTE_ADR = 0; + static final int SER_VER_BYTE_ADR = 1; + static final int FAMILY_BYTE_ADR = 2; + static final int FLAGS_BYTE_ADR = 3; + static final int K_SHORT_ADR = 4; // to 5 + static final int M_BYTE_ADR = 6; + // 7 is reserved for future use + // SINGLE ITEM ONLY + static final int DATA_START_ADR_SINGLE_ITEM = 8; + + // MULTI-ITEM + static final int N_LONG_ADR = 8; // to 15 + static final int MIN_K_SHORT_ADR = 16; // to 17 + static final int NUM_LEVELS_BYTE_ADR = 18; + + // 19 is reserved for future use + static final int DATA_START_ADR = 20; // Full Sketch, not single item + + // Other static values + static final byte SERIAL_VERSION_EMPTY_FULL = 1; // Empty or full preamble, NOT single item format + static final byte SERIAL_VERSION_SINGLE = 2; // only single-item format + static final byte SERIAL_VERSION_UPDATABLE = 3; // + static final int PREAMBLE_INTS_EMPTY_SINGLE = 2; // for empty or single item + static final int PREAMBLE_INTS_FULL = 5; // Full preamble, not empty nor single item + + // Flag bit masks + static final int EMPTY_BIT_MASK = 1; + static final int LEVEL_ZERO_SORTED_BIT_MASK = 2; + static final int SINGLE_ITEM_BIT_MASK = 4; + static final int DOUBLES_SKETCH_BIT_MASK = 8; + static final int UPDATABLE_BIT_MASK = 16; + + enum Layout { + FLOAT_FULL_COMPACT, FLOAT_EMPTY_COMPACT, FLOAT_SINGLE_COMPACT, + DOUBLE_FULL_COMPACT, DOUBLE_EMPTY_COMPACT, DOUBLE_SINGLE_COMPACT, + FLOAT_UPDATABLE, DOUBLE_UPDATABLE } + + /** + * Returns a human readable string summary of the internal state of the given byte array. + * Used primarily in testing. + * + * @param byteArr the given byte array. + * @return the summary string. + */ + static String toString(final byte[] byteArr) { + final Memory mem = Memory.wrap(byteArr); + return toString(mem); + } + + /** + * Returns a human readable string summary of the internal state of the given Memory. + * Used primarily in testing. + * + * @param mem the given Memory + * @return the summary string. + */ + static String toString(final Memory mem) { + return memoryToString(mem); + } + + static String memoryToString(final Memory mem) { + final KllMemoryValidate memChk = new KllMemoryValidate(mem); + final int flags = memChk.flags & 0XFF; + final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", " + + zeroPad(Integer.toBinaryString(flags), 8); + final int preInts = memChk.preInts; + final StringBuilder sb = new StringBuilder(); + sb.append(Util.LS).append("### KLL SKETCH MEMORY SUMMARY:").append(LS); + sb.append("Byte 0 : Preamble Ints : ").append(preInts).append(LS); + sb.append("Byte 1 : SerVer : ").append(memChk.serVer).append(LS); + sb.append("Byte 2 : FamilyID : ").append(memChk.familyID).append(LS); + sb.append(" FamilyName : ").append(memChk.famName).append(LS); + sb.append("Byte 3 : Flags Field : ").append(flagsStr).append(LS); + sb.append(" Bit Flag Name").append(LS); + sb.append(" 0 EMPTY COMPACT : ").append(memChk.empty).append(LS); + sb.append(" 1 LEVEL_ZERO_SORTED : ").append(memChk.level0Sorted).append(LS); + sb.append(" 2 SINGLE_ITEM COMPACT: ").append(memChk.singleItem).append(LS); + sb.append(" 3 DOUBLES_SKETCH : ").append(memChk.doublesSketch).append(LS); + sb.append(" 4 UPDATABLE : ").append(memChk.updatableMemory).append(LS); + sb.append("Bytes 4-5 : K : ").append(memChk.k).append(LS); + sb.append("Byte 6 : Min Level Cap, M : ").append(memChk.m).append(LS); + sb.append("Byte 7 : (Reserved) : ").append(LS); + + switch (memChk.layout) { + case DOUBLE_FULL_COMPACT: + case FLOAT_FULL_COMPACT: + case FLOAT_UPDATABLE: + case DOUBLE_UPDATABLE: + { + sb.append("Bytes 8-15: N : ").append(memChk.n).append(LS); + sb.append("Bytes 16-17: MinK : ").append(memChk.minK).append(LS); + sb.append("Byte 18 : NumLevels : ").append(memChk.numLevels).append(LS); + break; + } + case FLOAT_EMPTY_COMPACT: + case FLOAT_SINGLE_COMPACT: + case DOUBLE_EMPTY_COMPACT: + case DOUBLE_SINGLE_COMPACT: + { + sb.append("Assumed : N : ").append(memChk.n).append(LS); + sb.append("Assumed : MinK : ").append(memChk.minK).append(LS); + sb.append("Assumed : NumLevels : ").append(memChk.numLevels).append(LS); + break; + } + default: break; //can never happen + } + sb.append("PreambleBytes : ").append(preInts * 4).append(LS); + sb.append("Sketch Bytes : ").append(memChk.sketchBytes).append(LS); + sb.append("Memory Capacity Bytes : ").append(mem.getCapacity()).append(LS); + sb.append("### END KLL Sketch Memory Summary").append(LS); + return sb.toString(); + } + + static int getMemoryPreInts(final Memory mem) { + return mem.getByte(PREAMBLE_INTS_BYTE_ADR) & 0XFF; + } + + static int getMemorySerVer(final Memory mem) { + return mem.getByte(SER_VER_BYTE_ADR) & 0XFF; + } + + static int getMemoryFamilyID(final Memory mem) { + return mem.getByte(FAMILY_BYTE_ADR) & 0XFF; + } + + static int getMemoryFlags(final Memory mem) { + return mem.getByte(FLAGS_BYTE_ADR) & 0XFF; + } + + static boolean getMemoryEmptyFlag(final Memory mem) { + return (getMemoryFlags(mem) & EMPTY_BIT_MASK) != 0; + } + + static boolean getMemoryLevelZeroSortedFlag(final Memory mem) { + return (getMemoryFlags(mem) & LEVEL_ZERO_SORTED_BIT_MASK) != 0; + } + + static boolean getMemorySingleItemFlag(final Memory mem) { + return (getMemoryFlags(mem) & SINGLE_ITEM_BIT_MASK) != 0; + } + + static boolean getMemoryDoubleSketchFlag(final Memory mem) { + return (getMemoryFlags(mem) & DOUBLES_SKETCH_BIT_MASK) != 0; + } + + static boolean getMemoryUpdatableFlag(final Memory mem) { + return (getMemoryFlags(mem) & UPDATABLE_BIT_MASK) != 0; + } + + static int getMemoryK(final Memory mem) { + return mem.getShort(K_SHORT_ADR) & 0XFFFF; + } + + static int getMemoryM(final Memory mem) { + return mem.getByte(M_BYTE_ADR) & 0XFF; + } + + static long getMemoryN(final Memory mem) { + return mem.getLong(N_LONG_ADR); + } + + static int getMemoryMinK(final Memory mem) { + return mem.getShort(MIN_K_SHORT_ADR) & 0XFFFF; + } + + static int getMemoryNumLevels(final Memory mem) { + return mem.getByte(NUM_LEVELS_BYTE_ADR) & 0XFF; + } + + static void setMemoryPreInts(final WritableMemory wmem, final int value) { + wmem.putByte(PREAMBLE_INTS_BYTE_ADR, (byte) value); + } + + static void setMemorySerVer(final WritableMemory wmem, final int value) { + wmem.putByte(SER_VER_BYTE_ADR, (byte) value); + } + + static void setMemoryFamilyID(final WritableMemory wmem, final int value) { + wmem.putByte(FAMILY_BYTE_ADR, (byte) value); + } + + static void setMemoryFlags(final WritableMemory wmem, final int value) { + wmem.putByte(FLAGS_BYTE_ADR, (byte) value); + } + + static void setMemoryEmptyFlag(final WritableMemory wmem, final boolean empty) { + final int flags = getMemoryFlags(wmem); + setMemoryFlags(wmem, empty ? flags | EMPTY_BIT_MASK : flags & ~EMPTY_BIT_MASK); + } + + static void setMemoryLevelZeroSortedFlag(final WritableMemory wmem, final boolean levelZeroSorted) { + final int flags = getMemoryFlags(wmem); + setMemoryFlags(wmem, levelZeroSorted ? flags | LEVEL_ZERO_SORTED_BIT_MASK : flags & ~LEVEL_ZERO_SORTED_BIT_MASK); + } + + static void setMemorySingleItemFlag(final WritableMemory wmem, final boolean singleItem) { + final int flags = getMemoryFlags(wmem); + setMemoryFlags(wmem, singleItem ? flags | SINGLE_ITEM_BIT_MASK : flags & ~SINGLE_ITEM_BIT_MASK); + } + + static void setMemoryDoubleSketchFlag(final WritableMemory wmem, final boolean doubleSketch) { + final int flags = getMemoryFlags(wmem); + setMemoryFlags(wmem, doubleSketch ? flags | DOUBLES_SKETCH_BIT_MASK : flags & ~DOUBLES_SKETCH_BIT_MASK); + } + + static void setMemoryUpdatableFlag(final WritableMemory wmem, final boolean updatable) { + final int flags = getMemoryFlags(wmem); + setMemoryFlags(wmem, updatable ? flags | UPDATABLE_BIT_MASK : flags & ~UPDATABLE_BIT_MASK); + } + + static void setMemoryK(final WritableMemory wmem, final int value) { + wmem.putShort(K_SHORT_ADR, (short) value); + } + + static void setMemoryM(final WritableMemory wmem, final int value) { + wmem.putByte(M_BYTE_ADR, (byte) value); + } + + static void setMemoryN(final WritableMemory wmem, final long value) { + wmem.putLong(N_LONG_ADR, value); + } + + static void setMemoryMinK(final WritableMemory wmem, final int value) { + wmem.putShort(MIN_K_SHORT_ADR, (short) value); + } + + static void setMemoryNumLevels(final WritableMemory wmem, final int value) { + wmem.putByte(NUM_LEVELS_BYTE_ADR, (byte) value); + } + +} + diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java new file mode 100644 index 000000000..faa5d1081 --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -0,0 +1,457 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; +import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; +import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; + +import java.util.Random; + +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +/* + * Sampled stream data (floats or doubles) is stored as an array or as part of a Memory object. + * This array is partitioned into sections called levels and the indices into the array of items + * are tracked by a small integer array called levels or levels array. + * The data for level i lies in positions levelsArray[i] through levelsArray[i + 1] - 1 inclusive. + * Hence, the levelsArray must contain (numLevels + 1) indices. + * The valid portion of items array is completely packed and sorted, except for level 0, + * which is filled from the top down. Any items below the index levelsArray[0] is garbage and will be + * overwritten by subsequent updates. + * + * Invariants: + * 1) After a compaction, or an update, or a merge, every level is sorted except for level zero. + * 2) After a compaction, (sum of capacities) - (sum of items) >= 1, + * so there is room for least 1 more item in level zero. + * 3) There are no gaps except at the bottom, so if levels_[0] = 0, + * the sketch is exactly filled to capacity and must be compacted or the itemsArray and levelsArray + * must be expanded to include more levels. + * 4) Sum of weights of all retained items == N. + * 5) Current total item capacity = itemsArray.length = levelsArray[numLevels]. + */ + +/** + * This class is the root of the KLL sketch class hierarchy. It includes the public API that is independent + * of either sketch type (float or double) and independent of whether the sketch is targeted for use on the + * heap or Direct (off-heap. + * + *
Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}
Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}
Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}
The normalized rank (rank) of any specific value is defined as its * absolute rank divided by N. - * Thus, the normalized rank is a value between zero and one. + * Thus, the normalized rank is a value in the interval [0.0, 1.0). * In the documentation and Javadocs for this sketch absolute rank is never used so any * reference to just rank should be interpreted to mean normalized rank. * *
This sketch is configured with a parameter k, which affects the size of the sketch * and its estimation error. * - *
The estimation error is commonly called epsilon (or eps) and is a fraction - * between zero and one. Larger values of k result in smaller values of epsilon. - * Epsilon is always with respect to the rank and cannot be applied to the + *
In the research literature, the estimation error is commonly called epsilon + * (or eps) and is a fraction between zero and one. + * Larger values of k result in smaller values of epsilon. + * The epsilon error is always with respect to the rank and cannot be applied to the * corresponding values. * *
The relationship between the normalized rank and the corresponding values can be viewed @@ -147,6 +148,25 @@ *
The current implementations of the KLL sketch in the DataSketches Java library component include:
+ * + *Please visit our website: DataSketches Home Page for more + * information.
+ * * @author Kevin Lang * @author Alexander Saydakov * @author Lee Rhodes diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java b/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java index 40d4d3501..307917d12 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java @@ -70,12 +70,12 @@ final class DoublesAuxiliary { /** * Get the estimated quantile given a fractional rank. - * @param fRank the fractional rank where: 0 ≤ fRank ≤ 1.0. + * @param rank the normalized rank where: 0 ≤ rank ≤ 1.0. * @return the estimated quantile */ - double getQuantile(final double fRank) { - checkFractionalRankBounds(fRank); - final long pos = QuantilesHelper.posOfPhi(fRank, auxN_); + double getQuantile(final double rank) { + checkFractionalRankBounds(rank); + final long pos = QuantilesHelper.posOfRank(rank, auxN_); return approximatelyAnswerPositionalQuery(pos); } diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsAuxiliary.java b/src/main/java/org/apache/datasketches/quantiles/ItemsAuxiliary.java index 9a617d431..4905fc221 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsAuxiliary.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsAuxiliary.java @@ -79,13 +79,13 @@ final class ItemsAuxiliaryThe intent of the design of this class was to isolate the detailed knowledge of the bit and - * byte layout of the serialized form of the sketches derived from the Sketch class into one place. + * byte layout of the serialized form of the sketches derived from the base sketch classes into one place. * This allows the possibility of the introduction of different serialization * schemes with minimal impact on the rest of the library.
* *- * MAP: Low significance bytes of this long data structure are on the right. However, the - * multi-byte integers (int and long) are stored in native byte order. The - * byte values are treated as unsigned.
+ * LAYOUT: The low significance bytes of this long based data structure are on the right. + * The multi-byte primitives are stored in native byte order. + * The single byte fields are treated as unsigned. * *An empty ItemsSketch, on-heap DoublesSketch or compact off-heap DoublesSketch only require 8 - * bytes. An off-heap UpdateDoublesSketch and all non-empty skethces require at least 16 bytes of + * bytes. An off-heap UpdateDoublesSketch and all non-empty sketches require at least 16 bytes of * preamble.
* - *+ ** * @author Lee Rhodes */ diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java new file mode 100644 index 000000000..4c7033342 --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class KllDirectDoublesSketchIteratorTest { + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void emptySketch() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + KllDoublesSketchIterator it = sketch.iterator(); + Assert.assertFalse(it.next()); + } + + @Test + public void oneItemSketch() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(0); + KllDoublesSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getValue(), 0f); + Assert.assertEquals(it.getWeight(), 1); + Assert.assertFalse(it.next()); + } + + @Test + public void bigSketches() { + for (int n = 1000; n < 100000; n += 2000) { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + for (int i = 0; i < n; i++) { + sketch.update(i); + } + KllDoublesSketchIterator it = sketch.iterator(); + int count = 0; + int weight = 0; + while (it.next()) { + count++; + weight += it.getWeight(); + } + Assert.assertEquals(count, sketch.getNumRetained()); + Assert.assertEquals(weight, n); + } + } + + private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { + KllDoublesSketch sk = new KllDoublesSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + + KllDirectDoublesSketch ddsk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); + return ddsk; + } + +} + diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java new file mode 100644 index 000000000..e07d7d3c7 --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -0,0 +1,643 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +@SuppressWarnings("javadoc") +public class KllDirectDoublesSketchTest { + + private static final double PMF_EPS_FOR_K_8 = 0.35; // PMF rank error (epsilon) for k=8 + private static final double PMF_EPS_FOR_K_128 = 0.025; // PMF rank error (epsilon) for k=128 + private static final double PMF_EPS_FOR_K_256 = 0.013; // PMF rank error (epsilon) for k=256 + private static final double NUMERIC_NOISE_TOLERANCE = 1E-6; + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void empty() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(Double.NaN); // this must not change anything + assertTrue(sketch.isEmpty()); + assertEquals(sketch.getN(), 0); + assertEquals(sketch.getNumRetained(), 0); + assertTrue(Double.isNaN(sketch.getRank(0))); + assertTrue(Double.isNaN(sketch.getMinValue())); + assertTrue(Double.isNaN(sketch.getMaxValue())); + assertTrue(Double.isNaN(sketch.getQuantile(0.5))); + assertNull(sketch.getQuantiles(new double[] {0})); + assertNull(sketch.getPMF(new double[] {0})); + assertNotNull(sketch.toString(true, true)); + assertNotNull(sketch.toString()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantileInvalidArg() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(1); + sketch.getQuantile(-1.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantilesInvalidArg() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(1); + sketch.getQuantiles(new double[] {2.0}); + } + + @Test + public void oneItem() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(1); + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 1); + assertEquals(sketch.getNumRetained(), 1); + assertEquals(sketch.getRank(1), 0.0); + assertEquals(sketch.getRank(2), 1.0); + assertEquals(sketch.getMinValue(), 1.0); + assertEquals(sketch.getMaxValue(), 1.0); + assertEquals(sketch.getQuantile(0.5), 1.0); + } + + @Test + public void manyItemsEstimationMode() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + final int n = 1_000_000; + + for (int i = 0; i < n; i++) { + sketch.update(i); + } + assertEquals(sketch.getN(), n); + + // test getRank + for (int i = 0; i < n; i++) { + final double trueRank = (double) i / n; + assertEquals(sketch.getRank(i), trueRank, PMF_EPS_FOR_K_256, "for value " + i); + } + + // test getPMF + final double[] pmf = sketch.getPMF(new double[] {n / 2.0}); // split at median + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); + assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); + + assertEquals(sketch.getMinValue(), 0f); // min value is exact + assertEquals(sketch.getQuantile(0), 0f); // min value is exact + assertEquals(sketch.getMaxValue(), n - 1f); // max value is exact + assertEquals(sketch.getQuantile(1), n - 1f); // max value is exact + + // check at every 0.1 percentage point + final double[] fractions = new double[1001]; + final double[] reverseFractions = new double[1001]; // check that ordering doesn't matter + for (int i = 0; i <= 1000; i++) { + fractions[i] = (double) i / 1000; + reverseFractions[1000 - i] = fractions[i]; + } + final double[] quantiles = sketch.getQuantiles(fractions); + final double[] reverseQuantiles = sketch.getQuantiles(reverseFractions); + double previousQuantile = 0; + for (int i = 0; i <= 1000; i++) { + final double quantile = sketch.getQuantile(fractions[i]); + assertEquals(quantile, quantiles[i]); + assertEquals(quantile, reverseQuantiles[1000 - i]); + assertTrue(previousQuantile <= quantile); + previousQuantile = quantile; + } + } + + @Test + public void getRankGetCdfGetPmfConsistency() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + final int n = 1000; + final double[] values = new double[n]; + for (int i = 0; i < n; i++) { + sketch.update(i); + values[i] = i; + } + final double[] ranks = sketch.getCDF(values); + final double[] pmf = sketch.getPMF(values); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i]), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + + @Test + public void merge() { + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final KllDirectDoublesSketch sketch2 = getDDSketch(200, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i * 1.0); + sketch2.update((2 * n - i - 1) * 1.0); + } + + assertEquals(sketch1.getMinValue(), 0.0); + assertEquals(sketch1.getMaxValue(), (n - 1) * 1.0); + + assertEquals(sketch2.getMinValue(), n * 1.0); + assertEquals(sketch2.getMaxValue(), (2 * n - 1) * 1.0); + + sketch1.merge(sketch2); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2L * n); + assertEquals(sketch1.getMinValue(), 0.0); + assertEquals(sketch1.getMaxValue(), (2 * n - 1) * 1.0); + assertEquals(sketch1.getQuantile(0.5), n * 1.0, n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeLowerK() { + final KllDirectDoublesSketch sketch1 = getDDSketch(256, 0); + final KllDirectDoublesSketch sketch2 = getDDSketch(128, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinValue(), 0.0f); + assertEquals(sketch1.getMaxValue(), n - 1f); + + assertEquals(sketch2.getMinValue(), n); + assertEquals(sketch2.getMaxValue(), 2f * n - 1f); + + assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); + assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); + sketch1.merge(sketch2); + + // sketch1 must get "contaminated" by the lower K in sketch2 + assertEquals(sketch1.getNormalizedRankError(false), sketch2.getNormalizedRankError(false)); + assertEquals(sketch1.getNormalizedRankError(true), sketch2.getNormalizedRankError(true)); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2 * n); + assertEquals(sketch1.getMinValue(), 0); + assertEquals(sketch1.getMaxValue(), 2.0 * n - 1.0); + assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_128); + } + + @Test + public void mergeEmptyLowerK() { + final KllDirectDoublesSketch sketch1 = getDDSketch(256, 0); + final KllDirectDoublesSketch sketch2 = getDDSketch(128, 0); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + + // rank error should not be affected by a merge with an empty sketch with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinValue(), 0); + assertEquals(sketch1.getMaxValue(), n - 1.0); + assertEquals(sketch1.getQuantile(0.5), n / 2.0, n / 2 * PMF_EPS_FOR_K_256); + + //merge the other way + sketch2.merge(sketch1); + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinValue(), 0); + assertEquals(sketch1.getMaxValue(), n - 1.0); + assertEquals(sketch1.getQuantile(0.5), n / 2.0, n / 2 * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeExactModeLowerK() { + final KllDirectDoublesSketch sketch1 = getDDSketch(256, 0); + final KllDirectDoublesSketch sketch2 = getDDSketch(128, 0); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + sketch2.update(1); + + // rank error should not be affected by a merge with a sketch in exact mode with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + } + + @Test + public void mergeMinMinValueFromOther() { + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final KllDirectDoublesSketch sketch2 = getDDSketch(200, 0); + sketch1.update(1); + sketch2.update(2); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinValue(), 1.0); + } + + @Test + public void mergeMinAndMaxFromOther() { + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final KllDirectDoublesSketch sketch2 = getDDSketch(200, 0); + for (int i = 1; i <= 1_000_000; i++) { + sketch1.update(i); + } + sketch2.merge(sketch1); + assertEquals(sketch2.getMinValue(), 1); + assertEquals(sketch2.getMaxValue(), 1_000_000); + } + + @SuppressWarnings("unused") + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooSmall() { + final KllDirectDoublesSketch sketch1 = getDDSketch(KllSketch.DEFAULT_M - 1, 0); + } + + @SuppressWarnings("unused") + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooLarge() { + final KllDirectDoublesSketch sketch1 = getDDSketch(KllSketch.MAX_K + 1, 0); + } + + @Test + public void minK() { + final KllDirectDoublesSketch sketch = getDDSketch(KllSketch.DEFAULT_M, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); + } + + @Test + public void maxK() { + final KllDirectDoublesSketch sketch = getDDSketch(KllSketch.MAX_K, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.MAX_K); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); + } + + @Test + public void serializeDeserializeEmptyViaCompactHeapify() { + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final byte[] bytes = sketch1.toByteArray(); + final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertTrue(Double.isNaN(sketch2.getMinValue())); + assertTrue(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test + public void serializeDeserializeEmptyViaUpdatableWritableWrap() { + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final byte[] bytes = sketch1.toUpdatableByteArray(); + final KllDirectDoublesSketch sketch2 = + KllDirectDoublesSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertTrue(Double.isNaN(sketch2.getMinValue())); + assertTrue(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test + public void serializeDeserializeOneItemViaCompactHeapify() { + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + sketch1.update(1); + final byte[] bytes = sketch1.toByteArray(); + final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertFalse(Double.isNaN(sketch2.getMinValue())); + assertFalse(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Double.BYTES); + } + + @Test + public void serializeDeserializeOneItemViaUpdatableWritableWrap() { + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + sketch1.update(1); + final byte[] bytes = sketch1.toUpdatableByteArray(); + final KllDirectDoublesSketch sketch2 = + KllDirectDoublesSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertFalse(Double.isNaN(sketch2.getMinValue())); + assertFalse(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Double.BYTES); + } + + @Test + public void serializeDeserializeFullViaCompactHeapify() { + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final int n = 1000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + final byte[] bytes = sketch1.toByteArray(); + final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinValue(), sketch1.getMinValue()); + assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test + public void serializeDeserializeFullViaUpdatableWritableWrap() { + final KllDirectDoublesSketch sketch1 = getDDSketch(200, 0); + final int n = 1000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + final byte[] bytes = sketch1.toUpdatableByteArray(); + final KllDirectDoublesSketch sketch2 = + KllDirectDoublesSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinValue(), sketch1.getMinValue()); + assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void outOfOrderSplitPoints() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(0); + sketch.getCDF(new double[] {1, 0}); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void nanSplitPoint() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(0); + sketch.getCDF(new double[] {Double.NaN}); + } + + @Test + public void getQuantiles() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 0); + sketch.update(1); + sketch.update(2); + sketch.update(3); + final double[] quantiles1 = sketch.getQuantiles(new double[] {0, 0.5, 1}); + final double[] quantiles2 = sketch.getQuantiles(3); + assertEquals(quantiles1, quantiles2); + assertEquals(quantiles1[0], 1.0); + assertEquals(quantiles1[1], 2.0); + assertEquals(quantiles1[2], 3.0); + } + + @Test + public void checkSimpleMergeDirect() { //used for troubleshooting + int k = 20; + int n1 = 21; + int n2 = 43; + KllDoublesSketch sk1 = new KllDoublesSketch(k); + KllDoublesSketch sk2 = new KllDoublesSketch(k); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println("SK1:"); + println(sk1.toString(true, true)); + println("SK2:"); + println(sk2.toString(true, true)); + WritableMemory wmem1 = WritableMemory.writableWrap(sk1.toUpdatableByteArray()); + WritableMemory wmem2 = WritableMemory.writableWrap(sk2.toUpdatableByteArray()); + KllDirectDoublesSketch dsk1 = KllDirectDoublesSketch.writableWrap(wmem1, memReqSvr); + KllDirectDoublesSketch dsk2 = KllDirectDoublesSketch.writableWrap(wmem2, memReqSvr); + println("BEFORE MERGE"); + println(dsk1.toString(true, true)); + dsk1.merge(dsk2); + println("AFTER MERGE"); + println(dsk1.toString(true, true)); + } + + @Test + public void checkSketchInitializeDirectDoubleUpdatableMem() { + int k = 20; //don't change this + KllDirectDoublesSketch sk; + KllDoublesSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: DOUBLE FULL DIRECT FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkGetWritableMemory() { + final KllDirectDoublesSketch sketch = getDDSketch(200, 200); + assertEquals(sketch.getK(), 200); + assertEquals(sketch.getN(), 200); + assertFalse(sketch.isEmpty()); + assertTrue(sketch.isUpdatableMemory()); + assertFalse(sketch.isEstimationMode()); + assertTrue(sketch.isDoublesSketch()); + assertFalse(sketch.isLevelZeroSorted()); + assertFalse(sketch.isFloatsSketch()); + + final WritableMemory wmem = sketch.getWritableMemory(); + final KllDoublesSketch sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), 200); + assertEquals(sk.getN(), 200); + assertFalse(sk.isEmpty()); + assertFalse(sk.isUpdatableMemory()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isDoublesSketch()); + assertFalse(sk.isLevelZeroSorted()); + assertFalse(sk.isFloatsSketch()); + } + + @Test + public void checkReset() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + double min1 = sk.getMinValue(); + double max1 = sk.getMaxValue(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + double min2 = sk.getMinValue(); + double max2 = sk.getMaxValue(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } + + @Test + public void checkHeapify() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + KllDoublesSketch sk2 = KllDirectDoublesSketch.heapify(dstMem); + assertEquals(sk2.getMinValue(), 1.0); + assertEquals(sk2.getMaxValue(), 100.0); + } + + @Test + public void checkMergeKllDoublesSketch() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 21; i++) { sk.update(i); } + KllDoublesSketch sk2 = new KllDoublesSketch(20); + for (int i = 1; i <= 21; i++ ) { sk2.update(i + 100); } + sk.merge(sk2); + } + + @Test + public void checkReverseMergeKllDoubleSketch() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 21; i++) { sk.update(i); } + KllDoublesSketch sk2 = new KllDoublesSketch(20); + for (int i = 1; i <= 21; i++ ) { sk2.update(i + 100); } + sk2.merge(sk); + } + +// @Test +// public void checkWrapKllDoubleSketch() { +// KllDoublesSketch sk = new KllDoublesSketch(20); +// for (int i = 1; i <= 21; i++ ) { sk.update(i); } +// Memory srcMem = Memory.wrap(sk.toByteArray()); +// KllDirectDoublesSketch sk2 = KllDirectDoublesSketch.writableWrap(srcMem, memReqSvr); +// } + + private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { + KllDoublesSketch sk = new KllDoublesSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + KllDirectDoublesSketch ddsk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); + return ddsk; + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchIteratorTest.java new file mode 100644 index 000000000..9b54a7a2a --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchIteratorTest.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class KllDirectFloatsSketchIteratorTest { + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void emptySketch() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + KllFloatsSketchIterator it = sketch.iterator(); + Assert.assertFalse(it.next()); + } + + @Test + public void oneItemSketch() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(0); + KllFloatsSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getValue(), 0f); + Assert.assertEquals(it.getWeight(), 1); + Assert.assertFalse(it.next()); + } + + @Test + public void bigSketches() { + for (int n = 1000; n < 100000; n += 2000) { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + for (int i = 0; i < n; i++) { + sketch.update(i); + } + KllFloatsSketchIterator it = sketch.iterator(); + int count = 0; + int weight = 0; + while (it.next()) { + count++; + weight += it.getWeight(); + } + Assert.assertEquals(count, sketch.getNumRetained()); + Assert.assertEquals(weight, n); + } + } + + private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { + KllFloatsSketch sk = new KllFloatsSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + + KllDirectFloatsSketch dfsk = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); + return dfsk; + } + +} + diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java new file mode 100644 index 000000000..025004380 --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java @@ -0,0 +1,614 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +public class KllDirectFloatsSketchTest { + + private static final double PMF_EPS_FOR_K_8 = 0.35; // PMF rank error (epsilon) for k=8 + private static final double PMF_EPS_FOR_K_128 = 0.025; // PMF rank error (epsilon) for k=128 + private static final double PMF_EPS_FOR_K_256 = 0.013; // PMF rank error (epsilon) for k=256 + private static final double NUMERIC_NOISE_TOLERANCE = 1E-6; + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void empty() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(Float.NaN); // this must not change anything + assertTrue(sketch.isEmpty()); + assertEquals(sketch.getN(), 0); + assertEquals(sketch.getNumRetained(), 0); + assertTrue(Double.isNaN(sketch.getRank(0))); + assertTrue(Float.isNaN(sketch.getMinValue())); + assertTrue(Float.isNaN(sketch.getMaxValue())); + assertTrue(Float.isNaN(sketch.getQuantile(0.5))); + assertNull(sketch.getQuantiles(new double[] {0})); + assertNull(sketch.getPMF(new float[] {0})); + assertNotNull(sketch.toString(true, true)); + assertNotNull(sketch.toString()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantileInvalidArg() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(1); + sketch.getQuantile(-1.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantilesInvalidArg() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(1); + sketch.getQuantiles(new double[] {2.0}); + } + + @Test + public void oneItem() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(1); + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 1); + assertEquals(sketch.getNumRetained(), 1); + assertEquals(sketch.getRank(1), 0.0); + assertEquals(sketch.getRank(2), 1.0); + assertEquals(sketch.getMinValue(), 1f); + assertEquals(sketch.getMaxValue(), 1f); + assertEquals(sketch.getQuantile(0.5), 1f); + } + + @Test + public void manyItemsEstimationMode() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + final int n = 1_000_000; + + for (int i = 0; i < n; i++) { + sketch.update(i); + } + assertEquals(sketch.getN(), n); + + // test getRank + for (int i = 0; i < n; i++) { + final double trueRank = (double) i / n; + assertEquals(sketch.getRank(i), trueRank, PMF_EPS_FOR_K_256, "for value " + i); + } + + // test getPMF + final double[] pmf = sketch.getPMF(new float[] {n / 2}); // split at median + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); + assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); + + assertEquals(sketch.getMinValue(), 0f); // min value is exact + assertEquals(sketch.getQuantile(0), 0f); // min value is exact + assertEquals(sketch.getMaxValue(), n - 1f); // max value is exact + assertEquals(sketch.getQuantile(1), n - 1f); // max value is exact + + // check at every 0.1 percentage point + final double[] fractions = new double[1001]; + final double[] reverseFractions = new double[1001]; // check that ordering doesn't matter + for (int i = 0; i <= 1000; i++) { + fractions[i] = (double) i / 1000; + reverseFractions[1000 - i] = fractions[i]; + } + final float[] quantiles = sketch.getQuantiles(fractions); + final float[] reverseQuantiles = sketch.getQuantiles(reverseFractions); + double previousQuantile = 0; + for (int i = 0; i <= 1000; i++) { + final double quantile = sketch.getQuantile(fractions[i]); + assertEquals(quantile, quantiles[i]); + assertEquals(quantile, reverseQuantiles[1000 - i]); + assertTrue(previousQuantile <= quantile); + previousQuantile = quantile; + } + } + + @Test + public void getRankGetCdfGetPmfConsistency() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + final int n = 1000; + final float[] values = new float[n]; + for (int i = 0; i < n; i++) { + sketch.update(i); + values[i] = i; + } + final double[] ranks = sketch.getCDF(values); + final double[] pmf = sketch.getPMF(values); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i]), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + + @Test + public void merge() { + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + final KllDirectFloatsSketch sketch2 = getDFSketch(200, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i * 1.0F); + sketch2.update((2 * n - i - 1) * 1.0F); + } + + assertEquals(sketch1.getMinValue(), 0.0); + assertEquals(sketch1.getMaxValue(), (n - 1) * 1.0); + + assertEquals(sketch2.getMinValue(), n * 1.0); + assertEquals(sketch2.getMaxValue(), (2 * n - 1) * 1.0); + + sketch1.merge(sketch2); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2L * n); + assertEquals(sketch1.getMinValue(), 0.0); + assertEquals(sketch1.getMaxValue(), (2 * n - 1) * 1.0F); + assertEquals(sketch1.getQuantile(0.5), n * 1.0F, n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeLowerK() { + final KllDirectFloatsSketch sketch1 = getDFSketch(256, 0); + final KllDirectFloatsSketch sketch2 = getDFSketch(128, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinValue(), 0.0f); + assertEquals(sketch1.getMaxValue(), n - 1f); + + assertEquals(sketch2.getMinValue(), n); + assertEquals(sketch2.getMaxValue(), 2f * n - 1f); + + assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); + assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); + sketch1.merge(sketch2); + + // sketch1 must get "contaminated" by the lower K in sketch2 + assertEquals(sketch1.getNormalizedRankError(false), sketch2.getNormalizedRankError(false)); + assertEquals(sketch1.getNormalizedRankError(true), sketch2.getNormalizedRankError(true)); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2 * n); + assertEquals(sketch1.getMinValue(), 0f); + assertEquals(sketch1.getMaxValue(), 2f * n - 1f); + assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_128); + } + + @Test + public void mergeEmptyLowerK() { + final KllDirectFloatsSketch sketch1 = getDFSketch(256, 0); + final KllDirectFloatsSketch sketch2 = getDFSketch(128, 0); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + + // rank error should not be affected by a merge with an empty sketch with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinValue(), 0f); + assertEquals(sketch1.getMaxValue(), n - 1f); + assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256); + + //merge the other way + sketch2.merge(sketch1); + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinValue(), 0f); + assertEquals(sketch1.getMaxValue(), n - 1f); + assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeExactModeLowerK() { + final KllDirectFloatsSketch sketch1 = getDFSketch(256, 0); + final KllDirectFloatsSketch sketch2 = getDFSketch(128, 0); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + sketch2.update(1); + + // rank error should not be affected by a merge with a sketch in exact mode with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + } + + @Test + public void mergeMinMinValueFromOther() { + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + final KllDirectFloatsSketch sketch2 = getDFSketch(200, 0); + sketch1.update(1); + sketch2.update(2); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinValue(), 1.0F); + } + + @Test + public void mergeMinAndMaxFromOther() { + final KllDirectFloatsSketch sketch1 = getDFSketch(8, 0); //was 200 + final KllDirectFloatsSketch sketch2 = getDFSketch(8, 0); //was 200 + for (int i = 1; i <= 9; i++) { //was 1_000_000 + sketch1.update(i); + } + //System.out.println(sketch1.toString(true, true)); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinValue(), 1F); + assertEquals(sketch2.getMaxValue(), 9F); //was 1_000_000 + } + + @SuppressWarnings("unused") + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooSmall() { + final KllDirectFloatsSketch sketch1 = getDFSketch(KllSketch.DEFAULT_M - 1, 0); + } + + @SuppressWarnings("unused") + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooLarge() { + final KllDirectFloatsSketch sketch1 = getDFSketch(KllSketch.MAX_K + 1, 0); + } + + @Test + public void minK() { + final KllDirectFloatsSketch sketch = getDFSketch(KllSketch.DEFAULT_M, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); + } + + @Test + public void maxK() { + final KllDirectFloatsSketch sketch = getDFSketch(KllSketch.MAX_K, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.MAX_K); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); + } + + @Test + public void serializeDeserializeEmptyViaCompactHeapify() { + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + final byte[] bytes = sketch1.toByteArray(); + final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertTrue(Double.isNaN(sketch2.getMinValue())); + assertTrue(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test + public void serializeDeserializeEmptyViaUpdatableWritableWrap() { + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + final byte[] bytes = sketch1.toUpdatableByteArray(); + final KllDirectFloatsSketch sketch2 = + KllDirectFloatsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertTrue(Double.isNaN(sketch2.getMinValue())); + assertTrue(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test + public void serializeDeserializeOneItemViaCompactHeapify() { + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + sketch1.update(1); + final byte[] bytes = sketch1.toByteArray(); + final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertFalse(Double.isNaN(sketch2.getMinValue())); + assertFalse(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Float.BYTES); + } + + @Test + public void serializeDeserializeOneItemViaUpdatableWritableWrap() { + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + sketch1.update(1); + final byte[] bytes = sketch1.toUpdatableByteArray(); + final KllDirectFloatsSketch sketch2 = + KllDirectFloatsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertFalse(Double.isNaN(sketch2.getMinValue())); + assertFalse(Double.isNaN(sketch2.getMaxValue())); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Float.BYTES); + } + + @Test + public void serializeDeserializeFullViaCompactHeapify() { + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + final int n = 1000; + for (int i = 0; i < n; i++) { sketch1.update(i); } + final byte[] bytes = sketch1.toByteArray(); + final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinValue(), sketch1.getMinValue()); + assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test + public void serializeDeserializeFullViaUpdatableWritableWrap() { + final KllDirectFloatsSketch sketch1 = getDFSketch(200, 0); + final int n = 1000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + final byte[] bytes = sketch1.toUpdatableByteArray(); + final KllDirectFloatsSketch sketch2 = + KllDirectFloatsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.getCurrentUpdatableSerializedSizeBytes()); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinValue(), sketch1.getMinValue()); + assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void outOfOrderSplitPoints() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(0); + sketch.getCDF(new float[] {1, 0}); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void nanSplitPoint() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(0); + sketch.getCDF(new float[] {Float.NaN}); + } + + @Test + public void getQuantiles() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 0); + sketch.update(1); + sketch.update(2); + sketch.update(3); + final float[] quantiles1 = sketch.getQuantiles(new double[] {0, 0.5, 1}); + final float[] quantiles2 = sketch.getQuantiles(3); + assertEquals(quantiles1, quantiles2); + assertEquals(quantiles1[0], 1f); + assertEquals(quantiles1[1], 2f); + assertEquals(quantiles1[2], 3f); + } + + @Test + public void checkSimpleMergeDirect() { //used for troubleshooting + int k = 20; + int n1 = 21; + int n2 = 43; + KllFloatsSketch sk1 = new KllFloatsSketch(k); + KllFloatsSketch sk2 = new KllFloatsSketch(k); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println("SK1:"); + println(sk1.toString(true, true)); + println("SK2:"); + println(sk2.toString(true, true)); + WritableMemory wmem1 = WritableMemory.writableWrap(sk1.toUpdatableByteArray()); + WritableMemory wmem2 = WritableMemory.writableWrap(sk2.toUpdatableByteArray()); + KllDirectFloatsSketch dsk1 = KllDirectFloatsSketch.writableWrap(wmem1, memReqSvr); + KllDirectFloatsSketch dsk2 = KllDirectFloatsSketch.writableWrap(wmem2, memReqSvr); + println("BEFORE MERGE"); + println(dsk1.toString(true, true)); + dsk1.merge(dsk2); + println("AFTER MERGE"); + println(dsk1.toString(true, true)); + } + + @Test + public void checkSketchInitializeDirectDoubleUpdatableMem() { + int k = 20; //don't change this + KllDirectFloatsSketch sk; + KllFloatsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: DOUBLE FULL DIRECT FROM UPDATABLE"); + sk2 = new KllFloatsSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxFloatValue(), 21.0); + assertEquals(sk.getMinFloatValue(), 1.0); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = new KllFloatsSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), Double.NaN); + assertEquals(sk.getMinFloatValue(), Double.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = new KllFloatsSketch(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), 1.0); + assertEquals(sk.getMinFloatValue(), 1.0); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkGetWritableMemory() { + final KllDirectFloatsSketch sketch = getDFSketch(200, 200); + assertEquals(sketch.getK(), 200); + assertEquals(sketch.getN(), 200); + assertFalse(sketch.isEmpty()); + assertTrue(sketch.isUpdatableMemory()); + assertFalse(sketch.isEstimationMode()); + assertTrue(sketch.isFloatsSketch()); + assertFalse(sketch.isLevelZeroSorted()); + assertFalse(sketch.isDoublesSketch()); + + final WritableMemory wmem = sketch.getWritableMemory(); + final KllFloatsSketch sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), 200); + assertEquals(sk.getN(), 200); + assertFalse(sk.isEmpty()); + assertFalse(sk.isUpdatableMemory()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isFloatsSketch()); + assertFalse(sk.isLevelZeroSorted()); + assertFalse(sk.isDoublesSketch()); + } + + @Test + public void checkReset() { + WritableMemory dstMem = WritableMemory.allocate(3000); + KllDirectFloatsSketch sk = KllDirectFloatsSketch.newInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + float min1 = sk.getMinValue(); + float max1 = sk.getMaxValue(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + float min2 = sk.getMinValue(); + float max2 = sk.getMaxValue(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } + + @Test + public void checkHeapify() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllDirectFloatsSketch sk = KllDirectFloatsSketch.newInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + KllFloatsSketch sk2 = KllDirectFloatsSketch.heapify(dstMem); + assertEquals(sk2.getMinValue(), 1.0); + assertEquals(sk2.getMaxValue(), 100.0); + } + + private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { + KllFloatsSketch sk = new KllFloatsSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + + KllDirectFloatsSketch dfsk = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); + return dfsk; + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java index 391052a64..64a995038 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java @@ -61,6 +61,4 @@ public void bigSketches() { } } - } - diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index 7eeea733d..b149d3c1c 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -77,19 +77,20 @@ public void oneItem() { assertEquals(sketch.getNumRetained(), 1); assertEquals(sketch.getRank(1), 0.0); assertEquals(sketch.getRank(2), 1.0); - assertEquals(sketch.getMinValue(), 1f); - assertEquals(sketch.getMaxValue(), 1f); - assertEquals(sketch.getQuantile(0.5), 1f); + assertEquals(sketch.getMinValue(), 1.0); + assertEquals(sketch.getMaxValue(), 1.0); + assertEquals(sketch.getQuantile(0.5), 1.0); } @Test public void manyItemsEstimationMode() { final KllDoublesSketch sketch = new KllDoublesSketch(); - final int n = 1000000; + final int n = 1_000_000; + for (int i = 0; i < n; i++) { sketch.update(i); - assertEquals(sketch.getN(), i + 1); } + assertEquals(sketch.getN(), n); // test getRank for (int i = 0; i < n; i++) { @@ -125,7 +126,7 @@ public void manyItemsEstimationMode() { assertTrue(previousQuantile <= quantile); previousQuantile = quantile; } -} + } @Test public void getRankGetCdfGetPmfConsistency() { @@ -156,23 +157,23 @@ public void merge() { final KllDoublesSketch sketch2 = new KllDoublesSketch(); final int n = 10000; for (int i = 0; i < n; i++) { - sketch1.update(i); - sketch2.update(2 * n - i - 1); + sketch1.update(i * 1.0); + sketch2.update((2 * n - i - 1) * 1.0); } - assertEquals(sketch1.getMinValue(), 0.0f); - assertEquals(sketch1.getMaxValue(), n - 1f); + assertEquals(sketch1.getMinValue(), 0.0); + assertEquals(sketch1.getMaxValue(), (n - 1) * 1.0); - assertEquals(sketch2.getMinValue(), n); - assertEquals(sketch2.getMaxValue(), 2f * n - 1f); + assertEquals(sketch2.getMinValue(), n * 1.0); + assertEquals(sketch2.getMaxValue(), (2 * n - 1) * 1.0); sketch1.merge(sketch2); assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), 2L * n); - assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), 2f * n - 1); - assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_256); + assertEquals(sketch1.getMinValue(), 0.0); + assertEquals(sketch1.getMaxValue(), (2 * n - 1) * 1.0); + assertEquals(sketch1.getQuantile(0.5), n * 1.0, n * PMF_EPS_FOR_K_256); } @Test @@ -189,7 +190,7 @@ public void mergeLowerK() { assertEquals(sketch1.getMaxValue(), n - 1f); assertEquals(sketch2.getMinValue(), n); - assertEquals(sketch2.getMaxValue(), 2f * n - 1f); + assertEquals(sketch2.getMaxValue(), 2f * n - 1.0); assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); @@ -201,8 +202,8 @@ public void mergeLowerK() { assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), 2 * n); - assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), 2f * n - 1f); + assertEquals(sketch1.getMinValue(), 0); + assertEquals(sketch1.getMaxValue(), 2f * n - 1.0); assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_128); } @@ -222,17 +223,17 @@ public void mergeEmptyLowerK() { assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), n); - assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), n - 1f); - assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256); + assertEquals(sketch1.getMinValue(), 0); + assertEquals(sketch1.getMaxValue(), n - 1.0); + assertEquals(sketch1.getQuantile(0.5), n / 2.0, n / 2 * PMF_EPS_FOR_K_256); //merge the other way sketch2.merge(sketch1); assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), n); assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), n - 1f); - assertEquals(sketch1.getQuantile(0.5), n / 2f, n / 2 * PMF_EPS_FOR_K_256); + assertEquals(sketch1.getMaxValue(), n - 1.0); + assertEquals(sketch1.getQuantile(0.5), n / 2.0, n / 2 * PMF_EPS_FOR_K_256); } @Test @@ -258,50 +259,50 @@ public void mergeMinMinValueFromOther() { sketch1.update(1); sketch2.update(2); sketch2.merge(sketch1); - assertEquals(sketch2.getMinValue(), 1.0F); + assertEquals(sketch2.getMinValue(), 1.0); } @Test public void mergeMinAndMaxFromOther() { final KllDoublesSketch sketch1 = new KllDoublesSketch(); - for (int i = 0; i < 1000000; i++) { + for (int i = 1; i <= 1_000_000; i++) { sketch1.update(i); } final KllDoublesSketch sketch2 = new KllDoublesSketch(); sketch2.merge(sketch1); - assertEquals(sketch2.getMinValue(), 0F); - assertEquals(sketch2.getMaxValue(), 999999F); + assertEquals(sketch2.getMinValue(), 1); + assertEquals(sketch2.getMaxValue(), 1_000_000); } @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooSmall() { - new KllDoublesSketch(BaseKllSketch.MIN_K - 1); + new KllDoublesSketch(KllSketch.DEFAULT_M - 1); } @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooLarge() { - new KllDoublesSketch(BaseKllSketch.MAX_K + 1); + new KllDoublesSketch(KllSketch.MAX_K + 1); } @Test public void minK() { - final KllDoublesSketch sketch = new KllDoublesSketch(BaseKllSketch.MIN_K); + final KllDoublesSketch sketch = new KllDoublesSketch(KllSketch.DEFAULT_M); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), BaseKllSketch.MIN_K); + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); } @Test public void maxK() { - final KllDoublesSketch sketch = new KllDoublesSketch(BaseKllSketch.MAX_K); + final KllDoublesSketch sketch = new KllDoublesSketch(KllSketch.MAX_K); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), BaseKllSketch.MAX_K); + assertEquals(sketch.getK(), KllSketch.MAX_K); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); } @@ -310,14 +311,14 @@ public void serializeDeserializeEmpty() { final KllDoublesSketch sketch1 = new KllDoublesSketch(); final byte[] bytes = sketch1.toByteArray(); final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); - assertEquals(bytes.length, sketch1.getSerializedSizeBytes()); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); assertTrue(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); assertEquals(sketch2.getN(), sketch1.getN()); assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); assertTrue(Double.isNaN(sketch2.getMinValue())); assertTrue(Double.isNaN(sketch2.getMaxValue())); - assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); } @Test @@ -326,16 +327,26 @@ public void serializeDeserializeOneItem() { sketch1.update(1); final byte[] bytes = sketch1.toByteArray(); final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); - assertEquals(bytes.length, sketch1.getSerializedSizeBytes()); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); assertFalse(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), 1); assertEquals(sketch2.getN(), 1); assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); assertFalse(Double.isNaN(sketch2.getMinValue())); assertFalse(Double.isNaN(sketch2.getMaxValue())); - assertEquals(sketch2.getSerializedSizeBytes(), 8 + Double.BYTES); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Double.BYTES); } + //@Test //not implemented from C++ yet + //public void deserializeOneItemV1() throws Exception { + // final byte[] bytes = getResourceBytes("kll_sketch_float_one_item_v1.sk"); + // final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(bytes)); + // assertFalse(sketch.isEmpty()); + // assertFalse(sketch.isEstimationMode()); + // assertEquals(sketch.getN(), 1); + // assertEquals(sketch.getNumRetained(), 1); + //} + @Test public void serializeDeserialize() { final KllDoublesSketch sketch1 = new KllDoublesSketch(); @@ -345,14 +356,14 @@ public void serializeDeserialize() { } final byte[] bytes = sketch1.toByteArray(); final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes)); - assertEquals(bytes.length, sketch1.getSerializedSizeBytes()); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); assertFalse(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); assertEquals(sketch2.getN(), sketch1.getN()); assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); assertEquals(sketch2.getMinValue(), sketch1.getMinValue()); assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue()); - assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -369,33 +380,6 @@ public void nanSplitPoint() { sketch.getCDF(new double[] {Double.NaN}); } - @Test - public void getMaxSerializedSizeBytes() { - final int sizeBytes = - KllDoublesSketch.getMaxSerializedSizeBytes(BaseKllSketch.DEFAULT_K, 1_000_000_000); - assertEquals(sizeBytes, 6184); - } - - @Test - public void checkUbOnNumLevels() { - assertEquals(KllHelper.ubOnNumLevels(0), 1); - } - - @Test - public void checkIntCapAux() { - int lvlCap = KllHelper.levelCapacity(10, 61, 0, 8); - assertEquals(lvlCap, 8); - lvlCap = KllHelper.levelCapacity(10, 61, 60, 8); - assertEquals(lvlCap, 10); - } - - @Test - public void checkSuperLargeKandLevels() { - //This is beyond what the sketch can be configured for. - final int size = KllHelper.computeTotalItemCapacity(1 << 29, 8, 61); - assertEquals(size, 1_610_612_846); - } - @Test public void getQuantiles() { final KllDoublesSketch sketch = new KllDoublesSketch(); @@ -405,9 +389,26 @@ public void getQuantiles() { final double[] quantiles1 = sketch.getQuantiles(new double[] {0, 0.5, 1}); final double[] quantiles2 = sketch.getQuantiles(3); assertEquals(quantiles1, quantiles2); - assertEquals(quantiles1[0], 1f); - assertEquals(quantiles1[1], 2f); - assertEquals(quantiles1[2], 3f); + assertEquals(quantiles1[0], 1.0); + assertEquals(quantiles1[1], 2.0); + assertEquals(quantiles1[2], 3.0); + } + + @Test + public void checkReset() { + KllDoublesSketch sk = new KllDoublesSketch(20); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + double min1 = sk.getMinValue(); + double max1 = sk.getMaxValue(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + double min2 = sk.getMinValue(); + double max2 = sk.getMaxValue(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); } } diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java index ec1087d70..61d33f44b 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesValidationTest.java @@ -217,8 +217,8 @@ public void checkTestResults() { } int numLevels = sketch.getNumLevels(); int numSamples = sketch.getNumRetained(); - int[] levels = sketch.getLevels(); - long hashedSamples = simpleHashOfSubArray(sketch.getItems(), levels[0], numSamples); + int[] levels = sketch.getLevelsArray(); + long hashedSamples = simpleHashOfSubArray(sketch.getDoubleItemsArray(), levels[0], numSamples); System.out.print(testI); assert correctResultsWithReset[(7 * testI) + 4] == numLevels; assert correctResultsWithReset[(7 * testI) + 5] == numSamples; diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java index 7732efce8..33d829fcc 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java @@ -22,7 +22,6 @@ import org.testng.Assert; import org.testng.annotations.Test; -@SuppressWarnings("javadoc") public class KllFloatsSketchIteratorTest { @Test diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index fd2313b03..e1a35f584 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -86,7 +86,8 @@ public void oneItem() { @Test public void manyItemsEstimationMode() { final KllFloatsSketch sketch = new KllFloatsSketch(); - final int n = 1000000; + final int n = 1_000_000; + for (int i = 0; i < n; i++) { sketch.update(i); assertEquals(sketch.getN(), i + 1); @@ -99,7 +100,7 @@ public void manyItemsEstimationMode() { } // test getPMF - final double[] pmf = sketch.getPMF(new float[] {n / 2}); // split at median + final double[] pmf = sketch.getPMF(new float[] {n / 2.0F}); // split at median assertEquals(pmf.length, 2); assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); @@ -157,23 +158,23 @@ public void merge() { final KllFloatsSketch sketch2 = new KllFloatsSketch(); final int n = 10000; for (int i = 0; i < n; i++) { - sketch1.update(i); - sketch2.update(2 * n - i - 1); + sketch1.update(i * 1.0f); + sketch2.update((2 * n - i - 1) * 1.0f); } assertEquals(sketch1.getMinValue(), 0.0f); - assertEquals(sketch1.getMaxValue(), n - 1f); + assertEquals(sketch1.getMaxValue(), (n - 1) * 1.0f); - assertEquals(sketch2.getMinValue(), n); - assertEquals(sketch2.getMaxValue(), 2f * n - 1f); + assertEquals(sketch2.getMinValue(), n * 1.0f); + assertEquals(sketch2.getMaxValue(), (2 * n - 1) * 1.0f); sketch1.merge(sketch2); assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), 2L * n); - assertEquals(sketch1.getMinValue(), 0f); - assertEquals(sketch1.getMaxValue(), 2f * n - 1); - assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_256); + assertEquals(sketch1.getMinValue(), 0.0f); + assertEquals(sketch1.getMaxValue(), (2 * n - 1) * 1.0f); + assertEquals(sketch1.getQuantile(0.5), n * 1.0f, n * PMF_EPS_FOR_K_256); } @Test @@ -265,44 +266,44 @@ public void mergeMinMinValueFromOther() { @Test public void mergeMinAndMaxFromOther() { final KllFloatsSketch sketch1 = new KllFloatsSketch(); - for (int i = 0; i < 1000000; i++) { + for (int i = 1; i <= 1_000_000; i++) { sketch1.update(i); } - final KllFloatsSketch sketch2 = new KllFloatsSketch(); + final KllFloatsSketch sketch2 = new KllFloatsSketch(10); sketch2.merge(sketch1); - assertEquals(sketch2.getMinValue(), 0F); - assertEquals(sketch2.getMaxValue(), 999999F); + assertEquals(sketch2.getMinValue(), 1F); + assertEquals(sketch2.getMaxValue(), 1_000_000F); } @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooSmall() { - new KllFloatsSketch(BaseKllSketch.MIN_K - 1); + new KllFloatsSketch(KllSketch.DEFAULT_M - 1); } @SuppressWarnings("unused") @Test(expectedExceptions = SketchesArgumentException.class) public void kTooLarge() { - new KllFloatsSketch(BaseKllSketch.MAX_K + 1); + new KllFloatsSketch(KllSketch.MAX_K + 1); } @Test public void minK() { - final KllFloatsSketch sketch = new KllFloatsSketch(BaseKllSketch.MIN_K); + final KllFloatsSketch sketch = new KllFloatsSketch(KllSketch.DEFAULT_M); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), BaseKllSketch.MIN_K); + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); } @Test public void maxK() { - final KllFloatsSketch sketch = new KllFloatsSketch(BaseKllSketch.MAX_K); + final KllFloatsSketch sketch = new KllFloatsSketch(KllSketch.MAX_K); for (int i = 0; i < 1000; i++) { sketch.update(i); } - assertEquals(sketch.getK(), BaseKllSketch.MAX_K); + assertEquals(sketch.getK(), KllSketch.MAX_K); assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); } @@ -311,14 +312,14 @@ public void serializeDeserializeEmpty() { final KllFloatsSketch sketch1 = new KllFloatsSketch(); final byte[] bytes = sketch1.toByteArray(); final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); - assertEquals(bytes.length, sketch1.getSerializedSizeBytes()); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); assertTrue(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); assertEquals(sketch2.getN(), sketch1.getN()); assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); assertTrue(Float.isNaN(sketch2.getMinValue())); assertTrue(Float.isNaN(sketch2.getMaxValue())); - assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); } @Test @@ -327,14 +328,14 @@ public void serializeDeserializeOneItem() { sketch1.update(1); final byte[] bytes = sketch1.toByteArray(); final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); - assertEquals(bytes.length, sketch1.getSerializedSizeBytes()); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); assertFalse(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), 1); assertEquals(sketch2.getN(), 1); assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); assertFalse(Float.isNaN(sketch2.getMinValue())); assertFalse(Float.isNaN(sketch2.getMaxValue())); - assertEquals(sketch2.getSerializedSizeBytes(), 8 + Float.BYTES); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Float.BYTES); } @Test @@ -356,14 +357,14 @@ public void serializeDeserialize() { } final byte[] bytes = sketch1.toByteArray(); final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); - assertEquals(bytes.length, sketch1.getSerializedSizeBytes()); + assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes()); assertFalse(sketch2.isEmpty()); assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); assertEquals(sketch2.getN(), sketch1.getN()); assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); assertEquals(sketch2.getMinValue(), sketch1.getMinValue()); assertEquals(sketch2.getMaxValue(), sketch1.getMaxValue()); - assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes()); + assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes()); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -380,33 +381,6 @@ public void nanSplitPoint() { sketch.getCDF(new float[] {Float.NaN}); } - @Test - public void getMaxSerializedSizeBytes() { - final int sizeBytes = - KllFloatsSketch.getMaxSerializedSizeBytes(BaseKllSketch.DEFAULT_K, 1_000_000_000); - assertEquals(sizeBytes, 3160); - } - - @Test - public void checkUbOnNumLevels() { - assertEquals(KllHelper.ubOnNumLevels(0), 1); - } - - @Test - public void checkIntCapAux() { - int lvlCap = KllHelper.levelCapacity(10, 61, 0, 8); - assertEquals(lvlCap, 8); - lvlCap = KllHelper.levelCapacity(10, 61, 60, 8); - assertEquals(lvlCap, 10); - } - - @Test - public void checkSuperLargeKandLevels() { - //This is beyond what the sketch can be configured for. - final int size = KllHelper.computeTotalItemCapacity(1 << 29, 8, 61); - assertEquals(size, 1_610_612_846); - } - @Test public void getQuantiles() { final KllFloatsSketch sketch = new KllFloatsSketch(); @@ -421,4 +395,36 @@ public void getQuantiles() { assertEquals(quantiles1[2], 3f); } + @SuppressWarnings("deprecation") + @Test + public void checkDeprecatedMethods() { + final int k = 200; + final int n = 200; + int bytes = KllSketch.getMaxSerializedSizeBytes(k, n); //assumed float before + assertEquals(bytes, 832); + KllFloatsSketch sk = new KllFloatsSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + final byte[] byteArr = sk.toByteArray(); + assertEquals(byteArr.length, 832); + bytes = sk.getSerializedSizeBytes(); + assertEquals(bytes, 832); + } + + @Test + public void checkReset() { + KllFloatsSketch sk = new KllFloatsSketch(20); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + float min1 = sk.getMinValue(); + float max1 = sk.getMaxValue(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + float min2 = sk.getMinValue(); + float max2 = sk.getMaxValue(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } + } diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java index 71de641ed..9d3227c5b 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsValidationTest.java @@ -218,8 +218,8 @@ public void checkTestResults() { } int numLevels = sketch.getNumLevels(); int numSamples = sketch.getNumRetained(); - int[] levels = sketch.getLevels(); - long hashedSamples = simpleHashOfSubArray(sketch.getItems(), levels[0], numSamples); + int[] levels = sketch.getLevelsArray(); + long hashedSamples = simpleHashOfSubArray(sketch.getFloatItemsArray(), levels[0], numSamples); System.out.print(testI); assert correctResultsWithReset[(7 * testI) + 4] == numLevels; assert correctResultsWithReset[(7 * testI) + 5] == numSamples; diff --git a/src/test/java/org/apache/datasketches/kll/KllHelperTest.java b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java new file mode 100644 index 000000000..791bdd5c5 --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllHelperTest.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.kll.KllHelper.checkM; +import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; +import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; +import static org.testng.Assert.assertEquals; + +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.kll.KllSketch.SketchType; +import org.apache.datasketches.memory.Memory; +import org.testng.annotations.Test; + +@SuppressWarnings("unused") +public class KllHelperTest { + + /** + * Println Object o + * @param o object to print + */ + static void println(Object o) { + //System.out.println(o.toString()); + } + + @Test + public void checkCheckM() { + try { + checkM(0); + } catch (SketchesArgumentException e) {} + try { + checkM(3); + } catch (SketchesArgumentException e) {} + try { + checkM(10); + } catch (SketchesArgumentException e) {} + } + + @Test + public void checkGetKFromEps() { + final int k = KllSketch.DEFAULT_K; + final double eps = KllHelper.getNormalizedRankError(k, false); + final double epsPmf = KllHelper.getNormalizedRankError(k, true); + final int kEps = KllSketch.getKFromEpsilon(eps, false); + final int kEpsPmf = KllSketch.getKFromEpsilon(epsPmf, true); + assertEquals(kEps, k); + assertEquals(kEpsPmf, k); + } + + @Test + public void checkIntCapAux() { + int lvlCap = KllHelper.levelCapacity(10, 61, 0, 8); + assertEquals(lvlCap, 8); + lvlCap = KllHelper.levelCapacity(10, 61, 60, 8); + assertEquals(lvlCap, 10); + } + + @Test + public void checkSuperLargeKandLevels() { + //This is beyond what the sketch can be configured for. + final int size = KllHelper.computeTotalItemCapacity(1 << 29, 8, 61); + assertEquals(size, 1_610_612_846); + } + + @Test + public void checkUbOnNumLevels() { + assertEquals(KllHelper.ubOnNumLevels(0), 1); + } + + @Test + public void checkUpdatableSerDe() { + KllDoublesSketch sk = new KllDoublesSketch(200); + for (int i = 1; i <= 533; i++) { sk.update(i); } + int retained = sk.getNumRetained(); + int numLevels = ((KllHeapSketch)sk).getNumLevels(); + println("NumLevels: " + numLevels); + println("NumRetained: " + retained); + + byte[] compByteArr1 = sk.toByteArray(); + int compBytes1 = compByteArr1.length; + println("compBytes1: " + compBytes1); + + byte[] upByteArr1 = sk.toUpdatableByteArray(); + int upBytes1 = upByteArr1.length; + println("upBytes1: " + upBytes1); + + Memory mem; + KllDoublesSketch sk2; + + mem = Memory.wrap(compByteArr1); + sk2 = KllDoublesSketch.heapify(mem); + byte[] compByteArr2 = sk2.toByteArray(); + int compBytes2 = compByteArr2.length; + println("compBytes2: " + compBytes2); + assertEquals(compBytes1, compBytes2); + assertEquals(sk2.getNumRetained(), retained); + + mem = Memory.wrap(compByteArr2); + sk2 = KllDoublesSketch.heapify(mem); + byte[] upByteArr2 = sk2.toUpdatableByteArray(); + int upBytes2 = upByteArr2.length; + println("upBytes2: " + upBytes2); + assertEquals(upBytes1, upBytes2); + assertEquals(sk2.getNumRetained(), retained); + } + + + @Test + public void getMaxCompactDoublesSerializedSizeBytes() { + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(KllSketch.DEFAULT_K, 1L << 30, DOUBLES_SKETCH, false); + assertEquals(sizeBytes, 5704); + } + + @Test + public void getMaxCompactFloatsSerializedSizeBytes() { + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(KllSketch.DEFAULT_K, 1L << 30, FLOATS_SKETCH, false); + assertEquals(sizeBytes, 2908); + } + + @Test + public void getMaxUpdatableDoubleSerializedSizeBytes() { + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(KllSketch.DEFAULT_K, 1L << 30, DOUBLES_SKETCH, true); + assertEquals(sizeBytes, 5708); + } + + @Test + public void getMaxUpdatableFloatsSerializedSizeBytes() { + final int sizeBytes = KllSketch.getMaxSerializedSizeBytes(KllSketch.DEFAULT_K, 1L << 30, FLOATS_SKETCH, true); + assertEquals(sizeBytes, 2912); + } + + @Test + public void getStatsAtNumLevels() { + int k = 200; + int m = 8; + int numLevels = 23; + KllHelper.LevelStats lvlStats = KllHelper.getFinalSketchStatsAtNumLevels(k, m, numLevels, false); + assertEquals(lvlStats.items, 697); + assertEquals(lvlStats.n, 1257766904); + } + + @Test + public void getStatsAtNumLevels2() { + int k = 20; + int m = 8; + int numLevels = 2; + KllHelper.LevelStats lvlStats = KllHelper.getFinalSketchStatsAtNumLevels(k, KllSketch.DEFAULT_M, numLevels, false); + assertEquals(lvlStats.numLevels, 2); + assertEquals(lvlStats.items, 33); + } + + @Test + public void testGetAllLevelStats() { + long n = 1L << 30; + int k = 200; + int m = 8; + KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, m, n, DOUBLES_SKETCH, false); + assertEquals(gStats.compactBytes, 5704); + } + + @Test + public void testGetAllLevelStats2() { + long n = 533; + int k = 200; + int m = 8; + KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, KllSketch.DEFAULT_M, n, DOUBLES_SKETCH, false); + assertEquals(gStats.numLevels, 2); + assertEquals(gStats.maxItems, 333); + + } +} diff --git a/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java b/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java new file mode 100644 index 000000000..324954156 --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.kll.KllPreambleUtil.*; + +import org.apache.datasketches.Family; +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +@SuppressWarnings("unused") +public class KllMemoryValidateTest { + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidFamily() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + setMemoryFamilyID(wmem, Family.KLL.getID() - 1); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidSerVer() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + setMemorySerVer(wmem, SERIAL_VERSION_EMPTY_FULL - 1); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidEmptyAndSingle() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + setMemoryFlags(wmem, EMPTY_BIT_MASK | SINGLE_ITEM_BIT_MASK); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidUpdatableAndSerVer() { + KllFloatsSketch sk = new KllFloatsSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + setMemoryFlags(wmem, UPDATABLE_BIT_MASK); + setMemorySerVer(wmem, SERIAL_VERSION_EMPTY_FULL); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidSingleAndPreInts() { + KllFloatsSketch sk = new KllFloatsSketch(); + sk.update(1); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + setMemoryPreInts(wmem, PREAMBLE_INTS_FULL); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidSingleAndSerVer() { + KllFloatsSketch sk = new KllFloatsSketch(); + sk.update(1); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + setMemorySerVer(wmem, SERIAL_VERSION_EMPTY_FULL); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidEmptyDoublesAndPreIntsFull() { + KllDoublesSketch sk = new KllDoublesSketch(); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + setMemoryPreInts(wmem, PREAMBLE_INTS_FULL); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidSingleDoubleCompactAndSerVer() { + KllDoublesSketch sk = new KllDoublesSketch(); + sk.update(1); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + setMemorySerVer(wmem, SERIAL_VERSION_EMPTY_FULL); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidDoubleUpdatableAndPreInts() { + KllDoublesSketch sk = new KllDoublesSketch(); + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + setMemoryPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidFloatFullAndPreInts() { + KllFloatsSketch sk = new KllFloatsSketch(); + sk.update(1); sk.update(2); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + setMemoryPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidFloatUpdatableFullAndPreInts() { + KllFloatsSketch sk = new KllFloatsSketch(); + sk.update(1); sk.update(2); + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + setMemoryPreInts(wmem, PREAMBLE_INTS_EMPTY_SINGLE); + KllMemoryValidate memVal = new KllMemoryValidate(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInvalidDoubleCompactSingleAndPreInts() { + KllDoublesSketch sk = new KllDoublesSketch(); + sk.update(1); + byte[] byteArr = sk.toByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + setMemoryPreInts(wmem, PREAMBLE_INTS_FULL);//should be 2, single + KllMemoryValidate memVal = new KllMemoryValidate(wmem); + } + +} + diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java new file mode 100644 index 000000000..eedf5e44e --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectDoublesTest.java @@ -0,0 +1,440 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import java.util.Objects; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +public class MiscDirectDoublesTest { + static final String LS = System.getProperty("line.separator"); + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void checkBounds() { + final KllDirectDoublesSketch sk = getDDSketch(200, 0); + for (int i = 0; i < 1000; i++) { + sk.update(i); + } + final double eps = sk.getNormalizedRankError(false); + final double est = sk.getQuantile(0.5); + final double ub = sk.getQuantileUpperBound(0.5); + final double lb = sk.getQuantileLowerBound(0.5); + assertEquals(ub, sk.getQuantile(.5 + eps)); + assertEquals(lb, sk.getQuantile(0.5 - eps)); + println("Ext : " + est); + println("UB : " + ub); + println("LB : " + lb); + } + + @Test + public void checkMisc() { + final KllDirectDoublesSketch sk = getDDSketch(8, 0); + assertTrue(Objects.isNull(sk.getQuantiles(10))); + //sk.toString(true, true); + for (int i = 0; i < 20; i++) { sk.update(i); } + //sk.toString(true, true); + //sk.toByteArray(); + final double[] items = sk.getDoubleItemsArray(); + assertEquals(items.length, 16); + final int[] levels = sk.getLevelsArray(); + assertEquals(levels.length, 3); + assertEquals(sk.getNumLevels(), 2); + } + + //@Test //enable static println(..) for visual checking + public void visualCheckToString() { + final KllDirectDoublesSketch sk = getDDSketch(20, 0); + for (int i = 0; i < 10; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + + final KllDirectDoublesSketch sk2 = getDDSketch(20, 0); + for (int i = 0; i < 400; i++) { sk2.update(i + 1); } + println("\n" + sk2.toString(true, true)); + + sk2.merge(sk); + final String s2 = sk2.toString(true, true); + println(LS + s2); + } + + //@Test + public void viewCompactions() { + final KllDirectDoublesSketch sk = getDDSketch(20, 0); + show(sk, 20); + show(sk, 21); //compaction 1 + show(sk, 43); + show(sk, 44); //compaction 2 + show(sk, 54); + show(sk, 55); //compaction 3 + show(sk, 73); + show(sk, 74); //compaction 4 + show(sk, 88); + show(sk, 89); //compaction 5 + show(sk, 96); + show(sk, 97); //compaction 6 + show(sk, 108); + } + + private static void show(final KllDirectDoublesSketch sk, int limit) { + int i = (int) sk.getN(); + for ( ; i < limit; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + } + + @Test + public void checkSketchInitializeDoubleHeap() { + int k = 20; //don't change this + KllDirectDoublesSketch sk; + + //println("#### CASE: DOUBLE FULL HEAP"); + sk = getDDSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE HEAP EMPTY"); + sk = getDDSketch(k, 0); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE HEAP SINGLE"); + sk = getDDSketch(k, 0); + sk.update(1); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeDoubleHeapifyCompactMem() { + int k = 20; //don't change this + KllDoublesSketch sk; + KllDirectDoublesSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: DOUBLE FULL HEAPIFIED FROM COMPACT"); + sk2 = getDDSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM COMPACT"); + sk2 = getDDSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM COMPACT"); + sk2 = getDDSketch(k, 0); + sk2.update(1); + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeDoubleHeapifyUpdatableMem() { + int k = 20; //don't change this + KllDoublesSketch sk; + KllDirectDoublesSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: DOUBLE FULL HEAPIFIED FROM UPDATABLE"); + sk2 = getDDSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + // println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = getDDSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = getDDSketch(k, 0); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkMemoryToStringDoubleUpdatable() { + int k = 20; //don't change this + KllDirectDoublesSketch sk; + KllDirectDoublesSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: DOUBLE FULL UPDATABLE"); + sk = getDDSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: DOUBLE EMPTY UPDATABLE"); + sk = getDDSketch(k, 0); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: DOUBLE SINGLE UPDATABL"); + sk = getDDSketch(k, 0); + sk.update(1); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + } + + @Test + public void checkSimpleMerge() { + int k = 20; + int n1 = 21; + int n2 = 21; + KllDirectDoublesSketch sk1 = getDDSketch(k, 0); + KllDirectDoublesSketch sk2 = getDDSketch(k, 0); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println(sk1.toString(true, true)); + println(sk2.toString(true, true)); + sk1.merge(sk2); + println(sk1.toString(true, true)); + assertEquals(sk1.getMaxValue(), 121.0); + assertEquals(sk1.getMinValue(), 1.0); + } + + @Test + public void checkSizes() { + KllDirectDoublesSketch sk = getDDSketch(20, 0); + for (int i = 1; i <= 21; i++) { sk.update(i); } + //println(sk.toString(true, true)); + byte[] byteArr1 = sk.toUpdatableByteArray(); + int size1 = sk.getCurrentUpdatableSerializedSizeBytes(); + assertEquals(size1, byteArr1.length); + byte[] byteArr2 = sk.toByteArray(); + int size2 = sk.getCurrentCompactSerializedSizeBytes(); + assertEquals(size2, byteArr2.length); + } + + @Test + public void checkNewInstance() { + int k = 200; + WritableMemory dstMem = WritableMemory.allocate(6000); + KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(k, dstMem, memReqSvr); + for (int i = 1; i <= 10_000; i++) {sk.update(i); } + assertEquals(sk.getMinValue(), 1.0); + assertEquals(sk.getMaxValue(), 10000.0); + //println(sk.toString(true, true)); + } + + @Test + public void checkDifferentM() { + int k = 20; + int m = 4; + WritableMemory dstMem = WritableMemory.allocate(1000); + KllDirectDoublesSketch sk = KllDirectDoublesSketch.newInstance(k, m, dstMem, memReqSvr); + for (int i = 1; i <= 200; i++) {sk.update(i); } + assertEquals(sk.getMinValue(), 1.0); + assertEquals(sk.getMaxValue(), 200.0); + } + + private static KllDirectDoublesSketch getDDSketch(final int k, final int n) { + KllDoublesSketch sk = new KllDoublesSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + KllDirectDoublesSketch ddsk = KllDirectDoublesSketch.writableWrap(wmem, memReqSvr); + return ddsk; + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java new file mode 100644 index 000000000..597ebe5dc --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/MiscDirectFloatsTest.java @@ -0,0 +1,440 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import java.util.Objects; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +public class MiscDirectFloatsTest { + static final String LS = System.getProperty("line.separator"); + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void checkBounds() { + final KllDirectFloatsSketch sk = getDFSketch(200, 0); + for (int i = 0; i < 1000; i++) { + sk.update(i); + } + final double eps = sk.getNormalizedRankError(false); + final float est = sk.getQuantile(0.5); + final float ub = sk.getQuantileUpperBound(0.5); + final float lb = sk.getQuantileLowerBound(0.5); + assertEquals(ub, sk.getQuantile(.5 + eps)); + assertEquals(lb, sk.getQuantile(0.5 - eps)); + println("Ext : " + est); + println("UB : " + ub); + println("LB : " + lb); + } + + @Test + public void checkMisc() { + final KllDirectFloatsSketch sk = getDFSketch(8, 0); + assertTrue(Objects.isNull(sk.getQuantiles(10))); + //sk.toString(true, true); + for (int i = 0; i < 20; i++) { sk.update(i); } + //sk.toString(true, true); + //sk.toByteArray(); + final float[] items = sk.getFloatItemsArray(); + assertEquals(items.length, 16); + final int[] levels = sk.getLevelsArray(); + assertEquals(levels.length, 3); + assertEquals(sk.getNumLevels(), 2); + } + + //@Test //enable static println(..) for visual checking + public void visualCheckToString() { + final KllDirectFloatsSketch sk = getDFSketch(20, 0); + for (int i = 0; i < 10; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + + final KllDirectFloatsSketch sk2 = getDFSketch(20, 0); + for (int i = 0; i < 400; i++) { sk2.update(i + 1); } + println("\n" + sk2.toString(true, true)); + + sk2.merge(sk); + final String s2 = sk2.toString(true, true); + println(LS + s2); + } + + //@Test + public void viewCompactions() { + final KllDirectFloatsSketch sk = getDFSketch(20, 0); + show(sk, 20); + show(sk, 21); //compaction 1 + show(sk, 43); + show(sk, 44); //compaction 2 + show(sk, 54); + show(sk, 55); //compaction 3 + show(sk, 73); + show(sk, 74); //compaction 4 + show(sk, 88); + show(sk, 89); //compaction 5 + show(sk, 96); + show(sk, 97); //compaction 6 + show(sk, 108); + } + + private static void show(final KllDirectFloatsSketch sk, int limit) { + int i = (int) sk.getN(); + for ( ; i < limit; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + } + + @Test + public void checkSketchInitializeFloatHeap() { + int k = 20; //don't change this + KllDirectFloatsSketch sk; + + //println("#### CASE: FLOAT FULL HEAP"); + sk = getDFSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: FLOAT HEAP EMPTY"); + sk = getDFSketch(k, 0); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: FLOAT HEAP SINGLE"); + sk = getDFSketch(k, 0); + sk.update(1); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeFloatHeapifyCompactMem() { + int k = 20; //don't change this + KllFloatsSketch sk; + KllDirectFloatsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: FLOAT FULL HEAPIFIED FROM COMPACT"); + sk2 = getDFSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinFloatValue(), 1.0f); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: FLOAT EMPTY HEAPIFIED FROM COMPACT"); + sk2 = getDFSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: FLOAT SINGLE HEAPIFIED FROM COMPACT"); + sk2 = getDFSketch(k, 0); + sk2.update(1); + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeFloatHeapifyUpdatableMem() { + int k = 20; //don't change this + KllFloatsSketch sk; + KllDirectFloatsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: FLOAT FULL HEAPIFIED FROM UPDATABLE"); + sk2 = getDFSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + // println("#### CASE: FLOAT EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = getDFSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: FLOAT SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = getDFSketch(k, 0); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkMemoryToStringFloatUpdatable() { + int k = 20; //don't change this + KllDirectFloatsSketch sk; + KllDirectFloatsSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: FLOAT FULL UPDATABLE"); + sk = getDFSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: FLOAT EMPTY UPDATABLE"); + sk = getDFSketch(k, 0); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: FLOAT SINGLE UPDATABL"); + sk = getDFSketch(k, 0); + sk.update(1); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + } + + @Test + public void checkSimpleMerge() { + int k = 20; + int n1 = 21; + int n2 = 21; + KllDirectFloatsSketch sk1 = getDFSketch(k, 0); + KllDirectFloatsSketch sk2 = getDFSketch(k, 0); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println(sk1.toString(true, true)); + println(sk2.toString(true, true)); + sk1.merge(sk2); + println(sk1.toString(true, true)); + assertEquals(sk1.getMaxValue(), 121.0F); + assertEquals(sk1.getMinValue(), 1.0F); + } + + @Test + public void checkSizes() { + KllDirectFloatsSketch sk = getDFSketch(20, 0); + for (int i = 1; i <= 21; i++) { sk.update(i); } + //println(sk.toString(true, true)); + byte[] byteArr1 = sk.toUpdatableByteArray(); + int size1 = sk.getCurrentUpdatableSerializedSizeBytes(); + assertEquals(size1, byteArr1.length); + byte[] byteArr2 = sk.toByteArray(); + int size2 = sk.getCurrentCompactSerializedSizeBytes(); + assertEquals(size2, byteArr2.length); + } + + @Test + public void checkNewInstance() { + int k = 200; + WritableMemory dstMem = WritableMemory.allocate(3000); + KllDirectFloatsSketch sk = KllDirectFloatsSketch.newInstance(k, dstMem, memReqSvr); + for (int i = 1; i <= 10_000; i++) {sk.update(i); } + assertEquals(sk.getMinValue(), 1.0F); + assertEquals(sk.getMaxValue(), 10000.0F); + //println(sk.toString(true, true)); + } + + @Test + public void checkDifferentM() { + int k = 20; + int m = 4; + WritableMemory dstMem = WritableMemory.allocate(1000); + KllDirectFloatsSketch sk = KllDirectFloatsSketch.newInstance(k, m, dstMem, memReqSvr); + for (int i = 1; i <= 200; i++) {sk.update(i); } + assertEquals(sk.getMinValue(), 1.0); + assertEquals(sk.getMaxValue(), 200.0); + } + + private static KllDirectFloatsSketch getDFSketch(final int k, final int n) { + KllFloatsSketch sk = new KllFloatsSketch(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toUpdatableByteArray(); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + KllDirectFloatsSketch dfsk = KllDirectFloatsSketch.writableWrap(wmem, memReqSvr); + return dfsk; + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java index 791684eb9..276f52776 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscDoublesTest.java @@ -20,12 +20,13 @@ package org.apache.datasketches.kll; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; import java.util.Objects; -import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.SketchesArgumentException; import org.testng.annotations.Test; /** @@ -33,17 +34,7 @@ */ @SuppressWarnings("javadoc") public class MiscDoublesTest { - - @Test - public void checkGetKFromEps() { - final int k = BaseKllSketch.DEFAULT_K; - final double eps = BaseKllSketch.getNormalizedRankError(k, false); - final double epsPmf = BaseKllSketch.getNormalizedRankError(k, true); - final int kEps = BaseKllSketch.getKFromEpsilon(eps, false); - final int kEpsPmf = BaseKllSketch.getKFromEpsilon(epsPmf, true); - assertEquals(kEps, k); - assertEquals(kEpsPmf, k); - } + static final String LS = System.getProperty("line.separator"); @Test public void checkBounds() { @@ -66,7 +57,7 @@ public void checkBounds() { public void checkHeapifyExceptions1() { KllDoublesSketch sk = new KllDoublesSketch(); WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); - wmem.putByte(6, (byte)4); //corrupt M + wmem.putByte(6, (byte)3); //corrupt with odd M KllDoublesSketch.heapify(wmem); } @@ -106,31 +97,399 @@ public void checkHeapifyExceptions5() { @Test public void checkMisc() { - KllDoublesSketch sk = new KllDoublesSketch(8, true); + KllDoublesSketch sk = new KllDoublesSketch(8); assertTrue(Objects.isNull(sk.getQuantiles(10))); sk.toString(true, true); for (int i = 0; i < 20; i++) { sk.update(i); } sk.toString(true, true); sk.toByteArray(); - final double[] items = sk.getItems(); + final double[] items = sk.getDoubleItemsArray(); assertEquals(items.length, 16); - final int[] levels = sk.getLevels(); + final int[] levels = sk.getLevelsArray(); assertEquals(levels.length, 3); assertEquals(sk.getNumLevels(), 2); } - //@Test //requires visual check - public void visualCheck() { + //@Test //enable static println(..) for visual checking + public void visualCheckToString() { final KllDoublesSketch sketch = new KllDoublesSketch(20); for (int i = 0; i < 10; i++) { sketch.update(i + 1); } - println(sketch.toString(true, true)); + final String s1 = sketch.toString(true, true); + println(s1); final KllDoublesSketch sketch2 = new KllDoublesSketch(20); for (int i = 0; i < 400; i++) { sketch2.update(i + 1); } println("\n" + sketch2.toString(true, true)); sketch2.merge(sketch); - println("\n" + sketch2.toString(true, true)); + final String s2 = sketch2.toString(true, true); + println(LS + s2); + } + + @Test + public void viewCompactions() { + KllDoublesSketch sk = new KllDoublesSketch(20); + show(sk, 20); + show(sk, 21); //compaction 1 + show(sk, 43); + show(sk, 44); //compaction 2 + show(sk, 54); + show(sk, 55); //compaction 3 + show(sk, 73); + show(sk, 74); //compaction 4 + show(sk, 88); + show(sk, 89); //compaction 5 + show(sk, 96); + show(sk, 97); //compaction 6 + show(sk, 108); + } + + private static void show(final KllDoublesSketch sk, int limit) { + int i = (int) sk.getN(); + for ( ; i < limit; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + } + + @Test + public void checkGrowLevels() { + KllDoublesSketch sk = new KllDoublesSketch(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + assertEquals(sk.getNumLevels(), 2); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLevelsArray()[2], 33); + } + + @Test + public void checkSketchInitializeDoubleHeap() { + int k = 20; //don't change this + KllDoublesSketch sk; + + println("#### CASE: DOUBLE FULL HEAP"); + sk = new KllDoublesSketch(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE HEAP EMPTY"); + sk = new KllDoublesSketch(k); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE HEAP SINGLE"); + sk = new KllDoublesSketch(k); + sk.update(1); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeDoubleHeapifyCompactMem() { + int k = 20; //don't change this + KllDoublesSketch sk; + KllDoublesSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: DOUBLE FULL HEAPIFIED FROM COMPACT"); + sk2 = new KllDoublesSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM COMPACT"); + sk2 = new KllDoublesSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM COMPACT"); + sk2 = new KllDoublesSketch(k); + sk2.update(1); + println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeDoubleHeapifyUpdatableMem() { + int k = 20; //don't change this + KllDoublesSketch sk; + KllDoublesSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: DOUBLE FULL HEAPIFIED FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxDoubleValue(), 21.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), Double.NaN); + assertEquals(sk.getMinDoubleValue(), Double.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: DOUBLE SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = new KllDoublesSketch(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllDoublesSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getDoubleItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxDoubleValue(), 1.0); + assertEquals(sk.getMinDoubleValue(), 1.0); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkMemoryToStringDoubleCompact() { + int k = 20; // don't change this + KllDoublesSketch sk; + KllDoublesSketch sk2; + byte[] compBytes; + byte[] compBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: DOUBLE FULL COMPACT"); + sk = new KllDoublesSketch(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllDoublesSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + + println("#### CASE: DOUBLE EMPTY COMPACT"); + sk = new KllDoublesSketch(20); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllDoublesSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + + println("#### CASE: DOUBLE SINGLE COMPACT"); + sk = new KllDoublesSketch(20); + sk.update(1); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllDoublesSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + } + + @Test + public void checkMemoryToStringDoubleUpdatable() { + int k = 20; //don't change this + KllDoublesSketch sk; + KllDoublesSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: DOUBLE FULL UPDATABLE"); + sk = new KllDoublesSketch(20); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllDoublesSketch.heapify(wmem); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: DOUBLE EMPTY UPDATABLE"); + sk = new KllDoublesSketch(k); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllDoublesSketch.heapify(wmem); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: DOUBLE SINGLE UPDATABL"); + sk = new KllDoublesSketch(k); + sk.update(1); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllDoublesSketch.heapify(wmem); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + } + + @Test + public void checkSimpleMerge() { + int k = 20; + int n1 = 21; + int n2 = 21; + KllDoublesSketch sk1 = new KllDoublesSketch(k); + KllDoublesSketch sk2 = new KllDoublesSketch(k); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println(sk1.toString(true, true)); + println(sk2.toString(true, true)); + sk1.merge(sk2); + println(sk1.toString(true, true)); } @Test diff --git a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java index f7a0aeebc..a53199a20 100644 --- a/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/MiscFloatsTest.java @@ -20,13 +20,13 @@ package org.apache.datasketches.kll; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; import java.util.Objects; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.WritableMemory; - import org.testng.annotations.Test; /** @@ -34,17 +34,7 @@ */ @SuppressWarnings("javadoc") public class MiscFloatsTest { - - @Test - public void checkGetKFromEps() { - final int k = BaseKllSketch.DEFAULT_K; - final double eps = BaseKllSketch.getNormalizedRankError(k, false); - final double epsPmf = BaseKllSketch.getNormalizedRankError(k, true); - final int kEps = BaseKllSketch.getKFromEpsilon(eps, false); - final int kEpsPmf = BaseKllSketch.getKFromEpsilon(epsPmf, true); - assertEquals(kEps, k); - assertEquals(kEpsPmf, k); - } + static final String LS = System.getProperty("line.separator"); @Test public void checkBounds() { @@ -67,7 +57,7 @@ public void checkBounds() { public void checkHeapifyExceptions1() { KllFloatsSketch sk = new KllFloatsSketch(); WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); - wmem.putByte(6, (byte)4); //corrupt M + wmem.putByte(6, (byte)3); //corrupt with odd M KllFloatsSketch.heapify(wmem); } @@ -107,32 +97,405 @@ public void checkHeapifyExceptions5() { @Test public void checkMisc() { - KllFloatsSketch sk = new KllFloatsSketch(8, true); + KllFloatsSketch sk = new KllFloatsSketch(8); assertTrue(Objects.isNull(sk.getQuantiles(10))); sk.toString(true, true); for (int i = 0; i < 20; i++) { sk.update(i); } sk.toString(true, true); sk.toByteArray(); - final float[] items = sk.getItems(); + final float[] items = sk.getFloatItemsArray(); assertEquals(items.length, 16); - final int[] levels = sk.getLevels(); + final int[] levels = sk.getLevelsArray(); assertEquals(levels.length, 3); assertEquals(sk.getNumLevels(), 2); } - //@Test //requires visual check - public void checkNumRetainedAboveLevelZero() { + //@Test //enable static println(..) for visual checking + public void visualCheckToString() { final KllFloatsSketch sketch = new KllFloatsSketch(20); for (int i = 0; i < 10; i++) { sketch.update(i + 1); } final String s1 = sketch.toString(true, true); println(s1); + final KllFloatsSketch sketch2 = new KllFloatsSketch(20); - for (int i = 0; i < 400; i++) { - sketch2.update(i + 1); - } + for (int i = 0; i < 400; i++) { sketch2.update(i + 1); } + println("\n" + sketch2.toString(true, true)); + sketch2.merge(sketch); final String s2 = sketch2.toString(true, true); - println(s2); + println(LS + s2); + } + + @Test + public void viewCompactions() { + KllFloatsSketch sk = new KllFloatsSketch(20); + show(sk, 20); + show(sk, 21); //compaction 1 + show(sk, 43); + show(sk, 44); //compaction 2 + show(sk, 54); + show(sk, 55); //compaction 3 + show(sk, 73); + show(sk, 74); //compaction 4 + show(sk, 88); + show(sk, 89); //compaction 5 + show(sk, 96); + show(sk, 97); //compaction 6 + show(sk, 108); + } + + private static void show(final KllFloatsSketch sk, int limit) { + int i = (int) sk.getN(); + for ( ; i < limit; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + } + + @Test + public void checkGrowLevels() { + KllFloatsSketch sk = new KllFloatsSketch(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + assertEquals(sk.getNumLevels(), 2); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLevelsArray()[2], 33); + } + + @Test + public void checkSketchInitializeFloatHeap() { + int k = 20; //don't change this + KllFloatsSketch sk; + + println("#### CASE: FLOAT FULL HEAP"); + sk = new KllFloatsSketch(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: FLOAT HEAP EMPTY"); + sk = new KllFloatsSketch(k); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: FLOAT HEAP SINGLE"); + sk = new KllFloatsSketch(k); + sk.update(1); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeFloatHeapifyCompactMem() { + int k = 20; //don't change this + KllFloatsSketch sk; + KllFloatsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: FLOAT FULL HEAPIFIED FROM COMPACT"); + sk2 = new KllFloatsSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: FLOAT EMPTY HEAPIFIED FROM COMPACT"); + sk2 = new KllFloatsSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: FLOAT SINGLE HEAPIFIED FROM COMPACT"); + sk2 = new KllFloatsSketch(k); + sk2.update(1); + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeFloatHeapifyUpdatableMem() { + int k = 20; //don't change this + KllFloatsSketch sk; + KllFloatsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: FLOAT FULL HEAPIFIED FROM UPDATABLE"); + sk2 = new KllFloatsSketch(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 33); + assertEquals(sk.getLevelsArray().length, 3); + assertEquals(sk.getMaxFloatValue(), 21.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: FLOAT EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = new KllFloatsSketch(k); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), Float.NaN); + assertEquals(sk.getMinFloatValue(), Float.NaN); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: FLOAT SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = new KllFloatsSketch(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem)); + sk = KllFloatsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getFloatItemsArray().length, 20); + assertEquals(sk.getLevelsArray().length, 2); + assertEquals(sk.getMaxFloatValue(), 1.0F); + assertEquals(sk.getMinFloatValue(), 1.0F); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkMemoryToStringFloatCompact() { + int k = 20; //don't change this + KllFloatsSketch sk; + KllFloatsSketch sk2; + byte[] compBytes; + byte[] compBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: FLOAT FULL COMPACT"); + sk = new KllFloatsSketch(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllFloatsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + + println("#### CASE: FLOAT EMPTY COMPACT"); + sk = new KllFloatsSketch(k); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllFloatsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + + println("#### CASE: FLOAT SINGLE COMPACT"); + sk = new KllFloatsSketch(k); + sk.update(1); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllFloatsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + } + + @Test + public void checkMemoryToStringFloatUpdatable() { + int k = 20; //don't change this + KllFloatsSketch sk; + KllFloatsSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: FLOAT FULL UPDATABLE"); + sk = new KllFloatsSketch(20); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllFloatsSketch.heapify(wmem); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: FLOAT EMPTY UPDATABLE"); + sk = new KllFloatsSketch(k); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllFloatsSketch.heapify(wmem); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: FLOAT SINGLE UPDATABLE"); + sk = new KllFloatsSketch(k); + sk.update(1); + upBytes = sk.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.memoryToString(wmem); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllFloatsSketch.heapify(wmem); + upBytes2 = sk2.toUpdatableByteArray(); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.memoryToString(wmem); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + } + + @Test + public void checkSimpleMerge() { + int k = 20; + int m = 4; + int n1 = 21; + int n2 = 43; + KllFloatsSketch sk1 = new KllFloatsSketch(k, m); + KllFloatsSketch sk2 = new KllFloatsSketch(k, m); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println(sk1.toString(true, true)); + println(sk2.toString(true, true)); + sk1.merge(sk2); + println(sk1.toString(true, true)); + } + + @Test + public void checkOtherM() { + } @Test diff --git a/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java index 7c017edfb..5513f7d6a 100644 --- a/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java @@ -855,9 +855,9 @@ public void checkPutMemoryTooSmall() { } @Test - public void checkAuxPosOfPhi() throws Exception { + public void checkAuxPosOfRank() throws Exception { long n = 10; - long returnValue = QuantilesHelper.posOfPhi(1.0, 10); + long returnValue = QuantilesHelper.posOfRank(1.0, 10); //println("" + returnValue); assertEquals(returnValue, n-1); } diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml index 3fef29680..fe398312f 100644 --- a/tools/SketchesCheckstyle.xml +++ b/tools/SketchesCheckstyle.xml @@ -173,7 +173,7 @@ under the License.{@code * Long || Start Byte Adr: Common for both DoublesSketch and ItemsSketch * Adr: * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | @@ -66,7 +66,7 @@ * * || 39 | 38 | 37 | 36 | 35 | 34 | 33 | 32 | * 4 ||---------------------------START OF COMBINED BUfFER----------------------------| - *+ * }