Skip to content
Permalink
Browse files
Interim 11. Almost done. KllDirectDoublesSketch working and tested.
Next: Create the KllDirectFloatsSketch and tests.
  • Loading branch information
leerho committed Mar 25, 2022
1 parent 75cff23 commit 60cbfaabd43ab33eb542148eb4dbd8fbef98a264
Showing 19 changed files with 2,708 additions and 1,198 deletions.
@@ -19,136 +19,233 @@

package org.apache.datasketches.kll;

import org.apache.datasketches.kll.KllPreambleUtil.SketchType;
import static java.lang.Math.max;
import static java.lang.Math.min;

import org.apache.datasketches.memory.MemoryRequestServer;
import org.apache.datasketches.memory.WritableMemory;

/**
* Please refer to the documentation in the package-info:<br>
* {@link org.apache.datasketches.kll}
* This class implements an off-heap doubles KllSketch via a WritableMemory instance of the sketch.
*
* <p>Please refer to the documentation in the package-info:<br>
* {@link org.apache.datasketches.kll}</p>
*
* @author Lee Rhodes, Kevin Lang
*/
public class KllDirectDoublesSketch extends KllDirectSketch {
public final class KllDirectDoublesSketch extends KllDirectSketch {


public KllDirectDoublesSketch(final WritableMemory wmem) {
super(wmem, SketchType.DOUBLE_SKETCH);
/**
*
* @param wmem the current WritableMemory
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
*/
public KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr) {
super(SketchType.DOUBLES_SKETCH, wmem, memReqSvr);
}

//public int getNumRetained()
@SuppressWarnings("unused")

/**
* Returns an approximation to the Cumulative Distribution Function (CDF), which is the
* cumulative analog of the PMF, of the input stream given a set of splitPoint (values).
*
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
* getNormalizedRankError(false) function.
*
* <p>If the sketch is empty this returns null.</p>
*
* @param splitPoints an array of <i>m</i> unique, monotonically increasing double values
* that divide the real number line into <i>m+1</i> consecutive disjoint intervals.
* The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and
* exclusive of the right splitPoint, with the exception that the last interval will include
* the maximum value.
* It is not necessary to include either the min or max values in these split points.
*
* @return an array of m+1 double values, which are a consecutive approximation to the CDF
* of the input stream given the splitPoints. The value at array position j of the returned
* CDF array is the sum of the returned values in positions 0 through j of the returned PMF
* array.
*/
public double[] getCDF(final double[] splitPoints) {
return null;
return getDoublesPmfOrCdf(splitPoints, true);
}

/**
* Returns the max value of the stream.
* If the sketch is empty this returns NaN.
*
* @return the max value of the stream
*/
public double getMaxValue() {
return getMaxDoubleValue();
}

/**
* Returns the min value of the stream.
* If the sketch is empty this returns NaN.
*
* @return the min value of the stream
*/
public double getMinValue() {
return getMinDoubleValue();
}

/**
* Returns an approximation to the Probability Mass Function (PMF) of the input stream
* given a set of splitPoints (values).
*
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
* getNormalizedRankError(true) function.
*
* <p>If the sketch is empty this returns null.</p>
*
* @param splitPoints an array of <i>m</i> unique, monotonically increasing double values
* that divide the real number line into <i>m+1</i> consecutive disjoint intervals.
* The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and
* exclusive of the right splitPoint, with the exception that the last interval will include
* the maximum value.
* It is not necessary to include either the min or max values in these split points.
*
* @return an array of m+1 doubles each of which is an approximation
* to the fraction of the input stream values (the mass) that fall into one of those intervals.
* The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right
* splitPoint, with the exception that the last interval will include maximum value.
*/
public double[] getPMF(final double[] splitPoints) {
return getDoublesPmfOrCdf(splitPoints, false);
}

/**
* Returns an approximation to the value of the data item
* that would be preceded by the given fraction of a hypothetical sorted
* version of the input stream so far.
*
* <p>We note that this method has a fairly large overhead (microseconds instead of nanoseconds)
* so it should not be called multiple times to get different quantiles from the same
* sketch. Instead use getQuantiles(), which pays the overhead only once.
*
* <p>If the sketch is empty this returns NaN.
*
* @param fraction the specified fractional position in the hypothetical sorted stream.
* These are also called normalized ranks or fractional ranks.
* If fraction = 0.0, the true minimum value of the stream is returned.
* If fraction = 1.0, the true maximum value of the stream is returned.
*
* @return the approximation to the value at the given fraction
*/
public double getQuantile(final double fraction) {
return getDoublesQuantile(fraction);
}

/**
* Gets the lower bound of the value interval in which the true quantile of the given rank
* exists with a confidence of at least 99%.
* @param fraction the given normalized rank as a fraction
* @return the lower bound of the value interval in which the true quantile of the given rank
* exists with a confidence of at least 99%. Returns NaN if the sketch is empty.
*/
public double getQuantileLowerBound(final double fraction) {
return getQuantile(max(0, fraction - KllHelper.getNormalizedRankError(getDyMinK(), false)));
}

/**
* This is a more efficient multiple-query version of getQuantile().
*
* <p>This returns an array that could have been generated by using getQuantile() with many
* different fractional ranks, but would be very inefficient.
* This method incurs the internal set-up overhead once and obtains multiple quantile values in
* a single query. It is strongly recommend that this method be used instead of multiple calls
* to getQuantile().
*
* <p>If the sketch is empty this returns null.
*
* @param fractions given array of fractional positions in the hypothetical sorted stream.
* These are also called normalized ranks or fractional ranks.
* These fractions must be in the interval [0.0, 1.0], inclusive.
*
* @return array of approximations to the given fractions in the same order as given fractions
* array.
*/
public double[] getQuantiles(final double[] fractions) {
return getDoublesQuantiles(fractions);
}

/**
* This is also a more efficient multiple-query version of getQuantile() and allows the caller to
* specify the number of evenly spaced fractional ranks.
*
* <p>If the sketch is empty this returns null.
*
* @param numEvenlySpaced an integer that specifies the number of evenly spaced fractional ranks.
* This must be a positive integer greater than 0. A value of 1 will return the min value.
* A value of 2 will return the min and the max value. A value of 3 will return the min,
* the median and the max value, etc.
*
* @return array of approximations to the given fractions in the same order as given fractions
* array.
*/
public double[] getQuantiles(final int numEvenlySpaced) {
if (isEmpty()) { return null; }
return getQuantiles(org.apache.datasketches.Util.evenlySpaced(0.0, 1.0, numEvenlySpaced));
}

/**
* Gets the upper bound of the value interval in which the true quantile of the given rank
* exists with a confidence of at least 99%.
* @param fraction the given normalized rank as a fraction
* @return the upper bound of the value interval in which the true quantile of the given rank
* exists with a confidence of at least 99%. Returns NaN if the sketch is empty.
*/
public double getQuantileUpperBound(final double fraction) {
return getQuantile(min(1.0, fraction + KllHelper.getNormalizedRankError(getDyMinK(), false)));
}

/**
* Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1,
* inclusive.
*
* <p>The resulting approximation has a probabilistic guarantee that can be obtained from the
* getNormalizedRankError(false) function.
*
* <p>If the sketch is empty this returns NaN.</p>
*
* @param value to be ranked
* @return an approximate rank of the given value
*/
public double getRank(final double value) {
return getDoubleRank(value);
}

/**
* @return the iterator for this class
*/
public KllDoublesSketchIterator iterator() {
return new KllDoublesSketchIterator(getDoubleItemsArray(), getLevelsArray(), getNumLevels());
}

/**
* Merges another sketch into this one.
* @param other sketch to merge into this one
*/
public void merge(final KllSketch other) {
if (!other.isDirect()) { kllSketchThrow(32); }
if (!other.isDoublesSketch()) { kllSketchThrow(33); }
mergeDoubleImpl(other);
}

@Override
public byte[] toByteArray() {
return null;
return toCompactByteArrayImpl();
}

@Override
public String toString(final boolean withLevels, final boolean withData) {
return null;
}

@Override
public byte[] toUpdatableByteArray() {
return null;
}

@Override
double[] getDoubleItemsArray() {
return null;
}

@Override
float[] getFloatItemsArray() {
return null;
}

@Override
double getMaxDoubleValue() {
return 0;
}

@Override
float getMaxFloatValue() {
return 0;
}

@Override
double getMinDoubleValue() {
return 0;
}

@Override
float getMinFloatValue() {
return 0;
}

@Override
void setDoubleItemsArray(final double[] floatItems) {
}

@Override
void setFloatItemsArray(final float[] floatItems) {
}

@Override
void setMaxDoubleValue(final double value) {
}

@Override
void setMaxFloatValue(final float value) {
return toStringImpl(withLevels, withData);
}

@Override
void setMinDoubleValue(final double value) {
}

@Override
void setMinFloatValue(final float value) {
public void update(final double value) {
updateDouble(value);
}

@Override
void setLevelsArray(final int[] levelsArr) {

}

//int getDyMinK

//int[] getLevelsArray

//int getLevelsArrayAt()

//int getNumLevels

//void incN()

//void incNumLevels()

//boolean isLevelZeroSorted()

//void setDyMinK()

//void updateLevelsArray()

//void setLevelsArrayAt()

//void setLevelsArrayAtMinusEq()

//void setLevelsArrayAtPlusEq()

//void setLevelZeroSorted()

//void setN()

//void setNumLevels()

//int getItemsDataStartBytes()

//int getItemsArrLengthItems()

//int getLevelsArrLengthints()


}

0 comments on commit 60cbfaa

Please sign in to comment.