Skip to content

Commit

Permalink
Interim 2
Browse files Browse the repository at this point in the history
Completed changes to Sorted Views
  • Loading branch information
leerho committed Aug 17, 2022
1 parent 70f181d commit e2d8f07
Show file tree
Hide file tree
Showing 13 changed files with 131 additions and 59 deletions.
32 changes: 23 additions & 9 deletions src/main/java/org/apache/datasketches/DoublesSortedView.java
Expand Up @@ -47,28 +47,42 @@ public interface DoublesSortedView extends SortedView {
* Returns an array of values where each value is a number in the range [0.0, 1.0].
* The size of this array is one larger than the size of the input splitPoints array.
*
* <p>If <i>isCdf</i> is true, the points in the returned array are monotonically increasing and end with the
* <p>The points in the returned array are monotonically increasing and end with the
* value 1.0. Each value represents a point along the cumulative distribution function that approximates
* the CDF of the input data stream. Therefore, each point represents the fractional density of the distribution
* from zero to the given point. For example, if one of the returned values is 0.5, then the splitPoint corresponding
* to that value would be the median of the distribution.</p>
*
* <p>If <i>isCdf</i> is false, the points in the returned array are not monotonic and represent the discrete
* derivative of the CDF, or the Probability Mass Function (PMF). Each returned point represents the fractional
* @param splitPoints the given array of quantile values or splitPoints. This is a sorted, monotonic array of unique
* values in the range of (minValue, maxValue). This array should not include either the minValue or the maxValue.
* The returned array will have one extra interval representing the very top of the distribution.
* @param searchCrit if INCLUSIVE, each interval within the distribution will include its top value and exclude its
* bottom value. Otherwise, it will be the reverse. The only exception is that the top portion will always include
* the top value retained by the sketch.
* @return an array of points that correspond to the given splitPoints, and represents the input data distribution
* as a CDF.
*/
double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit);

/**
* Returns an array of values where each value is a number in the range [0.0, 1.0].
* The size of this array is one larger than the size of the input splitPoints array.
*
* <p>The points in the returned array are not monotonic and represent the discrete derivative of the CDF,
* which is also called the Probability Mass Function (PMF). Each returned point represents the fractional
* area of the total distribution which lies between the previous point (or zero) and the given point, which
* corresponds to the given splitPoint.<p>
*
* @param splitPoints the given array of quantile values or splitPoints. This is a sorted, unique, monotonic array
* of values in the range of (minValue, maxValue). This array should not include either the minValue or the maxValue.
* @param splitPoints the given array of quantile values or splitPoints. This is a sorted, monotonic array of unique
* values in the range of (minValue, maxValue). This array should not include either the minValue or the maxValue.
* The returned array will have one extra interval representing the very top of the distribution.
* @param isCdf if true, a CDF will be returned, otherwise, a PMF will be returned.
* @param searchCrit if INCLUSIVE, each interval within the distribution will include its top value and exclude its
* bottom value. Otherwise, it will be the reverse. The only exception is that the top portion will always include
* the top value retained by the sketch.
* @return an array of points that correspond to the given splitPoints, and represents the data distribution
* as a CDF or PMF.
* @return an array of points that correspond to the given splitPoints, and represents the input data distribution
* as a PMF.
*/
double[] getPmfOrCdf(double[] splitPoints, boolean isCdf, QuantileSearchCriteria searchCrit);
double[] getPMF(double[] splitPoints, QuantileSearchCriteria searchCrit);


/**
Expand Down
36 changes: 25 additions & 11 deletions src/main/java/org/apache/datasketches/FloatsSortedView.java
Expand Up @@ -47,28 +47,42 @@ public interface FloatsSortedView extends SortedView {
* Returns an array of values where each value is a number in the range [0.0, 1.0].
* The size of this array is one larger than the size of the input splitPoints array.
*
* <p>If <i>isCdf</i> is true, the points in the returned array are monotonically increasing and end with the
* <p>The points in the returned array are monotonically increasing and end with the
* value 1.0. Each value represents a point along the cumulative distribution function that approximates
* the CDF of the input data stream. Therefore, each point represents the fractional density of the distribution
* between from zero. For example, if one of the returned values is 0.5, then the splitPoint corresponding to that
* value would be the median of the distribution.</p>
* from zero to the given point. For example, if one of the returned values is 0.5, then the splitPoint corresponding
* to that value would be the median of the distribution.</p>
*
* <p>If <i>isCdf</i> is false, the points in the returned array are not monotonic and represent the discrete
* derivative of the CDF, or the Probability Mass Function (PMF). Each returned point represents the fractional
* @param splitPoints the given array of quantile values or splitPoints. This is a sorted, monotonic array of unique
* values in the range of (minValue, maxValue). This array should not include either the minValue or the maxValue.
* The returned array will have one extra interval representing the very top of the distribution.
* @param searchCrit if INCLUSIVE, each interval within the distribution will include its top value and exclude its
* bottom value. Otherwise, it will be the reverse. The only exception is that the top portion will always include
* the top value retained by the sketch.
* @return an array of points that correspond to the given splitPoints, and represents the input data distribution
* as a CDF.
*/
double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit);

/**
* Returns an array of values where each value is a number in the range [0.0, 1.0].
* The size of this array is one larger than the size of the input splitPoints array.
*
* <p>The points in the returned array are not monotonic and represent the discrete derivative of the CDF,
* which is also called the Probability Mass Function (PMF). Each returned point represents the fractional
* area of the total distribution which lies between the previous point (or zero) and the given point, which
* corresponds to the given splitPoint.<p>
*
* @param splitPoints the given array of quantile values or splitPoints. This is a sorted, unique, monotonic array
* of values in the range of (minValue, maxValue). This array should not include either the minValue or the maxValue.
* @param splitPoints the given array of quantile values or splitPoints. This is a sorted, monotonic array of unique
* values in the range of (minValue, maxValue). This array should not include either the minValue or the maxValue.
* The returned array will have one extra interval representing the very top of the distribution.
* @param isCdf if true, a CDF will be returned, otherwise, a PMF will be returned.
* @param searchCrit if INCLUSIVE, each interval within the distribution will include its top value and exclude its
* bottom value. Otherwise, it will be the reverse. The only exception is that the top portion will always include
* the top value retained by the sketch.
* @return an array of points that correspond to the given splitPoints, and represents the data distribution
* as a CDF or PMF.
* @return an array of points that correspond to the given splitPoints, and represents the input data distribution
* as a PMF.
*/
double[] getPmfOrCdf(float[] splitPoints, boolean isCdf, QuantileSearchCriteria searchCrit);
double[] getPMF(float[] splitPoints, QuantileSearchCriteria searchCrit);

/**
* Returns the array of values
Expand Down
42 changes: 28 additions & 14 deletions src/main/java/org/apache/datasketches/GenericSortedView.java
Expand Up @@ -47,28 +47,42 @@ public interface GenericSortedView<T> extends SortedView {
* Returns an array of values where each value is a number in the range [0.0, 1.0].
* The size of this array is one larger than the size of the input splitPoints array.
*
* <p>If <i>isCdf</i> is true, the points in the returned array are monotonically increasing and end with the
* <p>The points in the returned array are monotonically increasing and end with the
* value 1.0. Each value represents a point along the cumulative distribution function that approximates
* the CDF of the input data stream. Therefore, each point represents the fractional density of the distribution
* from zero to the given point. For example, if one of the returned values is 0.5, then the splitPoint
* corresponding to that value would be the median of the distribution.</p>
* from zero to the given point. For example, if one of the returned values is 0.5, then the splitPoint corresponding
* to that value would be the median of the distribution.</p>
*
* <p>If <i>isCdf</i> is false, the points in the returned array are not monotonic and represent the discrete
* derivative of the CDF, or the Probability Mass Function (PMF). Each returned point represents the fractional
* @param splitPoints the given array of quantile values or splitPoints. This is a sorted, monotonic array of unique
* values in the range of (minValue, maxValue). This array should not include either the minValue or the maxValue.
* The returned array will have one extra interval representing the very top of the distribution.
* @param searchCrit if INCLUSIVE, each interval within the distribution will include its top value and exclude its
* bottom value. Otherwise, it will be the reverse. The only exception is that the top portion will always include
* the top value retained by the sketch.
* @return an array of points that correspond to the given splitPoints, and represents the input data distribution
* as a CDF.
*/
double[] getCDF(T[] splitPoints, QuantileSearchCriteria searchCrit);

/**
* Returns an array of values where each value is a number in the range [0.0, 1.0].
* The size of this array is one larger than the size of the input splitPoints array.
*
* <p>The points in the returned array are not monotonic and represent the discrete derivative of the CDF,
* which is also called the Probability Mass Function (PMF). Each returned point represents the fractional
* area of the total distribution which lies between the previous point (or zero) and the given point, which
* corresponds to the given splitPoint.<p>
*
* @param splitPoints the given array of quantile items or splitPoints. This is a sorted, unique, monotonic array
* of items in the range of (minValue, maxValue). This array should not include either the minValue or the maxValue.
* @param splitPoints the given array of quantile values or splitPoints. This is a sorted, monotonic array of unique
* values in the range of (minValue, maxValue). This array should not include either the minValue or the maxValue.
* The returned array will have one extra interval representing the very top of the distribution.
* @param isCdf if true, a CDF will be returned, otherwise, a PMF will be returned.
* @param searchCrit if INCLUSIVE, each interval within the distribution will include its top item and exclude its
* bottom item. Otherwise, it will be the reverse. The only exception is that the top portion will always include
* the top item retained by the sketch.
* @return an array of points that correspond to the given splitPoints, and represents the data distribution
* as a CDF or PMF.
* @param searchCrit if INCLUSIVE, each interval within the distribution will include its top value and exclude its
* bottom value. Otherwise, it will be the reverse. The only exception is that the top portion will always include
* the top value retained by the sketch.
* @return an array of points that correspond to the given splitPoints, and represents the input data distribution
* as a PMF.
*/
double[] getPmfOrCdf(T[] splitPoints, boolean isCdf, QuantileSearchCriteria searchCrit);
double[] getPMF(T[] splitPoints, QuantileSearchCriteria searchCrit);

/**
* Returns the array of items.
Expand Down
Expand Up @@ -225,7 +225,7 @@ public double[] getCDF(final double[] splitPoints) {
public double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (this.isEmpty()) { return null; }
refreshSortedView();
return kllDoublesSV.getPmfOrCdf(splitPoints, true, searchCrit);
return kllDoublesSV.getCDF(splitPoints, searchCrit);
}

/**
Expand Down Expand Up @@ -264,7 +264,7 @@ public double[] getPMF(final double[] splitPoints) {
public double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (this.isEmpty()) { return null; }
refreshSortedView();
return kllDoublesSV.getPmfOrCdf(splitPoints, false, searchCrit);
return kllDoublesSV.getPMF(splitPoints, searchCrit);
}

/**
Expand Down
Expand Up @@ -101,16 +101,22 @@ public double getRank(final double value, final QuantileSearchCriteria searchCri
}

@Override
public double[] getPmfOrCdf(final double[] splitPoints, final boolean isCdf, final QuantileSearchCriteria searchCrit) {
public double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
validateDoubleValues(splitPoints);
final int len = splitPoints.length + 1;
final double[] buckets = new double[len];
for (int i = 0; i < len - 1; i++) {
buckets[i] = getRank(splitPoints[i], searchCrit);
}
buckets[len - 1] = 1.0;
if (isCdf) { return buckets; }
for (int i = len; i-- > 1;) {
return buckets;
}

@Override
public double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
final double[] buckets = getCDF(splitPoints, searchCrit);
final int len = buckets.length;
for (int i = len; i-- > 1; ) {
buckets[i] -= buckets[i - 1];
}
return buckets;
Expand Down
Expand Up @@ -225,7 +225,7 @@ public double[] getCDF(final float[] splitPoints) {
public double[] getCDF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (this.isEmpty()) { return null; }
refreshSortedView();
return kllFloatsSV.getPmfOrCdf(splitPoints, true, searchCrit);
return kllFloatsSV.getCDF(splitPoints, searchCrit);
}

/**
Expand Down Expand Up @@ -264,7 +264,7 @@ public double[] getPMF(final float[] splitPoints) {
public double[] getPMF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (this.isEmpty()) { return null; }
refreshSortedView();
return kllFloatsSV.getPmfOrCdf(splitPoints, false, searchCrit);
return kllFloatsSV.getPMF(splitPoints, searchCrit);
}

/**
Expand Down
Expand Up @@ -101,16 +101,22 @@ public double getRank(final float value, final QuantileSearchCriteria searchCrit
}

@Override
public double[] getPmfOrCdf(final float[] splitPoints, final boolean isCdf, final QuantileSearchCriteria searchCrit) {
public double[] getCDF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) {
validateFloatValues(splitPoints);
final int len = splitPoints.length + 1;
final double[] buckets = new double[len];
for (int i = 0; i < len - 1; i++) {
buckets[i] = getRank(splitPoints[i], searchCrit);
}
buckets[len - 1] = 1.0;
if (isCdf) { return buckets; }
for (int i = len; i-- > 1;) {
return buckets;
}

@Override
public double[] getPMF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) {
final double[] buckets = getCDF(splitPoints, searchCrit);
final int len = buckets.length;
for (int i = len; i-- > 1; ) {
buckets[i] -= buckets[i - 1];
}
return buckets;
Expand Down
Expand Up @@ -251,7 +251,7 @@ public double[] getCDF(final double[] splitPoints) {
public double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (this.isEmpty()) { return null; }
refreshSortedView();
return classicQdsSV.getPmfOrCdf(splitPoints, true, searchCrit);
return classicQdsSV.getCDF(splitPoints, searchCrit);
}

/**
Expand Down Expand Up @@ -290,7 +290,7 @@ public double[] getPMF(final double[] splitPoints) {
public double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (this.isEmpty()) { return null; }
refreshSortedView();
return classicQdsSV.getPmfOrCdf(splitPoints, false, searchCrit);
return classicQdsSV.getPMF(splitPoints, searchCrit);
}

/**
Expand Down
Expand Up @@ -106,16 +106,22 @@ public double getRank(final double value, final QuantileSearchCriteria searchCri
}

@Override
public double[] getPmfOrCdf(final double[] splitPoints, final boolean isCdf, final QuantileSearchCriteria searchCrit) {
public double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
Util.checkSplitPointsOrder(splitPoints);
final int len = splitPoints.length + 1;
final double[] buckets = new double[len];
for (int i = 0; i < len - 1; i++) {
buckets[i] = getRank(splitPoints[i], searchCrit);
}
buckets[len - 1] = 1.0;
if (isCdf) { return buckets; }
for (int i = len; i-- > 1;) {
return buckets;
}

@Override
public double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
final double[] buckets = getCDF(splitPoints, searchCrit);
final int len = buckets.length;
for (int i = len; i-- > 1; ) {
buckets[i] -= buckets[i - 1];
}
return buckets;
Expand Down
Expand Up @@ -288,7 +288,7 @@ public double[] getCDF(final T[] splitPoints) {
public double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { return null; }
refreshSortedView();
return classicQisSV.getPmfOrCdf(splitPoints, true, searchCrit);
return classicQisSV.getCDF(splitPoints, searchCrit);
}

/**
Expand Down Expand Up @@ -327,7 +327,7 @@ public double[] getPMF(final T[] splitPoints) {
public double[] getPMF(final T[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { return null; }
refreshSortedView();
return classicQisSV.getPmfOrCdf(splitPoints, false, searchCrit);
return classicQisSV.getPMF(splitPoints, searchCrit);
}

/**
Expand Down
Expand Up @@ -122,16 +122,22 @@ public double getRank(final T item, final QuantileSearchCriteria searchCrit) {
}

@Override
public double[] getPmfOrCdf(final T[] splitPoints, final boolean isCdf, final QuantileSearchCriteria searchCrit) {
public double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria searchCrit) {
checkSplitPoints(splitPoints);
final int len = splitPoints.length + 1;
final double[] buckets = new double[len];
for (int i = 0; i < len - 1; i++) {
buckets[i] = getRank(splitPoints[i], searchCrit);
}
buckets[len - 1] = 1.0;
if (isCdf) { return buckets; }
for (int i = len; i-- > 1;) {
return buckets;
}

@Override
public double[] getPMF(final T[] splitPoints, final QuantileSearchCriteria searchCrit) {
final double[] buckets = getCDF(splitPoints, searchCrit);
final int len = buckets.length;
for (int i = len; i-- > 1; ) {
buckets[i] -= buckets[i - 1];
}
return buckets;
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/apache/datasketches/req/ReqSketch.java
Expand Up @@ -236,7 +236,7 @@ public double[] getCDF(final float[] splitPoints) {
public double[] getCDF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { return null; }
refreshSortedView();
return reqSV.getPmfOrCdf(splitPoints, true, searchCrit);
return reqSV.getCDF(splitPoints, searchCrit);
}

@Override
Expand Down Expand Up @@ -268,7 +268,7 @@ public double[] getPMF(final float[] splitPoints) {
public double[] getPMF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (this.isEmpty()) { return null; }
refreshSortedView();
return reqSV.getPmfOrCdf(splitPoints, false, searchCrit);
return reqSV.getPMF(splitPoints, searchCrit);
}

@Override
Expand Down

0 comments on commit e2d8f07

Please sign in to comment.