diff --git a/src/main/java/org/apache/datasketches/InequalitySearch.java b/src/main/java/org/apache/datasketches/InequalitySearch.java index bb2c3c22d..278a33832 100644 --- a/src/main/java/org/apache/datasketches/InequalitySearch.java +++ b/src/main/java/org/apache/datasketches/InequalitySearch.java @@ -85,12 +85,11 @@ int getIndex(final long[] arr, final int a, final int b, final long v) { @Override int resolve(final int lo, final int hi, final int low, final int high) { - if (lo >= high) { return high; } - return -1; + return (lo >= high) ? high : -1; } @Override - String desc(final double[] arr, final int low, final int high, final double v, final int idx) { + public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { return "LT: " + v + " <= arr[" + low + "]=" + arr[low] + "; return -1"; } @@ -104,7 +103,7 @@ String desc(final double[] arr, final int low, final int high, final double v, f } @Override - String desc(final float[] arr, final int low, final int high, final float v, final int idx) { + public String desc(final float[] arr, final int low, final int high, final float v, final int idx) { if (idx == -1) { return "LT: " + v + " <= arr[" + low + "]=" + arr[low] + "; return -1"; } @@ -118,7 +117,7 @@ String desc(final float[] arr, final int low, final int high, final float v, fin } @Override - String desc(final long[] arr, final int low, final int high, final long v, final int idx) { + public String desc(final long[] arr, final int low, final int high, final long v, final int idx) { if (idx == -1) { return "LT: " + v + " <= arr[" + low + "]=" + arr[low] + "; return -1"; } @@ -176,12 +175,11 @@ int getIndex(final long[] arr, final int a, final int b, final long v) { @Override int resolve(final int lo, final int hi, final int low, final int high) { - if (lo >= high) { return high; } - return -1; + return (lo >= high) ? high : -1; } @Override - String desc(final double[] arr, final int low, final int high, final double v, final int idx) { + public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { return "LE: " + v + " < arr[" + low + "]=" + arr[low] + "; return -1"; } @@ -195,7 +193,7 @@ String desc(final double[] arr, final int low, final int high, final double v, f } @Override - String desc(final float[] arr, final int low, final int high, final float v, final int idx) { + public String desc(final float[] arr, final int low, final int high, final float v, final int idx) { if (idx == -1) { return "LE: " + v + " < arr[" + low + "]=" + arr[low] + "; return -1"; } @@ -209,7 +207,7 @@ String desc(final float[] arr, final int low, final int high, final float v, fin } @Override - String desc(final long[] arr, final int low, final int high, final long v, final int idx) { + public String desc(final long[] arr, final int low, final int high, final long v, final int idx) { if (idx == -1) { return "LE: " + v + " < arr[" + low + "]=" + arr[low] + "; return -1"; } @@ -267,7 +265,7 @@ int resolve(final int lo, final int hi, final int low, final int high) { } @Override - String desc(final double[] arr, final int low, final int high, final double v, final int idx) { + public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { if (v > arr[high]) { return "EQ: " + v + " > arr[" + high + "]; return -1"; @@ -281,7 +279,7 @@ String desc(final double[] arr, final int low, final int high, final double v, f } @Override - String desc(final float[] arr, final int low, final int high, final float v, final int idx) { + public String desc(final float[] arr, final int low, final int high, final float v, final int idx) { if (idx == -1) { if (v > arr[high]) { return "EQ: " + v + " > arr[" + high + "]; return -1"; @@ -295,7 +293,7 @@ String desc(final float[] arr, final int low, final int high, final float v, fin } @Override - String desc(final long[] arr, final int low, final int high, final long v, final int idx) { + public String desc(final long[] arr, final int low, final int high, final long v, final int idx) { if (idx == -1) { if (v > arr[high]) { return "EQ: " + v + " > arr[" + high + "]; return -1"; @@ -353,12 +351,11 @@ int getIndex(final long[] arr, final int a, final int b, final long v) { @Override int resolve(final int lo, final int hi, final int low, final int high) { - if (hi <= low) { return low; } - return -1; + return (hi <= low) ? low : -1; } @Override - String desc(final double[] arr, final int low, final int high, final double v, final int idx) { + public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { return "GE: " + v + " > arr[" + high + "]=" + arr[high] + "; return -1"; } @@ -372,7 +369,7 @@ String desc(final double[] arr, final int low, final int high, final double v, f } @Override - String desc(final float[] arr, final int low, final int high, final float v, final int idx) { + public String desc(final float[] arr, final int low, final int high, final float v, final int idx) { if (idx == -1) { return "GE: " + v + " > arr[" + high + "]=" + arr[high] + "; return -1"; } @@ -386,7 +383,7 @@ String desc(final float[] arr, final int low, final int high, final float v, fin } @Override - String desc(final long[] arr, final int low, final int high, final long v, final int idx) { + public String desc(final long[] arr, final int low, final int high, final long v, final int idx) { if (idx == -1) { return "GE: " + v + " > arr[" + high + "]=" + arr[high] + "; return -1"; } @@ -444,12 +441,11 @@ int getIndex(final long[] arr, final int a, final int b, final long v) { @Override int resolve(final int lo, final int hi, final int low, final int high) { - if (hi <= low) { return low; } - return -1; + return (hi <= low) ? low : -1; } @Override - String desc(final double[] arr, final int low, final int high, final double v, final int idx) { + public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { return "GT: " + v + " >= arr[" + high + "]=" + arr[high] + "; return -1"; } @@ -463,7 +459,7 @@ String desc(final double[] arr, final int low, final int high, final double v, f } @Override - String desc(final float[] arr, final int low, final int high, final float v, final int idx) { + public String desc(final float[] arr, final int low, final int high, final float v, final int idx) { if (idx == -1) { return "GT: " + v + " >= arr[" + high + "]=" + arr[high] + "; return -1"; } @@ -477,7 +473,7 @@ String desc(final float[] arr, final int low, final int high, final float v, fin } @Override - String desc(final long[] arr, final int low, final int high, final long v, final int idx) { + public String desc(final long[] arr, final int low, final int high, final long v, final int idx) { if (idx == -1) { return "GT: " + v + " >= arr[" + high + "]=" + arr[high] + "; return -1"; } @@ -580,7 +576,7 @@ String desc(final long[] arr, final int low, final int high, final long v, final * @param idx the resolved index from the search * @return the descriptive string. */ - abstract String desc(double[] arr, int low, int high, double v, int idx); + public abstract String desc(double[] arr, int low, int high, double v, int idx); /** * Optional call that describes the details of the results of the search. @@ -592,7 +588,7 @@ String desc(final long[] arr, final int low, final int high, final long v, final * @param idx the resolved index from the search * @return the descriptive string. */ - abstract String desc(float[] arr, int low, int high, float v, int idx); + public abstract String desc(float[] arr, int low, int high, float v, int idx); /** * Optional call that describes the details of the results of the search. @@ -604,7 +600,7 @@ String desc(final long[] arr, final int low, final int high, final long v, final * @param idx the resolved index from the search * @return the descriptive string. */ - abstract String desc(long[] arr, int low, int high, long v, int idx); + public abstract String desc(long[] arr, int low, int high, long v, int idx); /** * Binary Search for the index of the double value in the given search range that satisfies diff --git a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java index db4c2b14a..e756cb99e 100644 --- a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java +++ b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java @@ -223,6 +223,12 @@ abstract class BaseReqSketch { */ public abstract int getSerializationBytes(); + /** + * Gets the sorted view of the current state of this sketch + * @return the sorted view of the current state of this sketch + */ + public abstract ReqSketchSortedView getSortedView(); + /** * Returns true if this sketch is empty. * @return empty flag @@ -239,8 +245,10 @@ abstract class BaseReqSketch { * Returns the current comparison criterion. If true the value comparison criterion is * ≤, otherwise it will be the default, which is <. * @return the current comparison criterion - * @deprecated + * @deprecated in the future the ltEq comparison parameter will not be saved at the class level in preference to + * the comparison parameter being specified for each API call. This method will be removed. */ + @Deprecated public abstract boolean isLessThanOrEqual(); /** @@ -266,14 +274,15 @@ abstract class BaseReqSketch { /** * Sets the chosen criterion for value comparison - * @deprecated - * * @param ltEq (Less-than-or Equals) If true, the sketch will use the ≤ criterion for comparing * values. Otherwise, the criterion is strictly <, the default. * This can be set anytime prior to a getRank(float) or getQuantile(double) or * equivalent query. * @return this + * @deprecated in the future the ltEq comparison parameter will not be saved at the class level in preference to + * the comparison parameter being specified for each API call. This method will be removed. */ + @Deprecated public abstract ReqSketch setLessThanOrEqual(final boolean ltEq); /** @@ -301,7 +310,7 @@ abstract class BaseReqSketch { * items of the compactor and the current nominal capacity of the compactor. * @param fmt the format string for the data items; example: "%4.0f". * @param allData all the retained items for the sketch will be output by - * compactory level. Otherwise, just a summary will be output. + * compactor level. Otherwise, just a summary will be output. * @return a detailed view of the compactors and their data */ public abstract String viewCompactorDetail(String fmt, boolean allData); diff --git a/src/main/java/org/apache/datasketches/req/ReqAuxiliary.java b/src/main/java/org/apache/datasketches/req/ReqAuxiliary.java deleted file mode 100644 index c329ba481..000000000 --- a/src/main/java/org/apache/datasketches/req/ReqAuxiliary.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.req; - -import java.util.Arrays; -import java.util.List; - -import org.apache.datasketches.InequalitySearch; - -/** - * Supports searches for quantiles - * @author Lee Rhodes - */ -class ReqAuxiliary { - private static final String LS = System.getProperty("line.separator"); - private float[] items; - private long[] weights; - private final boolean hra; //used in merge - private final long N; - - ReqAuxiliary(final ReqSketch sk) { - hra = sk.getHighRankAccuracy(); - N = sk.getN(); - buildAuxTable(sk); - } - - //Testing only! Allows testing of support methods without a sketch. - ReqAuxiliary(final float[] items, final long[] weights, final boolean hra, final long N) { - this.hra = hra; - this.N = N; - this.items = items; - this.weights = weights; - } - - private void buildAuxTable(final ReqSketch sk) { - final List compactors = sk.getCompactors(); - final int numComp = compactors.size(); - final int totalItems = sk.getRetainedItems(); - items = new float[totalItems]; - weights = new long[totalItems]; - int auxCount = 0; - for (int i = 0; i < numComp; i++) { - final ReqCompactor c = compactors.get(i); - final FloatBuffer bufIn = c.getBuffer(); - final long weight = 1 << c.getLgWeight(); - final int bufInLen = bufIn.getCount(); - mergeSortIn(bufIn, weight, auxCount); - auxCount += bufInLen; - } - createCumulativeWeights(); - dedup(); - } - - private void createCumulativeWeights() { - final int len = items.length; - for (int i = 1; i < len; i++) { - weights[i] += weights[i - 1]; - } - assert weights[len - 1] == N; - } - - void dedup() { - final int itemsLen = items.length; - final float[] itemsB = new float[itemsLen]; - final long[] wtsB = new long[itemsLen]; - int bidx = 0; - int i = 0; - while (i < itemsLen) { - int j = i + 1; - int hidup = j; - while (j < itemsLen && items[i] == items[j]) { - hidup = j++; - } - if (j - i == 1) { //no dups - itemsB[bidx] = items[i]; - wtsB[bidx++] = weights[i]; - i++; - continue; - } else { - itemsB[bidx] = items[hidup]; //lgtm [java/index-out-of-bounds] - wtsB[bidx++] = weights[hidup]; - i = j; - continue; - } - } - items = Arrays.copyOf(itemsB, bidx); - weights = Arrays.copyOf(wtsB, bidx); - } - - //Specially modified version of FloatBuffer.mergeSortIn(). Here spaceAtBottom is always false and - // the ultimate array size has already been set. However, this must simultaneously deal with - // sorting the weights as well. Also used in test. - void mergeSortIn(final FloatBuffer bufIn, final long weight, final int auxCount) { - if (!bufIn.isSorted()) { bufIn.sort(); } - final float[] arrIn = bufIn.getArray(); //may be larger than its item count. - final int bufInLen = bufIn.getCount(); - final int totLen = auxCount + bufInLen; - int i = auxCount - 1; - int j = bufInLen - 1; - int h = hra ? bufIn.getCapacity() - 1 : bufInLen - 1; - for (int k = totLen; k-- > 0; ) { - if (i >= 0 && j >= 0) { //both valid - if (items[i] >= arrIn[h]) { - items[k] = items[i]; - weights[k] = weights[i--]; - } else { - items[k] = arrIn[h--]; j--; - weights[k] = weight; - } - } else if (i >= 0) { //i is valid - items[k] = items[i]; - weights[k] = weights[i--]; - } else if (j >= 0) { //j is valid - items[k] = arrIn[h--]; j--; - weights[k] = weight; - } else { - break; - } - } - } - - /** - * Gets the quantile based on the given normalized rank, - * which must be in the range [0.0, 1.0], inclusive. - * @param normRank the given normalized rank - * @param ltEq determines the search method used. - * @return the quantile based on given normalized rank and ltEq. - */ - float getQuantile(final double normRank, final boolean ltEq) { - final int len = weights.length; - final long rank = (int)(normRank * N); - //Note that when ltEq=false, GT matches KLL & Quantiles behavior. - final InequalitySearch crit = ltEq ? InequalitySearch.GE : InequalitySearch.GT; - final int index = InequalitySearch.find(weights, 0, len - 1, rank, crit); - if (index == -1) { - return items[len - 1]; //resolves high end (GE & GT) -1 only! - } - return items[index]; - } - - //used for testing - - Row getRow(final int index) { - return new Row(items[index], weights[index]); - } - - static class Row { - float item; - long weight; - - Row(final float item, final long weight) { - this.item = item; - this.weight = weight; - } - } - - String toString(final int precision, final int fieldSize) { - final StringBuilder sb = new StringBuilder(); - final int p = precision; - final int z = fieldSize; - final String ff = "%" + z + "." + p + "f"; - final String sf = "%" + z + "s"; - final String df = "%" + z + "d"; - final String dfmt = ff + df + LS; - final String sfmt = sf + sf + LS; - sb.append("Aux Detail").append(LS); - sb.append(String.format(sfmt, "Item", "Weight")); - final int totalCount = items.length; - for (int i = 0; i < totalCount; i++) { - final Row row = getRow(i); - sb.append(String.format(dfmt, row.item, row.weight)); - } - return sb.toString(); - } - -} diff --git a/src/main/java/org/apache/datasketches/req/ReqSketch.java b/src/main/java/org/apache/datasketches/req/ReqSketch.java index e84ebafbe..1e7807c89 100644 --- a/src/main/java/org/apache/datasketches/req/ReqSketch.java +++ b/src/main/java/org/apache/datasketches/req/ReqSketch.java @@ -89,7 +89,7 @@ public class ReqSketch extends BaseReqSketch { private int retItems = 0; //number of retained items in the sketch private int maxNomSize = 0; //sum of nominal capacities of all compactors //Objects - private ReqAuxiliary aux = null; + private ReqSketchSortedView rssv = null; private List compactors = new ArrayList<>(); private ReqDebug reqDebug = null; //user config, default: null, can be set after construction. private final CompactorReturn cReturn = new CompactorReturn(); //used in compress() @@ -129,12 +129,12 @@ public class ReqSketch extends BaseReqSketch { maxValue = other.maxValue; ltEq = other.ltEq; reqDebug = other.reqDebug; - //aux does not need to be copied + //rssv does not need to be copied for (int i = 0; i < other.getNumLevels(); i++) { compactors.add(new ReqCompactor(other.compactors.get(i))); } - aux = null; + rssv = null; } /** @@ -188,12 +188,12 @@ private void compress() { //we specifically decided not to do lazy compression. } } - aux = null; + rssv = null; if (reqDebug != null) { reqDebug.emitCompressDone(); } } - ReqAuxiliary getAux() { - return aux; + public ReqSketchSortedView getSortedView() { + return (rssv != null) ? rssv : new ReqSketchSortedView(this); } @Override @@ -247,9 +247,10 @@ private long[] getCounts(final float[] values, final boolean inclusive) { } /** - * @deprecated * @return ltEq flag + * @deprecated */ + @Deprecated boolean getLtEq() { return ltEq; } @@ -331,10 +332,10 @@ public float getQuantile(final double normRank, final boolean inclusive) { throw new SketchesArgumentException( "Normalized rank must be in the range [0.0, 1.0]: " + normRank); } - if (aux == null) { - aux = new ReqAuxiliary(this); + if (rssv == null) { + rssv = new ReqSketchSortedView(this); } - return aux.getQuantile(normRank, inclusive); + return rssv.getQuantile(normRank, inclusive); } @Override @@ -490,7 +491,7 @@ public ReqSketch merge(final ReqSketch other) { compress(); } assert retItems < maxNomSize; - aux = null; + rssv = null; return this; } @@ -501,7 +502,7 @@ public ReqSketch reset() { maxNomSize = 0; minValue = Float.NaN; maxValue = Float.NaN; - aux = null; + rssv = null; compactors = new ArrayList<>(); grow(); return this; @@ -536,24 +537,24 @@ public String toString() { } @Override - public void update(final float item) { - if (Float.isNaN(item)) { return; } + public void update(final float value) { + if (Float.isNaN(value)) { return; } if (isEmpty()) { - minValue = item; - maxValue = item; + minValue = value; + maxValue = value; } else { - if (item < minValue) { minValue = item; } - if (item > maxValue) { maxValue = item; } + if (value < minValue) { minValue = value; } + if (value > maxValue) { maxValue = value; } } final FloatBuffer buf = compactors.get(0).getBuffer(); - buf.append(item); + buf.append(value); retItems++; totalN++; if (retItems >= maxNomSize) { buf.sort(); compress(); } - aux = null; + rssv = null; } /** diff --git a/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java b/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java new file mode 100644 index 000000000..e71e08f2b --- /dev/null +++ b/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.req; + +import java.util.Arrays; +import java.util.List; + +import org.apache.datasketches.InequalitySearch; + +/** + * Supports searches for quantiles, Ranks, Iterator and Sorted View + * @author Lee Rhodes + */ +public class ReqSketchSortedView { + private static final String LS = System.getProperty("line.separator"); + private float[] values; + private long[] cumWeights; + private final boolean hra; //used in merge + private final long N; + + ReqSketchSortedView(final ReqSketch sk) { + hra = sk.getHighRankAccuracy(); + N = sk.getN(); + buildAuxTable(sk); + } + + /** + * Testing only! Allows testing of mergeSortIn without a sketch. + * Arrays must be appropriately sized. + * @param values given values + * @param natRanks currently not used for the test. + * @param hra hra vs lra + * @param N total stream size in number of values presented to the sketch. + */ + ReqSketchSortedView(final float[] values, final long[] natRanks, final boolean hra, final long N) { + this.hra = hra; + this.N = N; + this.values = values; + this.cumWeights = natRanks; + } + + /** + * Gets the quantile based on the given normalized rank, + * which must be in the range [0.0, 1.0], inclusive. + * @param normRank the given normalized rank + * @param inclusive determines the search criterion used. + * @return the quantile + */ + public float getQuantile(final double normRank, final boolean inclusive) { + final int len = cumWeights.length; + final long rank = (int)(normRank * N); + final InequalitySearch crit = inclusive ? InequalitySearch.GE : InequalitySearch.GT; + final int index = InequalitySearch.find(cumWeights, 0, len - 1, rank, crit); + if (index == -1) { + return values[len - 1]; //GT: normRank >= 1.0; GE: normRank > 1.0 + } + return values[index]; + } + + /** + * Gets the normalized rank based on the given value. + * @param value the given value + * @param ltEq determines the search criterion used. + * @return the normalized rank + */ + public double getRank(final float value, final boolean ltEq) { + final int len = values.length; + final InequalitySearch crit = ltEq ? InequalitySearch.LE : InequalitySearch.LT; + final int index = InequalitySearch.find(values, 0, len - 1, value, crit); + if (index == -1) { + return 0; //LT: value <= minValue; LE: value < minValue + } + return (double)cumWeights[index] / N; + } + + public ReqSketchSortedViewIterator iterator() { + return new ReqSketchSortedViewIterator(values, cumWeights); + } + + public String toString(final int precision, final int fieldSize) { + final StringBuilder sb = new StringBuilder(); + final int p = precision; + final int z = Math.max(fieldSize, 6); + final String ff = "%" + z + "." + p + "f"; + final String sf = "%" + z + "s"; + final String df = "%" + z + "d"; + final String dfmt = ff + df + LS; + final String sfmt = sf + sf + LS; + sb.append("Sorted View Data:").append(LS + LS); + sb.append(String.format(sfmt, "Value", "CumWeight")); + final int totalCount = values.length; + for (int i = 0; i < totalCount; i++) { + final Row row = getRow(i); + sb.append(String.format(dfmt, row.value, row.cumWeight)); + } + return sb.toString(); + } + + private void buildAuxTable(final ReqSketch sk) { + final List compactors = sk.getCompactors(); + final int numComp = compactors.size(); + final int totalValues = sk.getRetainedItems(); + values = new float[totalValues]; + cumWeights = new long[totalValues]; + int count = 0; + for (int i = 0; i < numComp; i++) { + final ReqCompactor c = compactors.get(i); + final FloatBuffer bufIn = c.getBuffer(); + final long bufWeight = 1 << c.getLgWeight(); + final int bufInLen = bufIn.getCount(); + mergeSortIn(bufIn, bufWeight, count); + count += bufInLen; + } + createCumulativeNativeRanks(); + dedup(); + } + + private void createCumulativeNativeRanks() { + final int len = values.length; + for (int i = 1; i < len; i++) { + cumWeights[i] += cumWeights[i - 1]; + } + assert cumWeights[len - 1] == N; + } + + private void dedup() { + final int valuesLen = values.length; + final float[] valuesB = new float[valuesLen]; + final long[] natRanksB = new long[valuesLen]; + int bidx = 0; + int i = 0; + while (i < valuesLen) { + int j = i + 1; + int hidup = j; + while (j < valuesLen && values[i] == values[j]) { + hidup = j++; + } + if (j - i == 1) { //no dups + valuesB[bidx] = values[i]; + natRanksB[bidx++] = cumWeights[i]; + i++; + continue; + } else { + valuesB[bidx] = values[hidup]; + natRanksB[bidx++] = cumWeights[hidup]; + i = j; + continue; + } + } + values = Arrays.copyOf(valuesB, bidx); + cumWeights = Arrays.copyOf(natRanksB, bidx); + } + + + /** + * Specially modified version of FloatBuffer.mergeSortIn(). Here spaceAtBottom is always false and + * the ultimate array size has already been set. However, this must simultaneously deal with + * sorting the base FloatBuffer as well. Also used in test. + * + * @param bufIn given FloatBuffer. If not sorted it will be sorted here. + * @param bufWeight associated weight of input FloatBuffer + * @param count tracks number of values inserted into the class arrays + */ + void mergeSortIn(final FloatBuffer bufIn, final long bufWeight, final int count) { + if (!bufIn.isSorted()) { bufIn.sort(); } + final float[] arrIn = bufIn.getArray(); //may be larger than its value count. + final int bufInLen = bufIn.getCount(); + final int totLen = count + bufInLen; + int i = count - 1; + int j = bufInLen - 1; + int h = hra ? bufIn.getCapacity() - 1 : bufInLen - 1; + for (int k = totLen; k-- > 0; ) { + if (i >= 0 && j >= 0) { //both valid + if (values[i] >= arrIn[h]) { + values[k] = values[i]; + cumWeights[k] = cumWeights[i--]; //not yet natRanks, just individual wts + } else { + values[k] = arrIn[h--]; j--; + cumWeights[k] = bufWeight; + } + } else if (i >= 0) { //i is valid + values[k] = values[i]; + cumWeights[k] = cumWeights[i--]; + } else if (j >= 0) { //j is valid + values[k] = arrIn[h--]; j--; + cumWeights[k] = bufWeight; + } else { + break; + } + } + } + + //used for testing + + Row getRow(final int index) { + return new Row(values[index], cumWeights[index]); + } + + static class Row { + float value; + long cumWeight; + + Row(final float value, final long cumWeight) { + this.value = value; + this.cumWeight = cumWeight; + } + } + +} diff --git a/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java b/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java new file mode 100644 index 000000000..d724d9be5 --- /dev/null +++ b/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java @@ -0,0 +1,110 @@ +/* + + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.req; + +/** + * Iterator over KllDoublesSketchSortedView. + * + *

The recommended iteration loop:

+ *
{@code
+ *   ReqSketchSortedViewIterator itr = sketch.getSortedView().iterator();
+ *   while (itr.next()) {
+ *     float v = itr.getValue();
+ *     ...
+ *   }
+ * }
+ */ +public class ReqSketchSortedViewIterator { + + private final float[] values; + private final long[] cumWeights; + private int index; + + ReqSketchSortedViewIterator(final float[] values, final long[] cumWeights) { + this.values = values; + this.cumWeights = cumWeights; + index = -1; + } + + /** + * Gets the current value. + * + *

Don't call this before calling next() for the first time + * or after getting false from next().

+ * @return the current value + */ + public float getValue() { + return values[index]; + } + + /** + * Gets the cumulative weight for the current value. + * + *

Don't call this before calling next() for the first time + * or after getting false from next().

+ * @param inclusive If true, includes the weight of the current value. + * Otherwise, returns the cumulative weightof the previous value. + * @return cumulative weight for the current value. + */ + public long getCumulativeWeight(final boolean inclusive) { + return inclusive ? cumWeights[index] + : (index == 0) ? 0 : cumWeights[index - 1]; + } + + /** + * Gets the normalized rank for the current value or previous value. + * + *

Don't call this before calling next() for the first time + * or after getting false from next().

+ * @param inclusive if true, returns the normalized rank of the current value. + * Otherwise, returns the normalized rank of the previous value. + * @return normalized rank for the current value or previous value. + */ + public double getNormalizedRank(final boolean inclusive) { + final double N = cumWeights[ cumWeights.length - 1]; + return getCumulativeWeight(inclusive) / N; + } + + /** + * Gets the weight of the current value. + * + *

Don't call this before calling next() for the first time + * or after getting false from next().

+ * @return item weight of the current value. + */ + public long getWeight() { + if (index == 0) { return cumWeights[0]; } + return cumWeights[index] - cumWeights[index - 1]; + } + + /** + * Advancing the iterator and checking existence of the next element + * is combined here for efficiency. This results in an undefined + * state of the iterator before the first call of this method. + * @return true if the next element exists + */ + public boolean next() { + index++; + return index < values.length; + } + +} + diff --git a/src/test/java/org/apache/datasketches/CrossCheckQuantiles.java b/src/test/java/org/apache/datasketches/CrossCheckQuantiles.java new file mode 100644 index 000000000..4b01dd2a2 --- /dev/null +++ b/src/test/java/org/apache/datasketches/CrossCheckQuantiles.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches; + +public class CrossCheckQuantiles { + + private final static boolean enablePrinting = true; + + /** + * @param format the format + * @param args the args + */ + static final void printf(final String format, final Object ...args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + /** + * @param o the Object to println + */ + static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} + diff --git a/src/test/java/org/apache/datasketches/GenericInequalitySearchTest.java b/src/test/java/org/apache/datasketches/GenericInequalitySearchTest.java index fa2e6387e..2d6729f6f 100644 --- a/src/test/java/org/apache/datasketches/GenericInequalitySearchTest.java +++ b/src/test/java/org/apache/datasketches/GenericInequalitySearchTest.java @@ -67,17 +67,6 @@ private static void checkBuildRandArr() { } } - private static String listFltArray(final Float[] arr, final int low, final int high) { - final StringBuilder sb = new StringBuilder(); - sb.append(LS); - sb.append("arr: "); - for (int i = 0; i < arr.length; i++) { - if (i == low || i == high) { sb.append(String.format("(%.0f) ", arr[i])); } - else { sb.append(String.format("%.0f ", arr[i])); } - } - return sb.toString(); - } - @Test public void checkBinSearchFltLimits() { for (int len = 10; len <= 13; len++) { @@ -89,6 +78,19 @@ public void checkBinSearchFltLimits() { } } + private static String listFltArray(final Float[] arr, final int low, final int high) { + final StringBuilder sb = new StringBuilder(); + sb.append(LS); + sb.append("The values in parentheses are the low and high values of the sub-array to search"); + sb.append(LS); + sb.append("arr: "); + for (int i = 0; i < arr.length; i++) { + if (i == low || i == high) { sb.append(String.format("(%.0f) ", arr[i])); } + else { sb.append(String.format("%.0f ", arr[i])); } + } + return sb.toString(); + } + private void checkBinarySearchFloatLimits(final Float[] arr, final int low, final int high) { final Float lowV = arr[low]; final Float highV = arr[high]; @@ -287,27 +289,28 @@ public static String desc(final T[] arr, final int low, final int high, fina return ""; } + private final static boolean enablePrinting = false; /** * @param format the format * @param args the args */ static final void printf(final String format, final Object ...args) { - //System.out.printf(format, args); + if (enablePrinting) { System.out.printf(format, args); } } /** * @param o the Object to println */ static final void println(final Object o) { - //System.out.println(o.toString()); + if (enablePrinting) { System.out.println(o.toString()); } } /** * @param o the Object to print */ static final void print(final Object o) { - //System.out.print(o.toString()); + if (enablePrinting) { System.out.print(o.toString()); } } } diff --git a/src/test/java/org/apache/datasketches/req/ReqAuxiliaryTest.java b/src/test/java/org/apache/datasketches/req/ReqAuxiliaryTest.java deleted file mode 100644 index 66e90eb62..000000000 --- a/src/test/java/org/apache/datasketches/req/ReqAuxiliaryTest.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.req; - -import static org.testng.Assert.assertTrue; - -import org.apache.datasketches.req.ReqAuxiliary.Row; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ - -public class ReqAuxiliaryTest { - - @Test - public void checkMergeSortIn() { - checkMergeSortInImpl(true); - checkMergeSortInImpl(false); - } - - private static void checkMergeSortInImpl(final boolean hra) { - final FloatBuffer buf1 = new FloatBuffer(25, 0, hra); - for (int i = 1; i < 12; i += 2) { buf1.append(i); } //6 items - final FloatBuffer buf2 = new FloatBuffer(25, 0, hra); - for (int i = 2; i <= 12; i += 2) { buf2.append(i); } //6 items - final long N = 12; - - final float[] items = new float[25]; - final long[] weights = new long[25]; - - final ReqAuxiliary aux = new ReqAuxiliary(items, weights, hra, N); - aux.mergeSortIn(buf1, 1, 0); - aux.mergeSortIn(buf2, 2, 6); - println(aux.toString(3, 12)); - Row row = aux.getRow(0); - for (int i = 1; i < 12; i++) { - final Row rowi = aux.getRow(i); - assertTrue(rowi.item >= row.item); - row = rowi; - } - } - - /** - * output - * @param o object - */ - static final void println(final Object o) { - //System.out.println(o.toString()); - } -} diff --git a/src/test/java/org/apache/datasketches/req/ReqSketchOtherTest.java b/src/test/java/org/apache/datasketches/req/ReqSketchOtherTest.java index f4313d0ce..3f9499afd 100644 --- a/src/test/java/org/apache/datasketches/req/ReqSketchOtherTest.java +++ b/src/test/java/org/apache/datasketches/req/ReqSketchOtherTest.java @@ -104,7 +104,7 @@ public void checkEstimationMode() { assertEquals(maxNomSize, 240); final float v = sk.getQuantile(1.0); assertEquals(v, 120.0f); - final ReqAuxiliary aux = sk.getAux(); + final ReqSketchSortedView aux = sk.getSortedView(); assertNotNull(aux); assertTrue(sk.getRSE(sk.getK(), .5, false, 120) > 0); assertTrue(sk.getSerializationBytes() > 0); diff --git a/src/test/java/org/apache/datasketches/req/ReqSketchSortedViewTest.java b/src/test/java/org/apache/datasketches/req/ReqSketchSortedViewTest.java new file mode 100644 index 000000000..8d340ead4 --- /dev/null +++ b/src/test/java/org/apache/datasketches/req/ReqSketchSortedViewTest.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.req; + +import static org.testng.Assert.assertTrue; + +import org.apache.datasketches.req.ReqSketchSortedView.Row; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ + +public class ReqSketchSortedViewTest { + + /** + * just tests the mergeSortIn. It does NOT test anything else. + */ + @Test + public void checkMergeSortIn() { + checkMergeSortInImpl(true); + checkMergeSortInImpl(false); + } + + private static void checkMergeSortInImpl(final boolean hra) { + final FloatBuffer buf1 = new FloatBuffer(25, 0, hra); + for (int i = 1; i < 12; i += 2) { buf1.append(i); } //6 odd values + final FloatBuffer buf2 = new FloatBuffer(25, 0, hra); + for (int i = 2; i <= 12; i += 2) { buf2.append(i); } //6 even values + final long N = 18; + + final float[] values = new float[25]; + final long[] valueWeights = new long[25]; //not used + + final ReqSketchSortedView rssv = new ReqSketchSortedView(values, valueWeights, hra, N); + rssv.mergeSortIn(buf1, 1, 0); + rssv.mergeSortIn(buf2, 2, 6); //at weight of 2 + println(rssv.toString(3, 12)); + Row row = rssv.getRow(0); + for (int i = 1; i < 12; i++) { + final Row rowi = rssv.getRow(i); + assertTrue(rowi.value >= row.value); + row = rowi; + } + } + + @Test + public void checkRssvVsSketch() { + int k = 4; + boolean hra = false; + boolean inclusive; + boolean useSketch; + int numV = 3; + int dup = 2; + inclusive = false; + useSketch = true; + checkRSSV(k, hra, inclusive, useSketch, numV, dup); + println("-------------------"); + inclusive = false; + useSketch = false; + checkRSSV(k, hra, inclusive, useSketch, numV, dup); + println("###################"); + inclusive = true; + useSketch = true; + checkRSSV(k, hra, inclusive, useSketch, numV, dup); + println("-------------------"); + inclusive = true; + useSketch = false; + checkRSSV(k, hra, inclusive, useSketch, numV, dup); + println(""); + println("###################"); + println(""); + } + + private void checkRSSV(final int k, final boolean hra, final boolean inclusive, + final boolean useSketch, final int numV, final int dup) { + println(""); + println("CHECK ReqSketchSortedView"); + println(" k: " + k + ", hra: " + hra + ", inclusive: " + inclusive + ", useSketch: " + useSketch); + ReqSketchBuilder bldr = ReqSketch.builder(); + bldr.setK(4).setHighRankAccuracy(hra).setLessThanOrEqual(inclusive); + ReqSketch sk = bldr.build(); + int n = numV * dup; //Total values including duplicates + println(" numV: " + numV + ", dup: " + dup); + + float[] arr = new float[n]; + + int h = 0; + for (int i = 0; i < numV; i++) { + float flt = (i + 1) * 10; + for (int j = 1; j <= dup; j++) { arr[h++] = flt; } + } + println(""); + println("Example Sketch Input with illustrated weights and ranks:"); + println(" Sketch only keeps individual value weights per level"); + println(" Cumulative Weights are computed in RSSV."); + println(" Normalized Ranks are computed on the fly."); + println(""); + printf("%16s%16s%16s\n", "Value", "CumWeight", "NormalizedRank"); + for (int i = 0; i < n; i++) { + printf("%16.1f%16d%16.3f\n", arr[i], i + 1, (i + 1.0)/n); + sk.update(arr[i]); + } + + println(""); + + //Sorted View Data: + ReqSketchSortedView rssv = new ReqSketchSortedView(sk); + println(rssv.toString(1, 16)); + + println("GetQuantile(NormalizedRank):"); + println(" CumWeight is for illustration"); + println(" Convert NormalizedRank to CumWeight (CW)."); + println(" Search RSSV CumWeights[] array:"); + println(" Non Inclusive (uses GT): arr[A] <= CW < arr[B], return B"); + println(" Inclusive (uses GE): arr[A] < CW <= arr[B], return B"); + println(" Return Values[B]"); + println(""); + printf("%16s%16s%16s\n", "NormalizedRank", "CumWeight", "Quantile"); + int m = 2 * n; + for (int i = 0; i <= m; i++) { + double fract = (double) i / m; + float q = useSketch + ? sk.getQuantile(fract, inclusive) + : rssv.getQuantile(fract, inclusive); //until aux iterator is created + printf("%16.3f%16.3f%16.1f\n", fract, fract * n, q); + } + + println(""); + println("GetRank(Value):"); + println(" Search RSSV Values[] array:"); + println(" Non Inclusive (uses LT): arr[A] < V <= arr[B], return A"); + println(" Inclusive (uses LE): arr[A] <= V < arr[B], return A"); + println(" Convert CumWeights[A] to NormRank,"); + println(" Return NormRank"); + printf("%16s%16s\n", "ValueIn", "NormalizedRank"); + float q = 5.0F; + for (int i = 1; i <= numV * 2 + 1; i++) { + double r = useSketch + ? sk.getRank(q, inclusive) + : rssv.getRank(q, inclusive); //until aux iterator is created + printf("%16.1f%16.3f\n", q, r); + q += 5.0F; + } + } + + @Test + public void checkIterator() { + int k = 4; + boolean hra = false; + int numV = 3; + int dup = 2; + println(""); + println("CHECK ReqSketchSortedViewIterator"); + println(" k: " + k + ", hra: " + hra); + ReqSketchBuilder bldr = ReqSketch.builder(); + ReqSketch sketch = bldr.build(); + int n = numV * dup; //Total values including duplicates + println(" numV: " + numV + ", dup: " + dup); + + float[] arr = new float[n]; + int h = 0; + for (int i = 0; i < numV; i++) { + float flt = (i + 1) * 10; + for (int j = 1; j <= dup; j++) { arr[h++] = flt; } + } + for (int i = 0; i < n; i++) { sketch.update(arr[i]); } + + ReqSketchSortedViewIterator itr = sketch.getSortedView().iterator(); + println(""); + String[] header = {"Value", "Wt", "CumWtNotInc", "nRankNotInc", "CumWtInc", "nRankInc"}; + String hfmt = "%12s%12s%12s%12s%12s%12s\n"; + String fmt = "%12.1f%12d%12d%12.3f%12d%12.3f\n"; + printf(hfmt, (Object[]) header); + while (itr.next()) { + float v = itr.getValue(); + long wt = itr.getWeight(); + long cumWtNotInc = itr.getCumulativeWeight(false); + double nRankNotInc = itr.getNormalizedRank(false); + long cumWtInc = itr.getCumulativeWeight(true); + double nRankInc = itr.getNormalizedRank(true); + printf(fmt, v, wt, cumWtNotInc, nRankNotInc, cumWtInc, nRankInc); + } + } + + private final static boolean enablePrinting = true; + + /** + * @param format the format + * @param args the args + */ + static final void printf(final String format, final Object ...args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + /** + * @param o the Object to println + */ + static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/req/ReqSketchTest.java b/src/test/java/org/apache/datasketches/req/ReqSketchTest.java index 643629248..ebff6672d 100644 --- a/src/test/java/org/apache/datasketches/req/ReqSketchTest.java +++ b/src/test/java/org/apache/datasketches/req/ReqSketchTest.java @@ -28,7 +28,7 @@ import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; //import static org.apache.datasketches.req.FloatBuffer.TAB; -import org.apache.datasketches.req.ReqAuxiliary.Row; +import org.apache.datasketches.req.ReqSketchSortedView.Row; import org.testng.annotations.Test; /** @@ -154,22 +154,22 @@ private static void checkGetRanks(final ReqSketch sk, final int max, final int i } private static void checkAux(final ReqSketch sk, final int iDebug) { - final ReqAuxiliary aux = new ReqAuxiliary(sk); + final ReqSketchSortedView aux = new ReqSketchSortedView(sk); if (iDebug > 0) { println(aux.toString(3,12)); } final int totalCount = sk.getRetainedItems(); - float item = 0; + float value = 0; long wt = 0; for (int i = 0; i < totalCount; i++) { final Row row = aux.getRow(i); if (i == 0) { - item = row.item; - wt = row.weight; + value = row.value; + wt = row.cumWeight; } else { - assertTrue(row.item >= item); - assertTrue(row.weight >= wt); - item = row.item; - wt = row.weight; + assertTrue(row.value >= value); + assertTrue(row.cumWeight >= wt); + value = row.value; + wt = row.cumWeight; } } } @@ -303,7 +303,7 @@ private static void checkSerDeImpl(final int k, final boolean hra, final int cou assertEquals(sk2.getHighRankAccuracy(),sk1.getHighRankAccuracy()); assertEquals(sk2.getK(), sk1.getK()); assertEquals(sk2.getMaxNomSize(), sk1.getMaxNomSize()); - assertEquals(sk2.getLtEq(), sk1.getLtEq()); + //assertEquals(sk2.getLtEq(), sk1.getLtEq()); assertEquals(sk2.getNumLevels(), sk1.getNumLevels()); assertEquals(sk2.getSerializationBytes(), sk1.getSerializationBytes()); } @@ -332,7 +332,7 @@ public void checkAuxDeDup() { } @Test - public void tenItems() { + public void tenValues() { final ReqSketch sketch = ReqSketch.builder().build(); for (int i = 1; i <= 10; i++) { sketch.update(i); } assertFalse(sketch.isEmpty());