Skip to content

Commit

Permalink
Moved public statics from KllPreambleUtil to KllSketch, added reset()
Browse files Browse the repository at this point in the history
method.
  • Loading branch information
leerho committed Mar 31, 2022
1 parent 06af8ba commit 0e5dde1
Show file tree
Hide file tree
Showing 13 changed files with 206 additions and 109 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import static java.lang.Math.max;
import static java.lang.Math.min;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE;
import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M;
import static org.apache.datasketches.kll.KllPreambleUtil.DOUBLES_SKETCH_BIT_MASK;
import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE;
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
Expand Down Expand Up @@ -87,7 +86,7 @@ public static KllDirectDoublesSketch writableWrap(final WritableMemory srcMem, f
*/
public static KllDirectDoublesSketch newInstance(final int k, final WritableMemory dstMem,
final MemoryRequestServer memReqSvr) {
return newInstance(k, DEFAULT_M, dstMem, memReqSvr);
return newInstance(k, KllSketch.DEFAULT_M, dstMem, memReqSvr);
}

/**
Expand Down Expand Up @@ -308,16 +307,6 @@ public void merge(final KllSketch other) {
mergeDoubleImpl(other);
}

@Override
public byte[] toByteArray() {
return toCompactByteArrayImpl();
}

@Override
public String toString(final boolean withLevels, final boolean withData) {
return toStringImpl(withLevels, withData);
}

/**
* Updates this sketch with the given data item.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import static java.lang.Math.max;
import static java.lang.Math.min;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT;
import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M;
import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FLOAT;
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK;
Expand Down Expand Up @@ -87,7 +86,7 @@ public static KllDirectFloatsSketch writableWrap(final WritableMemory srcMem, fi
*/
public static KllDirectFloatsSketch newInstance(final int k, final WritableMemory dstMem,
final MemoryRequestServer memReqSvr) {
return newInstance(k, DEFAULT_M, dstMem, memReqSvr);
return newInstance(k, KllSketch.DEFAULT_M, dstMem, memReqSvr);
}

/**
Expand Down Expand Up @@ -308,16 +307,6 @@ public void merge(final KllSketch other) {
mergeFloatImpl(other);
}

@Override
public byte[] toByteArray() {
return toCompactByteArrayImpl();
}

@Override
public String toString(final boolean withLevels, final boolean withData) {
return toStringImpl(withLevels, withData);
}

/**
* Updates this sketch with the given data item.
*
Expand Down
21 changes: 21 additions & 0 deletions src/main/java/org/apache/datasketches/kll/KllDirectSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,27 @@ public long getN() {
return extractN(wmem);
}

@Override
public void reset() {
final int k = getK();
setN(0);
setMinK(k);
setNumLevels(1);
setLevelsArray(new int[] {k, k});
setLevelZeroSorted(false);
final int newLevelsArrLen = 2 * Integer.BYTES;
final int newItemsArrLen = k;
KllSketch.memorySpaceMgmt(this, newLevelsArrLen, newItemsArrLen);
levelsArrUpdatable.putIntArray(0L, new int[] {k, k}, 0, 2);
if (sketchType == SketchType.DOUBLES_SKETCH) {
minMaxArrUpdatable.putDoubleArray(0L, new double[] {Double.NaN, Double.NaN}, 0, 2);
itemsArrUpdatable.putDoubleArray(0L, new double[k], 0, k);
} else {
minMaxArrUpdatable.putFloatArray(0L, new float[] {Float.NaN, Float.NaN}, 0, 2);
itemsArrUpdatable.putFloatArray(0L, new float[k], 0, k);
}
}

@Override
public byte[] toUpdatableByteArray() {
final int bytes = (int) wmem.getCapacity();
Expand Down
19 changes: 15 additions & 4 deletions src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@

import static java.lang.Math.max;
import static java.lang.Math.min;
import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K;
import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M;
import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_DOUBLE;
import static org.apache.datasketches.kll.KllSketch.Error.SRC_CANNOT_BE_DIRECT;
import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL;
Expand Down Expand Up @@ -58,7 +56,7 @@ private KllDoublesSketch(final Memory mem, final KllMemoryValidate memVal) {
* This will have a rank error of about 1.65%.
*/
public KllDoublesSketch() {
this(DEFAULT_K);
this(KllSketch.DEFAULT_K);
}

/**
Expand All @@ -69,7 +67,7 @@ public KllDoublesSketch() {
* @param k parameter that controls size of the sketch and accuracy of estimates
*/
public KllDoublesSketch(final int k) {
this(k, DEFAULT_M);
this(k, KllSketch.DEFAULT_M);
}

/**
Expand Down Expand Up @@ -290,6 +288,19 @@ public void merge(final KllSketch other) {
mergeDoubleImpl(other);
}

@Override
public void reset() {
final int k = getK();
setN(0);
setMinK(k);
setNumLevels(1);
setLevelsArray(new int[] {k, k});
setLevelZeroSorted(false);
doubleItems_ = new double[k];
minDoubleValue_ = Double.NaN;
maxDoubleValue_ = Double.NaN;
}

/**
* Updates this sketch with the given data item.
*
Expand Down
19 changes: 15 additions & 4 deletions src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@

import static java.lang.Math.max;
import static java.lang.Math.min;
import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_K;
import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M;
import static org.apache.datasketches.kll.KllSketch.Error.SRC_IS_NOT_FLOAT;
import static org.apache.datasketches.kll.KllSketch.Error.SRC_CANNOT_BE_DIRECT;
import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_CALL;
Expand Down Expand Up @@ -58,7 +56,7 @@ private KllFloatsSketch(final Memory mem, final KllMemoryValidate memVal) {
* This will have a rank error of about 1.65%.
*/
public KllFloatsSketch() {
this(DEFAULT_K);
this(KllSketch.DEFAULT_K);
}

/**
Expand All @@ -69,7 +67,7 @@ public KllFloatsSketch() {
* @param k parameter that controls size of the sketch and accuracy of estimates
*/
public KllFloatsSketch(final int k) {
this(k, DEFAULT_M);
this(k, KllSketch.DEFAULT_M);
}

/**
Expand Down Expand Up @@ -290,6 +288,19 @@ public void merge(final KllFloatsSketch other) {
mergeFloatImpl(other);
}

@Override
public void reset() {
final int k = getK();
setN(0);
setMinK(k);
setNumLevels(1);
setLevelsArray(new int[] {k, k});
setLevelZeroSorted(false);
floatItems_ = new float[k];
minFloatValue_ = Float.NaN;
maxFloatValue_ = Float.NaN;
}

/**
* Updates this sketch with the given data item.
*
Expand Down
9 changes: 3 additions & 6 deletions src/main/java/org/apache/datasketches/kll/KllHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@
import static org.apache.datasketches.Util.floorPowerOf2;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT;
import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K;
import static org.apache.datasketches.kll.KllPreambleUtil.MAX_M;
import static org.apache.datasketches.kll.KllPreambleUtil.MIN_M;
import static org.apache.datasketches.kll.KllSketch.CDF_COEF;
import static org.apache.datasketches.kll.KllSketch.CDF_EXP;
import static org.apache.datasketches.kll.KllSketch.PMF_COEF;
Expand Down Expand Up @@ -194,14 +191,14 @@ public static LevelStats getLevelCapacityItems(
* @param k must be greater than 7 and less than 65536.
*/
static void checkK(final int k, final int m) {
if (k < m || k > MAX_K) {
if (k < m || k > KllSketch.MAX_K) {
throw new SketchesArgumentException(
"K must be >= " + m + " and <= " + MAX_K + ": " + k);
"K must be >= " + m + " and <= " + KllSketch.MAX_K + ": " + k);
}
}

static void checkM(final int m) {
if (m < MIN_M || m > MAX_M || ((m & 1) == 1)) {
if (m < KllSketch.MIN_M || m > KllSketch.MAX_M || ((m & 1) == 1)) {
throw new SketchesArgumentException(
"M must be >= 2, <= 8 and even: " + m);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,6 @@ private KllPreambleUtil() {}

static final String LS = System.getProperty("line.separator");

/**
* The default value of K
*/
public static final int DEFAULT_K = 200;
public static final int DEFAULT_M = 8;
public static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short
public static final int MAX_M = 8;
public static final int MIN_M = 2;

// Preamble byte addresses
static final int PREAMBLE_INTS_BYTE_ADR = 0;
static final int SER_VER_BYTE_ADR = 1;
Expand Down
52 changes: 41 additions & 11 deletions src/main/java/org/apache/datasketches/kll/KllSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_DOUBLE;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_FLOAT;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM;
import static org.apache.datasketches.kll.KllPreambleUtil.DEFAULT_M;
import static org.apache.datasketches.kll.KllPreambleUtil.MAX_K;
import static org.apache.datasketches.kll.KllPreambleUtil.MIN_M;
import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR;
import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_DOUBLE;
import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE;
Expand Down Expand Up @@ -110,6 +107,31 @@ public abstract class KllSketch {
MemoryRequestServer memReqSvr;
boolean direct;

/**
* The default value of K
*/
public static final int DEFAULT_K = 200;

/**
* The default value of M
*/
static final int DEFAULT_M = 8;

/**
* The maximum value of K
*/
public static final int MAX_K = (1 << 16) - 1; // serialized as an unsigned short

/**
* The maximum value of M
*/
static final int MAX_M = 8;

/**
* The minimum value of M
*/
static final int MIN_M = 2;

/**
*
* @param sketchType either DOUBLE_SKETCH or FLOAT_SKETCH
Expand Down Expand Up @@ -155,7 +177,7 @@ public static int getKFromEpsilon(final double epsilon, final boolean pmf) {
final double krnd = round(kdbl);
final double del = abs(krnd - kdbl);
final int k = (int) (del < EPS_DELTA_THRESHOLD ? krnd : ceil(kdbl));
return max(MIN_M, min(MAX_K, k));
return max(KllSketch.MIN_M, min(KllSketch.MAX_K, k));
}

/**
Expand All @@ -166,27 +188,28 @@ public static int getKFromEpsilon(final double epsilon, final boolean pmf) {
* @param k parameter that controls size of the sketch and accuracy of estimates
* @param n stream length
* @return upper bound on the compact serialized size
* @deprecated use {@link #getMaxSerializedSizeBytes(int, int, long, SketchType, boolean)} instead.
* @deprecated use {@link #getMaxSerializedSizeBytes(int, long, SketchType, boolean)} instead.
*/
@Deprecated
public static int getMaxSerializedSizeBytes(final int k, final long n) {
final KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, DEFAULT_M, n, FLOATS_SKETCH, false);
final KllHelper.GrowthStats gStats =
KllHelper.getGrowthSchemeForGivenN(k, KllSketch.DEFAULT_M, n, FLOATS_SKETCH, false);
return gStats.compactBytes;
}

/**
* Returns upper bound on the serialized size of a KllSketch given the following parameters.
* It assumes the default value of <i>m</i>, which is 8.
* @param k parameter that controls size of the sketch and accuracy of estimates
* @param m parameter that controls the smallest value of k, and the smallest level width.
* If in doubt, use the default value of 8.
* @param n stream length
* @param sketchType either DOUBLES_SKETCH or FLOATS_SKETCH
* @param updatable true if updatable form, otherwise the standard compact form.
* @return upper bound on the serialized size of a KllSketch.
*/
public static int getMaxSerializedSizeBytes(final int k, final int m, final long n,
public static int getMaxSerializedSizeBytes(final int k, final long n,
final SketchType sketchType, final boolean updatable) {
final KllHelper.GrowthStats gStats = KllHelper.getGrowthSchemeForGivenN(k, m, n, sketchType, false);
final KllHelper.GrowthStats gStats =
KllHelper.getGrowthSchemeForGivenN(k, KllSketch.DEFAULT_M, n, sketchType, false);
return updatable ? gStats.updatableBytes : gStats.compactBytes;
}

Expand Down Expand Up @@ -343,6 +366,13 @@ public final boolean isEstimationMode() {
return getNumLevels() > 1;
}

/**
* This resets the current sketch back to zero entries.
* It retains key parameters such as <i>k</i>, <i>m</i>, and
* <i>SketchType (double or float)</i>.
*/
public abstract void reset();

/**
* Returns serialized sketch in a compact byte array form.
* @return serialized sketch in a compact byte array form.
Expand Down Expand Up @@ -693,7 +723,7 @@ static WritableMemory memorySpaceMgmt(
newWmem = sketch.memReqSvr.request(oldWmem, requiredSketchBytes);
oldWmem.copyTo(0, newWmem, 0, startAdr); //copy preamble
}
else { //Expand in current memory
else { //Expand or contract in current memory
newWmem = oldWmem;
}

Expand Down
Loading

0 comments on commit 0e5dde1

Please sign in to comment.