Skip to content
Permalink
Browse files
Interim 6
  • Loading branch information
leerho committed Mar 5, 2022
1 parent b662bb0 commit 185bb384931d11b03548dde9e268873fd284df91
Showing 10 changed files with 275 additions and 282 deletions.
@@ -0,0 +1,142 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.datasketches.kll;

abstract class HeapKllSketch extends KllSketch {

/*
* Data is stored in items_.
* The data for level i lies in positions levels_[i] through levels_[i + 1] - 1 inclusive.
* Hence, levels_ must contain (numLevels_ + 1) indices.
* The valid portion of items_ is completely packed, except for level 0,
* which is filled from the top down.
*
* Invariants:
* 1) After a compaction, or an update, or a merge, all levels are sorted except for level zero.
* 2) After a compaction, (sum of capacities) - (sum of items) >= 1,
* so there is room for least 1 more item in level zero.
* 3) There are no gaps except at the bottom, so if levels_[0] = 0,
* the sketch is exactly filled to capacity and must be compacted.
* 4) Sum of weights of all retained items == N.
* 5) curTotalCap = items_.length = levels_[numLevels_].
*/

private int dyMinK_; // dynamic minK for error estimation after merging with different k
private long n_; // number of items input into this sketch
private int numLevels_; // one-based number of current levels,
private int[] levels_; // array of index offsets into the items[]. Size = numLevels + 1.
private boolean isLevelZeroSorted_;

/**
* Heap constructor.
* @param k configured size of sketch. Range [m, 2^16]
*/
HeapKllSketch(final int k, final SketchType sketchType) {
super(k, sketchType);
KllHelper.checkK(k);
dyMinK_ = k;
numLevels_ = 1;
levels_ = new int[] {k, k};
isLevelZeroSorted_ = false;
}

@Override
int getDyMinK() {
return dyMinK_;
}

@Override
void setDyMinK(final int dyMinK) {
dyMinK_ = dyMinK;
}

@Override
int getNumLevels() {
return numLevels_;
}

@Override
void setNumLevels(final int numLevels) {
numLevels_ = numLevels;
}

@Override
void incNumLevels() {
numLevels_++;
}

@Override
int[] getLevelsArray() {
return levels_;
}

@Override
int getLevelsArrayAt(final int index) {
return levels_[index];
}

@Override
void setLevelsArray(final int[] levels) {
this.levels_ = levels;
}

@Override
void setLevelsArrayAt(final int index, final int value) {
this.levels_[index] = value;
}

@Override
void setLevelsArrayAtPlusEq(final int index, final int plusEq) {
this.levels_[index] += plusEq;
}

@Override
void setLevelsArrayAtMinusEq(final int index, final int minusEq) {
this.levels_[index] -= minusEq;
}

@Override
boolean isLevelZeroSorted() {
return isLevelZeroSorted_;
}

@Override
void setLevelZeroSorted(final boolean sorted) {
this.isLevelZeroSorted_ = sorted;
}

@Override
void setN(final long n) {
n_ = n;
}

@Override
void incN() {
n_++;
}

// public functions

@Override
public long getN() {
return n_;
}

}
@@ -22,11 +22,9 @@
import static java.lang.Math.max;
import static java.lang.Math.min;
import static org.apache.datasketches.Util.isOdd;
import static org.apache.datasketches.kll.KllHelper.getAllLevelStatsGivenN;
import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_DOUBLE;
import static org.apache.datasketches.kll.PreambleUtil.DATA_START_ADR_SINGLE_ITEM;
import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_K;
import static org.apache.datasketches.kll.PreambleUtil.DEFAULT_M;
import static org.apache.datasketches.kll.PreambleUtil.DOUBLES_SKETCH_BIT_MASK;
import static org.apache.datasketches.kll.PreambleUtil.DY_MIN_K_SHORT_ADR;
import static org.apache.datasketches.kll.PreambleUtil.EMPTY_BIT_MASK;
@@ -51,7 +49,6 @@
import org.apache.datasketches.Family;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.Util;
import org.apache.datasketches.kll.KllHelper.LevelStats;
import org.apache.datasketches.kll.PreambleUtil.MemoryCheck;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
@@ -60,13 +57,12 @@
* Please refer to the documentation in the package-info:<br>
* {@link org.apache.datasketches.kll}
*/
public class KllDoublesSketch extends BaseKllSketch {
public class KllDoublesSketch extends HeapKllSketch {

// Specific to the doubles sketch
private double[] items_; // the continuous array of double items
private double minValue_;
private double maxValue_;
private static final boolean IS_DOUBLE = true;

/**
* Heap constructor with the default <em>k = 200</em>, which has a rank error of about 1.65%.
@@ -82,26 +78,7 @@ public KllDoublesSketch() {
* @param k parameter that controls size of the sketch and accuracy of estimates
*/
public KllDoublesSketch(final int k) {
this(k, DEFAULT_M, true);
}

/**
* Used for testing only.
* @param k configured size of sketch. Range [m, 2^16]
* @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0.
*/
KllDoublesSketch(final int k, final boolean compatible) {
this(k, DEFAULT_M, compatible);
}

/**
* Heap constructor.
* @param k configured size of sketch. Range [m, 2^16]
* @param m minimum level size. Default is 8.
* @param compatible if true, compatible with quantiles sketch treatment of rank 0.0 and 1.0.
*/
private KllDoublesSketch(final int k, final int m, final boolean compatible) {
super(k, m, compatible);
super(k, SketchType.DOUBLE_SKETCH);
items_ = new double[k];
minValue_ = Double.NaN;
maxValue_ = Double.NaN;
@@ -113,9 +90,8 @@ private KllDoublesSketch(final int k, final int m, final boolean compatible) {
* @param memChk the MemoryCheck object
*/
private KllDoublesSketch(final Memory mem, final MemoryCheck memChk) {
super(memChk.k, memChk.m, true);
super(memChk.k, SketchType.DOUBLE_SKETCH);
setLevelZeroSorted(memChk.level0Sorted);

final int k = getK();
if (memChk.empty) {
setNumLevels(1);
@@ -227,46 +203,6 @@ public double getMinValue() {
return minValue_;
}

//Size related

/**
* Returns upper bound on the compact serialized size of a sketch given a parameter <em>k</em> and stream
* length. This method can be used if allocation of storage is necessary beforehand.
* @param k parameter that controls size of the sketch and accuracy of estimates
* @param n stream length
* @return upper bound on the compact serialized size
*/
public static int getMaxSerializedSizeBytes(final int k, final long n) {
final LevelStats lvlStats = getAllLevelStatsGivenN(k, DEFAULT_M, n, false, false, IS_DOUBLE);
return lvlStats.getCompactBytes();
}

/**
* Returns the current compact number of bytes this sketch would require to store.
* @return the current compact number of bytes this sketch would require to store.
*/
public int getCurrentCompactSerializedSizeBytes() {
return KllHelper.getSerializedSizeBytes(getNumLevels(), getNumRetained(), IS_DOUBLE, false);
}

/**
* Returns the current updatable number of bytes this sketch would require to store.
* @return the current updatable number of bytes this sketch would require to store.
*/
public int getCurrentUpdatableSerializedSizeBytes() {
return KllHelper.getSerializedSizeBytes(getNumLevels(), getNumRetained(), IS_DOUBLE, true);
}

/**
* Returns the number of bytes this sketch would require to store.
* @return the number of bytes this sketch would require to store.
* @deprecated use {@link #getCurrentCompactSerializedSizeBytes() }
*/
@Deprecated
public int getSerializedSizeBytes() {
return getCurrentCompactSerializedSizeBytes();
}

/**
* Returns an approximation to the Probability Mass Function (PMF) of the input stream
* given a set of splitPoints (values).
@@ -511,8 +447,7 @@ public byte[] toByteArray() {
@Override
public byte[] toUpdatableByteArray() {
final int k = getK();
final int itemCap = KllHelper.computeTotalItemCapacity(k, M, getNumLevels());
final int numBytes = KllHelper.getSerializedSizeBytes(getNumLevels(), itemCap, IS_DOUBLE, true);
final int numBytes = getCurrentUpdatableSerializedSizeBytes();
final byte[] bytes = new byte[numBytes];
final WritableMemory wmem = WritableMemory.writableWrap(bytes);
//load the preamble
@@ -562,7 +497,7 @@ public String toString(final boolean withLevels, final boolean withData) {
sb.append(" Level 0 Sorted : ").append(isLevelZeroSorted()).append(Util.LS);
sb.append(" Capacity Items : ").append(items_.length).append(Util.LS);
sb.append(" Retained Items : ").append(getNumRetained()).append(Util.LS);
sb.append(" Storage Bytes : ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS);
sb.append(" Compact Storage Bytes: ").append(getCurrentCompactSerializedSizeBytes()).append(Util.LS);
sb.append(" Min Value : ").append(minValue_).append(Util.LS);
sb.append(" Max Value : ").append(maxValue_).append(Util.LS);
sb.append("### End sketch summary").append(Util.LS);
@@ -604,7 +539,6 @@ public String toString(final boolean withLevels, final boolean withData) {
sb.append(Util.LS);
sb.append("### End sketch data").append(Util.LS);
}

return sb.toString();
}

@@ -744,7 +678,6 @@ private void compressWhileUpdating() {

if (oddPop) {
setLevelsArrayAt(level, getLevelsArrayAt(level + 1) - 1); // the current level now contains one item

items_[getLevelsArrayAt(level)] = items_[rawBeg]; // namely this leftover guy
} else {
setLevelsArrayAt(level, getLevelsArrayAt(level + 1)); // the current level is now empty
@@ -831,7 +764,8 @@ private void mergeHigherLevels(final KllDoublesSketch other, final long finalN)
final int theShift = freeSpaceAtBottom - outlevels[0];

if (getLevelsArray().length < finalNumLevels + 1) {
setLevelsArray(new int[finalNumLevels + 1]);

;
}

for (int lvl = 0; lvl < finalNumLevels + 1; lvl++) { // includes the "extra" index
@@ -867,7 +801,7 @@ private void populateWorkArrays(final KllDoublesSketch other, final double[] wor
}
}

// only for testing
// for testing

double[] getItems() {
return items_;

0 comments on commit 185bb38

Please sign in to comment.