Skip to content
Permalink
Browse files
Add two methods to Union:
getCurrentBytes() and getMaxUnionBytes()

Requested by Druid.
  • Loading branch information
leerho committed Feb 16, 2022
1 parent 0f0f075 commit 1a247da9ce8646c8b11ecaab67a32e3fb1ebca32
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 8 deletions.
@@ -26,6 +26,7 @@
import static org.apache.datasketches.Util.MIN_LG_NOM_LONGS;
import static org.apache.datasketches.Util.TAB;
import static org.apache.datasketches.Util.ceilingPowerOf2;
import static org.apache.datasketches.Util.checkNomLongs;

import org.apache.datasketches.Family;
import org.apache.datasketches.ResizeFactor;
@@ -66,8 +67,8 @@ public SetOperationBuilder() {

/**
* Sets the Maximum Nominal Entries (max K) for this set operation. The effective value of K of the result of a
* Set Operation can be less than max K, but never greater.
* The minimum value is 16 and the maximum value is 67,108,864, which is 2^26.
* Set Operation can be less than max K, but never greater.
* The minimum value is 16 and the maximum value is 67,108,864, which is 2^26.
* @param nomEntries <a href="{@docRoot}/resources/dictionary.html#nomEntries">Nominal Entres</a>
* This will become the ceiling power of 2 if it is not a power of 2.
* @return this SetOperationBuilder
@@ -81,6 +82,20 @@ public SetOperationBuilder setNominalEntries(final int nomEntries) {
return this;
}

/**
* Alternative method of setting the Nominal Entries for this set operation from the log_base2 value.
* The minimum value is 4 and the maximum value is 26.
* Be aware that set operations as large as this maximum value may not have been
* thoroughly characterized for performance.
*
* @param lgNomEntries the log_base2 Nominal Entries.
* @return this SetOperationBuilder
*/
public SetOperationBuilder setLogNominalEntries(final int lgNomEntries) {
bLgNomLongs = checkNomLongs(1 << lgNomEntries);
return this;
}

/**
* Returns Log-base 2 Nominal Entries
* @return Log-base 2 Nominal Entries
@@ -301,7 +301,7 @@ public static int getMaxCompactSketchBytes(final int numberOfEntries) {
/**
* Returns the maximum number of storage bytes required for an UpdateSketch with the given
* number of nominal entries (power of 2).
* @param nomEntries <a href="{@docRoot}/resources/dictionary.html#nomEntries">Nominal Entres</a>
* @param nomEntries <a href="{@docRoot}/resources/dictionary.html#nomEntries">Nominal Entries</a>
* This will become the ceiling power of 2 if it is not.
* @return the maximum number of storage bytes required for a UpdateSketch with the given
* nomEntries
@@ -31,11 +31,25 @@
*/
public abstract class Union extends SetOperation {


/**
* Returns the number of storage bytes required for this union in its current state.
*
* @return the number of storage bytes required for this union in its current state.
*/
public abstract int getCurrentBytes();

@Override
public Family getFamily() {
return Family.UNION;
}

/**
* Returns the maximum required storage bytes for this union.
* @return the maximum required storage bytes for this union.
*/
public abstract int getMaxUnionBytes();

/**
* Gets the result of this operation as an ordered CompactSketch on the Java heap.
* This does not disturb the underlying data structure of the union.
@@ -216,9 +216,14 @@ static UnionImpl wrapInstance(final WritableMemory srcMem, final long expectedSe
}

@Override
public boolean isSameResource(final Memory that) {
return gadget_ instanceof DirectQuickSelectSketchR
? gadget_.getMemory().isSameResource(that) : false;
public int getCurrentBytes() {
return gadget_.getCurrentBytes();
}

@Override
public int getMaxUnionBytes() {
final int lgK = gadget_.getLgNomLongs();
return (16 << lgK) + (Family.UNION.getMaxPreLongs() << 3);
}

@Override
@@ -256,6 +261,12 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds
minThetaLong, curCountOut, seedHash, empty, true, dstOrdered, dstOrdered, dstMem, compactCacheOut);
}

@Override
public boolean isSameResource(final Memory that) {
return gadget_ instanceof DirectQuickSelectSketchR
? gadget_.getMemory().isSameResource(that) : false;
}

@Override
public void reset() {
gadget_.reset();
@@ -113,7 +113,7 @@ public UpdateSketchBuilder setNominalEntries(final int nomEntries) {
* This value is also used for building a shared concurrent sketch.
* The minimum value is 4 and the maximum value is 26.
* Be aware that sketches as large as this maximum value may not have been
* thoroughly tested or characterized for performance.
* thoroughly characterized for performance.
*
* @param lgNomEntries the Log Nominal Entries. Also for the concurrent shared sketch
* @return this UpdateSketchBuilder
@@ -36,6 +36,14 @@
@SuppressWarnings("javadoc")
public class UnionImplTest {

@Test
public void checkGetCurrentAndMaxBytes() {
final int lgK = 10;
final Union union = Sketches.setOperationBuilder().setLogNominalEntries(lgK).buildUnion();
assertEquals(union.getCurrentBytes(), 288);
assertEquals(union.getMaxUnionBytes(), 16416);
}

@Test
public void checkUpdateWithSketch() {
final int k = 16;
@@ -270,6 +278,9 @@ public void checkDirectUnionSingleItem() {
//println(csk.toString(true, true, 1, true));
}




@Test
public void printlnTest() {
println("PRINTING: "+this.getClass().getName());
@@ -279,7 +290,7 @@ public void printlnTest() {
* @param o value to print
*/
static void println(final Object o) {
//System.out.println(o.toString()); //disable here
System.out.println(o.toString()); //disable here
}

}

0 comments on commit 1a247da

Please sign in to comment.