Skip to content
Permalink
Browse files
This adds tests, see SketchTest2.
Fixed a couple of bugs.
  • Loading branch information
leerho committed Dec 24, 2021
1 parent 3aebc54 commit bd638adb2b9e7b9d04c325236e8397d63caa6c31
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 10 deletions.
@@ -20,6 +20,7 @@
package org.apache.datasketches.theta;

import static org.apache.datasketches.Family.idToFamily;
import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
@@ -50,7 +51,7 @@
* @author Lee Rhodes
*/
public abstract class CompactSketch extends Sketch {

private static final short defaultSeedHash = Util.computeSeedHash(DEFAULT_UPDATE_SEED);

/**
* Heapify takes a CompactSketch image in Memory and instantiates an on-heap CompactSketch.
@@ -77,11 +78,11 @@ public static CompactSketch heapify(final Memory srcMem) {
return CompactOperations.memoryToCompact(srcMem, srcOrdered, null);
}
//not SerVer 3, assume compact stored form
final short srcSeedHash = (short) extractSeedHash(srcMem);
if (serVer == 1) {
return ForwardCompatibility.heapify1to3(srcMem, srcSeedHash);
return ForwardCompatibility.heapify1to3(srcMem, defaultSeedHash);
}
if (serVer == 2) {
final short srcSeedHash = (short) extractSeedHash(srcMem);
return ForwardCompatibility.heapify2to3(srcMem, srcSeedHash);
}
throw new SketchesArgumentException("Unknown Serialization Version: " + serVer);
@@ -64,7 +64,7 @@ static final CompactSketch heapify1to3(final Memory srcMem, final short seedHash
final long thetaLong = extractThetaLong(srcMem);
final boolean empty = (curCount == 0) && (thetaLong == Long.MAX_VALUE);

if (empty || (memCap <= 24)) { //return empty
if (empty || (memCap <= 24)) { //return empty //TODO is this right?
return EmptyCompactSketch.getInstance();
}

@@ -157,7 +157,7 @@ public static CompactSketch heapifyCompactSketch(final Memory srcMem) {
* @return {@link CompactSketch CompactSketch}
*/
public static CompactSketch heapifyCompactSketch(final Memory srcMem, final long seed) {
return CompactSketch.heapify(srcMem);
return CompactSketch.heapify(srcMem, seed);
}

/**
@@ -166,7 +166,7 @@ public static CompactSketch heapifyCompactSketch(final Memory srcMem, final long
* @return {@link CompactSketch CompactSketch}
*/
public static CompactSketch wrapCompactSketch(final Memory srcMem) {
return CompactSketch.heapify(srcMem);
return CompactSketch.wrap(srcMem);
}

/**
@@ -176,7 +176,7 @@ public static CompactSketch wrapCompactSketch(final Memory srcMem) {
* @return {@link CompactSketch CompactSketch}
*/
public static CompactSketch wrapCompactSketch(final Memory srcMem, final long seed) {
return CompactSketch.heapify(srcMem);
return CompactSketch.wrap(srcMem, seed);
}

/**
@@ -41,7 +41,7 @@ public class BackwardConversions {
* It also had code for the even earlier CountUniqueSketch (for backward compatibility),
* which was the bucket sketch based on Giroire.
*
* <p><b>Serialization:</b></p>
* <p><b>Serialization Version 1:</b></p>
* <pre>
* Long || Start Byte Adr:
* Adr:
@@ -63,7 +63,7 @@ public class BackwardConversions {
* <li><i>MD_LONGS</i> (Metadata Longs, now Preamble Longs) was always 3.</li>
* <li><i>SerVer</i> is always 1.</li>
* <li>The <i>SkType</i> had three values: 1,2,3 for Alpha, QuickSelect, and SetSketch,
* repectively.</li>
* respectively.</li>
* <li>Bytes <i>lgNom</i> and <i>lgArr</i> were only used by the QS and Alpha sketches.</li>
* <li>V1 <i>LgResize</i> (2 bits) was only relevant to the Alpha and QS sketches.</li>
* <li>The flags byte is in byte 6 (moved to 5 in V2).</li>
@@ -113,7 +113,7 @@ public static Memory convertSerVer3toSerVer1(final CompactSketch skV3) {
* the Alpha sketch, and the early HLL sketch. It also had an early adaptor for Pig.
*
*
* <p><b>Serialization:</b></p>
* <p><b>Serialization Version 2:</b></p>
* <pre>
* Long || Start Byte Adr:
* Adr:
@@ -0,0 +1,165 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.datasketches.theta;

import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
import static org.testng.Assert.assertEquals;

import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.Util;
import org.testng.annotations.Test;

public class SketchTest2 {
private static final short defaultSeedHash = Util.computeSeedHash(DEFAULT_UPDATE_SEED);

@Test
public void checkHeapifyCompactSketchAssumedDefaultSeed() {
final int k = 64;
final long seed = DEFAULT_UPDATE_SEED;
final short seedHash = Util.computeSeedHash(seed);
UpdateSketch sv3usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build();
for (int i=0; i<k; i++) { sv3usk.update(i); }

CompactSketch sv3csk = sv3usk.compact();
Memory sv3cskMem = Memory.wrap(sv3csk.toByteArray());
CompactSketch sv3cskResult;

//SV3 test
sv3cskResult = Sketches.heapifyCompactSketch(sv3cskMem);
assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate());
assertEquals(sv3cskResult.getSeedHash(), seedHash);

//SV2 test
Memory sv2cskMem = BackwardConversions.convertSerVer3toSerVer2(sv3csk, seed);
sv3cskResult = Sketches.heapifyCompactSketch(sv2cskMem);
assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate());
assertEquals(sv3cskResult.getSeedHash(), seedHash);

//SV1 test
Memory sv1cskMem = BackwardConversions.convertSerVer3toSerVer1(sv3csk);
sv3cskResult = Sketches.heapifyCompactSketch(sv1cskMem);
assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate());
assertEquals(sv3cskResult.getSeedHash(), seedHash);
}

@Test
public void checkHeapifyCompactSketchAssumedDifferentSeed() {
final int k = 64;
final long seed = 128L;
final short seedHash = Util.computeSeedHash(seed);
UpdateSketch sv3usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build();
for (int i=0; i<k; i++) { sv3usk.update(i); }

CompactSketch sv3csk = sv3usk.compact();
Memory sv3cskMem = Memory.wrap(sv3csk.toByteArray());
CompactSketch sv3cskResult;

//SV3 test
sv3cskResult = Sketches.heapifyCompactSketch(sv3cskMem);
assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate());
assertEquals(sv3cskResult.getSeedHash(), seedHash);

//SV2 test
Memory sv2cskMem = BackwardConversions.convertSerVer3toSerVer2(sv3csk, seed);
sv3cskResult = Sketches.heapifyCompactSketch(sv2cskMem);
assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate());
assertEquals(sv3cskResult.getSeedHash(), seedHash);

//SV1 test
Memory sv1cskMem = BackwardConversions.convertSerVer3toSerVer1(sv3csk);
sv3cskResult = Sketches.heapifyCompactSketch(sv1cskMem);
assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate());
assertEquals(sv3cskResult.getSeedHash(), defaultSeedHash);
}


@Test
public void checkHeapifyCompactSketchGivenDefaultSeed() {
final int k = 64;
final long seed = DEFAULT_UPDATE_SEED;
final short seedHash = Util.computeSeedHash(seed);
UpdateSketch sv3usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build();
for (int i=0; i<k; i++) { sv3usk.update(i); }

CompactSketch sv3csk = sv3usk.compact();
Memory sv3cskMem = Memory.wrap(sv3csk.toByteArray());
CompactSketch sv3cskResult;

//SV3 test
sv3cskResult = Sketches.heapifyCompactSketch(sv3cskMem, seed);
assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate());
assertEquals(sv3cskResult.getSeedHash(), seedHash);

//SV2 test
Memory sv2cskMem = BackwardConversions.convertSerVer3toSerVer2(sv3csk, seed);
sv3cskResult = Sketches.heapifyCompactSketch(sv2cskMem, seed);
assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate());
assertEquals(sv3cskResult.getSeedHash(), seedHash);

//SV1 test
Memory sv1cskMem = BackwardConversions.convertSerVer3toSerVer1(sv3csk);
sv3cskResult = Sketches.heapifyCompactSketch(sv1cskMem, seed);
assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate());
assertEquals(sv3cskResult.getSeedHash(), seedHash);
}

@Test
public void checkHeapifyCompactSketchGivenDifferentSeed() {
final int k = 64;
final long seed = 128L;
final short seedHash = Util.computeSeedHash(seed);
UpdateSketch sv3usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build();
for (int i=0; i<k; i++) { sv3usk.update(i); }

CompactSketch sv3csk = sv3usk.compact();
Memory sv3cskMem = Memory.wrap(sv3csk.toByteArray());
CompactSketch sv3cskResult;

//SV3 test
sv3cskResult = Sketches.heapifyCompactSketch(sv3cskMem, seed);
assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate());
assertEquals(sv3cskResult.getSeedHash(), seedHash);

//SV2 test
Memory sv2cskMem = BackwardConversions.convertSerVer3toSerVer2(sv3csk, seed);
sv3cskResult = Sketches.heapifyCompactSketch(sv2cskMem, seed);
assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate());
assertEquals(sv3cskResult.getSeedHash(), seedHash);

//SV1 test
Memory sv1cskMem = BackwardConversions.convertSerVer3toSerVer1(sv3csk);
sv3cskResult = Sketches.heapifyCompactSketch(sv1cskMem, seed);
assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate());
assertEquals(sv3cskResult.getSeedHash(), seedHash);
}

@Test
public void printlnTest() {
println("PRINTING: "+this.getClass().getName());
}

/**
* @param s value to print
*/
static void println(String s) {
//System.out.println(s); //disable here
}
}

0 comments on commit bd638ad

Please sign in to comment.