Skip to content
Permalink
Browse files
Interim changes, all harmless at this point:
Mostly renamed variables for clarity, added code documentation.
  • Loading branch information
leerho committed Dec 16, 2021
1 parent 388f11d commit 3c0381e129f097d4cc3636799a3ccc36a6ca4a38
Show file tree
Hide file tree
Showing 22 changed files with 1,154 additions and 416 deletions.
@@ -306,20 +306,20 @@ static final long[] compactCache(final long[] srcCache, final int curCount,
/*
* The truth table for empty, curCount and theta when compacting is as follows:
* <pre>
* Num Theta CurCount Empty State Comments
* 0 1.0 0 T OK The Normal Empty State
* 1 1.0 0 F Internal This can result from an intersection of two exact, disjoint sets,
* or AnotB of two exact, identical sets. There is no probability
* distribution, so change to empty. Return {Th = 1.0, 0, T}.
* Num Theta CurCount Empty State Name, Comments
* 0 1.0 0 T OK EMPTY: The Normal Empty State
* 1 1.0 0 F Internal This can occur internally as the result of an intersection of two exact,
* disjoint sets, or AnotB of two exact, identical sets. There is no probability
* distribution, so this is converted internally to EMPTY {1.0, 0, T}.
* This is handled in SetOperation.createCompactSketch().
* 2 1.0 !0 T Error Empty=T and curCount !0 should never co-exist.
* 2 1.0 !0 T Error Empty=T and curCount !0 should never coexist.
* This is checked in all compacting operations.
* 3 1.0 !0 F OK This corresponds to a sketch in exact mode
* 3 1.0 !0 F OK EXACT: This corresponds to a sketch in exact mode
* 4 <1.0 0 T Internal This can be an initial UpdateSketch state if p < 1.0,
* so change theta to 1.0. Return {Th = 1.0, 0, T}.
* This is handled in UpdateSketch.compact() and toByteArray().
* 5 <1.0 0 F OK This can result from set operations
* 6 <1.0 !0 T Error Empty=T and curCount !0 should never co-exist.
* 6 <1.0 !0 T Error Empty=T and curCount !0 should never coexist.
* This is checked in all compacting operations.
* 7 <1.0 !0 F OK This corresponds to a sketch in estimation mode
* </pre>
@@ -509,7 +509,7 @@ private static <S extends Summary> DataArrays<S> getCopyOfResultArraysTuple(
final long[] hashArrA,
final S[] summaryArrA,
final Sketch<S> skB) {
final DataArrays<S> daB = new DataArrays<>();
final DataArrays<S> daR = new DataArrays<>();

//Rebuild/get hashtable of skB
final long[] hashTableB;
@@ -541,9 +541,9 @@ private static <S extends Summary> DataArrays<S> getCopyOfResultArraysTuple(
}
}
}
daB.hashArr = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches);
daB.summaryArr = Arrays.copyOfRange(tmpSummaryArrA, 0, nonMatches);
return daB;
daR.hashArr = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches);
daR.summaryArr = Arrays.copyOfRange(tmpSummaryArrA, 0, nonMatches);
return daR;
}

@SuppressWarnings("unchecked")
@@ -0,0 +1,238 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.datasketches.tuple.arrayofdoubles;

import static java.lang.Math.min;
import static org.apache.datasketches.HashOperations.continueCondition;
import static org.apache.datasketches.HashOperations.convertToHashTable;
import static org.apache.datasketches.HashOperations.count;
import static org.apache.datasketches.HashOperations.hashSearch;
import static org.apache.datasketches.Util.REBUILD_THRESHOLD;
import static org.apache.datasketches.Util.simpleLog2OfLong;

import java.util.Arrays;

import org.apache.datasketches.SetOperationCornerCases;
import org.apache.datasketches.SetOperationCornerCases.AnotbAction;
import org.apache.datasketches.SetOperationCornerCases.CornerCase;
//import org.apache.datasketches.tuple.AnotB.DataArrays;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.tuple.Util;


/**
* Computes a set difference, A-AND-NOT-B, of two ArrayOfDoublesSketches.
*
* <p>This class includes a stateless operation as follows:</p>
*
* <pre><code>
* CompactSketch csk = anotb.aNotB(ArrayOfDoublesSketch skA, ArrayOfDoublesSketch skB);
* </code></pre>
*
* @author Lee Rhodes
*/
public class ArrayOfDoublesAnotBImpl extends ArrayOfDoublesAnotB {
private int numValues_;
private short seedHash_;

private long thetaLong_ = Long.MAX_VALUE;
private boolean empty_ = true;
private long[] keys_;
private double[] values_;
private int count_;

ArrayOfDoublesAnotBImpl(final int numValues, final long seed) {
numValues_ = numValues;
seedHash_ = Util.computeSeedHash(seed);
}

@Override
public void update(final ArrayOfDoublesSketch skA, final ArrayOfDoublesSketch skB) {
if (skA == null || skB == null) {
throw new SketchesArgumentException("Neither argument may be null.");
}
numValues_ = skA.getNumValues();
seedHash_ = skA.getSeedHash();
if (numValues_ != skB.getNumValues()) {
throw new SketchesArgumentException("Inputs cannot have different numValues");
}
if (seedHash_ != skB.getSeedHash()) {
throw new SketchesArgumentException("Inputs cannot have different seedHashes");
}

final long thetaLongA = skA.getThetaLong();
final int countA = skA.getRetainedEntries();
final boolean emptyA = skA.isEmpty();

final long thetaLongB = skB.getThetaLong();
final int countB = skB.getRetainedEntries();
final boolean emptyB = skB.isEmpty();

final int id =
SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
final AnotbAction anotbAction = cCase.getAnotbAction();

final long minThetaLong = min(thetaLongA, thetaLongB);

switch (anotbAction) {
case EMPTY_1_0_T: {
reset();
break;
}
case DEGEN_MIN_0_F: {
keys_ = null;
values_ = null;
thetaLong_ = minThetaLong;
empty_ = false;
count_ = 0;
break;
}
case DEGEN_THA_0_F: {
keys_ = null;
values_ = null;
thetaLong_ = thetaLongA;
empty_ = false;
count_ = 0;
break;
}
case TRIM_A: {
final DataArrays daA = new DataArrays(skA.getKeys(), skA.getValuesAsOneDimension(), countA);
final DataArrays da = trimDataArrays(daA, minThetaLong, numValues_);
keys_ = da.hashArr;
values_ = da.valuesArr;
thetaLong_ = minThetaLong;
empty_ = skA.isEmpty();
count_ = da.count;
break;
}
case SKETCH_A: {
final ArrayOfDoublesCompactSketch csk = skA.compact();
keys_ = csk.getKeys();
values_ = csk.getValuesAsOneDimension();
thetaLong_ = csk.theta_;
empty_ = csk.isEmpty();
count_ = csk.getRetainedEntries();
break;
}
case FULL_ANOTB: { //both A and B should have valid entries.
final long[] keysA = skA.getKeys();
final double[] valuesA = skA.getValuesAsOneDimension();
final DataArrays daR = getResultArrays(minThetaLong, countA, keysA, valuesA, skB);
count_ = daR.count;
keys_ = (count_ == 0) ? null : daR.hashArr;
values_ = (count_ == 0) ? null : daR.valuesArr;
thetaLong_ = minThetaLong;
empty_ = (minThetaLong == Long.MAX_VALUE) && (count_ == 0);
break;
}
//default: not possible
}
}

@Override
public ArrayOfDoublesCompactSketch getResult() {
return new HeapArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_);
}

@Override
public ArrayOfDoublesCompactSketch getResult(final WritableMemory dstMem) {
return new DirectArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_, dstMem);
}

private static DataArrays getResultArrays(
final long minThetaLong,
final int countA,
final long[] hashArrA,
final double[] valuesArrA,
final ArrayOfDoublesSketch skB) {
final int numValues = skB.numValues_;

//create hashtable of skB
final long[] hashTableB = convertToHashTable(skB.getKeys(), skB.getRetainedEntries(), minThetaLong,
REBUILD_THRESHOLD);

//build temporary arrays of skA
long[] tmpHashArrA = new long[countA];
double[] tmpValuesArrA = new double[countA * numValues];

//search for non matches and build temp arrays
final int lgHTBLen = simpleLog2OfLong(hashTableB.length);
int nonMatches = 0;
for (int i = 0; i < countA; i++) {
final long hash = hashArrA[i];
if (continueCondition(minThetaLong, hash)) { continue; }
final int index = hashSearch(hashTableB, lgHTBLen, hash);
if (index == -1) {
tmpHashArrA[nonMatches] = hash;
System.arraycopy(valuesArrA, i * numValues, tmpValuesArrA, nonMatches * numValues, numValues);
nonMatches++;
}
}
tmpHashArrA = Arrays.copyOf(tmpHashArrA, nonMatches);
tmpValuesArrA = Arrays.copyOf(tmpValuesArrA, nonMatches * numValues);
final DataArrays daR = new DataArrays(tmpHashArrA, tmpValuesArrA, nonMatches);
return daR;
}


private static class DataArrays {
long[] hashArr;
double[] valuesArr;
int count;

DataArrays(final long[] hashArr, final double[] valuesArr, final int count) {
this.hashArr = hashArr;
this.valuesArr = valuesArr;
this.count = count;
}
}

private static DataArrays trimDataArrays(final DataArrays da, final long thetaLong, final int numValues) {
final long[] hashArrIn = da.hashArr;
final double[] valuesArrIn = da.valuesArr;
final int count = count(hashArrIn, thetaLong);
final long[] hashArrOut = new long[count];
final double[] valuesArrOut = new double[count * numValues];
int haInIdx;
int vaInIdx = 0;
int haOutIdx = 0;
int vaOutIdx = 0;
for (haInIdx = 0; haInIdx < count; haInIdx++, vaInIdx += numValues) {
final long hash = hashArrIn[haInIdx];
if (continueCondition(thetaLong, hash)) { continue; }
hashArrOut[haOutIdx] = hashArrIn[haInIdx];
System.arraycopy(valuesArrIn, vaInIdx, valuesArrOut, vaOutIdx, numValues);
haOutIdx++;
vaOutIdx += numValues;
}
return new DataArrays(hashArrOut, valuesArrOut, count);
}

private void reset() {
empty_ = true;
thetaLong_ = Long.MAX_VALUE;
keys_ = null;
values_ = null;
count_ = 0;
}
}

@@ -97,7 +97,7 @@ public void trim() {
}

/**
* @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than
* @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than or equal to
* given value.
* @param numValues Number of double values to keep for each key
* @return maximum required storage bytes given nomEntries and numValues
@@ -93,7 +93,7 @@ public ArrayOfDoublesUnion buildUnion() {

/**
* Creates an instance of ArrayOfDoublesUnion based on the current configuration of the builder
* and the given memory.
* and the given destination memory.
* @param dstMem destination memory to be used by the sketch
* @return an instance of ArrayOfDoublesUnion
*/
@@ -131,7 +131,7 @@ public ArrayOfDoublesIntersection buildIntersection(final WritableMemory dstMem)
* @return an instance of ArrayOfDoublesAnotB
*/
public ArrayOfDoublesAnotB buildAnotB() {
return new HeapArrayOfDoublesAnotB(numValues_, seed_);
return new ArrayOfDoublesAnotBImpl(numValues_, seed_);
}

}
@@ -44,7 +44,7 @@ static enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES }
static final int SIZE_OF_KEY_BYTES = Long.BYTES;
static final int SIZE_OF_VALUE_BYTES = Double.BYTES;

// Common Layout of first 16 bytes:
// Common Layout of first 16 bytes and Empty AoDCompactSketch:
// Long || Start Byte Adr:
// Adr:
// || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
@@ -151,6 +151,18 @@ public double getLowerBound(final int numStdDev) {
return BinomialBoundsN.getLowerBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_);
}

/**
* Returns true if this sketch's data structure is backed by Memory or WritableMemory.
* @return true if this sketch's data structure is backed by Memory or WritableMemory.
*/
public abstract boolean hasMemory();

/**
* Returns the Memory object if it exists, otherwise null.
* @return the Memory object if it exists, otherwise null.
*/
abstract Memory getMemory();

/**
* <a href="{@docRoot}/resources/dictionary.html#empty">See Empty</a>
* @return true if empty.

0 comments on commit 3c0381e

Please sign in to comment.