Skip to content
Permalink
Browse files
Interim changes toward AoD set operations compatibility
  • Loading branch information
leerho committed Dec 13, 2021
1 parent c7b99ef commit 388f11d013a7ff871ed347ea14747bcfb8b4cbfc
Show file tree
Hide file tree
Showing 19 changed files with 722 additions and 161 deletions.
@@ -474,6 +474,8 @@ public static <S extends Summary> CompactSketch<S> aNotB(
//restricted

static class DataArrays<S extends Summary> {
DataArrays() {}

long[] hashArr;
S[] summaryArr;
}
@@ -31,63 +31,63 @@

@SuppressWarnings("unchecked")
class HashTables<S extends Summary> {
long[] hashTable_ = null;
S[] summaryTable_ = null;
int lgTableSize_ = 0;
int count_ = 0;
long[] hashTable = null;
S[] summaryTable = null;
int lgTableSize = 0;
int numKeys = 0;

HashTables() { }

//must have valid entries
void fromSketch(final Sketch<S> sketch) {
count_ = sketch.getRetainedEntries();
lgTableSize_ = getLgTableSize(count_);
numKeys = sketch.getRetainedEntries();
lgTableSize = getLgTableSize(numKeys);

hashTable_ = new long[1 << lgTableSize_];
hashTable = new long[1 << lgTableSize];
final SketchIterator<S> it = sketch.iterator();
while (it.next()) {
final long hash = it.getHash();
final int index = hashInsertOnly(hashTable_, lgTableSize_, hash);
final int index = hashInsertOnly(hashTable, lgTableSize, hash);
final S mySummary = (S)it.getSummary().copy();
if (summaryTable_ == null) {
summaryTable_ = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize_);
if (summaryTable == null) {
summaryTable = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize);
}
summaryTable_[index] = mySummary;
summaryTable[index] = mySummary;
}
}

//must have valid entries
void fromSketch(final org.apache.datasketches.theta.Sketch sketch, final S summary) {
count_ = sketch.getRetainedEntries(true);
lgTableSize_ = getLgTableSize(count_);
numKeys = sketch.getRetainedEntries(true);
lgTableSize = getLgTableSize(numKeys);

hashTable_ = new long[1 << lgTableSize_];
hashTable = new long[1 << lgTableSize];
final org.apache.datasketches.theta.HashIterator it = sketch.iterator();
while (it.next()) {
final long hash = it.get();
final int index = hashInsertOnly(hashTable_, lgTableSize_, hash);
final int index = hashInsertOnly(hashTable, lgTableSize, hash);
final S mySummary = (S)summary.copy();
if (summaryTable_ == null) {
summaryTable_ = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize_);
if (summaryTable == null) {
summaryTable = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize);
}
summaryTable_[index] = mySummary;
summaryTable[index] = mySummary;
}
}

private void fromArrays(final long[] hashArr, final S[] summaryArr, final int count) {
count_ = count;
lgTableSize_ = getLgTableSize(count);
numKeys = count;
lgTableSize = getLgTableSize(count);

summaryTable_ = null;
hashTable_ = new long[1 << lgTableSize_];
summaryTable = null;
hashTable = new long[1 << lgTableSize];
for (int i = 0; i < count; i++) {
final long hash = hashArr[i];
final int index = hashInsertOnly(hashTable_, lgTableSize_, hash);
final int index = hashInsertOnly(hashTable, lgTableSize, hash);
final S mySummary = summaryArr[i];
if (summaryTable_ == null) {
summaryTable_ = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize_);
if (summaryTable == null) {
summaryTable = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize);
}
summaryTable_[index] = summaryArr[i];
summaryTable[index] = summaryArr[i];
}
}

@@ -98,21 +98,21 @@ HashTables<S> getIntersectHashTables(
final SummarySetOperations<S> summarySetOps) {

//Match nextSketch data with local instance data, filtering by theta
final int maxMatchSize = min(count_, nextTupleSketch.getRetainedEntries());
final int maxMatchSize = min(numKeys, nextTupleSketch.getRetainedEntries());
final long[] matchHashArr = new long[maxMatchSize];
final S[] matchSummariesArr = Util.newSummaryArray(summaryTable_, maxMatchSize);
final S[] matchSummariesArr = Util.newSummaryArray(summaryTable, maxMatchSize);
int matchCount = 0;
final SketchIterator<S> it = nextTupleSketch.iterator();

while (it.next()) {
final long hash = it.getHash();
if (hash >= thetaLong) { continue; }
final int index = hashSearch(hashTable_, lgTableSize_, hash);
final int index = hashSearch(hashTable, lgTableSize, hash);
if (index < 0) { continue; }
//Copy the intersecting items from local hashTables_
// sequentially into local matchHashArr_ and matchSummaries_
matchHashArr[matchCount] = hash;
matchSummariesArr[matchCount] = summarySetOps.intersection(summaryTable_[index], it.getSummary());
matchSummariesArr[matchCount] = summarySetOps.intersection(summaryTable[index], it.getSummary());
matchCount++;
}
final HashTables<S> resultHT = new HashTables<>();
@@ -130,7 +130,7 @@ HashTables<S> getIntersectHashTables(
final Class<S> summaryType = (Class<S>) summary.getClass();

//Match nextSketch data with local instance data, filtering by theta
final int maxMatchSize = min(count_, nextThetaSketch.getRetainedEntries());
final int maxMatchSize = min(numKeys, nextThetaSketch.getRetainedEntries());
final long[] matchHashArr = new long[maxMatchSize];
final S[] matchSummariesArr = (S[]) Array.newInstance(summaryType, maxMatchSize);
int matchCount = 0;
@@ -140,12 +140,12 @@ HashTables<S> getIntersectHashTables(
while (it.next()) {
final long hash = it.get();
if (hash >= thetaLong) { continue; }
final int index = hashSearch(hashTable_, lgTableSize_, hash);
final int index = hashSearch(hashTable, lgTableSize, hash);
if (index < 0) { continue; }
//Copy the intersecting items from local hashTables_
// sequentially into local matchHashArr_ and matchSummaries_
matchHashArr[matchCount] = hash;
matchSummariesArr[matchCount] = summarySetOps.intersection(summaryTable_[index], summary);
matchSummariesArr[matchCount] = summarySetOps.intersection(summaryTable[index], summary);
matchCount++;
}
final HashTables<S> resultHT = new HashTables<>();
@@ -154,10 +154,10 @@ HashTables<S> getIntersectHashTables(
}

void clear() {
hashTable_ = null;
summaryTable_ = null;
lgTableSize_ = 0;
count_ = 0;
hashTable = null;
summaryTable = null;
lgTableSize = 0;
numKeys = 0;
}

static int getLgTableSize(final int count) {
@@ -105,13 +105,14 @@ public CompactSketch<S> intersect(
*/
public void intersect(final Sketch<S> tupleSketch) {
if (tupleSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); }

final boolean firstCall = firstCall_;
firstCall_ = false;

// input sketch could be first or next call

final boolean emptyIn = tupleSketch.isEmpty();
if (empty_ || emptyIn) { //empty rule
//Because of the definition of null above and the Empty Rule (which is OR), empty_ must be true.
//Whatever the current internal state, we make our local empty.
resetToEmpty();
return;
@@ -133,9 +134,9 @@ public void intersect(final Sketch<S> tupleSketch) {

//Next Call
else {
if (hashTables_.count_ == 0) { return; }
if (hashTables_.numKeys == 0) { return; }
//process intersect with current hashTables
hashTables_ = hashTables_.getIntersectHashTables(tupleSketch, thetaLongIn, summarySetOps_);
hashTables_ = hashTables_.getIntersectHashTables(tupleSketch, thetaLong_, summarySetOps_);
}
}

@@ -156,14 +157,14 @@ public void intersect(final org.apache.datasketches.theta.Sketch thetaSketch, fi

final boolean emptyIn = thetaSketch.isEmpty();
if (empty_ || emptyIn) { //empty rule
//Because of the definition of null above and the Empty Rule (which is OR), empty_ must be true.
//Whatever the current internal state, we make our local empty.
resetToEmpty();
return;
}

final long thetaLongIn = thetaSketch.getThetaLong();
thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule

final int countIn = thetaSketch.getRetainedEntries();
if (countIn == 0) {
hashTables_.clear();
@@ -179,7 +180,7 @@ public void intersect(final org.apache.datasketches.theta.Sketch thetaSketch, fi

//Next Call
else {
if (hashTables_.count_ == 0) { return; }
if (hashTables_.numKeys == 0) { return; }
hashTables_ = hashTables_.getIntersectHashTables(thetaSketch, thetaLongIn, summarySetOps_, summary);
}
}
@@ -193,23 +194,23 @@ public CompactSketch<S> getResult() {
throw new SketchesStateException(
"getResult() with no intervening intersections is not a legal result.");
}
final int countIn = hashTables_.count_;
final int countIn = hashTables_.numKeys;
if (countIn == 0) {
return new CompactSketch<>(null, null, thetaLong_, empty_);
}

final int tableSize = hashTables_.hashTable_.length;
final int tableSize = hashTables_.hashTable.length;

final long[] hashArr = new long[countIn];
final S[] summaryArr = Util.newSummaryArray(hashTables_.summaryTable_, countIn);
final S[] summaryArr = Util.newSummaryArray(hashTables_.summaryTable, countIn);

//compact the arrays
int cnt = 0;
for (int i = 0; i < tableSize; i++) {
final long hash = hashTables_.hashTable_[i];
final long hash = hashTables_.hashTable[i];
if (hash == 0 || hash > thetaLong_) { continue; }
hashArr[cnt] = hash;
summaryArr[cnt] = (S) hashTables_.summaryTable_[i].copy();
summaryArr[cnt] = (S) hashTables_.summaryTable[i].copy();
cnt++;
}
assert cnt == countIn;

0 comments on commit 388f11d

Please sign in to comment.