Skip to content

Commit

Permalink
Hopefully the final fixes and code cleanup.
Browse files Browse the repository at this point in the history
  • Loading branch information
leerho committed Dec 17, 2021
1 parent 3c0381e commit 968f37b
Show file tree
Hide file tree
Showing 14 changed files with 66 additions and 64 deletions.
2 changes: 1 addition & 1 deletion src/main/java/org/apache/datasketches/theta/Union.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public abstract CompactSketch union(Sketch sketchA, Sketch sketchB, boolean dstO
/**
* Perform a Union operation with <i>this</i> union and the given Memory image of any sketch of the
* Theta Family. The input image may be from earlier versions of the Theta Compact Sketch,
* called the SetSketch (circa 2012), which was prior to Open Source and are compact and ordered.
* called the SetSketch (circa 2014), which was prior to Open Source and are compact and ordered.
*
* <p>This method can be repeatedly called.
*
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/apache/datasketches/theta/UnionImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ final class UnionImpl extends Union {
* be meaningless. It is private for very good reasons.
*/
private final UpdateSketch gadget_;
private final short seedHash_; //eliminates having to compute the seedHash on every update.
private final short seedHash_; //eliminates having to compute the seedHash on every union.
private long unionThetaLong_; //when on-heap, this is the only copy
private boolean unionEmpty_; //when on-heap, this is the only copy

Expand Down
6 changes: 2 additions & 4 deletions src/main/java/org/apache/datasketches/tuple/Union.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ public Union(final SummarySetOperations<S> summarySetOps) {
}

/**
* Creates new Union instance with instructions on how to process two summaries that
* overlap.
* Creates new Union instance.
* @param nomEntries nominal entries (K). Forced to the nearest power of 2 greater than
* given value.
* @param summarySetOps instance of SummarySetOperations
Expand Down Expand Up @@ -117,8 +116,7 @@ public void union(final Sketch<S> tupleSketch) {

/**
* Performs a stateful union of the internal set with the given thetaSketch by combining entries
* using the hashes from the theta sketch and summary values from the given summary and rules
* from the summarySetOps defined by the Union constructor.
* using the hashes from the theta sketch and summary values from the given summary.
* @param thetaSketch the given theta sketch input. If null or empty, it is ignored.
* @param summary the given proxy summary for the theta sketch, which doesn't have one. This may
* not be null.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ public void update(final ArrayOfDoublesSketch skA, final ArrayOfDoublesSketch sk
final ArrayOfDoublesCompactSketch csk = skA.compact();
keys_ = csk.getKeys();
values_ = csk.getValuesAsOneDimension();
thetaLong_ = csk.theta_;
thetaLong_ = csk.thetaLong_;
empty_ = csk.isEmpty();
count_ = csk.getRetainedEntries();
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ abstract class ArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesUpdatableSk

abstract void incrementCount();

abstract void setThetaLong(long theta);
abstract void setThetaLong(long thetaLong);

abstract int insertKey(long key);

Expand All @@ -91,7 +91,7 @@ abstract class ArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesUpdatableSk
@Override
public void trim() {
if (getRetainedEntries() > getNominalEntries()) {
setThetaLong(getNewTheta());
setThetaLong(getNewThetaLong());
rebuild();
}
}
Expand All @@ -113,7 +113,7 @@ static int getMaxBytes(final int nomEntries, final int numValues) {
// not sufficient by itself without keeping track of theta of another sketch
void merge(final long key, final double[] values) {
setNotEmpty();
if (key < theta_) {
if (key < thetaLong_) {
final int index = findOrInsertKey(key);
if (index < 0) {
incrementCount();
Expand All @@ -128,7 +128,7 @@ void merge(final long key, final double[] values) {
void rebuildIfNeeded() {
if (getRetainedEntries() <= rebuildThreshold_) { return; }
if (getCurrentCapacity() > getNominalEntries()) {
setThetaLong(getNewTheta());
setThetaLong(getNewThetaLong());
rebuild();
} else {
rebuild(getCurrentCapacity() * getResizeFactor().getValue());
Expand Down Expand Up @@ -160,7 +160,7 @@ void insertOrIgnore(final long key, final double[] values) {
+ " elements, but has " + values.length);
}
setNotEmpty();
if ((key == 0) || (key >= theta_)) { return; }
if ((key == 0) || (key >= thetaLong_)) { return; }
final int index = findOrInsertKey(key);
if (index < 0) {
incrementCount();
Expand All @@ -171,7 +171,7 @@ void insertOrIgnore(final long key, final double[] values) {
rebuildIfNeeded();
}

long getNewTheta() {
long getNewThetaLong() {
final long[] keys = new long[getRetainedEntries()];
int i = 0;
for (int j = 0; j < getCurrentCapacity(); j++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ static enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES }

final int numValues_;

long theta_;
long thetaLong_;
boolean isEmpty_ = true;

ArrayOfDoublesSketch(final int numValues) {
Expand Down Expand Up @@ -184,7 +184,7 @@ public int getNumValues() {
* @return true if the sketch is in estimation mode.
*/
public boolean isEstimationMode() {
return ((theta_ < Long.MAX_VALUE) && !isEmpty());
return ((thetaLong_ < Long.MAX_VALUE) && !isEmpty());
}

/**
Expand Down Expand Up @@ -218,7 +218,7 @@ public double getTheta() {
* @return the value of theta as a long
*/
long getThetaLong() {
return isEmpty() ? Long.MAX_VALUE : theta_;
return isEmpty() ? Long.MAX_VALUE : thetaLong_;
}

abstract short getSeedHash();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.apache.datasketches.tuple.arrayofdoubles;

import static java.lang.Math.min;
import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;

import org.apache.datasketches.Family;
Expand Down Expand Up @@ -128,10 +129,13 @@ public void union(final ArrayOfDoublesSketch tupleSketch) {
throw new SketchesArgumentException("Incompatible sketches: number of values mismatch "
+ gadget_.getNumValues() + " and " + tupleSketch.getNumValues());
}

if (tupleSketch.isEmpty()) { return; }
if (tupleSketch.getThetaLong() < unionThetaLong_) {
setUnionThetaLong(tupleSketch.getThetaLong());
}
else { gadget_.setNotEmpty(); }

setUnionThetaLong(min(min(unionThetaLong_, tupleSketch.getThetaLong()), gadget_.getThetaLong()));

if (tupleSketch.getRetainedEntries() == 0) { return; }
final ArrayOfDoublesSketchIterator it = tupleSketch.iterator();
while (it.next()) {
if (it.getKey() < unionThetaLong_) {
Expand All @@ -152,7 +156,7 @@ public void union(final ArrayOfDoublesSketch tupleSketch) {
public ArrayOfDoublesCompactSketch getResult(final WritableMemory dstMem) {
long unionThetaLong = unionThetaLong_;
if (gadget_.getRetainedEntries() > gadget_.getNominalEntries()) {
unionThetaLong = Math.min(unionThetaLong, gadget_.getNewTheta());
unionThetaLong = Math.min(unionThetaLong, gadget_.getNewThetaLong());
}
if (dstMem == null) {
return new HeapArrayOfDoublesCompactSketch(gadget_, unionThetaLong);
Expand Down Expand Up @@ -211,8 +215,8 @@ public static int getMaxBytes(final int nomEntries, final int numValues) {
return ArrayOfDoublesQuickSelectSketch.getMaxBytes(nomEntries, numValues) + PREAMBLE_SIZE_BYTES;
}

void setUnionThetaLong(final long theta) {
unionThetaLong_ = theta;
void setUnionThetaLong(final long thetaLong) {
unionThetaLong_ = thetaLong;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ short getSeedHash() {
}

/**
* Insert if key is less than theta and not a duplicate, otherwise ignore.
* Insert if key is less than thetaLong and not a duplicate, otherwise ignore.
* @param key the hash value of the input value
* @param values array of values to update the summary
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc
* Converts the given UpdatableArrayOfDoublesSketch to this compact form
* trimming if necessary according to given theta
* @param sketch the given UpdatableArrayOfDoublesSketch
* @param theta new value of theta
* @param thetaLong new value of thetaLong
* @param dstMem the given destination Memory.
*/
DirectArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch,
final long theta, final WritableMemory dstMem) {
final long thetaLong, final WritableMemory dstMem) {
super(sketch.getNumValues());
checkIfEnoughMemory(dstMem, sketch.getRetainedEntries(), sketch.getNumValues());
mem_ = dstMem;
Expand All @@ -76,15 +76,15 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc
));
dstMem.putByte(NUM_VALUES_BYTE, (byte) numValues_);
dstMem.putShort(SEED_HASH_SHORT, Util.computeSeedHash(sketch.getSeed()));
theta_ = Math.min(sketch.getThetaLong(), theta);
dstMem.putLong(THETA_LONG, theta_);
thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong);
dstMem.putLong(THETA_LONG, thetaLong_);
if (count > 0) {
int keyOffset = ENTRIES_START;
int valuesOffset = keyOffset + (SIZE_OF_KEY_BYTES * sketch.getRetainedEntries());
final ArrayOfDoublesSketchIterator it = sketch.iterator();
int actualCount = 0;
while (it.next()) {
if (it.getKey() < theta_) {
if (it.getKey() < thetaLong_) {
dstMem.putLong(keyOffset, it.getKey());
dstMem.putDoubleArray(valuesOffset, it.getValues(), 0, numValues_);
keyOffset += SIZE_OF_KEY_BYTES;
Expand All @@ -99,7 +99,7 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc
/*
* Creates an instance from components
*/
DirectArrayOfDoublesCompactSketch(final long[] keys, final double[] values, final long theta,
DirectArrayOfDoublesCompactSketch(final long[] keys, final double[] values, final long thetaLong,
final boolean isEmpty, final int numValues, final short seedHash, final WritableMemory dstMem) {
super(numValues);
checkIfEnoughMemory(dstMem, values.length, numValues);
Expand All @@ -119,8 +119,8 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc
));
dstMem.putByte(NUM_VALUES_BYTE, (byte) numValues_);
dstMem.putShort(SEED_HASH_SHORT, seedHash);
theta_ = theta;
dstMem.putLong(THETA_LONG, theta_);
thetaLong_ = thetaLong;
dstMem.putLong(THETA_LONG, thetaLong_);
if (count > 0) {
dstMem.putInt(RETAINED_ENTRIES_INT, count);
dstMem.putLongArray(ENTRIES_START, keys, 0, count);
Expand Down Expand Up @@ -152,7 +152,7 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc
}

isEmpty_ = (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0;
theta_ = mem_.getLong(THETA_LONG);
thetaLong_ = mem_.getLong(THETA_LONG);
}

/**
Expand All @@ -179,14 +179,14 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc
}
Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), Util.computeSeedHash(seed));
isEmpty_ = (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0;
theta_ = mem_.getLong(THETA_LONG);
thetaLong_ = mem_.getLong(THETA_LONG);
}

@Override
public ArrayOfDoublesCompactSketch compact(final WritableMemory dstMem) {
if (dstMem == null) {
return new
HeapArrayOfDoublesCompactSketch(getKeys(), getValuesAsOneDimension(), theta_, isEmpty_, numValues_,
HeapArrayOfDoublesCompactSketch(getKeys(), getValuesAsOneDimension(), thetaLong_, isEmpty_, numValues_,
getSeedHash());
} else {
mem_.copyTo(0, dstMem, 0, mem_.getCapacity());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke
));
mem_.putByte(NUM_VALUES_BYTE, (byte) numValues);
mem_.putShort(SEED_HASH_SHORT, Util.computeSeedHash(seed));
theta_ = (long) (Long.MAX_VALUE * (double) samplingProbability);
mem_.putLong(THETA_LONG, theta_);
thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability);
mem_.putLong(THETA_LONG, thetaLong_);
mem_.putByte(LG_NOM_ENTRIES_BYTE, (byte) Integer.numberOfTrailingZeros(nomEntries));
mem_.putByte(LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity));
mem_.putByte(LG_RESIZE_FACTOR_BYTE, (byte) lgResizeFactor);
Expand Down Expand Up @@ -121,7 +121,7 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke
valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * getCurrentCapacity());
// to do: make parent take care of its own parts
lgCurrentCapacity_ = Integer.numberOfTrailingZeros(getCurrentCapacity());
theta_ = mem_.getLong(THETA_LONG);
thetaLong_ = mem_.getLong(THETA_LONG);
isEmpty_ = (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0;
setRebuildThreshold();
}
Expand Down Expand Up @@ -251,8 +251,8 @@ public void reset() {
final int lgResizeFactor = mem_.getByte(LG_RESIZE_FACTOR_BYTE);
final float samplingProbability = mem_.getFloat(SAMPLING_P_FLOAT);
final int startingCapacity = Util.getStartingCapacity(getNominalEntries(), lgResizeFactor);
theta_ = (long) (Long.MAX_VALUE * (double) samplingProbability);
mem_.putLong(THETA_LONG, theta_);
thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability);
mem_.putLong(THETA_LONG, thetaLong_);
mem_.putByte(LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity));
mem_.putInt(RETAINED_ENTRIES_INT, 0);
keysOffset_ = ENTRIES_START;
Expand Down Expand Up @@ -282,9 +282,9 @@ protected int getCurrentCapacity() {
}

@Override
protected void setThetaLong(final long theta) {
theta_ = theta;
mem_.putLong(THETA_LONG, theta_);
protected void setThetaLong(final long thetaLong) {
thetaLong_ = thetaLong;
mem_.putLong(THETA_LONG, thetaLong_);
}

@Override
Expand Down Expand Up @@ -335,7 +335,7 @@ protected void rebuild(final int newCapacity) {
valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * newCapacity);
lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newCapacity);
for (int i = 0; i < keys.length; i++) {
if ((keys[i] != 0) && (keys[i] < theta_)) {
if ((keys[i] != 0) && (keys[i] < thetaLong_)) {
insert(keys[i], Arrays.copyOfRange(values, i * numValues, (i + 1) * numValues));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ class DirectArrayOfDoublesUnion extends ArrayOfDoublesUnion {
}

@Override
void setUnionThetaLong(final long theta) {
super.setUnionThetaLong(theta);
mem_.putLong(THETA_LONG, theta);
void setUnionThetaLong(final long thetaLong) {
super.setUnionThetaLong(thetaLong);
mem_.putLong(THETA_LONG, thetaLong);
}

static ArrayOfDoublesUnion wrapUnion(final WritableMemory mem, final long seed, final boolean isWritable) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch

/**
* Converts the given UpdatableArrayOfDoublesSketch to this compact form
* trimming if necessary according to given theta
* trimming if necessary according to given thetaLong
* @param sketch the given UpdatableArrayOfDoublesSketch
* @param theta new value of theta
* @param thetaLong new value of thetaLong
*/
HeapArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, final long theta) {
HeapArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, final long thetaLong) {
super(sketch.getNumValues());
isEmpty_ = sketch.isEmpty();
theta_ = Math.min(sketch.getThetaLong(), theta);
thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong);
seedHash_ = Util.computeSeedHash(sketch.getSeed());
final int count = sketch.getRetainedEntries();
if (count > 0) {
Expand All @@ -67,7 +67,7 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch
int i = 0;
while (it.next()) {
final long key = it.getKey();
if (key < theta_) {
if (key < thetaLong_) {
keys_[i] = key;
System.arraycopy(it.getValues(), 0, values_, i * numValues_, numValues_);
i++;
Expand All @@ -89,12 +89,12 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch
/*
* Creates an instance from components
*/
HeapArrayOfDoublesCompactSketch(final long[] keys, final double[] values, final long theta,
HeapArrayOfDoublesCompactSketch(final long[] keys, final double[] values, final long thetaLong,
final boolean isEmpty, final int numValues, final short seedHash) {
super(numValues);
keys_ = keys;
values_ = values;
theta_ = theta;
thetaLong_ = thetaLong;
isEmpty_ = isEmpty;
seedHash_ = seedHash;
}
Expand Down Expand Up @@ -131,7 +131,7 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch
}
Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed));
isEmpty_ = (mem.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0;
theta_ = mem.getLong(THETA_LONG);
thetaLong_ = mem.getLong(THETA_LONG);
final boolean hasEntries =
(mem.getByte(FLAGS_BYTE) & (1 << Flags.HAS_ENTRIES.ordinal())) != 0;
if (hasEntries) {
Expand All @@ -147,7 +147,7 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch
public ArrayOfDoublesCompactSketch compact(final WritableMemory dstMem) {
if (dstMem == null) {
return new
HeapArrayOfDoublesCompactSketch(keys_.clone(), values_.clone(), theta_, isEmpty_, numValues_, seedHash_);
HeapArrayOfDoublesCompactSketch(keys_.clone(), values_.clone(), thetaLong_, isEmpty_, numValues_, seedHash_);
} else {
final byte[] byteArr = this.toByteArray();
dstMem.putByteArray(0, byteArr, 0, byteArr.length);
Expand Down Expand Up @@ -183,7 +183,7 @@ public byte[] toByteArray() {
));
mem.putByte(NUM_VALUES_BYTE, (byte) numValues_);
mem.putShort(SEED_HASH_SHORT, seedHash_);
mem.putLong(THETA_LONG, theta_);
mem.putLong(THETA_LONG, thetaLong_);
if (count > 0) {
mem.putInt(RETAINED_ENTRIES_INT, count);
mem.putLongArray(ENTRIES_START, keys_, 0, count);
Expand Down
Loading

0 comments on commit 968f37b

Please sign in to comment.