Skip to content
Permalink
Browse files
Merge branch 'master' into FixMikhailsBug
  • Loading branch information
leerho committed Oct 23, 2021
2 parents 08b590a + c076377 commit c801e9783777e3f32e06b436fd3fbd5a47870c32
Show file tree
Hide file tree
Showing 25 changed files with 404 additions and 84 deletions.
@@ -100,7 +100,7 @@ public enum Family {
* The Tuple family of sketches is a large family of sketches that are extensions of the
* Theta Sketch Framework.
*/
TUPLE(9, "TUPLE", 1, 1),
TUPLE(9, "TUPLE", 1, 3),

/**
* The Frequency family of sketches. (Not part of TSF.)
@@ -61,7 +61,11 @@ public FdtSketch(final int lgK) {
/**
* Used by deserialization.
* @param mem the image of a FdtSketch
* @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
* This capability will be removed in a future release.
* Heapifying a CompactSketch is not deprecated.
*/
@Deprecated
FdtSketch(final Memory mem) {
super(mem);
}
@@ -41,11 +41,15 @@
*/
public class CompactSketch<S extends Summary> extends Sketch<S> {
private static final byte serialVersionWithSummaryClassNameUID = 1;
private static final byte serialVersionUID = 2;
private static final byte serialVersionUIDLegacy = 2;
private static final byte serialVersionUID = 3;
private static final short defaultSeedHash = (short) 37836; // for compatibility with C++
private long[] hashArr_;
private S[] summaryArr_;

private enum Flags { IS_BIG_ENDIAN, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED }
private enum FlagsLegacy { IS_BIG_ENDIAN, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED }

private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED }

/**
* Create a CompactSketch from correct components
@@ -80,49 +84,83 @@ private enum Flags { IS_BIG_ENDIAN, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED }
}
SerializerDeserializer
.validateType(mem.getByte(offset++), SerializerDeserializer.SketchType.CompactSketch);
final byte flags = mem.getByte(offset++);
final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0;
if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
throw new SketchesArgumentException("Byte order mismatch");
}
empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0;
final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0;
if (isThetaIncluded) {
thetaLong_ = mem.getLong(offset);
offset += Long.BYTES;
} else {
thetaLong_ = Long.MAX_VALUE;
}
final boolean hasEntries = (flags & 1 << Flags.HAS_ENTRIES.ordinal()) > 0;
if (hasEntries) {
int classNameLength = 0;
if (version == serialVersionWithSummaryClassNameUID) { //Obsolete?
classNameLength = mem.getByte(offset++);
if (version <= serialVersionUIDLegacy) { // legacy serial format
final byte flags = mem.getByte(offset++);
final boolean isBigEndian = (flags & 1 << FlagsLegacy.IS_BIG_ENDIAN.ordinal()) > 0;
if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
throw new SketchesArgumentException("Byte order mismatch");
}
empty_ = (flags & 1 << FlagsLegacy.IS_EMPTY.ordinal()) > 0;
final boolean isThetaIncluded = (flags & 1 << FlagsLegacy.IS_THETA_INCLUDED.ordinal()) > 0;
if (isThetaIncluded) {
thetaLong_ = mem.getLong(offset);
offset += Long.BYTES;
} else {
thetaLong_ = Long.MAX_VALUE;
}
final boolean hasEntries = (flags & 1 << FlagsLegacy.HAS_ENTRIES.ordinal()) > 0;
if (hasEntries) {
int classNameLength = 0;
if (version == serialVersionWithSummaryClassNameUID) {
classNameLength = mem.getByte(offset++);
}
final int count = mem.getInt(offset);
offset += Integer.BYTES;
if (version == serialVersionWithSummaryClassNameUID) {
offset += classNameLength;
}
hashArr_ = new long[count];
for (int i = 0; i < count; i++) {
hashArr_[i] = mem.getLong(offset);
offset += Long.BYTES;
}
for (int i = 0; i < count; i++) {
offset += readSummary(mem, offset, i, count, deserializer);
}
}
final int count = mem.getInt(offset);
offset += Integer.BYTES;
if (version == serialVersionWithSummaryClassNameUID) {
offset += classNameLength;
} else { // current serial format
offset++; // unused
final byte flags = mem.getByte(offset++);
offset += 2; // usused
empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0;
thetaLong_ = Long.MAX_VALUE;
int count = 0;
if (!empty_) {
if (preambleLongs == 1) {
count = 1;
} else {
count = mem.getInt(offset);
offset += Integer.BYTES;
offset += 4; // unused
if (preambleLongs > 2) {
thetaLong_ = mem.getLong(offset);
offset += Long.BYTES;
}
}
}
hashArr_ = new long[count];
for (int i = 0; i < count; i++) {
hashArr_[i] = mem.getLong(offset);
offset += Long.BYTES;
}
for (int i = 0; i < count; i++) {
final Memory memRegion = mem.region(offset, mem.getCapacity() - offset);
final DeserializeResult<S> result = deserializer.heapifySummary(memRegion);
final S summary = result.getObject();
final Class<S> summaryType = (Class<S>) result.getObject().getClass();
offset += result.getSize();
if (summaryArr_ == null) {
summaryArr_ = (S[]) Array.newInstance(summaryType, count);
}
summaryArr_[i] = summary;
offset += readSummary(mem, offset, i, count, deserializer);
}
}
}

@SuppressWarnings({"unchecked"})
private int readSummary(final Memory mem, final int offset, final int i, final int count,
final SummaryDeserializer<S> deserializer) {
final Memory memRegion = mem.region(offset, mem.getCapacity() - offset);
final DeserializeResult<S> result = deserializer.heapifySummary(memRegion);
final S summary = result.getObject();
final Class<S> summaryType = (Class<S>) result.getObject().getClass();
if (summaryArr_ == null) {
summaryArr_ = (S[]) Array.newInstance(summaryType, count);
}
summaryArr_[i] = summary;
return result.getSize();
}

@Override
public CompactSketch<S> compact() {
return this;
@@ -150,65 +188,56 @@ public int getCountLessThanThetaLong(final long thetaLong) {
// Long || Start Byte Adr:
// Adr:
// || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
// 0 || | Flags | SkType | FamID | SerVer | Preamble_Longs |
// 0 || seed hash | Flags | unused | SkType | FamID | SerVer | Preamble_Longs |
@SuppressWarnings("null")
@Override
public byte[] toByteArray() {
int summariesBytesLength = 0;
final int count = getRetainedEntries();
final boolean isSingleItem = count == 1 && !isEstimationMode();
final int preambleLongs = isEmpty() || isSingleItem ? 1 : isEstimationMode() ? 3 : 2;

int summariesSizeBytes = 0;
byte[][] summariesBytes = null;
final int count = getRetainedEntries();
if (count > 0) {
summariesBytes = new byte[count][];
for (int i = 0; i < count; i++) {
summariesBytes[i] = summaryArr_[i].toByteArray();
summariesBytesLength += summariesBytes[i].length;
summariesSizeBytes += summariesBytes[i].length;
}
}

int sizeBytes =
Byte.BYTES // preamble longs
+ Byte.BYTES // serial version
+ Byte.BYTES // family id
+ Byte.BYTES // sketch type
+ Byte.BYTES; // flags
final boolean isThetaIncluded = thetaLong_ < Long.MAX_VALUE;
if (isThetaIncluded) {
sizeBytes += Long.BYTES; // theta
}
if (count > 0) {
sizeBytes +=
+ Integer.BYTES // count
+ Long.BYTES * count + summariesBytesLength;
}
final int sizeBytes = Long.BYTES * preambleLongs + Long.BYTES * count + summariesSizeBytes;
final byte[] bytes = new byte[sizeBytes];
int offset = 0;
bytes[offset++] = PREAMBLE_LONGS;
bytes[offset++] = (byte) preambleLongs;
bytes[offset++] = serialVersionUID;
bytes[offset++] = (byte) Family.TUPLE.getID();
bytes[offset++] = (byte) SerializerDeserializer.SketchType.CompactSketch.ordinal();
final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
offset++; // unused
bytes[offset++] = (byte) (
(isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0)
| (empty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0)
| (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0)
| (isThetaIncluded ? 1 << Flags.IS_THETA_INCLUDED.ordinal() : 0)
(1 << Flags.IS_COMPACT.ordinal())
| (1 << Flags.IS_READ_ONLY.ordinal())
| (isEmpty() ? 1 << Flags.IS_EMPTY.ordinal() : 0)
);
if (isThetaIncluded) {
ByteArrayUtil.putLongLE(bytes, offset, thetaLong_);
offset += Long.BYTES;
}
if (count > 0) {
ByteArrayUtil.putIntLE(bytes, offset, getRetainedEntries());
offset += Integer.BYTES;
for (int i = 0; i < count; i++) {
ByteArrayUtil.putLongLE(bytes, offset, hashArr_[i]);
offset += Long.BYTES;
}
for (int i = 0; i < count; i++) {
System.arraycopy(summariesBytes[i], 0, bytes, offset, summariesBytes[i].length);
offset += summariesBytes[i].length;
ByteArrayUtil.putShortLE(bytes, offset, defaultSeedHash);
offset += Short.BYTES;
if (!isEmpty()) {
if (!isSingleItem) {
ByteArrayUtil.putIntLE(bytes, offset, count);
offset += Integer.BYTES;
offset += 4; // unused
if (isEstimationMode()) {
ByteArrayUtil.putLongLE(bytes, offset, thetaLong_);
offset += Long.BYTES;
}
}
}
for (int i = 0; i < count; i++) {
ByteArrayUtil.putLongLE(bytes, offset, hashArr_[i]);
offset += Long.BYTES;
System.arraycopy(summariesBytes[i], 0, bytes, offset, summariesBytes[i].length);
offset += summariesBytes[i].length;
}
return bytes;
}

@@ -140,7 +140,11 @@ private enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES,
* @param mem Memory object with serialized QukckSelectSketch
* @param deserializer the SummaryDeserializer
* @param summaryFactory the SummaryFactory
* @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
* This capability will be removed in a future release.
* Heapifying a CompactSketch is not deprecated.
*/
@Deprecated
QuickSelectSketch(
final Memory mem,
final SummaryDeserializer<S> deserializer,
@@ -314,6 +318,14 @@ public CompactSketch<S> compact() {
// Adr:
// || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
// 0 || RF | lgArr | lgNom | Flags | SkType | FamID | SerVer | Preamble_Longs |
/**
* This serializes an UpdatableSketch (QuickSelectSketch).
* @return serialized representation of an UpdatableSketch (QuickSelectSketch).
* @deprecated As of 3.0.0, serializing an UpdatableSketch is deprecated.
* This capability will be removed in a future release.
* Serializing a CompactSketch is not deprecated.
*/
@Deprecated
@SuppressWarnings("null")
@Override
public byte[] toByteArray() {
@@ -45,7 +45,7 @@ public static enum SketchType { QuickSelectSketch, CompactSketch, ArrayOfDoubles
public static void validateFamily(final byte familyId, final byte preambleLongs) {
final Family family = Family.idToFamily(familyId);
if (family.equals(Family.TUPLE)) {
if (preambleLongs != Family.TUPLE.getMinPreLongs()) {
if (preambleLongs < Family.TUPLE.getMinPreLongs() || preambleLongs > Family.TUPLE.getMaxPreLongs()) {
throw new SketchesArgumentException(
"Possible corruption: Invalid PreambleLongs value for family TUPLE: " + preambleLongs);
}
@@ -166,7 +166,11 @@ public double getTheta() {
}

/**
* This is to serialize an instance to a byte array.
* This is to serialize a sketch instance to a byte array.
*
* <p>As of 3.0.0, serializing an UpdatableSketch is deprecated.
* This capability will be removed in a future release.
* Serializing a CompactSketch is not deprecated.</p>
* @return serialized representation of the sketch
*/
public abstract byte[] toByteArray();
@@ -35,12 +35,17 @@ public static <S extends Summary> Sketch<S> createEmptySketch() {
}

/**
* Instantiate Sketch from a given Memory
* Instantiate a Sketch from a given Memory.
*
* <p>As of 3.0.0, heapifying an UpdatableSketch is deprecated.
* This capability will be removed in a future release.
* Heapifying a CompactSketch is not deprecated.</p>
* @param <S> Type of Summary
* @param mem Memory object representing a Sketch
* @param deserializer instance of SummaryDeserializer
* @return Sketch created from its Memory representation
*/
@SuppressWarnings("deprecation")
public static <S extends Summary> Sketch<S> heapifySketch(final Memory mem,
final SummaryDeserializer<S> deserializer) {
final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(mem);
@@ -58,10 +63,13 @@ public static <S extends Summary> Sketch<S> heapifySketch(final Memory mem,
* @param deserializer instance of SummaryDeserializer
* @param summaryFactory instance of SummaryFactory
* @return Sketch created from its Memory representation
* @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
* This capability will be removed in a future release.
* Heapifying a CompactSketch is not deprecated.
*/
public static <U, S extends
UpdatableSummary<U>> UpdatableSketch<U, S> heapifyUpdatableSketch(final Memory mem,
final SummaryDeserializer<S> deserializer, final SummaryFactory<S> summaryFactory) {
@Deprecated
public static <U, S extends UpdatableSummary<U>> UpdatableSketch<U, S> heapifyUpdatableSketch(
final Memory mem, final SummaryDeserializer<S> deserializer, final SummaryFactory<S> summaryFactory) {
return new UpdatableSketch<U, S>(mem, deserializer, summaryFactory);
}

@@ -65,7 +65,11 @@ public UpdatableSketch(final int nomEntries, final int lgResizeFactor,
* @param srcMem Memory object with data of a serialized UpdatableSketch
* @param deserializer instance of SummaryDeserializer
* @param summaryFactory instance of SummaryFactory
* @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
* This capability will be removed in a future release.
* Heapifying a CompactSketch is not deprecated.
*/
@Deprecated
public UpdatableSketch(final Memory srcMem, final SummaryDeserializer<S> deserializer,
final SummaryFactory<S> summaryFactory) {
super(srcMem, deserializer, summaryFactory);
@@ -62,7 +62,11 @@ public DoubleSketch(final int lgK, final int lgResizeFactor, final float samplin
* usually with data.
* @param mem the given Memory
* @param mode The DoubleSummary mode to be used
* @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
* This capability will be removed in a future release.
* Heapifying a CompactSketch is not deprecated.
*/
@Deprecated
public DoubleSketch(final Memory mem, final DoubleSummary.Mode mode) {
super(mem, new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode));
}
@@ -62,7 +62,11 @@ public IntegerSketch(final int lgK, final int lgResizeFactor, final float sampli
* usually with data.
* @param mem the given Memory
* @param mode The IntegerSummary mode to be used
* @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
* This capability will be removed in a future release.
* Heapifying a CompactSketch is not deprecated.
*/
@Deprecated
public IntegerSketch(final Memory mem, final IntegerSummary.Mode mode) {
super(mem, new IntegerSummaryDeserializer(), new IntegerSummaryFactory(mode));
}
@@ -64,7 +64,11 @@ public ArrayOfStringsSketch(final int lgK, final ResizeFactor rf, final float p)
* Constructs this sketch from a Memory image, which must be from an ArrayOfStringsSketch, and
* usually with data.
* @param mem the given Memory
* @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
* This capability will be removed in a future release.
* Heapifying a CompactSketch is not deprecated.
*/
@Deprecated
public ArrayOfStringsSketch(final Memory mem) {
super(mem, new ArrayOfStringsSummaryDeserializer(), new ArrayOfStringsSummaryFactory());
}

0 comments on commit c801e97

Please sign in to comment.