diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java index 6dfc76bb1..c50e67757 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java @@ -262,8 +262,8 @@ UpdateReturnState hashUpdate(final long hash) { * @return the hash table threshold */ static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { - //FindBugs may complain if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD, but this allows us - // to tune these constants for different sketches. + //FindBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD, + //but this allows us to tune these constants for different sketches. final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : REBUILD_THRESHOLD; return (int) Math.floor(fraction * (1 << lgArrLongs)); } diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java index 7cf48d950..dcb5ccdfa 100644 --- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java @@ -369,7 +369,8 @@ private void processVer3(final Memory skMem) { // OR the above and the SI bit is set if (SingleItemSketch.testPre0SeedHash(skMem.getLong(0), seedHash_)) { final long hash = skMem.getLong(8); - update(hash); //a hash < 1 will be rejected later + //backdoor update, hash function is bypassed. A hash < 1 will be rejected later + gadget_.hashUpdate(hash); return; } return; //empty diff --git a/src/main/java/org/apache/datasketches/tuple/Sketch.java b/src/main/java/org/apache/datasketches/tuple/Sketch.java index 9e4d11f14..5e777c952 100644 --- a/src/main/java/org/apache/datasketches/tuple/Sketch.java +++ b/src/main/java/org/apache/datasketches/tuple/Sketch.java @@ -116,10 +116,14 @@ public double getTheta() { * @return a SketchIterator */ public SketchIterator iterator() { - return new SketchIterator(keys_, summaries_); + return new SketchIterator<>(keys_, summaries_); } - long getThetaLong() { + /** + * Returns Theta as a long + * @return Theta as a long + */ + public long getThetaLong() { return theta_; } diff --git a/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java b/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java index c66b42553..ac5ce9333 100644 --- a/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java @@ -26,7 +26,8 @@ public interface UpdatableSummary extends Summary { /** - * This is to provide a method of updating summaries + * This is to provide a method of updating summaries. + * This should not be called by the user. * @param value update value */ public void update(U value); diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java new file mode 100644 index 000000000..57cc8e6d8 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple.adouble; + +import org.apache.datasketches.ResizeFactor; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.tuple.UpdatableSketch; + +/** + * @author Lee Rhodes + */ +public class DoubleSketch extends UpdatableSketch { + + /** + * Constructs this sketch with given lgK. + * @param lgK Log_base2 of Nominal Entries. + * See Nominal Entries + * @param mode The DoubleSummary mode to be used + */ + public DoubleSketch(final int lgK, final DoubleSummary.Mode mode) { + super(1 << lgK, ResizeFactor.X8.ordinal(), 1.0F, new DoubleSummaryFactory(mode)); + } + + /** + * Constructs this sketch from a Memory image, which must be from an DoubleSketch, and + * usually with data. + * @param mem the given Memory + * @param mode The DoubleSummary mode to be used + */ + public DoubleSketch(final Memory mem, final DoubleSummary.Mode mode) { + super(mem, new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode)); + } + + @Override + public void update(final String key, final Double value) { + super.update(key, value); + } + + @Override + public void update(final long key, final Double value) { + super.update(key, value); + } +} diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java index 017f3a681..ae6b7c09b 100644 --- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java @@ -31,6 +31,8 @@ * Supported modes: Sum, Min, Max, AlwaysOne, Increment. The default mode is Sum. */ public final class DoubleSummary implements UpdatableSummary { + private double value_; + private final Mode mode_; /** * The aggregation modes for this Summary @@ -48,6 +50,7 @@ public static enum Mode { *

New retained value = min(previous retained value, incoming value)

*/ Min, + /** * The aggregation mode is the maximum function. *

New retained value = max(previous retained value, incoming value)

@@ -58,27 +61,21 @@ public static enum Mode { * The aggregation mode is always one. *

New retained value = 1.0

*/ - AlwaysOne, - - /** - * The aggregation mode is increment by one. - *

New retained value = previous retained value + 1.0

- */ - Increment + AlwaysOne } - private double value_; - private final Mode mode_; - /** - * Creates an instance of DoubleSummary with zero starting value and default mode (Sum) + * Creates an instance of DoubleSummary with a given starting value and mode + * @param value starting value + * @param mode update mode */ - public DoubleSummary() { - this(0, Mode.Sum); + private DoubleSummary(final double value, final Mode mode) { + value_ = value; + mode_ = mode; } /** - * Creates an instance of DoubleSummary with a starting value and a given mode (Sum) + * Creates an instance of DoubleSummary with a given mode. * @param mode update mode */ public DoubleSummary(final Mode mode) { @@ -96,21 +93,9 @@ public DoubleSummary(final Mode mode) { case AlwaysOne: value_ = 1.0; break; - case Increment: - value_ = 0; } } - /** - * Creates an instance of DoubleSummary with a given starting value and mode - * @param value starting value - * @param mode update mode - */ - public DoubleSummary(final double value, final Mode mode) { - value_ = value; - mode_ = mode; - } - @Override public void update(final Double value) { switch (mode_) { @@ -126,8 +111,6 @@ public void update(final Double value) { case AlwaysOne: value_ = 1.0; break; - case Increment: - value_++; } } diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java index 1856d1519..8dd7ea702 100644 --- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java +++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java @@ -31,6 +31,7 @@ public final class DoubleSummaryFactory implements SummaryFactory /** * Creates an instance of DoubleSummaryFactory with default mode */ + @Deprecated public DoubleSummaryFactory() { summaryMode_ = DoubleSummary.Mode.Sum; } diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummarySetOperations.java index 93e3c35bf..d859883f5 100644 --- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummarySetOperations.java +++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummarySetOperations.java @@ -30,9 +30,12 @@ public final class DoubleSummarySetOperations implements SummarySetOperations { + + /** + * Constructs this sketch with given lgK. + * @param lgK Log_base2 of Nominal Entries. + * See Nominal Entries + * @param mode The IntegerSummary mode to be used + */ + public IntegerSketch(final int lgK, final IntegerSummary.Mode mode) { + super(1 << lgK, ResizeFactor.X8.ordinal(), 1.0F, new IntegerSummaryFactory(mode)); + } + + /** + * Constructs this sketch from a Memory image, which must be from an IntegerSketch, and + * usually with data. + * @param mem the given Memory + * @param mode The IntegerSummary mode to be used + */ + public IntegerSketch(final Memory mem, final IntegerSummary.Mode mode) { + super(mem, new IntegerSummaryDeserializer(), new IntegerSummaryFactory(mode)); + } + + @Override + public void update(final String key, final Integer value) { + super.update(key, value); + } + + @Override + public void update(final long key, final Integer value) { + super.update(key, value); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java new file mode 100644 index 000000000..a0e3e2982 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple.aninteger; + +import org.apache.datasketches.ByteArrayUtil; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.tuple.DeserializeResult; +import org.apache.datasketches.tuple.UpdatableSummary; + +/** + * Summary for generic tuple sketches of type Integer. + * This summary keeps an Integer value. On update a predefined operation is performed depending on + * the mode. + * Supported modes: Sum, Min, Max, AlwaysOne, Increment. The default mode is Sum. + */ +public class IntegerSummary implements UpdatableSummary { + private int value_; + private final Mode mode_; + + /** + * The aggregation modes for this Summary + */ + public static enum Mode { + + /** + * The aggregation mode is the summation function. + *

New retained value = previous retained value + incoming value

+ */ + Sum, + + /** + * The aggregation mode is the minimum function. + *

New retained value = min(previous retained value, incoming value)

+ */ + Min, + + /** + * The aggregation mode is the maximum function. + *

New retained value = max(previous retained value, incoming value)

+ */ + Max, + + /** + * The aggregation mode is always one. + *

New retained value = 1

+ */ + AlwaysOne + } + + /** + * Creates an instance of IntegerSummary with a given starting value and mode. + * @param value starting value + * @param mode update mode + */ + private IntegerSummary(final int value, final Mode mode) { + value_ = value; + mode_ = mode; + } + + /** + * Creates an instance of IntegerSummary with a given mode. + * @param mode update mode. This should not be called by a user. + */ + public IntegerSummary(final Mode mode) { + mode_ = mode; + switch (mode) { + case Sum: + value_ = 0; + break; + case Min: + value_ = Integer.MAX_VALUE; + break; + case Max: + value_ = Integer.MIN_VALUE; + break; + case AlwaysOne: + value_ = 1; + break; + } + } + + /** + * Updates an instance of IntegerSummary with the given value. + * This should not be called by the user. + * @param value The given value. + */ + @Override + public void update(final Integer value) { + switch (mode_) { + case Sum: + value_ += value; + break; + case Min: + if (value < value_) { value_ = value; } + break; + case Max: + if (value > value_) { value_ = value; } + break; + case AlwaysOne: + value_ = 1; + break; + } + } + + @Override + public IntegerSummary copy() { + return new IntegerSummary(value_, mode_); + } + + /** + * @return current value of the IntegerSummary + */ + public int getValue() { + return value_; + } + + private static final int SERIALIZED_SIZE_BYTES = 5; + private static final int VALUE_INDEX = 0; + private static final int MODE_BYTE_INDEX = 4; + + @Override + public byte[] toByteArray() { + final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES]; + ByteArrayUtil.putIntLE(bytes, VALUE_INDEX, value_); + bytes[MODE_BYTE_INDEX] = (byte) mode_.ordinal(); + return bytes; + } + + /** + * Creates an instance of the IntegerSummary given a serialized representation + * @param mem Memory object with serialized IntegerSummary + * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes + * read from the Memory + */ + public static DeserializeResult fromMemory(final Memory mem) { + return new DeserializeResult<>(new IntegerSummary(mem.getInt(VALUE_INDEX), + Mode.values()[mem.getByte(MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryDeserializer.java new file mode 100644 index 000000000..0a4529e07 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryDeserializer.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple.aninteger; + +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.tuple.DeserializeResult; +import org.apache.datasketches.tuple.SummaryDeserializer; + +/** + * @author Lee Rhodes + */ +public class IntegerSummaryDeserializer implements SummaryDeserializer { + + @Override + public DeserializeResult heapifySummary(final Memory mem) { + return IntegerSummary.fromMemory(mem); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryFactory.java new file mode 100644 index 000000000..0ebb729d5 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryFactory.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple.aninteger; + +import org.apache.datasketches.tuple.SummaryFactory; + +/** + * Factory for IntegerSummary. + * + * @author Lee Rhodes + */ +public class IntegerSummaryFactory implements SummaryFactory { + + private final IntegerSummary.Mode summaryMode_; + + /** + * Creates an instance of IntegerSummaryFactory with a given mode + * @param summaryMode summary mode + */ + public IntegerSummaryFactory(final IntegerSummary.Mode summaryMode) { + summaryMode_ = summaryMode; + } + + @Override + public IntegerSummary newSummary() { + return new IntegerSummary(summaryMode_); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummarySetOperations.java new file mode 100644 index 000000000..2cfa118ba --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummarySetOperations.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple.aninteger; + +import static org.apache.datasketches.tuple.aninteger.IntegerSummary.Mode; + +import org.apache.datasketches.tuple.SummarySetOperations; + +/** + * Methods for defining how unions and intersections of two objects of type IntegerSummary + * are performed. These methods are not called directly by a user. + * + * @author Lee Rhodes + */ +public class IntegerSummarySetOperations implements SummarySetOperations { + + private final Mode unionSummaryMode_; + + /** + * Intersection is not well defined or even meaningful between numeric values. + * Nevertheless, this can be defined to be a different type of aggregation for intersecting keys. + */ + private final Mode intersectionSummaryMode_; + + /** + * Creates a new instance with two modes + * @param unionSummaryMode for unions + * @param intersectionSummaryMode for intersections + */ + public IntegerSummarySetOperations(final Mode unionSummaryMode, final Mode intersectionSummaryMode) { + unionSummaryMode_ = unionSummaryMode; + intersectionSummaryMode_ = intersectionSummaryMode; + } + + @Override + public IntegerSummary union(final IntegerSummary a, final IntegerSummary b) { + final IntegerSummary result = new IntegerSummary(unionSummaryMode_); + result.update(a.getValue()); + result.update(b.getValue()); + return result; + } + + @Override + public IntegerSummary intersection(final IntegerSummary a, final IntegerSummary b) { + final IntegerSummary result = new IntegerSummary(intersectionSummaryMode_); + result.update(a.getValue()); + result.update(b.getValue()); + return result; + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/package-info.java b/src/main/java/org/apache/datasketches/tuple/aninteger/package-info.java new file mode 100644 index 000000000..9ca51c6b2 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple/aninteger/package-info.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * @author Lee Rhodes + */ + +package org.apache.datasketches.tuple.aninteger; diff --git a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java index 48395d043..7c8667b02 100644 --- a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java +++ b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java @@ -26,13 +26,12 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.testng.annotations.Test; - +import org.apache.datasketches.SketchesArgumentException; +import org.apache.datasketches.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableDirectHandle; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.SketchesArgumentException; -import org.apache.datasketches.Util; +import org.testng.annotations.Test; @SuppressWarnings("javadoc") public class UnionImplTest { @@ -224,16 +223,52 @@ public void checkCompactFlagCorruption() { println(""+csk1.getEstimate()); } + @Test + public void checkDirectUnionSingleItem() { + int num = 2; + UpdateSketch[] skArr = new UpdateSketch[num]; + for (int i = 0; i < num; i++) { + skArr[i] = new UpdateSketchBuilder().build(); + } + for (int i = 0; i < (num/2); i++) { + skArr[i].update(i); + skArr[i + (num/2)].update(i); + skArr[i].update(i + num); + } + + Union union = new SetOperationBuilder().buildUnion(); + for (int i = 0; i < num; i++) { + union.update(skArr[i]); + } + + CompactSketch csk = union.getResult(); + assertEquals(csk.getEstimate(), 2.0); + //println(csk.toString(true, true, 1, true)); + + Memory[] memArr = new Memory[num]; + for (int i = 0; i < num; i++) { + memArr[i] = Memory.wrap(skArr[i].compact().toByteArray()); + } + union = new SetOperationBuilder().buildUnion(); + for (int i = 0; i < num; i++) { + union.update(memArr[i]); + } + + csk = union.getResult(); + assertEquals(csk.getEstimate(), 2.0); + //println(csk.toString(true, true, 1, true)); + } + @Test public void printlnTest() { println("PRINTING: "+this.getClass().getName()); } /** - * @param s value to print + * @param o value to print */ - static void println(String s) { - //System.out.println(s); //disable here + static void println(Object o) { + //System.out.println(o.toString()); //disable here } } diff --git a/src/test/java/org/apache/datasketches/tuple/CompactSketchWithDoubleSummaryTest.java b/src/test/java/org/apache/datasketches/tuple/CompactSketchWithDoubleSummaryTest.java index 547986dd3..ec2222bde 100644 --- a/src/test/java/org/apache/datasketches/tuple/CompactSketchWithDoubleSummaryTest.java +++ b/src/test/java/org/apache/datasketches/tuple/CompactSketchWithDoubleSummaryTest.java @@ -24,6 +24,7 @@ import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.tuple.adouble.DoubleSummary; +import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.apache.datasketches.tuple.adouble.DoubleSummaryDeserializer; import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory; import org.testng.Assert; @@ -31,9 +32,12 @@ @SuppressWarnings("javadoc") public class CompactSketchWithDoubleSummaryTest { + private final DoubleSummary.Mode mode = Mode.Sum; + @Test public void emptyFromNonPublicConstructorNullArray() { - CompactSketch sketch = new CompactSketch<>(null, null, Long.MAX_VALUE, true); + CompactSketch sketch = + new CompactSketch<>(null, null, Long.MAX_VALUE, true); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); @@ -50,8 +54,10 @@ public void emptyFromNonPublicConstructorNullArray() { @Test public void emptyFromNonPublicConstructor() { long[] keys = new long[0]; - DoubleSummary[] summaries = (DoubleSummary[]) java.lang.reflect.Array.newInstance(DoubleSummary.class, 0); - CompactSketch sketch = new CompactSketch<>(keys, summaries, Long.MAX_VALUE, true); + DoubleSummary[] summaries = + (DoubleSummary[]) java.lang.reflect.Array.newInstance(DoubleSummary.class, 0); + CompactSketch sketch = + new CompactSketch<>(keys, summaries, Long.MAX_VALUE, true); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); @@ -67,7 +73,8 @@ public void emptyFromNonPublicConstructor() { @Test public void emptyFromQuickSelectSketch() { - UpdatableSketch us = new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + UpdatableSketch us = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); CompactSketch sketch = us.compact(); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); @@ -84,7 +91,8 @@ public void emptyFromQuickSelectSketch() { @Test public void exactModeFromQuickSelectSketch() { - UpdatableSketch us = new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + UpdatableSketch us = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); us.update(1, 1.0); us.update(2, 1.0); us.update(3, 1.0); @@ -111,13 +119,15 @@ public void exactModeFromQuickSelectSketch() { @Test public void serializeDeserializeSmallExact() { - UpdatableSketch us = new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + UpdatableSketch us = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); us.update("a", 1.0); us.update("b", 1.0); us.update("c", 1.0); CompactSketch sketch1 = us.compact(); Sketch sketch2 = - Sketches.heapifySketch(Memory.wrap(sketch1.toByteArray()), new DoubleSummaryDeserializer()); + Sketches.heapifySketch(Memory.wrap(sketch1.toByteArray()), + new DoubleSummaryDeserializer()); Assert.assertFalse(sketch2.isEmpty()); Assert.assertFalse(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 3.0); @@ -137,7 +147,8 @@ public void serializeDeserializeSmallExact() { @Test public void serializeDeserializeEstimation() throws Exception { - UpdatableSketch us = new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + UpdatableSketch us = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 8192; i++) { us.update(i, 1.0); } @@ -165,19 +176,22 @@ public void serializeDeserializeEstimation() throws Exception { @Test(expectedExceptions = SketchesArgumentException.class) public void deserializeWrongType() { - UpdatableSketch us = new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + UpdatableSketch us = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 8192; i++) { us.update(i, 1.0); } CompactSketch sketch1 = us.compact(); - Sketches.heapifyUpdatableSketch(Memory.wrap(sketch1.toByteArray()), new DoubleSummaryDeserializer(), - new DoubleSummaryFactory()); + Sketches.heapifyUpdatableSketch(Memory.wrap(sketch1.toByteArray()), + new DoubleSummaryDeserializer(), + new DoubleSummaryFactory(mode)); } @Test public void serialVersion1Compatibility() throws Exception { byte[] bytes = getResourceBytes("CompactSketchWithDoubleSummary4K_serialVersion1.bin"); - Sketch sketch = Sketches.heapifySketch(Memory.wrap(bytes), new DoubleSummaryDeserializer()); + Sketch sketch = Sketches.heapifySketch(Memory.wrap(bytes), + new DoubleSummaryDeserializer()); Assert.assertTrue(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.99); Assert.assertEquals(sketch.getRetainedEntries(), 4096); diff --git a/src/test/java/org/apache/datasketches/tuple/FilterTest.java b/src/test/java/org/apache/datasketches/tuple/adouble/FilterTest.java similarity index 91% rename from src/test/java/org/apache/datasketches/tuple/FilterTest.java rename to src/test/java/org/apache/datasketches/tuple/adouble/FilterTest.java index 1fbf09105..461576812 100644 --- a/src/test/java/org/apache/datasketches/tuple/FilterTest.java +++ b/src/test/java/org/apache/datasketches/tuple/adouble/FilterTest.java @@ -17,21 +17,26 @@ * under the License. */ -package org.apache.datasketches.tuple; +package org.apache.datasketches.tuple.adouble; import java.util.Random; -import org.testng.Assert; -import org.testng.annotations.Test; - +import org.apache.datasketches.tuple.Filter; +import org.apache.datasketches.tuple.Sketch; +import org.apache.datasketches.tuple.Sketches; +import org.apache.datasketches.tuple.UpdatableSketch; +import org.apache.datasketches.tuple.UpdatableSketchBuilder; import org.apache.datasketches.tuple.adouble.DoubleSummary; +import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory; +import org.testng.Assert; +import org.testng.annotations.Test; @SuppressWarnings("javadoc") public class FilterTest { - private static final int numberOfElements = 100; private static final Random random = new Random(1);//deterministic for this class + private final DoubleSummary.Mode mode = Mode.Sum; @Test public void emptySketch() { @@ -64,7 +69,7 @@ public void nullSketch() { @Test public void filledSketchShouldBehaveTheSame() { UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); fillSketch(sketch, numberOfElements, 0.0); @@ -82,7 +87,7 @@ public void filledSketchShouldBehaveTheSame() { @Test public void filledSketchShouldFilterOutElements() { UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); fillSketch(sketch, numberOfElements, 0.0); fillSketch(sketch, 2 * numberOfElements, 1.0); @@ -101,7 +106,7 @@ public void filledSketchShouldFilterOutElements() { @Test public void filteringInEstimationMode() { UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); int n = 10000; fillSketch(sketch, n, 0.0); @@ -122,7 +127,7 @@ public void filteringInEstimationMode() { public void nonEmptySketchWithNoEntries() { UpdatableSketch sketch = new UpdatableSketchBuilder<>( - new DoubleSummaryFactory()).setSamplingProbability(0.0001f).build(); + new DoubleSummaryFactory(mode)).setSamplingProbability(0.0001f).build(); sketch.update(0, 0.0); Assert.assertFalse(sketch.isEmpty()); diff --git a/src/test/java/org/apache/datasketches/tuple/adouble/UpdatableSketchWithDoubleSummaryTest.java b/src/test/java/org/apache/datasketches/tuple/adouble/UpdatableSketchWithDoubleSummaryTest.java index ea18d4f2c..7f40d31f1 100644 --- a/src/test/java/org/apache/datasketches/tuple/adouble/UpdatableSketchWithDoubleSummaryTest.java +++ b/src/test/java/org/apache/datasketches/tuple/adouble/UpdatableSketchWithDoubleSummaryTest.java @@ -19,6 +19,8 @@ package org.apache.datasketches.tuple.adouble; +import static org.testng.Assert.assertEquals; + import org.apache.datasketches.ResizeFactor; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.memory.Memory; @@ -37,11 +39,14 @@ @SuppressWarnings("javadoc") public class UpdatableSketchWithDoubleSummaryTest { + private final DoubleSummary.Mode mode = Mode.Sum; @Test public void isEmpty() { - UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + int lgK = 12; + DoubleSketch sketch = new DoubleSketch(lgK, mode); +// UpdatableSketch sketch = +// new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); @@ -55,11 +60,42 @@ public void isEmpty() { Assert.assertFalse(it.next()); } + @Test + public void serDeTest() { + int lgK = 12; + int K = 1 << lgK; + DoubleSummary.Mode a1Mode = DoubleSummary.Mode.AlwaysOne; + DoubleSketch a1Sk = new DoubleSketch(lgK, a1Mode); + int m = 2 * K; + for (int key = 0; key < m; key++) { + a1Sk.update(key, 1.0); + } + double est1 = a1Sk.getEstimate(); + Memory mem = Memory.wrap(a1Sk.toByteArray()); + DoubleSketch a1Sk2 = new DoubleSketch(mem, a1Mode); + double est2 = a1Sk2.getEstimate(); + assertEquals(est1, est2); + } + + @Test + public void checkStringKey() { + int lgK = 12; + int K = 1 << lgK; + DoubleSummary.Mode a1Mode = DoubleSummary.Mode.AlwaysOne; + DoubleSketch a1Sk1 = new DoubleSketch(lgK, a1Mode); + int m = K / 2; + for (int key = 0; key < m; key++) { + a1Sk1.update(Integer.toHexString(key), 1.0); + } + assertEquals(a1Sk1.getEstimate(), K / 2.0); + } + + @Test public void isEmptyWithSampling() { float samplingProbability = 0.1f; UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()) + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)) .setSamplingProbability(samplingProbability).build(); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); @@ -75,7 +111,7 @@ public void sampling() { float samplingProbability = 0.001f; UpdatableSketch sketch = new UpdatableSketchBuilder<>( - new DoubleSummaryFactory()).setSamplingProbability(samplingProbability).build(); + new DoubleSummaryFactory(mode)).setSamplingProbability(samplingProbability).build(); sketch.update("a", 1.0); Assert.assertFalse(sketch.isEmpty()); Assert.assertTrue(sketch.isEstimationMode()); @@ -90,7 +126,7 @@ public void sampling() { public void exactMode() { UpdatableSketch sketch = new UpdatableSketchBuilder<>( - new DoubleSummaryFactory()).build(); + new DoubleSummaryFactory(mode)).build(); Assert.assertTrue(sketch.isEmpty()); Assert.assertEquals(sketch.getEstimate(), 0.0); for (int i = 1; i <= 4096; i++) { @@ -129,7 +165,7 @@ public void exactMode() { public void estimationMode() { UpdatableSketch sketch = new UpdatableSketchBuilder<>( - new DoubleSummaryFactory()).build(); + new DoubleSummaryFactory(mode)).build(); Assert.assertEquals(sketch.getEstimate(), 0.0); for (int i = 1; i <= 8192; i++) { sketch.update(i, 1.0); @@ -161,7 +197,7 @@ public void estimationMode() { public void estimationModeWithSamplingNoResizing() { UpdatableSketch sketch = new UpdatableSketchBuilder<>( - new DoubleSummaryFactory()) + new DoubleSummaryFactory(mode)) .setSamplingProbability(0.5f) .setResizeFactor(ResizeFactor.X1).build(); for (int i = 0; i < 16384; i++) { @@ -176,7 +212,7 @@ public void estimationModeWithSamplingNoResizing() { @Test public void updatesOfAllKeyTypes() { UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); sketch.update(1L, 1.0); sketch.update(2.0, 1.0); byte[] bytes = { 3 }; @@ -189,18 +225,18 @@ public void updatesOfAllKeyTypes() { Assert.assertEquals(sketch.getEstimate(), 6.0); } - @Test - public void updateDoubleSummary() { - DoubleSummary ds = new DoubleSummary(); - ds.update(1.0); - Assert.assertEquals(ds.getValue(), 1.0); - } +// @Test +// public void updateDoubleSummary() { +// DoubleSummary ds = new DoubleSummary(); +// ds.update(1.0); +// Assert.assertEquals(ds.getValue(), 1.0); +// } @Test public void doubleSummaryDefaultSumMode() { UpdatableSketch sketch = new UpdatableSketchBuilder<>( - new DoubleSummaryFactory()).build(); + new DoubleSummaryFactory(mode)).build(); { sketch.update(1, 1.0); Assert.assertEquals(sketch.getRetainedEntries(), 1); @@ -292,12 +328,12 @@ public void doubleSummaryMaxMode() { @Test public void serializeDeserializeExact() throws Exception { UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); sketch1.update(1, 1.0); UpdatableSketch sketch2 = Sketches.heapifyUpdatableSketch( Memory.wrap(sketch1.toByteArray()), - new DoubleSummaryDeserializer(), new DoubleSummaryFactory()); + new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode)); Assert.assertEquals(sketch2.getEstimate(), 1.0); SketchIterator it = sketch2.iterator(); @@ -317,7 +353,7 @@ public void serializeDeserializeExact() throws Exception { public void serializeDeserializeEstimationNoResizing() throws Exception { UpdatableSketch sketch1 = new UpdatableSketchBuilder<>( - new DoubleSummaryFactory()).setResizeFactor(ResizeFactor.X1).build(); + new DoubleSummaryFactory(mode)).setResizeFactor(ResizeFactor.X1).build(); for (int j = 0; j < 10; j++) { for (int i = 0; i < 8192; i++) { sketch1.update(i, 1.0); @@ -348,7 +384,7 @@ public void serializeDeserializeSampling() throws Exception { int sketchSize = 16384; int numberOfUniques = sketchSize; UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()) + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)) .setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); for (int i = 0; i < numberOfUniques; i++) { sketch1.update(i, 1.0); @@ -364,21 +400,21 @@ public void serializeDeserializeSampling() throws Exception { @Test public void unionExactMode() { UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); sketch1.update(1, 1.0); sketch1.update(1, 1.0); sketch1.update(1, 1.0); sketch1.update(2, 1.0); UpdatableSketch sketch2 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); sketch2.update(2, 1.0); sketch2.update(2, 1.0); sketch2.update(3, 1.0); sketch2.update(3, 1.0); sketch2.update(3, 1.0); - Union union = new Union<>(new DoubleSummarySetOperations()); + Union union = new Union<>(new DoubleSummarySetOperations(mode)); union.update(sketch1); union.update(sketch2); CompactSketch result = union.getResult(); @@ -408,19 +444,19 @@ public void unionExactMode() { public void unionEstimationMode() { int key = 0; UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 8192; i++) { sketch1.update(key++, 1.0); } key -= 4096; // overlap half of the entries UpdatableSketch sketch2 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 8192; i++) { sketch2.update(key++, 1.0); } - Union union = new Union<>(4096, new DoubleSummarySetOperations()); + Union union = new Union<>(4096, new DoubleSummarySetOperations(mode)); union.update(sketch1); union.update(sketch2); CompactSketch result = union.getResult(); @@ -433,7 +469,7 @@ public void unionEstimationMode() { public void unionMixedMode() { int key = 0; UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 1000; i++) { sketch1.update(key++, 1.0); //System.out.println("theta1=" + sketch1.getTheta() + " " + sketch1.getThetaLong()); @@ -442,13 +478,13 @@ public void unionMixedMode() { key -= 500; // overlap half of the entries UpdatableSketch sketch2 = new UpdatableSketchBuilder<> - (new DoubleSummaryFactory()).setSamplingProbability(0.2f).build(); + (new DoubleSummaryFactory(mode)).setSamplingProbability(0.2f).build(); for (int i = 0; i < 20000; i++) { sketch2.update(key++, 1.0); //System.out.println("theta2=" + sketch2.getTheta() + " " + sketch2.getThetaLong()); } - Union union = new Union<>(4096, new DoubleSummarySetOperations()); + Union union = new Union<>(4096, new DoubleSummarySetOperations(mode)); union.update(sketch1); union.update(sketch2); CompactSketch result = union.getResult(); @@ -460,9 +496,9 @@ public void unionMixedMode() { @Test public void intersectionEmpty() { UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations()); + new Intersection<>(new DoubleSummarySetOperations(mode)); intersection.update(sketch); CompactSketch result = intersection.getResult(); Assert.assertEquals(result.getRetainedEntries(), 0); @@ -476,10 +512,10 @@ public void intersectionEmpty() { public void intersectionNotEmptyNoEntries() { UpdatableSketch sketch1 = new UpdatableSketchBuilder<> - (new DoubleSummaryFactory()).setSamplingProbability(0.01f).build(); + (new DoubleSummaryFactory(mode)).setSamplingProbability(0.01f).build(); sketch1.update("a", 1.0); // this happens to get rejected because of sampling with low probability Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations()); + new Intersection<>(new DoubleSummarySetOperations(mode)); intersection.update(sketch1); CompactSketch result = intersection.getResult(); Assert.assertEquals(result.getRetainedEntries(), 0); @@ -492,13 +528,13 @@ public void intersectionNotEmptyNoEntries() { @Test public void intersectionExactWithNull() { UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); sketch1.update(1, 1.0); sketch1.update(2, 1.0); sketch1.update(3, 1.0); Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations()); + new Intersection<>(new DoubleSummarySetOperations(mode)); intersection.update(sketch1); intersection.update(null); CompactSketch result = intersection.getResult(); @@ -512,7 +548,7 @@ public void intersectionExactWithNull() { @Test public void intersectionExactWithEmpty() { UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); sketch1.update(1, 1.0); sketch1.update(2, 1.0); sketch1.update(3, 1.0); @@ -520,7 +556,7 @@ public void intersectionExactWithEmpty() { Sketch sketch2 = Sketches.createEmptySketch(); Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations(Mode.Sum)); + new Intersection<>(new DoubleSummarySetOperations(mode)); intersection.update(sketch1); intersection.update(sketch2); CompactSketch result = intersection.getResult(); @@ -534,21 +570,21 @@ public void intersectionExactWithEmpty() { @Test public void intersectionExactMode() { UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); sketch1.update(1, 1.0); sketch1.update(1, 1.0); sketch1.update(2, 1.0); sketch1.update(2, 1.0); UpdatableSketch sketch2 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); sketch2.update(2, 1.0); sketch2.update(2, 1.0); sketch2.update(3, 1.0); sketch2.update(3, 1.0); Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations()); + new Intersection<>(new DoubleSummarySetOperations(mode)); intersection.update(sketch1); intersection.update(sketch2); CompactSketch result = intersection.getResult(); @@ -577,19 +613,19 @@ public void intersectionExactMode() { public void intersectionDisjointEstimationMode() { int key = 0; UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 8192; i++) { sketch1.update(key++, 1.0); } UpdatableSketch sketch2 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 8192; i++) { sketch2.update(key++, 1.0); } Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations()); + new Intersection<>(new DoubleSummarySetOperations(mode)); intersection.update(sketch1); intersection.update(sketch2); CompactSketch result = intersection.getResult(); @@ -613,20 +649,20 @@ public void intersectionDisjointEstimationMode() { public void intersectionEstimationMode() { int key = 0; UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 8192; i++) { sketch1.update(key++, 1.0); } key -= 4096; // overlap half of the entries UpdatableSketch sketch2 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 8192; i++) { sketch2.update(key++, 1.0); } Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations()); + new Intersection<>(new DoubleSummarySetOperations(mode)); intersection.update(sketch1); intersection.update(sketch2); CompactSketch result = intersection.getResult(); @@ -661,7 +697,7 @@ public void aNotBEmpty() { Assert.assertEquals(result.getUpperBound(1), 0.0); UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); aNotB.update(sketch, sketch); result = aNotB.getResult(); Assert.assertEquals(result.getRetainedEntries(), 0); @@ -674,10 +710,10 @@ public void aNotBEmpty() { @Test public void aNotBEmptyA() { UpdatableSketch sketchA = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); UpdatableSketch sketchB = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); sketchB.update(1, 1.0); sketchB.update(2, 1.0); @@ -694,12 +730,12 @@ public void aNotBEmptyA() { @Test public void aNotBEmptyB() { UpdatableSketch sketchA = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); sketchA.update(1, 1.0); sketchA.update(2, 1.0); UpdatableSketch sketchB = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); AnotB aNotB = new AnotB<>(); aNotB.update(sketchA, sketchB); @@ -723,14 +759,14 @@ public void aNotBEmptyB() { @Test public void aNotBExactMode() { UpdatableSketch sketchA = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); sketchA.update(1, 1.0); sketchA.update(1, 1.0); sketchA.update(2, 1.0); sketchA.update(2, 1.0); UpdatableSketch sketchB = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); sketchB.update(2, 1.0); sketchB.update(2, 1.0); sketchB.update(3, 1.0); @@ -754,14 +790,14 @@ public void aNotBExactMode() { public void aNotBEstimationMode() { int key = 0; UpdatableSketch sketchA = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 8192; i++) { sketchA.update(key++, 1.0); } key -= 4096; // overlap half of the entries UpdatableSketch sketchB = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 8192; i++) { sketchB.update(key++, 1.0); } @@ -797,14 +833,14 @@ public void aNotBEstimationMode() { public void aNotBEstimationModeLargeB() { int key = 0; UpdatableSketch sketchA = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 10000; i++) { sketchA.update(key++, 1.0); } key -= 2000; // overlap UpdatableSketch sketchB = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); for (int i = 0; i < 100000; i++) { sketchB.update(key++, 1.0); } @@ -832,7 +868,7 @@ public void aNotBEstimationModeLargeB() { @Test(expectedExceptions = SketchesArgumentException.class) public void invalidSamplingProbability() { new UpdatableSketchBuilder<> - (new DoubleSummaryFactory()).setSamplingProbability(2f).build(); + (new DoubleSummaryFactory(mode)).setSamplingProbability(2f).build(); } } diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java new file mode 100644 index 000000000..d11b1c3fd --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple.aninteger; + +import static java.lang.Math.exp; +import static java.lang.Math.log; +import static java.lang.Math.round; +import static org.testng.Assert.assertEquals; + +import org.apache.datasketches.tuple.CompactSketch; +import org.apache.datasketches.tuple.SketchIterator; +import org.apache.datasketches.tuple.Union; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +@SuppressWarnings("javadoc") +public class EngagementTest { + + @Test + public void computeEngagementHistogram() { + int lgK = 12; + int K = 1 << lgK; // = 4096 + int days = 30; + IntegerSummary.Mode sumMode = IntegerSummary.Mode.Sum; + int v = 0; + int daysPerMonth = 30; + IntegerSketch[] skArr = new IntegerSketch[30]; + for (int i = 0; i < 30; i++) { + skArr[i] = new IntegerSketch(lgK, sumMode); + } + for (int i = 0; i <= days; i++) { //31 generating indices + int numIds = numIDs(daysPerMonth, i); + int numDays = numDays(daysPerMonth, i); + int myV = v++; + for (int d = 0; d < numDays; d++) { + for (int id = 0; id < numIds; id++) { + skArr[d].update(myV + id, 1); + } + } + v += numIds; + } + + int numVisits = unionOps(K, sumMode, skArr); + assertEquals(numVisits, 897); + } + + @Test + public void simpleCheckAlwaysOneIntegerSketch() { + int lgK = 12; + int K = 1 << lgK; // = 4096 + IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne; + + IntegerSketch a1Sk1 = new IntegerSketch(lgK, a1Mode); + IntegerSketch a1Sk2 = new IntegerSketch(lgK, a1Mode); + + int m = 2 * K; + for (int key = 0; key < m; key++) { + a1Sk1.update(key, 1); + a1Sk2.update(key + (m/2), 1); //overlap by 1/2 = 1.5m = 12288. + } + int numVisits = unionOps(K, a1Mode, a1Sk1, a1Sk2); + assertEquals(numVisits, K); + } + + private static int unionOps(int K, IntegerSummary.Mode mode, IntegerSketch ... sketches) { + IntegerSummarySetOperations setOps = new IntegerSummarySetOperations(mode, mode); + Union union = new Union<>(K, setOps); + int len = sketches.length; + + for (IntegerSketch isk : sketches) { + union.update(isk); + } + CompactSketch result = union.getResult(); + SketchIterator itr = result.iterator(); + + int[] freqArr = new int[len +1]; + + while (itr.next()) { + int value = itr.getSummary().getValue(); + freqArr[value]++; + } + println("Engagement Histogram:"); + printf("%12s,%12s\n","Days Visited", "Visitors"); + int sumVisitors = 0; + int sumVisits = 0; + for (int i = 0; i < freqArr.length; i++) { + int visits = freqArr[i]; + if (visits == 0) { continue; } + sumVisitors += visits; + sumVisits += (visits * i); + printf("%12d,%12d\n", i, visits); + } + println("Total Visitors: " + sumVisitors); + println("Total Visits : " + sumVisits); + return sumVisits; + } + + @Test + public void checkPwrLaw() { + int dpm = 30; + for (int i = 0; i <= dpm; i++) { + int numIds = numIDs(dpm, i); + int numDays = numDays(dpm, i); + printf("%6d%6d%6d\n", i, numIds, numDays); + } + } + + private static int numIDs(int daysPerMonth, int index) { + double d = daysPerMonth; + double i = index; + return (int)(round(exp((i * log(d)) / d))); + } + + private static int numDays(int daysPerMonth, int index) { + double d = daysPerMonth; + double i = index; + return (int)(round(exp(((d - i) * log(d)) / d))); + + } + + /** + * @param o object to print + */ + static void println(Object o) { + //System.out.println(o.toString()); //Disable + } + + /** + * @param fmt format + * @param args arguments + */ + static void printf(String fmt, Object ... args) { + //System.out.printf(fmt, args); //Disable + } +} diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java new file mode 100644 index 000000000..34b586fb7 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple.aninteger; + +import static org.testng.Assert.assertEquals; + +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.tuple.CompactSketch; +import org.apache.datasketches.tuple.Intersection; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +@SuppressWarnings("javadoc") +public class IntegerSketchTest { + + @Test + public void serDeTest() { + int lgK = 12; + int K = 1 << lgK; + IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne; + IntegerSketch a1Sk = new IntegerSketch(lgK, a1Mode); + int m = 2 * K; + for (int key = 0; key < m; key++) { + a1Sk.update(key, 1); + } + double est1 = a1Sk.getEstimate(); + Memory mem = Memory.wrap(a1Sk.toByteArray()); + IntegerSketch a1Sk2 = new IntegerSketch(mem, a1Mode); + double est2 = a1Sk2.getEstimate(); + assertEquals(est1, est2); + } + + @Test + public void intersectTest() { + int lgK = 12; + int K = 1 << lgK; + IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne; + IntegerSketch a1Sk1 = new IntegerSketch(lgK, a1Mode); + IntegerSketch a1Sk2 = new IntegerSketch(lgK, a1Mode); + int m = 2 * K; + for (int key = 0; key < m; key++) { + a1Sk1.update(key, 1); + a1Sk2.update(key + (m/2), 1); + } + Intersection inter = + new Intersection<>(new IntegerSummarySetOperations(a1Mode, a1Mode)); + inter.update(a1Sk1); + inter.update(a1Sk2); + CompactSketch csk = inter.getResult(); + assertEquals(csk.getEstimate(), K * 1.0, K * .03); + } + + @Test + public void checkMinMaxMode() { + int lgK = 12; + int K = 1 << lgK; + IntegerSummary.Mode minMode = IntegerSummary.Mode.Min; + IntegerSummary.Mode maxMode = IntegerSummary.Mode.Max; + IntegerSketch a1Sk1 = new IntegerSketch(lgK, minMode); + IntegerSketch a1Sk2 = new IntegerSketch(lgK, maxMode); + int m = K / 2; + for (int key = 0; key < m; key++) { + a1Sk1.update(key, 1); + a1Sk1.update(key, 0); + a1Sk1.update(key, 2); + a1Sk2.update(key + (m/2), 1); + a1Sk2.update(key + (m/2), 0); + a1Sk2.update(key + (m/2), 2); + } + double est1 = a1Sk1.getEstimate(); + double est2 = a1Sk2.getEstimate(); + assertEquals(est1, est2); + } + + @Test + public void checkStringKey() { + int lgK = 12; + int K = 1 << lgK; + IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne; + IntegerSketch a1Sk1 = new IntegerSketch(lgK, a1Mode); + int m = K / 2; + for (int key = 0; key < m; key++) { + a1Sk1.update(Integer.toHexString(key), 1); + } + assertEquals(a1Sk1.getEstimate(), K / 2.0); + } + + /** + * @param o object to print + */ + static void println(Object o) { + //System.out.println(o.toString()); //Disable + } + + /** + * @param fmt format + * @param args arguments + */ + static void printf(String fmt, Object ... args) { + //System.out.printf(fmt, args); //Disable + } +}