} pair.
+ *
+ * @param index the bit index
+ * @param count the count at the specified bit index
+ */
+ void accept(int index, int count);
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BitMap.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BitMap.java
new file mode 100644
index 0000000000..a16cb0c917
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BitMap.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.Objects;
+
+/**
+ * Contains functions to convert {@code int} indices into Bloom filter bit positions and visa versa.
+ *
+ * The functions view an array of longs as a collection of bitmaps each containing 64 bits. The bits are arranged
+ * in memory as a little-endian long value. This matches the requirements of the BitMapProducer interface.
+ *
+ * @since 4.5
+ */
+public class BitMap {
+ /** A bit shift to apply to an integer to divided by 64 (2^6). */
+ private static final int DIVIDE_BY_64 = 6;
+
+ /** Do not instantiate. */
+ private BitMap() {
+ }
+
+ /**
+ * Calculates the number of bitmaps (longs) required for the numberOfBits parameter.
+ *
+ * If the input is negative the behavior is not defined.
+
+ * @param numberOfBits the number of bits to store in the array of bitmaps.
+ * @return the number of bitmaps necessary.
+ */
+ public static int numberOfBitMaps(int numberOfBits) {
+ return numberOfBits == 0 ? 0 : ((numberOfBits - 1) >> DIVIDE_BY_64) + 1;
+ }
+
+ /**
+ * Checks if the specified index bit is enabled in the array of bit bitmaps.
+ *
+ * If the bit specified by idx is not in the bitMap false is returned.
+ *
+ * @param bitMaps The array of bit maps.
+ * @param idx the index of the bit to locate.
+ * @return {@code true} if the bit is enabled, {@code false} otherwise.
+ */
+ public static boolean contains(long[] bitMaps, int idx) {
+ return (idx >= 0 && getLongIndex(idx) < bitMaps.length && (bitMaps[getLongIndex(idx)] & getLongBit(idx)) != 0);
+ }
+
+ /**
+ * Sets the bit in the bitmaps.
+ * Does not perform range checking
+ *
+ * @param bitMaps The array of bit maps..
+ * @param idx the index of the bit to set.
+ * @throws IndexOutOfBoundsException if idx specifies a bit not in the range being tracked.
+ */
+ public static void set(long[] bitMaps, int idx) {
+ bitMaps[checkRange(bitMaps.length, idx)] |= getLongBit(idx);
+ }
+
+ /**
+ * Checks that the index is positive.
+ *
+ * @param bitIndex the bit index
+ * @throws IndexOutOfBoundsException if the index is not positive
+ */
+ public static void checkPositive(final int bitIndex) {
+ if (bitIndex < 0) {
+ throw new IndexOutOfBoundsException("Negative bitIndex: " + bitIndex);
+ }
+ }
+
+ /**
+ * Checks that the bitIndex produces a value in the range of a collection.
+ *
+ * @param limit the number of bitmaps in a collection.
+ * @param bitIndex the bit index
+ * @return the index for the bitmap in the array.
+ * @throws IndexOutOfBoundsException if the index is not positive
+ * @see #getLongIndex(int)
+ */
+ public static int checkRange(final int limit, final int bitIndex) {
+ checkPositive(bitIndex);
+ int idx = getLongIndex(bitIndex);
+ if (limit <= idx) {
+ throw new IndexOutOfBoundsException("bitIndex to large: " + bitIndex);
+ }
+ return idx;
+ }
+
+ /**
+ * Gets the filter index for the specified bit index assuming the filter is using 64-bit longs
+ * to store bits starting at index 0.
+ *
+ * The index is assumed to be positive. For a positive index the result will match
+ * {@code bitIndex / 64}.
+ *
+ * The divide is performed using bit shifts. If the input is negative the behavior
+ * is not defined.
+ *
+ * @param bitIndex the bit index (assumed to be positive)
+ * @return the index of the BitMap in an array of BitMaps.
+ * @see #checkPositive(int)
+ */
+ public static int getLongIndex(final int bitIndex) {
+ // An integer divide by 64 is equivalent to a shift of 6 bits if the integer is
+ // positive.
+ // We do not explicitly check for a negative here. Instead we use a
+ // a signed shift. Any negative index will produce a negative value
+ // by sign-extension and if used as an index into an array it will throw an
+ // exception.
+ return bitIndex >> DIVIDE_BY_64;
+ }
+
+ /**
+ * Gets the filter bit mask for the specified bit index assuming the filter is using 64-bit
+ * longs to store bits starting at index 0. The returned value is a {@code long} with only
+ * 1 bit set.
+ *
+ * The index is assumed to be positive. For a positive index the result will match
+ * {@code 1L << (bitIndex % 64)}.
+ *
+ * If the input is negative the behavior is not defined.
+ *
+ * @param bitIndex the bit index (assumed to be positive)
+ * @return the filter bit
+ * @see #checkPositive(int)
+ */
+ public static long getLongBit(final int bitIndex) {
+ // Bit shifts only use the first 6 bits. Thus it is not necessary to mask this
+ // using 0x3f (63) or compute bitIndex % 64.
+ // Note: If the index is negative the shift will be (64 - (bitIndex & 0x3f)) and
+ // this will identify an incorrect bit.
+ return 1L << bitIndex;
+ }
+
+ /**
+ * Determines if a cardinality is sparse based on the shape.
+ * This method assumes that BitMaps are 64bits and indexes are 32bits. If the memory
+ * necessary to store the cardinality as indexes is less than the estimated memory for BitMaps,
+ * the cardinality is determined to be {@code sparse}.
+ * @param cardinality the cardinality to check.
+ * @param shape the Shape to check against
+ * @return true if the cardinality is sparse within the shape.
+ */
+ public static boolean isSparse(int cardinality, Shape shape) {
+ /*
+ * Since the size of a BitMap is a long and the size of an index is an int,
+ * there can be 2 indexes for each bitmap. In Bloom filters indexes are evenly
+ * distributed across the range of possible values, Thus if the cardinality
+ * (number of indexes) is less than or equal to 2*number of BitMaps the
+ * cardinality is sparse within the shape.
+ */
+
+ Objects.requireNonNull(shape, "shape");
+ return cardinality <= (numberOfBitMaps(shape.getNumberOfBits()) * 2);
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BitMapProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BitMapProducer.java
new file mode 100644
index 0000000000..478cce0928
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BitMapProducer.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntConsumer;
+import java.util.function.LongConsumer;
+
+/**
+ * Produces BitMap longs for a Bloom filter.
+ *
+ * Each bit map is a little-endian long value representing a block of bits of this filter.
+ *
+ * The returned array will have length {@code ceil(m / 64)} where {@code m} is the
+ * number of bits in the filter and {@code ceil} is the ceiling function.
+ * Bits 0-63 are in the first long. A value of 1 at a bit position indicates the bit
+ * index is enabled.
+ *
+ * The producer may stop at the last non zero BitMap or may produce zero value bit maps to the limit determined by
+ * a shape..
+ *
+ * @since 4.5
+ */
+public interface BitMapProducer {
+
+ /**
+ * Each BitMap is passed to the consumer in order.
+ * Any exceptions thrown by the action are relayed to the caller.
+ *
+ * @param consumer the consumer of the BitMaps.
+ * @throws NullPointerException if the specified consumer is null
+ */
+ void forEachBitMap(LongConsumer consumer);
+
+ /**
+ * Creates a BitMapProducer from an array of Long.
+ * @param bitMaps the bitMaps to return.
+ * @return a BitMapProducer.
+ */
+ static BitMapProducer fromLongArray(long[] bitMaps) {
+ return new BitMapProducer() {
+
+ @Override
+ public void forEachBitMap(LongConsumer consumer) {
+ for (long word : bitMaps) {
+ consumer.accept(word);
+ }
+ }
+
+ };
+ }
+
+ /**
+ * Creates a BitMapProducer from an IndexProducer.
+ * @param producer the IndexProducer that specifies the indexes of the bits to enable.
+ * @param shape the desired shape.
+ * @return A BitMapProducer that produces the BitMap equivalent of the Indices from the producer.
+ */
+ static BitMapProducer fromIndexProducer(IndexProducer producer, Shape shape) {
+ Objects.requireNonNull(producer, "producer");
+ Objects.requireNonNull(shape, "shape");
+
+ return new BitMapProducer() {
+ private int maxBucket = -1;
+ private long[] result = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
+
+ @Override
+ public void forEachBitMap(LongConsumer consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ /*
+ * we can not assume that all the ints will be in order and not repeated. This
+ * is because the HasherCollection does not make the guarantee.
+ */
+ // process all the ints into a array of BitMaps
+ IntConsumer builder = new IntConsumer() {
+ @Override
+ public void accept(int i) {
+ int bucketIdx = BitMap.getLongIndex(i);
+ maxBucket = maxBucket < bucketIdx ? bucketIdx : maxBucket;
+ result[bucketIdx] |= BitMap.getLongBit(i);
+ }
+ };
+ producer.forEachIndex(builder);
+ // send the bitmaps to the consumer.
+ for (int bucket = 0; bucket <= maxBucket; bucket++) {
+ consumer.accept(result[bucket]);
+ }
+ }
+ };
+ }
+
+ /**
+ * A LongConsumer that builds an Array of BitMaps as produced by a BitMapProducer.
+ *
+ */
+ class ArrayBuilder implements LongConsumer {
+ private long[] result;
+ private int idx = 0;
+ private int bucketCount = 0;
+
+ /**
+ * Constructor that creates an empty ArrayBuilder.
+ * @param shape The shape used to generate the BitMaps.
+ */
+ public ArrayBuilder(Shape shape) {
+ this(shape, null);
+ }
+
+ /**
+ * Constructor that creates an array builder with an initial value.
+ * @param shape The shape used to generate the BitMaps.
+ * @param initialValue an array of BitMap values to initialize the builder with. May be {@code null}.
+ * @throws IllegalArgumentException is the length of initialValue is greater than the number of
+ * buckets as specified by the number of bits in the Shape.
+ */
+ public ArrayBuilder(Shape shape, long[] initialValue) {
+ Objects.requireNonNull(shape, "shape");
+ result = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
+ if (initialValue != null) {
+ if (initialValue.length > result.length) {
+ throw new IllegalArgumentException(
+ String.format("initialValue length (%s) is longer than shape length (%s)",
+ initialValue.length, result.length));
+ }
+ bucketCount = initialValue.length;
+ System.arraycopy(initialValue, 0, result, 0, initialValue.length);
+ }
+ }
+
+ @Override
+ public void accept(long bitmap) {
+ result[idx++] |= bitmap;
+ bucketCount = bucketCount >= idx ? bucketCount : idx;
+ }
+
+ /**
+ * Returns the array.
+ * @return the Array of BitMaps.
+ */
+ public long[] getArray() {
+ return Arrays.copyOf(result, bucketCount);
+ }
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilter.java
deleted file mode 100644
index de55cbe93d..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilter.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import java.util.BitSet;
-import java.util.PrimitiveIterator.OfInt;
-import java.util.function.IntConsumer;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
-
-/**
- * A bloom filter using a Java BitSet to track enabled bits. This is a standard
- * implementation and should work well for most Bloom filters.
- * @since 4.5
- */
-public class BitSetBloomFilter extends AbstractBloomFilter {
-
- /**
- * The bitSet that defines this BloomFilter.
- */
- private final BitSet bitSet;
-
- /**
- * Constructs an empty BitSetBloomFilter.
- *
- * @param shape the desired shape of the filter.
- */
- public BitSetBloomFilter(final Shape shape) {
- super(shape);
- this.bitSet = new BitSet();
- }
-
- @Override
- public int andCardinality(final BloomFilter other) {
- if (other instanceof BitSetBloomFilter) {
- verifyShape(other);
- final BitSet result = (BitSet) bitSet.clone();
- result.and(((BitSetBloomFilter) other).bitSet);
- return result.cardinality();
- }
- return super.andCardinality(other);
- }
-
- @Override
- public int cardinality() {
- return bitSet.cardinality();
- }
-
- @Override
- public boolean contains(final Hasher hasher) {
- verifyHasher(hasher);
- final OfInt iter = hasher.iterator(getShape());
- while (iter.hasNext()) {
- if (!bitSet.get(iter.nextInt())) {
- return false;
- }
- }
- return true;
- }
-
- @Override
- public long[] getBits() {
- return bitSet.toLongArray();
- }
-
- @Override
- public StaticHasher getHasher() {
- return new StaticHasher(bitSet.stream().iterator(), getShape());
- }
-
- @Override
- public boolean merge(final BloomFilter other) {
- verifyShape(other);
- if (other instanceof BitSetBloomFilter) {
- bitSet.or(((BitSetBloomFilter) other).bitSet);
- } else {
- bitSet.or(BitSet.valueOf(other.getBits()));
- }
- return true;
- }
-
- @Override
- public boolean merge(final Hasher hasher) {
- verifyHasher(hasher);
- hasher.iterator(getShape()).forEachRemaining((IntConsumer) bitSet::set);
- return true;
- }
-
- @Override
- public int orCardinality(final BloomFilter other) {
- if (other instanceof BitSetBloomFilter) {
- verifyShape(other);
- final BitSet result = (BitSet) bitSet.clone();
- result.or(((BitSetBloomFilter) other).bitSet);
- return result.cardinality();
- }
- return super.orCardinality(other);
- }
-
- @Override
- public int xorCardinality(final BloomFilter other) {
- if (other instanceof BitSetBloomFilter) {
- verifyShape(other);
- final BitSet result = (BitSet) bitSet.clone();
- result.xor(((BitSetBloomFilter) other).bitSet);
- return result.cardinality();
- }
- return super.xorCardinality(other);
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
index af43ddd51e..1e8680a239 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
@@ -16,138 +16,258 @@
*/
package org.apache.commons.collections4.bloomfilter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
/**
* The interface that describes a Bloom filter.
* @since 4.5
*/
-public interface BloomFilter {
+public interface BloomFilter extends IndexProducer, BitMapProducer {
- // Query Operations
+ /**
+ * Return the Bloom filter data as a BitMap array.
+ * @param filter the filter to get the data from.
+ * @return An array of BitMap long.
+ */
+ static long[] asBitMapArray(BloomFilter filter) {
+ BitMapProducer.ArrayBuilder builder = new BitMapProducer.ArrayBuilder(filter.getShape());
+ filter.forEachBitMap(builder);
+ return builder.getArray();
+ }
/**
- * Gets the shape of this filter.
- *
- * @return the shape of this filter
+ * Return the Bloom filter data as an array of indices for the enabled bits.
+ * @param filter the Filter to get the data from.
+ * @return An array of indices for enabled bits in the Bloom filter.
*/
- Shape getShape();
+ static int[] asIndexArray(BloomFilter filter) {
+ List lst = new ArrayList();
+ filter.forEachIndex(lst::add);
+ return lst.stream().mapToInt(Integer::intValue).toArray();
+ }
+
+ // Query Operations
/**
- * Gets an array of little-endian long values representing the bits of this filter.
+ * This method is used to determine the best method for matching.
*
- * The returned array will have length {@code ceil(m / 64)} where {@code m} is the
- * number of bits in the filter and {@code ceil} is the ceiling function.
- * Bits 0-63 are in the first long. A value of 1 at a bit position indicates the bit
- * index is enabled.
+ *
For `sparse` implementations
+ * the {@code forEachIndex(IntConsumer consumer)} method is more efficient. For non `sparse` implementations
+ * the {@code forEachBitMap(LongConsumer consumer)} is more efficient. Implementers should determine if it is easier
+ * for the implementation to produce indexes of BitMap blocks.
*
- * @return the {@code long[]} representation of this filter
+ * @return {@code true} if the implementation is sparse {@code false} otherwise.
+ * @see BitMap
*/
- long[] getBits();
+ boolean isSparse();
/**
- * Creates a StaticHasher that contains the indexes of the bits that are on in this
- * filter.
- *
- * @return a StaticHasher for that produces this Bloom filter
+ * Gets the shape that was used when the filter was built.
+ * @return The shape the filter was built with.
*/
- StaticHasher getHasher();
+ Shape getShape();
/**
- * Returns {@code true} if this filter contains the specified filter. Specifically this
+ * Returns {@code true} if this filter contains the specified filter.
+ *
+ * Specifically this
* returns {@code true} if this filter is enabled for all bits that are enabled in the
* {@code other} filter. Using the bit representations this is
- * effectively {@code (this AND other) == other}.
+ * effectively {@code (this AND other) == other}.
*
* @param other the other Bloom filter
- * @return true if this filter is enabled for all enabled bits in the other filter
- * @throws IllegalArgumentException if the shape of the other filter does not match
- * the shape of this filter
+ * @return true if all enabled bits in the other filter are enabled in this filter.
*/
- boolean contains(BloomFilter other);
+ default boolean contains(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ return isSparse() ? contains((IndexProducer) other) : contains((BitMapProducer) other);
+ }
/**
- * Returns {@code true} if this filter contains the specified decomposed Bloom filter.
- * Specifically this returns {@code true} if this filter is enabled for all bit indexes
- * identified by the {@code hasher}. Using the bit representations this is
- * effectively {@code (this AND hasher) == hasher}.
+ * Returns {@code true} if this filter contains the bits specified in the hasher.
+ *
+ * Specifically this returns {@code true} if this filter is enabled for all bit indexes
+ * identified by the {@code hasher}. Using the BitMap representations this is
+ * effectively {@code (this AND hasher) == hasher}.
*
* @param hasher the hasher to provide the indexes
* @return true if this filter is enabled for all bits specified by the hasher
- * @throws IllegalArgumentException if the hasher cannot generate indices for the shape of
- * this filter
*/
- boolean contains(Hasher hasher);
+ default boolean contains(Hasher hasher) {
+ Objects.requireNonNull(hasher, "Hasher");
+ Shape shape = getShape();
+ return contains(hasher.indices(shape));
+ }
- // Modification Operations
+ /**
+ * Returns {@code true} if this filter contains the indices specified IndexProducer.
+ *
+ * Specifically this returns {@code true} if this filter is enabled for all bit indexes
+ * identified by the {@code IndexProducer}.
+ *
+ * @param indexProducer the IndexProducer to provide the indexes
+ * @return {@code true} if this filter is enabled for all bits specified by the IndexProducer
+ */
+ boolean contains(IndexProducer indexProducer);
+
+ /**
+ * Returns {@code true} if this filter contains the bits specified in the BitMaps produced by the
+ * bitMapProducer.
+ *
+ * @param bitMapProducer the the {@code BitMapProducer} to provide the BitMaps.
+ * @return {@code true} if this filter is enabled for all bits specified by the BitMaps
+ */
+ boolean contains(BitMapProducer bitMapProducer);
/**
- * Merges the specified Bloom filter into this Bloom filter. Specifically all bit indexes
- * that are enabled in the {@code other} filter will be enabled in this filter.
+ * Merges the specified Bloom filter with this Bloom filter creating a new Bloom filter.
*
- * Note: This method should return {@code true} even if no additional bit indexes were
- * enabled. A {@code false} result indicates that this filter is not ensured to contain
- * the {@code other} Bloom filter.
+ *
Specifically all bit indexes that are enabled in the {@code other} and in @code this} filter will be
+ * enabled in the resulting filter.
*
* @param other the other Bloom filter
+ * @return The new Bloom filter.
+ */
+ default BloomFilter merge(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ Shape shape = getShape();
+ BloomFilter result = BitMap.isSparse((cardinality() + other.cardinality()), getShape())
+ ? new SparseBloomFilter(shape)
+ : new SimpleBloomFilter(shape);
+
+ result.mergeInPlace(this);
+ result.mergeInPlace(other);
+ return result;
+ }
+
+ /**
+ * Merges the specified Hasher with this Bloom filter and returns a new Bloom filter.
+ *
+ * Specifically all bit indexes that are identified by the {@code hasher} and in {@code this} Bloom filter
+ * be enabled in the resulting filter.
+ *
+ * @param hasher the hasher to provide the indices
+ * @return the new Bloom filter.
+ */
+ default BloomFilter merge(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ Shape shape = getShape();
+ BloomFilter result = BitMap.isSparse((hasher.size() * shape.getNumberOfHashFunctions()) + cardinality(), shape)
+ ? new SparseBloomFilter(shape, hasher)
+ : new SimpleBloomFilter(shape, hasher);
+ result.mergeInPlace(this);
+ return result;
+ }
+
+ /**
+ * Merges the specified Bloom filter into this Bloom filter.
+ *
+ * Specifically all
+ * bit indexes that are identified by the {@code other} will be enabled in this filter.
+ *
+ * Note: This method should return {@code true} even if no additional bit indexes were
+ * enabled. A {@code false} result indicates that this filter may or may not contain
+ * the {@code other} Bloom filter. This state may occur in complex Bloom filter implementations like
+ * counting Bloom filters.
+ *
+ * @param other The bloom filter to merge into this one.
* @return true if the merge was successful
- * @throws IllegalArgumentException if the shape of the other filter does not match
- * the shape of this filter
*/
- boolean merge(BloomFilter other);
+ boolean mergeInPlace(BloomFilter other);
/**
- * Merges the specified decomposed Bloom filter into this Bloom filter. Specifically all
+ * Merges the specified hasher into this Bloom filter. Specifically all
* bit indexes that are identified by the {@code hasher} will be enabled in this filter.
*
- * Note: This method should return {@code true} even if no additional bit indexes were
- * enabled. A {@code false} result indicates that this filter is not ensured to contain
- * the specified decomposed Bloom filter.
+ *
Note: This method should return {@code true} even if no additional bit indexes were
+ * enabled. A {@code false} result indicates that this filter may or may not contain
+ * the {@code other} Bloom filter. This state may occur in complex Bloom filter implementations like
+ * counting Bloom filters.
*
- * @param hasher the hasher to provide the indexes
+ * @param hasher The hasher to merge.
* @return true if the merge was successful
- * @throws IllegalArgumentException if the hasher cannot generate indices for the shape of
- * this filter
*/
- boolean merge(Hasher hasher);
+ default boolean mergeInPlace(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ Shape shape = getShape();
+ BloomFilter result = BitMap.isSparse((hasher.size() * shape.getNumberOfHashFunctions()) + cardinality(), shape)
+ ? new SparseBloomFilter(getShape(), hasher)
+ : new SimpleBloomFilter(getShape(), hasher);
+ return mergeInPlace(result);
+ }
+
+ /**
+ * Determines if the bloom filter is "full".
+ *
+ * Full is defined as having no unset bits.
+ *
+ * @return {@code true} if the filter is full, {@code false} otherwise.
+ */
+ default boolean isFull() {
+ return cardinality() == getShape().getNumberOfBits();
+ }
// Counting Operations
/**
* Gets the cardinality (number of enabled bits) of this Bloom filter.
*
- * This is also known as the Hamming value.
+ * This is also known as the Hamming value or Hamming number.
*
* @return the cardinality of this filter
*/
int cardinality();
/**
- * Performs a logical "AND" with the other Bloom filter and returns the cardinality
- * (number of enabled bits) of the result.
+ * Estimates the number of items in the Bloom filter.
*
- * @param other the other Bloom filter
- * @return the cardinality of the result of {@code (this AND other)}
+ * By default this is the rounding of the {@code Shape.estimateN(cardinality)} calculation for the
+ * shape and cardinality of this filter.
+ *
+ * An item is roughly equivalent to the number of Hashers that have been merged. As the Bloom filter
+ * is a probabilistic structure this value is an estimate.
+ *
+ * @return an estimate of the number of items in the bloom filter.
+ * @see Shape#estimateN(int)
*/
- int andCardinality(BloomFilter other);
+ default int estimateN() {
+ return (int) Math.round(getShape().estimateN(cardinality()));
+ }
/**
- * Performs a logical "OR" with the other Bloom filter and returns the cardinality
- * (number of enabled bits) of the result.
+ * Estimates the number of items in the union of this Bloom filter with the other bloom filter.
*
- * @param other the other Bloom filter
- * @return the cardinality of the result of {@code (this OR other)}
+ * By default this is the {@code estimateN()} of the merging of this filter with the {@code other} filter.
+ *
+ * An item is roughly equivalent to the number of Hashers that have been merged. As the Bloom filter
+ * is a probabilistic structure this value is an estimate.
+ *
+ * @param other The other Bloom filter
+ * @return an estimate of the number of items in the union.
+ * @see #estimateN()
*/
- int orCardinality(BloomFilter other);
+ default int estimateUnion(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ return this.merge(other).estimateN();
+ }
/**
- * Performs a logical "XOR" with the other Bloom filter and returns the cardinality
- * (number of enabled bits) of the result.
+ * Estimates the number of items in the intersection of this Bloom filter with the other bloom filter.
*
- * @param other the other Bloom filter
- * @return the cardinality of the result of {@code (this XOR other)}
+ * By default this is the {@code estimateN() + other.estimateN() - estimateUnion(other)}
+ *
+ * An item is roughly equivalent to the number of Hashers that have been merged. As the Bloom filter
+ * is a probabilistic structure this value is an estimate.
+ *
+ * @param other The other Bloom filter
+ * @return an estimate of the number of items in the intersection.
*/
- int xorCardinality(BloomFilter other);
+ default int estimateIntersection(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ return estimateN() + other.estimateN() - estimateUnion(other);
+ }
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java
deleted file mode 100644
index fe9b1161a9..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexer.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-/**
- * Contains functions to convert {@code int} indices into Bloom filter bit positions.
- */
-public final class BloomFilterIndexer {
- /** A bit shift to apply to an integer to divided by 64 (2^6). */
- private static final int DIVIDE_BY_64 = 6;
-
- /** Do not instantiate. */
- private BloomFilterIndexer() {}
-
- /**
- * Check the index is positive.
- *
- * @param bitIndex the bit index
- * @throws IndexOutOfBoundsException if the index is not positive
- */
- public static void checkPositive(final int bitIndex) {
- if (bitIndex < 0) {
- throw new IndexOutOfBoundsException("Negative bitIndex: " + bitIndex);
- }
- }
-
- /**
- * Gets the filter index for the specified bit index assuming the filter is using 64-bit longs
- * to store bits starting at index 0.
- *
- * The index is assumed to be positive. For a positive index the result will match
- * {@code bitIndex / 64}.
- *
- *
The divide is performed using bit shifts. If the input is negative the behavior
- * is not defined.
- *
- * @param bitIndex the bit index (assumed to be positive)
- * @return the filter index
- * @see #checkPositive(int)
- */
- public static int getLongIndex(final int bitIndex) {
- // An integer divide by 64 is equivalent to a shift of 6 bits if the integer is positive.
- // We do not explicitly check for a negative here. Instead we use a
- // a signed shift. Any negative index will produce a negative value
- // by sign-extension and if used as an index into an array it will throw an exception.
- return bitIndex >> DIVIDE_BY_64;
- }
-
- /**
- * Gets the filter bit mask for the specified bit index assuming the filter is using 64-bit
- * longs to store bits starting at index 0. The returned value is a {@code long} with only
- * 1 bit set.
- *
- *
The index is assumed to be positive. For a positive index the result will match
- * {@code 1L << (bitIndex % 64)}.
- *
- *
If the input is negative the behavior is not defined.
- *
- * @param bitIndex the bit index (assumed to be positive)
- * @return the filter bit
- * @see #checkPositive(int)
- */
- public static long getLongBit(final int bitIndex) {
- // Bit shifts only use the first 6 bits. Thus it is not necessary to mask this
- // using 0x3f (63) or compute bitIndex % 64.
- // Note: If the index is negative the shift will be (64 - (bitIndex & 0x3f)) and
- // this will identify an incorrect bit.
- return 1L << bitIndex;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
index 0c414ebe93..e8e8a7b242 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
@@ -27,7 +27,7 @@
* to and not later subtracted from the counting Bloom filter. The functional
* state of a CountingBloomFilter at the start and end of a series of merge and
* subsequent remove operations of the same Bloom filters, irrespective of
- * remove order, is expected to be the same.
+ * remove order, is expected to be the same.
*
* Removal of a filter that has not previously been merged results in an
* invalid state where the counts no longer represent a sum of merged Bloom
@@ -36,166 +36,148 @@
* undetected. The CountingBloomFilter maintains a state flag that is used as a
* warning that an operation was performed that resulted in invalid counts and
* thus an invalid state. For example this may occur if a count for an index was
- * set to negative following a remove operation.
+ * set to negative following a remove operation.
*
* Implementations should document the expected state of the filter after an
* operation that generates invalid counts, and any potential recovery options.
* An implementation may support a reversal of the operation to restore the
* state to that prior to the operation. In the event that invalid counts are
* adjusted to a valid range then it should be documented if there has been
- * irreversible information loss.
+ * irreversible information loss.
*
* Implementations may choose to throw an exception during an operation that
* generates invalid counts. Implementations should document the expected state
* of the filter after such an operation. For example are the counts not updated,
- * partially updated or updated entirely before the exception is raised.
+ * partially updated or updated entirely before the exception is raised.
*
* @since 4.5
*/
-public interface CountingBloomFilter extends BloomFilter {
-
- /**
- * Represents an operation that accepts an {@code } pair representing
- * the count for a bit index in a counting Bloom filter and returns no result.
- *
- * Note: This is a functional interface as a primitive type specialization of
- * {@link java.util.function.BiConsumer} for {@code int}.
- */
- @FunctionalInterface
- interface BitCountConsumer {
- /**
- * Performs this operation on the given {@code } pair.
- *
- * @param index the bit index
- * @param count the count at the specified bit index
- */
- void accept(int index, int count);
- }
+public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
// Query Operations
/**
- * Returns true if the internal state is valid. This flag is a warning that an addition or
+ * Returns {@code true} if the internal state is valid.
+ *
+ * This flag is a warning that an addition or
* subtraction of counts from this filter resulted in an invalid count for one or more
* indexes. For example this may occur if a count for an index was
* set to negative following a subtraction operation, or overflows an {@code int} following an
- * addition operation.
+ * addition operation.
*
* A counting Bloom filter that has an invalid state is no longer ensured to function
* identically to a standard Bloom filter instance that is the merge of all the Bloom filters
- * that have been added to and not later subtracted from this counting Bloom filter.
+ * that have been added to and not later subtracted from this counting Bloom filter.
*
* Note: The change to an invalid state may or may not be reversible. Implementations
* are expected to document their policy on recovery from an addition or removal operation
- * that generated an invalid state.
+ * that generated an invalid state.
*
- * @return true if the state is valid
+ * @return {@code true} if the state is valid
*/
boolean isValid();
- /**
- * Performs the given action for each {@code } pair where the count is non-zero.
- * Any exceptions thrown by the action are relayed to the caller.
- *
- * @param action the action to be performed for each non-zero bit count
- * @throws NullPointerException if the specified action is null
- */
- void forEachCount(BitCountConsumer action);
-
// Modification Operations
/**
- * Merges the specified Bloom filter into this Bloom filter. Specifically all counts for
- * indexes that are enabled in the {@code other} filter will be incremented by 1.
+ * Removes the specified Bloom filter from this Bloom filter.
+ *
+ * Specifically: all counts for the indexes identified by the {@code other} filter will be decremented by 1,
*
- * Note: If the other filter is a counting Bloom filter the index counts are ignored; only
- * the enabled indexes are used.
+ *
Note: If the other filter is a counting Bloom filter the index counts are ignored and it is treated as an
+ * IndexProducer.
*
- * This method will return true if the filter is valid after the operation.
+ *
This method will return {@code true} if the filter is valid after the operation.
*
- * @param other {@inheritDoc}
- * @return true if the merge was successful and the state is valid
- * @throws IllegalArgumentException {@inheritDoc}
+ * @param other the other Bloom filter
+ * @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
+ * @see #subtract(BitCountProducer)
*/
- @Override
- boolean merge(BloomFilter other);
+ boolean remove(BloomFilter other);
/**
- * Merges the specified decomposed Bloom filter into this Bloom filter. Specifically all
- * counts for the distinct indexes that are identified by the {@code hasher} will
- * be incremented by 1. If the {@code hasher} contains duplicate bit indexes these are ignored.
+ * Removes the specified hasher from the Bloom filter from this Bloom filter.
+ *
+ * Specifically all counts for the indices produced by the {@code hasher} will be
+ * decremented by 1.
*
- * This method will return true if the filter is valid after the operation.
+ *
For HasherCollections each enclosed Hasher will be considered a single item and decremented
+ * from the counts separately.
*
- * @param hasher {@inheritDoc}
- * @return true if the merge was successful and the state is valid
- * @throws IllegalArgumentException {@inheritDoc}
+ * This method will return {@code true} if the filter is valid after the operation.
+ *
+ * @param hasher the hasher to provide the indexes
+ * @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
+ * @see #subtract(BitCountProducer)
*/
- @Override
- boolean merge(Hasher hasher);
+ boolean remove(Hasher hasher);
/**
- * Removes the specified Bloom filter from this Bloom filter. Specifically
- * all counts for the indexes identified by the {@code other} filter will be decremented by 1.
+ * Adds the specified BitCountProducer to this Bloom filter.
*
- * Note: If the other filter is a counting Bloom filter the index counts are ignored; only
- * the enabled indexes are used.
+ *
Specifically
+ * all counts for the indexes identified by the {@code other} will be incremented
+ * by their corresponding values in the {@code other}.
*
- * This method will return true if the filter is valid after the operation.
+ *
This method will return {@code true} if the filter is valid after the operation.
*
- * @param other the other Bloom filter
- * @return true if the removal was successful and the state is valid
- * @throws IllegalArgumentException if the shape of the other filter does not match
- * the shape of this filter
+ * @param other the BitCountProducer to add.
+ * @return {@code true} if the addition was successful and the state is valid
* @see #isValid()
- * @see #subtract(CountingBloomFilter)
+ * @see #subtract(BitCountProducer)
*/
- boolean remove(BloomFilter other);
+ boolean add(BitCountProducer other);
/**
- * Removes the specified decomposed Bloom filter from this Bloom filter. Specifically
- * all counts for the distinct indexes identified by the {@code hasher} will be
- * decremented by 1. If the {@code hasher} contains duplicate bit indexes these are ignored.
+ * Adds the specified BitCountProducer to this Bloom filter.
*
- * This method will return true if the filter is valid after the operation.
+ *
Specifically
+ * all counts for the indexes identified by the {@code other} will be decremented
+ * by their corresponding values in the {@code other}.
*
- * @param hasher the hasher to provide the indexes
- * @return true if the removal was successful and the state is valid
- * @throws IllegalArgumentException if the hasher cannot generate indices for the shape of
- * this filter
+ * This method will return true if the filter is valid after the operation.
+ *
+ * @param other the BitCountProducer to subtract.
+ * @return {@code true} if the subtraction was successful and the state is valid
* @see #isValid()
+ * @see #add(BitCountProducer)
*/
- boolean remove(Hasher hasher);
+ boolean subtract(BitCountProducer other);
/**
- * Adds the specified counting Bloom filter to this Bloom filter. Specifically
- * all counts for the indexes identified by the {@code other} filter will be incremented
- * by their corresponding counts in the {@code other} filter.
+ * Merges the specified Bloom filter into this Bloom filter to produce a new CountingBloomFilter.
*
- * This method will return true if the filter is valid after the operation.
+ *
Specifically the new Bloom filter will contain all the counts of this filter and in addition
+ * all bit indexes that are enabled in the {@code other} filter will be incremented
+ * by one in the new filter.
*
- * @param other the other counting Bloom filter
- * @return true if the addition was successful and the state is valid
- * @throws IllegalArgumentException if the shape of the other filter does not match
- * the shape of this filter
- * @see #isValid()
+ * Note: the validity of the resulting filter is not guaranteed. When in doubt {@code isValid()}
+ * should be called on the new filter.
+ *
+ * @param other the other Bloom filter
+ * @return A new CountingBloomFilter instance.
*/
- boolean add(CountingBloomFilter other);
+ @Override
+ CountingBloomFilter merge(BloomFilter other);
/**
- * Adds the specified counting Bloom filter to this Bloom filter. Specifically
- * all counts for the indexes identified by the {@code other} filter will be decremented
- * by their corresponding counts in the {@code other} filter.
+ * Merges the specified hasher with this Bloom filter to create a new CountingBloomFilter.
*
- * This method will return true if the filter is valid after the operation.
+ *
Specifically the new Bloom filter will contain all the counts of this filter and in addition
+ * all bit indexes specified by the {@code hasher} will be incremented
+ * by one in the new filter.
*
- * @param other the other counting Bloom filter
- * @return true if the subtraction was successful and the state is valid
- * @throws IllegalArgumentException if the shape of the other filter does not match
- * the shape of this filter
- * @see #isValid()
+ * For HasherCollections each enclosed Hasher will be considered a single item and increment
+ * the counts separately.
+ *
+ * Note: the validity of the resulting filter is not guaranteed. When in doubt {@code isValid()}
+ * should be called on the new filter.
+ *
+ * @param hasher the hasher to provide the indexes
+ * @return A new CountingBloomFilter instance.
*/
- boolean subtract(CountingBloomFilter other);
+ @Override
+ CountingBloomFilter merge(Hasher hasher);
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java
deleted file mode 100644
index 71272e65c4..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilter.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import java.util.Arrays;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.PrimitiveIterator.OfInt;
-import java.util.function.IntConsumer;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
-import org.apache.commons.collections4.iterators.EmptyIterator;
-import org.apache.commons.collections4.iterators.IteratorChain;
-
-/**
- * A Bloom filter built on a single hasher. This filter type should only be used for small
- * filters (few on bits). While this implementation correctly supports the merge() methods
- * it is recommended that if merges are expected that one of the other Bloom filter
- * implementations be used.
- * @since 4.5
- */
-public class HasherBloomFilter extends AbstractBloomFilter {
- /** The bit representation for an empty Bloom filter. */
- private static final long[] EMPTY = new long[0];
-
- /**
- * The internal hasher representation.
- */
- private StaticHasher hasher;
-
- /**
- * Constructs a HasherBloomFilter from a hasher and a shape.
- *
- * @param hasher the hasher to use.
- * @param shape the shape of the Bloom filter.
- */
- public HasherBloomFilter(final Hasher hasher, final Shape shape) {
- super(shape);
- verifyHasher(hasher);
- if (hasher instanceof StaticHasher) {
- this.hasher = (StaticHasher) hasher;
- verifyShape(this.hasher.getShape());
- } else {
- this.hasher = new StaticHasher(hasher, shape);
- }
- }
-
- /**
- * Constructs an empty HasherBloomFilter from a shape.
- *
- * @param shape the shape of the Bloom filter.
- */
- public HasherBloomFilter(final Shape shape) {
- super(shape);
- this.hasher = new StaticHasher(EmptyIterator.emptyIterator(), shape);
- }
-
- @Override
- public int cardinality() {
- return hasher.size();
- }
-
- @Override
- public boolean contains(final Hasher hasher) {
- verifyHasher(hasher);
- final Set set = new TreeSet<>();
- hasher.iterator(getShape()).forEachRemaining((IntConsumer) idx -> {
- set.add(idx);
- });
- final OfInt iter = this.hasher.iterator(getShape());
- while (iter.hasNext()) {
- final int idx = iter.nextInt();
- set.remove(idx);
- if (set.isEmpty()) {
- return true;
- }
- }
- return false;
- }
-
- @Override
- public long[] getBits() {
- if (hasher.isEmpty()) {
- return EMPTY;
- }
-
- // Note: This can be simplified if the StaticHasher exposed a getMaxIndex()
- // method. Since it maintains an ordered list of unique indices the maximum
- // is the last value in the iterator. Knowing this value would allow
- // exact allocation of the long[].
- // For now we assume that the long[] will have a positive length and at least
- // 1 bit set in the entire array.
-
- final int n = (int) Math.ceil(hasher.getShape().getNumberOfBits() * (1.0 / Long.SIZE));
- final long[] result = new long[n];
- final OfInt iter = hasher.iterator(hasher.getShape());
- iter.forEachRemaining((IntConsumer) idx -> {
- BloomFilterIndexer.checkPositive(idx);
- final int buffIdx = BloomFilterIndexer.getLongIndex(idx);
- final long buffOffset = BloomFilterIndexer.getLongBit(idx);
- result[buffIdx] |= buffOffset;
- });
-
- int limit = result.length;
-
- // Assume the array has a non-zero length and at least 1 bit set.
- // This is tested using assertions.
- assert limit > 0 : "Number of bits in Shape is 0";
- while (result[limit - 1] == 0) {
- limit--;
- // If the hasher was not empty it is not possible to return
- // an array of length zero.
- assert limit > 0 : "Hasher reported a non-zero size but has no indices";
- }
- if (limit < result.length) {
- return Arrays.copyOf(result, limit);
- }
- return result;
- }
-
- @Override
- public StaticHasher getHasher() {
- return hasher;
- }
-
- @Override
- public boolean merge(final BloomFilter other) {
- return merge(other.getHasher());
- }
-
- @Override
- public boolean merge(final Hasher hasher) {
- verifyHasher(hasher);
- final IteratorChain iter = new IteratorChain<>(this.hasher.iterator(getShape()),
- hasher.iterator(getShape()));
- this.hasher = new StaticHasher(iter, getShape());
- return true;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilters.java b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilters.java
deleted file mode 100644
index e4adb4fc66..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilters.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-
-import java.util.Objects;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.function.Consumer;
-import java.util.function.IntConsumer;
-
-/**
- * Contains functions to filter indexes.
- */
-final class IndexFilters {
- /** Do not instantiate. */
- private IndexFilters() {
- }
-
- /**
- * Transfer all distinct indexes in the specified {@code hasher} generated for the
- * specified {@code shape} to the specified {@code consumer}. For example this
- * can be used to merge a {@link Hasher} representation of a Bloom filter into a
- * {@link BloomFilter} instance that does not naturally handle duplicate indexes.
- *
- * This method is functionally equivalent to:
- *
- *
- * final Set<Integer> distinct = new TreeSet<>();
- * hasher.iterator(shape).forEachRemaining((Consumer<Integer>) i -> {
- * if (distinct.add(i)) {
- * consumer.accept(i);
- * }
- * });
- *
- *
- * @param hasher the hasher
- * @param shape the shape
- * @param consumer the consumer to receive distinct indexes
- * @throws NullPointerException if the hasher, shape or action are null
- * @see Hasher#iterator(Shape)
- */
- static void distinctIndexes(final Hasher hasher, final Shape shape, final IntConsumer consumer) {
- Objects.requireNonNull(hasher, "hasher");
- Objects.requireNonNull(shape, "shape");
- Objects.requireNonNull(consumer, "consumer");
-
- // TODO
- // This function can be optimised based on the expected size
- // (number of indexes) of the hasher and the number of bits in the shape.
- //
- // A large size would benefit from a pre-allocated BitSet-type filter.
- // A very small size may be more efficient as a simple array of values
- // that have already been seen that is scanned for each new index.
- //
- // A default is to use a Set to filter distinct values. The choice of set
- // should be evaluated. A HashSet would be optimal if size is known.
- // A TreeSet has lower memory consumption and performance is not as
- // sensitive to knowing the size in advance.
-
- final Set distinct = new TreeSet<>();
- hasher.iterator(shape).forEachRemaining((Consumer) i -> {
- if (distinct.add(i)) {
- consumer.accept(i);
- }
- });
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java
new file mode 100644
index 0000000000..a0caace3e4
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.Objects;
+import java.util.function.IntConsumer;
+import java.util.function.LongConsumer;
+
+/**
+ * An object that produces indices of a Bloom filter.
+ *
+ * @since 4.5
+ */
+public interface IndexProducer {
+
+ /**
+ * Each index is passed to the consumer.
+ * Any exceptions thrown by the action are relayed to the caller.
+ *
+ * Indices ordering is not guaranteed
+ *
+ * @param consumer the action to be performed for each non-zero bit index.
+ * @throws NullPointerException if the specified action is null
+ */
+ void forEachIndex(IntConsumer consumer);
+
+ /**
+ * Creates an IndexProducer from a @{code BitMapProducer}.
+ * @param producer the @{code BitMapProducer}
+ * @return a new @{code IndexProducer}.
+ */
+ static IndexProducer fromBitMapProducer(BitMapProducer producer) {
+ Objects.requireNonNull(producer, "producer");
+ return new IndexProducer() {
+ @Override
+ public void forEachIndex(IntConsumer consumer) {
+ LongConsumer longConsumer = new LongConsumer() {
+ int wordIdx = 0;
+
+ @Override
+ public void accept(long word) {
+ for (int i = 0; i < 64; i++) {
+ long mask = 1L << i;
+ if ((word & mask) == mask) {
+ consumer.accept((wordIdx * 64) + i);
+ }
+ }
+ wordIdx++;
+ }
+ };
+ producer.forEachBitMap(longConsumer::accept);
+ }
+
+ };
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java
index 48c43620ad..d82548cd99 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java
@@ -16,14 +16,123 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
+import java.util.function.LongBinaryOperator;
+import java.util.function.LongConsumer;
+import java.util.function.LongUnaryOperator;
/**
* Implementations of set operations on Bloom filters.
*
+ * @since 4.5
*/
public final class SetOperations {
+ /**
+ * A helper class that calculates cardinality as the cardinality of the result of an operation on a two BitMap arrays.
+ *
+ * The first array is build in the constructor. The second array is processed as a LongConsumer. Whenever there are
+ * two values the op2 operation is used. Whenever the one array is longer than the other the op1 operation is used on the
+ * bitMaps that do not have matching entries.
+ *
+ * The calculated cardinalities are summed to return the cardinality of the operation.
+ *
+ */
+ private static class CardCounter implements LongConsumer {
+ /**
+ * The calculated cardinality
+ */
+ private int cardinality = 0;
+ /**
+ * The index into the array of BitMaps
+ */
+ private int idx = 0;
+ /**
+ * The array of BitMaps
+ */
+ private long[] bitMaps;
+ /**
+ * The operator to execute for 2 BitMaps
+ */
+ private LongBinaryOperator op2;
+ /**
+ * The operator to execute for a single BitMap;
+ */
+ private LongUnaryOperator op1;
+
+ /**
+ * Constructor.
+ * @param BitMaps The array of BitMap BitMaps for a Bloom filter
+ * @param op2 The operation to execute when there are two BitMaps to compare.
+ * @param op1 The operation to execute when there is only one BitMap to cmpare.
+ */
+ CardCounter(BitMapProducer producer, Shape shape, LongBinaryOperator op2, LongUnaryOperator op1) {
+ BitMapProducer.ArrayBuilder builder = new BitMapProducer.ArrayBuilder(shape);
+ producer.forEachBitMap(builder);
+ this.bitMaps = builder.getArray();
+ this.op2 = op2;
+ this.op1 = op1;
+ }
+
+ @Override
+ public void accept(long bitMap) {
+ if (idx < bitMaps.length) {
+ cardinality += Long.bitCount(op2.applyAsLong(bitMaps[idx++], bitMap));
+ } else {
+ cardinality += Long.bitCount(op1.applyAsLong(bitMap));
+ }
+ }
+
+ /**
+ * Gets the cardinality value.
+ * @return The accumulated cardinality.
+ */
+ int getCardinality() {
+ for (; idx < bitMaps.length; idx++) {
+ cardinality += Long.bitCount(op1.applyAsLong(bitMaps[idx]));
+ }
+ return cardinality;
+ }
+ }
+
+ /**
+ * Calculates the cardinality of the logical {@code AND} of the BitMaps for the two filters.
+ * @param shape the shape of the filter
+ * @param first the first BitMapProducer.
+ * @param second the second BitMapProducer
+ * @return the cardinality of the {@code AND} of the filters.
+ */
+ public static int andCardinality(final Shape shape, final BitMapProducer first, final BitMapProducer second) {
+ CardCounter lc = new CardCounter(first, shape, (x, y) -> x & y, (x) -> 0);
+ second.forEachBitMap(lc);
+ return lc.getCardinality();
+ }
+
+ /**
+ * Calculates the cardinality of the logical {@code OR} of the BitMaps for the two filters.
+ * @param shape the shape of the filter
+ * @param first the first BitMapProducer.
+ * @param second the second BitMapProducer
+ * @return the cardinality of the {@code OR} of the filters.
+ */
+ public static int orCardinality(final Shape shape, final BitMapProducer first, final BitMapProducer second) {
+ CardCounter lc = new CardCounter(first, shape, (x, y) -> x | y, (x) -> x);
+ second.forEachBitMap(lc);
+ return lc.getCardinality();
+ }
+
+ /**
+ * Calculates the cardinality of the logical {@code XOR} of the BitMaps for the two filters.
+ * @param shape the shape of the filter
+ * @param first the first BitMapProducer.
+ * @param second the second BitMapProducer
+ * @return the cardinality of the {@code XOR} of the filters.
+ */
+ public static int xorCardinality(final Shape shape, final BitMapProducer first, final BitMapProducer second) {
+ CardCounter lc = new CardCounter(first, shape, (x, y) -> x ^ y, (x) -> x);
+ second.forEachBitMap(lc);
+ return lc.getCardinality();
+ }
+
/**
* Calculates the Cosine distance between two Bloom filters.
*
@@ -49,57 +158,10 @@ public static double cosineDistance(final BloomFilter first, final BloomFilter s
* @return the Cosine similarity.
*/
public static double cosineSimilarity(final BloomFilter first, final BloomFilter second) {
- verifyShape(first, second);
- final int numerator = first.andCardinality(second);
+ final int numerator = andCardinality(first.getShape(), first, second);
return numerator == 0 ? 0 : numerator / (Math.sqrt(first.cardinality()) * Math.sqrt(second.cardinality()));
}
- /**
- * Estimates the number of items in the intersection of the sets represented by two
- * Bloom filters.
- *
- * @param first the first Bloom filter.
- * @param second the second Bloom filter.
- * @return an estimate of the size of the intersection between the two filters.
- */
- public static long estimateIntersectionSize(final BloomFilter first, final BloomFilter second) {
- verifyShape(first, second);
- // do subtraction early to avoid Long overflow.
- return estimateSize(first) - estimateUnionSize(first, second) + estimateSize(second);
- }
-
- /**
- * Estimates the number of items in the Bloom filter based on the shape and the number
- * of bits that are enabled.
- *
- * @param filter the Bloom filter to estimate size for.
- * @return an estimate of the number of items that were placed in the Bloom filter.
- */
- public static long estimateSize(final BloomFilter filter) {
- final Shape shape = filter.getShape();
- final double estimate = -(shape.getNumberOfBits() *
- Math.log(1.0 - filter.cardinality() * 1.0 / shape.getNumberOfBits())) /
- shape.getNumberOfHashFunctions();
- return Math.round(estimate);
- }
-
- /**
- * Estimates the number of items in the union of the sets represented by two
- * Bloom filters.
- *
- * @param first the first Bloom filter.
- * @param second the second Bloom filter.
- * @return an estimate of the size of the union between the two filters.
- */
- public static long estimateUnionSize(final BloomFilter first, final BloomFilter second) {
- verifyShape(first, second);
- final Shape shape = first.getShape();
- final double estimate = -(shape.getNumberOfBits() *
- Math.log(1.0 - first.orCardinality(second) * 1.0 / shape.getNumberOfBits())) /
- shape.getNumberOfHashFunctions();
- return Math.round(estimate);
- }
-
/**
* Calculates the Hamming distance between two Bloom filters.
*
@@ -108,8 +170,7 @@ public static long estimateUnionSize(final BloomFilter first, final BloomFilter
* @return the Hamming distance.
*/
public static int hammingDistance(final BloomFilter first, final BloomFilter second) {
- verifyShape(first, second);
- return first.xorCardinality(second);
+ return xorCardinality(first.getShape(), first, second);
}
/**
@@ -135,28 +196,14 @@ public static double jaccardDistance(final BloomFilter first, final BloomFilter
* @return the Jaccard similarity.
*/
public static double jaccardSimilarity(final BloomFilter first, final BloomFilter second) {
- verifyShape(first, second);
- final int orCard = first.orCardinality(second);
+ final int orCard = orCardinality(first.getShape(), first, second);
// if the orCard is zero then the hamming distance will also be zero.
return orCard == 0 ? 0 : hammingDistance(first, second) / (double) orCard;
}
- /**
- * Verifies the Bloom filters have the same shape.
- *
- * @param first the first filter to check.
- * @param second the second filter to check.
- * @throws IllegalArgumentException if the shapes are not the same.
- */
- private static void verifyShape(final BloomFilter first, final BloomFilter second) {
- if (!first.getShape().equals(second.getShape())) {
- throw new IllegalArgumentException(String.format("Shape %s is not the same as %s",
- first.getShape(), second.getShape()));
- }
- }
-
/**
* Do not instantiate.
*/
- private SetOperations() {}
+ private SetOperations() {
+ }
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/Shape.java b/src/main/java/org/apache/commons/collections4/bloomfilter/Shape.java
new file mode 100644
index 0000000000..fcaa971da8
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/Shape.java
@@ -0,0 +1,467 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.Objects;
+
+/**
+ * The definition of a Bloom filter shape.
+ *
+ * This class contains the values for the filter configuration and is used to
+ * convert a Hasher into a BloomFilter as well as verify that two Bloom filters are
+ * compatible. (i.e. can be compared or merged)
+ *
+ * Interrelatedness of values
+ *
+ * - Number of Items ({@code n})
+ * - {@code n = ceil(m / (-k / ln(1 - exp(ln(p) / k))))}
- Probability of
+ * False Positives ({@code p})
- {@code p = pow(1 - exp(-k / (m / n)), k)}
- Number
+ * of Bits ({@code m})
+ * - {@code m = ceil((n * ln(p)) / ln(1 / pow(2, ln(2))))}
- Number of
+ * Functions ({@code k})
- {@code k = round((m / n) * ln(2))}
+ *
+ * @see Bloom Filter calculator
+ * @see Bloom filter
+ * [Wikipedia]
+ * @since 4.5
+ */
+public final class Shape implements Comparable {
+
+ /**
+ * Number of hash functions to create a filter ({@code k}).
+ */
+ private final int numberOfHashFunctions;
+
+ /**
+ * Number of bits in the filter ({@code m}).
+ */
+ private final int numberOfBits;
+
+ /**
+ * Constructs a filter configuration with the specified number of items ({@code n}) and
+ * bits ({@code m}).
+ *
+ * The optimal number of hash functions ({@code k}) is computed.
+ *
k = round((m / n) * ln(2))
+ *
+ * The false-positive probability is computed using the number of items, bits and hash
+ * functions. An exception is raised if this is greater than or equal to 1 (i.e. the
+ * shape is invalid for use as a Bloom filter).
+ *
+ * @param numberOfHashFunctions Number of hash functions to use for each item placed in the filter.
+ * @param numberOfBits The number of bits in the filter
+ * @throws IllegalArgumentException if {@code numberOfHashFunctions < 1} or {@code numberOfBits < 1}
+ */
+ public Shape(final int numberOfHashFunctions, final int numberOfBits) {
+ this.numberOfBits = checkNumberOfBits(numberOfBits);
+ this.numberOfHashFunctions = checkNumberOfHashFunctions(numberOfHashFunctions);
+ }
+
+ /**
+ * Check number of bits is strictly positive.
+ *
+ * @param numberOfBits the number of bits
+ * @return the number of bits
+ * @throws IllegalArgumentException if the number of bits is {@code < 1}
+ */
+ private static int checkNumberOfBits(final int numberOfBits) {
+ if (numberOfBits < 1) {
+ throw new IllegalArgumentException("Number of bits must be greater than 0: " + numberOfBits);
+ }
+ return numberOfBits;
+ }
+
+ /**
+ * Check number of hash functions is strictly positive
+ *
+ * @param numberOfHashFunctions the number of hash functions
+ * @return the number of hash functions
+ * @throws IllegalArgumentException if the number of hash functions is {@code < 1}
+ */
+ private static int checkNumberOfHashFunctions(final int numberOfHashFunctions) {
+ if (numberOfHashFunctions < 1) {
+ throw new IllegalArgumentException(
+ "Number of hash functions must be greater than 0: " + numberOfHashFunctions);
+ }
+ return numberOfHashFunctions;
+ }
+
+ @Override
+ public int compareTo(Shape other) {
+ int i = Integer.compare(numberOfBits, other.numberOfBits);
+ return i == 0 ? Integer.compare(numberOfHashFunctions, other.numberOfHashFunctions) : i;
+ }
+
+ @Override
+ public boolean equals(final Object o) {
+ return (o instanceof Shape) ? compareTo((Shape) o) == 0 : false;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(numberOfBits, numberOfHashFunctions);
+ }
+
+ /**
+ * Gets the number of bits in the Bloom filter.
+ * This is also known as {@code m}.
+ *
+ * @return the number of bits in the Bloom filter ({@code m}).
+ */
+ public int getNumberOfBits() {
+ return numberOfBits;
+ }
+
+ /**
+ * Gets the number of hash functions used to construct the filter.
+ * This is also known as {@code k}.
+ *
+ * @return the number of hash functions used to construct the filter ({@code k}).
+ */
+ public int getNumberOfHashFunctions() {
+ return numberOfHashFunctions;
+ }
+
+ /**
+ * Calculates the probability of false positives ({@code p}) given
+ * numberOfItems ({@code n}), numberOfBits ({@code m}) and numberOfHashFunctions ({@code k}).
+ *
p = pow(1 - exp(-k / (m / n)), k)
+ *
+ * This is the probability that a Bloom filter will return true for the presence of an item
+ * when it does not contain the item.
+ *
+ * The probability assumes that the Bloom filter is filled with the expected number of
+ * items. If the filter contains fewer items then the actual probability will be lower.
+ * Thus, this returns the worst-case false positive probability for a filter that has not
+ * exceeded its expected number of items.
+ *
+ * @param numberOfItems the number of items hashed into the Bloom filter.
+ * @return the probability of false positives.
+ */
+ public double getProbability(int numberOfItems) {
+ if (numberOfItems < 0) {
+ throw new IllegalArgumentException("Number of items must be greater than or equal to 0: " + numberOfItems);
+ }
+ if (numberOfItems == 0) {
+ return 0;
+ }
+ return Math.pow(1.0 - Math.exp(-1.0 * numberOfHashFunctions * numberOfItems / numberOfBits),
+ numberOfHashFunctions);
+ }
+
+ @Override
+ public String toString() {
+ return String.format("Shape[ m=%s k=%s ]", numberOfBits, numberOfHashFunctions);
+ }
+
+ /**
+ * Estimate the number of items in a Bloom filter with this shape and the specified number of bits enabled.
+ *
+ * Note:
+ *
+ * - if hammingValue == numberOfBits, then result is infinity.
+ * - if hammingValue > numberOfBits, then result is NaN.
+ *
+ *
+ * @param hammingValue the number of enabled bits.
+ * @return An estimate of the number of items in the Bloom filter.
+ */
+ public double estimateN(int hammingValue) {
+ double c = hammingValue;
+ double m = numberOfBits;
+ double k = numberOfHashFunctions;
+ return -(m / k) * Math.log(1.0 - (c / m));
+ }
+
+ /**
+ * The factory to assist in the creation of proper Shapes.
+ *
+ * In the methods of this factory the `from` names are appended with the standard variable
+ * names in the order expected:
+ *
+ *
+ * - {@code N})
- The number of items to be placed in the Bloom filter
+ * - {@code M})
- The number of bits in the Bloom filter
+ * - {@code K})
- The number of hash functions for each item placed in the Bloom filter
+ * - {@code P})
- The probability of a collision once N items have been placed in the Bloom filter
+ *
+ */
+ public static class Factory {
+
+ /**
+ * The natural logarithm of 2. Used in several calculations. Approximately 0.693147180559945.
+ */
+ private static final double LN_2 = Math.log(2.0);
+
+ /**
+ * ln(1 / 2^ln(2)). Used in calculating the number of bits. Approximately -0.480453013918201.
+ *
+ * ln(1 / 2^ln(2)) = ln(1) - ln(2^ln(2)) = -ln(2) * ln(2)
+ */
+ private static final double DENOMINATOR = -LN_2 * LN_2;
+
+ /**
+ * Do not instantiate.
+ */
+ private Factory() {
+
+ }
+
+ /**
+ * Constructs a filter configuration with a desired false-positive probability ({@code p}) and the
+ * specified number of bits ({@code m}) and hash functions ({@code k}).
+ *
+ *
The number of items ({@code n}) to be stored in the filter is computed.
+ *
n = ceil(m / (-k / ln(1 - exp(ln(p) / k))))
+ *
+ * The actual probability will be approximately equal to the
+ * desired probability but will be dependent upon the calculated Bloom filter capacity
+ * (number of items). An exception is raised if this is greater than or equal to 1 (i.e. the
+ * shape is invalid for use as a Bloom filter).
+ *
+ * @param probability The desired false-positive probability in the range {@code (0, 1)}
+ * @param numberOfBits The number of bits in the filter
+ * @param numberOfHashFunctions The number of hash functions in the filter
+ * @return a valid Shape.
+ * @throws IllegalArgumentException if the desired probability is not in the range {@code (0, 1)},
+ * {@code numberOfBits < 1}, {@code numberOfHashFunctions < 1}, or the actual
+ * probability is {@code >= 1.0}
+ */
+ public static Shape fromPMK(final double probability, final int numberOfBits, final int numberOfHashFunctions) {
+ checkProbability(probability);
+ checkNumberOfBits(numberOfBits);
+ checkNumberOfHashFunctions(numberOfHashFunctions);
+
+ // Number of items (n):
+ // n = ceil(m / (-k / ln(1 - exp(ln(p) / k))))
+ final double n = Math.ceil(numberOfBits
+ / (-numberOfHashFunctions / Math.log(1 - Math.exp(Math.log(probability) / numberOfHashFunctions))));
+
+ // log of probability is always < 0
+ // number of hash functions is >= 1
+ // e^x where x < 0 = [0,1)
+ // log 1-e^x = [log1, log0) = <0 with an effective lower limit of -53
+ // numberOfBits/ (-numberOfHashFunctions / [-53,0) ) >0
+ // ceil( >0 ) >= 1
+ // so we can not produce a negative value thus we don't check for it.
+ //
+ // similarly we can not produce a number greater than numberOfBits so we
+ // do not have to check for Integer.MAX_VALUE either.
+
+ Shape shape = new Shape(numberOfHashFunctions, numberOfBits);
+ // check that probability is within range
+ checkCalculatedProbability(shape.getProbability((int) n));
+ return shape;
+ }
+
+ /**
+ * Constructs a filter configuration with the specified number of items ({@code n}) and
+ * desired false-positive probability ({@code p}).
+ *
+ *
The number of bits ({@code m}) for the filter is computed.
+ *
m = ceil(n * ln(p) / ln(1 / 2^ln(2)))
+ *
+ * The optimal number of hash functions ({@code k}) is computed.
+ *
k = round((m / n) * ln(2))
+ *
+ * The actual probability will be approximately equal to the
+ * desired probability but will be dependent upon the calculated number of bits and hash
+ * functions. An exception is raised if this is greater than or equal to 1 (i.e. the
+ * shape is invalid for use as a Bloom filter).
+ *
+ * @param numberOfItems Number of items to be placed in the filter
+ * @param probability The desired false-positive probability in the range {@code (0, 1)}
+ * @return a valid Shape
+ * @throws IllegalArgumentException if {@code numberOfItems < 1}, if the desired probability
+ * is not in the range {@code (0, 1)} or if the actual probability is {@code >= 1.0}.
+ */
+ public static Shape fromNP(final int numberOfItems, final double probability) {
+ checkNumberOfItems(numberOfItems);
+ checkProbability(probability);
+
+ // Number of bits (m)
+ final double m = Math.ceil(numberOfItems * Math.log(probability) / DENOMINATOR);
+ if (m > Integer.MAX_VALUE) {
+ throw new IllegalArgumentException(
+ "Resulting filter has more than " + Integer.MAX_VALUE + " bits: " + m);
+ }
+ int numberOfBits = (int) m;
+
+ int numberOfHashFunctions = calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
+ Shape shape = new Shape(numberOfHashFunctions, numberOfBits);
+ // check that probability is within range
+ checkCalculatedProbability(shape.getProbability(numberOfItems));
+ return shape;
+ }
+
+ /**
+ * Constructs a filter configuration with the specified number of items ({@code n}) and
+ * bits ({@code m}).
+ *
+ *
The optimal number of hash functions ({@code k}) is computed.
+ *
k = round((m / n) * ln(2))
+ *
+ * The false-positive probability is computed using the number of items, bits and hash
+ * functions. An exception is raised if this is greater than or equal to 1 (i.e. the
+ * shape is invalid for use as a Bloom filter).
+ *
+ * @param numberOfItems Number of items to be placed in the filter
+ * @param numberOfBits The number of bits in the filter
+ * @return a valid Shape.
+ * @throws IllegalArgumentException if {@code numberOfItems < 1}, {@code numberOfBits < 1},
+ * the calculated number of hash function is {@code < 1}, or if the actual probability is {@code >= 1.0}
+ */
+ public static Shape fromNM(final int numberOfItems, final int numberOfBits) {
+ checkNumberOfItems(numberOfItems);
+ checkNumberOfBits(numberOfBits);
+ int numberOfHashFunctions = calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
+ Shape shape = new Shape(numberOfHashFunctions, numberOfBits);
+ // check that probability is within range
+ checkCalculatedProbability(shape.getProbability(numberOfItems));
+ return shape;
+ }
+
+ /**
+ * Constructs a filter configuration with the specified number of items, bits
+ * and hash functions.
+ *
+ *
The false-positive probability is computed using the number of items, bits and hash
+ * functions. An exception is raised if this is greater than or equal to 1 (i.e. the
+ * shape is invalid for use as a Bloom filter).
+ *
+ * @param numberOfItems Number of items to be placed in the filter
+ * @param numberOfBits The number of bits in the filter.
+ * @param numberOfHashFunctions The number of hash functions in the filter
+ * @return a valid Shape.
+ * @throws IllegalArgumentException if {@code numberOfItems < 1}, {@code numberOfBits < 1},
+ * {@code numberOfHashFunctions < 1}, or if the actual probability is {@code >= 1.0}.
+ */
+ public static Shape fromNMK(final int numberOfItems, final int numberOfBits, final int numberOfHashFunctions) {
+ checkNumberOfItems(numberOfItems);
+ checkNumberOfBits(numberOfBits);
+ checkNumberOfHashFunctions(numberOfHashFunctions);
+ // check that probability is within range
+ Shape shape = new Shape(numberOfHashFunctions, numberOfBits);
+ // check that probability is within range
+ checkCalculatedProbability(shape.getProbability(numberOfItems));
+ return shape;
+ }
+
+ /**
+ * Check number of items is strictly positive.
+ *
+ * @param numberOfItems the number of items
+ * @return the number of items
+ * @throws IllegalArgumentException if the number of items is {@code < 1}.
+ */
+ private static int checkNumberOfItems(final int numberOfItems) {
+ if (numberOfItems < 1) {
+ throw new IllegalArgumentException("Number of items must be greater than 0: " + numberOfItems);
+ }
+ return numberOfItems;
+ }
+
+ /**
+ * Check number of bits is strictly positive.
+ *
+ * @param numberOfBits the number of bits
+ * @return the number of bits
+ * @throws IllegalArgumentException if the number of bits is {@code < 1}.
+ */
+ private static int checkNumberOfBits(final int numberOfBits) {
+ if (numberOfBits < 1) {
+ throw new IllegalArgumentException("Number of bits must be greater than 0: " + numberOfBits);
+ }
+ return numberOfBits;
+ }
+
+ /**
+ * Check number of hash functions is strictly positive
+ *
+ * @param numberOfHashFunctions the number of hash functions
+ * @return the number of hash functions
+ * @throws IllegalArgumentException if the number of hash functions is {@code < 1}.
+ */
+ private static int checkNumberOfHashFunctions(final int numberOfHashFunctions) {
+ if (numberOfHashFunctions < 1) {
+ throw new IllegalArgumentException(
+ "Number of hash functions must be greater than 0: " + numberOfHashFunctions);
+ }
+ return numberOfHashFunctions;
+ }
+
+ /**
+ * Check the probability is in the range 0.0, exclusive, to 1.0, exclusive.
+ *
+ * @param probability the probability
+ * @throws IllegalArgumentException if the probability is not in the range {@code (0, 1)}
+ */
+ private static void checkProbability(final double probability) {
+ // Using the negation of within the desired range will catch NaN
+ if (!(probability > 0.0 && probability < 1.0)) {
+ throw new IllegalArgumentException(
+ "Probability must be greater than 0 and less than 1: " + probability);
+ }
+ }
+
+ /**
+ * Check the calculated probability is {@code < 1.0}.
+ *
+ *
This function is used to verify that the dynamically calculated probability for the
+ * Shape is in the valid range 0 to 1 exclusive. This need only be performed once upon
+ * construction.
+ *
+ * @param probability the probability
+ * @throws IllegalArgumentException if the probability is {@code >= 1.0}.
+ */
+ private static void checkCalculatedProbability(final double probability) {
+ // We do not need to check for p <= 0.0 since we only allow positive values for
+ // parameters and the closest we can come to exp(-kn/m) == 1 is
+ // exp(-1/Integer.MAX_INT) approx 0.9999999995343387 so Math.pow( x, y ) will
+ // always be 00
+ if (probability >= 1.0) {
+ throw new IllegalArgumentException(
+ String.format("Calculated probability is greater than or equal to 1: " + probability));
+ }
+ }
+
+ /**
+ * Calculates the number of hash functions given numberOfItems and numberofBits.
+ * This is a method so that the calculation is consistent across all constructors.
+ *
+ * @param numberOfItems the number of items in the filter.
+ * @param numberOfBits the number of bits in the filter.
+ * @return the optimal number of hash functions.
+ * @throws IllegalArgumentException if the calculated number of hash function is {@code < 1}
+ */
+ private static int calculateNumberOfHashFunctions(final int numberOfItems, final int numberOfBits) {
+ // k = round((m / n) * ln(2)) We change order so that we use real math rather
+ // than integer math.
+ final long k = Math.round(LN_2 * numberOfBits / numberOfItems);
+ if (k < 1) {
+ throw new IllegalArgumentException(
+ String.format("Filter too small: Calculated number of hash functions (%s) was less than 1", k));
+ }
+ // Normally we would check that numberofHashFunctions <= Integer.MAX_VALUE but
+ // since numberOfBits is at most Integer.MAX_VALUE the numerator of
+ // numberofHashFunctions is ln(2) * Integer.MAX_VALUE = 646456992.9449 the
+ // value of k can not be above Integer.MAX_VALUE.
+ return (int) k;
+ }
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java
new file mode 100644
index 0000000000..0cb733e0bf
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.Objects;
+import java.util.function.IntConsumer;
+import java.util.function.LongConsumer;
+
+import org.apache.commons.collections4.bloomfilter.exceptions.NoMatchException;
+import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
+
+/**
+ * A bloom filter using an array of BitMaps to track enabled bits. This is a standard
+ * implementation and should work well for most Bloom filters.
+ * @since 4.5
+ */
+public class SimpleBloomFilter implements BloomFilter {
+
+ /**
+ * The array of BitMap longs that defines this Bloom filter.
+ */
+ private long[] bitMap;
+
+ /**
+ * The Shape of this Bloom filter
+ */
+ private final Shape shape;
+
+ /**
+ * The cardinality of this Bloom filter.
+ */
+ private int cardinality;
+
+ /**
+ * Constructs an empty SimpleBloomFilter.
+ *
+ * @param shape The shape for the filter.
+ */
+ public SimpleBloomFilter(Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+ this.shape = shape;
+ this.bitMap = new long[0];
+ this.cardinality = 0;
+ }
+
+ /**
+ * Constructor.
+ * @param shape The shape for the filter.
+ * @param hasher the Hasher to initialize the filter with.
+ */
+ public SimpleBloomFilter(final Shape shape, Hasher hasher) {
+ Objects.requireNonNull(shape, "shape");
+ Objects.requireNonNull(hasher, "hasher");
+ this.shape = shape;
+ this.bitMap = new long[0];
+ mergeInPlace(hasher);
+ }
+
+ /**
+ * Constructor.
+ * @param shape The shape for the filter.
+ * @param producer the BitMap Producer to initialize the filter with.
+ * @throws IllegalArgumentException if the producer returns too many bit maps.
+ */
+ public SimpleBloomFilter(final Shape shape, BitMapProducer producer) {
+ Objects.requireNonNull(shape, "shape");
+ Objects.requireNonNull(producer, "producer");
+ this.shape = shape;
+
+ BitMapProducer.ArrayBuilder builder = new BitMapProducer.ArrayBuilder(shape);
+ try {
+ producer.forEachBitMap(builder);
+ this.bitMap = builder.getArray();
+ } catch (IndexOutOfBoundsException e) {
+ throw new IllegalArgumentException( String.format("BitMapProducer should only send %s maps",
+ BitMap.numberOfBitMaps( shape.getNumberOfBits())), e);
+ }
+ this.cardinality = -1;
+ }
+
+ @Override
+ public boolean mergeInPlace(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ Shape shape = getShape();
+
+ hasher.indices(shape).forEachIndex(idx -> {
+ int lidx = BitMap.getLongIndex(idx);
+ if (bitMap.length <= lidx) {
+ long[] newMap = new long[lidx + 1];
+ System.arraycopy(bitMap, 0, newMap, 0, bitMap.length);
+ bitMap = newMap;
+ }
+ BitMap.set(bitMap, idx);
+ });
+ this.cardinality = -1;
+ return true;
+ }
+
+ @Override
+ public boolean mergeInPlace(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ BitMapProducer.ArrayBuilder builder = new BitMapProducer.ArrayBuilder(shape, this.bitMap);
+ other.forEachBitMap(builder);
+ this.bitMap = builder.getArray();
+ this.cardinality = -1;
+ return true;
+ }
+
+ @Override
+ public Shape getShape() {
+ return shape;
+ }
+
+ @Override
+ public boolean isSparse() {
+ return false;
+ }
+
+ @Override
+ public int cardinality() {
+ if (this.cardinality == -1) {
+ synchronized (this) {
+ if (this.cardinality == -1) {
+ this.cardinality = 0;
+ forEachBitMap(w -> this.cardinality += Long.bitCount(w));
+ }
+ }
+ }
+ return this.cardinality;
+ }
+
+ @Override
+ public void forEachIndex(IntConsumer consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ IndexProducer.fromBitMapProducer(this).forEachIndex(consumer);
+ }
+
+ @Override
+ public void forEachBitMap(LongConsumer consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ for (long l : bitMap) {
+ consumer.accept(l);
+ }
+ }
+
+ @Override
+ public boolean contains(IndexProducer indexProducer) {
+ try {
+ indexProducer.forEachIndex(idx -> {
+ if (!BitMap.contains(bitMap, idx)) {
+ throw new NoMatchException();
+ }
+ });
+ return true;
+ } catch (NoMatchException e) {
+ return false;
+ }
+ }
+
+ @Override
+ public boolean contains(BitMapProducer bitMapProducer) {
+ LongConsumer consumer = new LongConsumer() {
+ int i = 0;
+
+ @Override
+ public void accept(long w) {
+ if ((bitMap[i++] & w) != w) {
+ throw new NoMatchException();
+ }
+ }
+ };
+ try {
+ bitMapProducer.forEachBitMap(consumer);
+ return true;
+ } catch (NoMatchException e) {
+ return false;
+ }
+
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java
new file mode 100644
index 0000000000..92ea2a2be2
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.List;
+import java.util.Objects;
+import java.util.TreeSet;
+import java.util.function.IntConsumer;
+import java.util.function.LongConsumer;
+
+import org.apache.commons.collections4.bloomfilter.exceptions.NoMatchException;
+import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
+
+/**
+ * A bloom filter using a TreeSet of integers to track enabled bits. This is a standard
+ * implementation and should work well for most low cardinality Bloom filters.
+ * @since 4.5
+ */
+public class SparseBloomFilter implements BloomFilter {
+
+ /**
+ * The bitSet that defines this BloomFilter.
+ */
+ private final TreeSet indices;
+
+ /**
+ * The shape of this BloomFilter
+ */
+ private final Shape shape;
+
+ /**
+ * Constructs an empty BitSetBloomFilter.
+ *
+ * @param shape The shape of the filter.
+ */
+ public SparseBloomFilter(Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+ this.shape = shape;
+ this.indices = new TreeSet();
+ }
+
+ /**
+ * Constructs a populated Bloom filter.
+ * @param shape the shape for the bloom filter.
+ * @param hasher the hasher to provide the initial data.
+ */
+ public SparseBloomFilter(final Shape shape, Hasher hasher) {
+ this(shape);
+ Objects.requireNonNull(hasher, "hasher");
+ hasher.indices(shape).forEachIndex(this.indices::add);
+ }
+
+ /**
+ * Constructs a populated Bloom filter.
+ * @param shape the shape of the filter.
+ * @param indices a list of indices to to enable.
+ * @throws IllegalArgumentException if indices contains a value greater than the number
+ * of bits in the shape.
+ */
+ public SparseBloomFilter(Shape shape, List indices) {
+ this(shape);
+ Objects.requireNonNull(indices, "indices");
+ this.indices.addAll(indices);
+ if (! this.indices.isEmpty()) {
+ if (this.indices.last() >= shape.getNumberOfBits()) {
+ throw new IllegalArgumentException(String.format("Value in list {} is greater than maximum value ({})",
+ this.indices.last(), shape.getNumberOfBits()));
+ }
+ if (this.indices.first() < 0 ) {
+ throw new IllegalArgumentException(String.format("Value in list {} is less than 0",
+ this.indices.first()));
+ }
+ }
+ }
+
+ @Override
+ public boolean mergeInPlace(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ hasher.indices(shape).forEachIndex(this.indices::add);
+ return true;
+ }
+
+ @Override
+ public boolean mergeInPlace(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ other.forEachIndex(indices::add);
+ return true;
+ }
+
+ @Override
+ public Shape getShape() {
+ return shape;
+ }
+
+ @Override
+ public boolean isSparse() {
+ return true;
+ }
+
+ @Override
+ public int cardinality() {
+ return indices.size();
+ }
+
+ @Override
+ public void forEachIndex(IntConsumer consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ for (int value : indices) {
+ consumer.accept(value);
+ }
+ }
+
+ @Override
+ public void forEachBitMap(LongConsumer consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ if (cardinality() == 0) {
+ return;
+ }
+ // because our indices are always in order we can
+ // shorten the time necessary to create the longs for the
+ // consumer
+ long bitMap = 0;
+ int idx = 0;
+ for (int i : indices) {
+ while (BitMap.getLongIndex(i) != idx) {
+ consumer.accept(bitMap);
+ bitMap = 0;
+ idx++;
+ }
+ bitMap |= BitMap.getLongBit(i);
+ }
+ if (bitMap != 0) {
+ consumer.accept(bitMap);
+ }
+ }
+
+ @Override
+ public boolean contains(IndexProducer indexProducer) {
+ try {
+ indexProducer.forEachIndex(idx -> {
+ if (!indices.contains(idx)) {
+ throw new NoMatchException();
+ }
+ });
+ return true;
+ } catch (NoMatchException e) {
+ return false;
+ }
+ }
+
+ @Override
+ public boolean contains(BitMapProducer bitMapProducer) {
+ return contains(IndexProducer.fromBitMapProducer(bitMapProducer));
+ }
+
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilterTest.java b/src/main/java/org/apache/commons/collections4/bloomfilter/exceptions/NoMatchException.java
similarity index 55%
rename from src/test/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilterTest.java
rename to src/main/java/org/apache/commons/collections4/bloomfilter/exceptions/NoMatchException.java
index 9a2078d80c..b0efff37f4 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitSetBloomFilterTest.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/exceptions/NoMatchException.java
@@ -14,24 +14,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.commons.collections4.bloomfilter;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
+package org.apache.commons.collections4.bloomfilter.exceptions;
/**
- * Tests for the {@link BitSetBloomFilter}.
+ * An exception to short circuit Bloom filter match functionality using producers.
+ *
+ * @since 4.5
*/
-public class BitSetBloomFilterTest extends AbstractBloomFilterTest {
- @Override
- protected BitSetBloomFilter createEmptyFilter(final Shape shape) {
- return new BitSetBloomFilter(shape);
- }
+public class NoMatchException extends RuntimeException {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
- @Override
- protected BitSetBloomFilter createFilter(final Hasher hasher, final Shape shape) {
- final BitSetBloomFilter testFilter = new BitSetBloomFilter(shape);
- testFilter.merge( hasher );
- return testFilter;
+ /**
+ * Constructor.
+ */
+ public NoMatchException() {
}
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/package-info.java b/src/main/java/org/apache/commons/collections4/bloomfilter/exceptions/package-info.java
similarity index 77%
rename from src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/package-info.java
rename to src/main/java/org/apache/commons/collections4/bloomfilter/exceptions/package-info.java
index 95951ad7fe..4c00ea13e4 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/package-info.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/exceptions/package-info.java
@@ -14,11 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
/**
- * Provides implementations of the Bloom filter
- * {@link org.apache.commons.collections4.bloomfilter.hasher.HashFunction HashFunction} interface.
- *
- * @since 4.5
+ * Exceptions specific to Bloom filter processing.
*/
-package org.apache.commons.collections4.bloomfilter.hasher.function;
+package org.apache.commons.collections4.bloomfilter.exceptions;
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java
deleted file mode 100644
index ab6b773d6c..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.PrimitiveIterator;
-
-/**
- * The class that performs hashing on demand.
- * @since 4.5
- */
-public class DynamicHasher implements Hasher {
-
- /**
- * The builder for DynamicHashers.
- * @since 4.5
- */
- public static class Builder implements Hasher.Builder {
-
- /**
- * The list of items (each as a byte[]) that are to be hashed.
- */
- private final List buffers;
-
- /**
- * The function that the resulting DynamicHasher will use.
- */
- private final HashFunction function;
-
- /**
- * Constructs a DynamicHasher builder.
- *
- * @param function the function implementation.
- */
- public Builder(final HashFunction function) {
- this.function = function;
- this.buffers = new ArrayList<>();
- }
-
- @Override
- public DynamicHasher build() throws IllegalArgumentException {
- // Assumes the hasher will create a copy of the buffers
- final DynamicHasher hasher = new DynamicHasher(function, buffers);
- // Reset for further use
- buffers.clear();
- return hasher;
- }
-
- @Override
- public final DynamicHasher.Builder with(final byte[] property) {
- buffers.add(property);
- return this;
- }
-
- @Override
- public DynamicHasher.Builder with(final CharSequence item, final Charset charset) {
- Hasher.Builder.super.with(item, charset);
- return this;
- }
-
- @Override
- public DynamicHasher.Builder withUnencoded(final CharSequence item) {
- Hasher.Builder.super.withUnencoded(item);
- return this;
- }
- }
-
- /**
- * The iterator of integers.
- *
- * This assumes that the list of buffers is not empty.
- */
- private class Iterator implements PrimitiveIterator.OfInt {
- /** The number of hash functions per item. */
- private final int k;
- /** The number of bits in the shape. */
- private final int m;
- /** The current item. */
- private byte[] item;
- /** The index of the next item. */
- private int nextItem;
- /** The count of hash functions for the current item. */
- private int functionCount;
-
- /**
- * Constructs iterator with the specified shape.
- *
- * @param shape
- */
- private Iterator(final Shape shape) {
- // Assumes that shape returns non-zero positive values for hash functions and bits
- k = shape.getNumberOfHashFunctions();
- m = shape.getNumberOfBits();
- // Assume non-empty
- item = buffers.get(0);
- nextItem = 1;
- }
-
- @Override
- public boolean hasNext() {
- if (functionCount != k) {
- return true;
- }
- // Reached the number of hash functions for the current item.
- // Try and advance to the next item.
- if (nextItem != buffers.size()) {
- item = buffers.get(nextItem++);
- functionCount = 0;
- return true;
- }
- // Finished.
- // functionCount == shape.getNumberOfHashFunctions()
- // nextItem == buffers.size()
- return false;
- }
-
- @SuppressWarnings("cast") // Cast to long to workaround a bug in animal-sniffer.
- @Override
- public int nextInt() {
- if (hasNext()) {
- return (int) Math.floorMod(function.apply(item, functionCount++),
- // Cast to long to workaround a bug in animal-sniffer.
- (long) m);
- }
- throw new NoSuchElementException();
- }
- }
-
- /**
- * An iterator of integers to use when there are no values.
- */
- private static class NoValuesIterator implements PrimitiveIterator.OfInt {
- /** The singleton instance. */
- private static final NoValuesIterator INSTANCE = new NoValuesIterator();
-
- /**
- * Empty constructor.
- */
- private NoValuesIterator() {}
-
- @Override
- public boolean hasNext() {
- return false;
- }
-
- @Override
- public int nextInt() {
- throw new NoSuchElementException();
- }
- }
-
- /**
- * The list of byte arrays that are to be hashed.
- * Package private for access by the iterator.
- */
- final List buffers;
-
- /**
- * The function to hash the buffers.
- * Package private for access by the iterator.
- */
- final HashFunction function;
-
- /**
- * Constructs a DynamicHasher.
- *
- * @param function the function to use.
- * @param buffers the byte buffers that will be hashed.
- */
- public DynamicHasher(final HashFunction function, final List buffers) {
- this.buffers = new ArrayList<>(buffers);
- this.function = function;
- }
-
- @Override
- public PrimitiveIterator.OfInt iterator(final Shape shape) {
- HashFunctionValidator.checkAreEqual(getHashFunctionIdentity(),
- shape.getHashFunctionIdentity());
- // Use optimised iterator for no values
- return buffers.isEmpty() ? NoValuesIterator.INSTANCE : new Iterator(shape);
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return function;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentity.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentity.java
deleted file mode 100644
index 0ff2edb8d4..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentity.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import java.nio.charset.StandardCharsets;
-import java.util.Locale;
-
-/**
- * Defines the hash function used by a {@link Hasher}.
- *
- * @since 4.5
- */
-public interface HashFunctionIdentity {
-
- /**
- * Identifies the process type of this function.
- *
- *
- * - Iterative processes
- * - Call the underlying hash algorithm for each (buffer, seed) pair passed to
- * {@link HashFunction#apply(byte[], int)}.
- * - Cyclic processes
- * - Call the underlying hash algorithm using a (buffer, seed) pair passed to
- * {@link HashFunction#apply(byte[], int)} to initialize the state. Subsequent
- * calls can generate hash values without calling the underlying algorithm.
- *
- */
- enum ProcessType {
- /**
- * Call the underlying hash algorithm for a (buffer, seed) pair passed to
- * {@link HashFunction#apply(byte[], int)} when the state is uninitialized or
- * the seed is zero. This initializes the state. Subsequent calls with a non-zero
- * seed use the state to generate a new value.
- */
- CYCLIC,
- /**
- * Call the underlying hash algorithm for each (buffer, seed) pair passed to
- * {@link HashFunction#apply(byte[], int)}.
- */
- ITERATIVE
- }
-
- /**
- * Identifies the signedness of the calculations for this function.
- *
- * When the hash function executes it typically returns an array of bytes.
- * That array is converted into one or more numerical values which will be provided
- * as a {@code long} primitive type.
- * The signedness identifies if those {@code long} values are signed or unsigned.
- * For example a hash function that outputs only 32-bits can be unsigned if converted
- * using {@link Integer#toUnsignedLong(int)}. A hash function that outputs more than
- * 64-bits is typically signed.
- *
- */
- enum Signedness {
- /**
- * The result of {@link HashFunction#apply(byte[], int)} is signed,
- * thus the sign bit may be set.
- *
- *
- * The result can be used with {@code Math.floorMod(x, y)} to generate a positive
- * value if y is positive.
- *
- *
- * @see Math#floorMod(int, int)
- */
- SIGNED,
- /**
- * The result of {@link HashFunction#apply(byte[], int)} is unsigned,
- * thus the sign bit is never set.
- *
- *
- * The result can be used with {@code x % y} to generate a positive
- * value if y is positive.
- *
- */
- UNSIGNED
- }
-
- /**
- * Gets a common formatted string for general display.
- *
- * @param identity the identity to format.
- * @return the String representing the identity.
- */
- static String asCommonString(final HashFunctionIdentity identity) {
- return String.format("%s-%s-%s", identity.getName(), identity.getSignedness(), identity.getProcessType());
- }
-
- /**
- * Gets a {@code byte[]} buffer for a HashFunctionIdentity to create a signature. The
- * {@code byte[]} is composed using properties of the hash function as:
- *
- *
- * String.format("%s-%s-%s",
- * getName().toUpperCase(Locale.ROOT), getSignedness(), getProcess())
- * .getBytes("UTF-8");
- *
- *
- * @param identity The HashFunctionIdentity to create the buffer for.
- * @return the signature buffer for the identity
- * @see #getSignature()
- */
- static byte[] prepareSignatureBuffer(final HashFunctionIdentity identity) {
- return String.format("%s-%s-%s",
- identity.getName().toUpperCase(Locale.ROOT), identity.getSignedness(),
- identity.getProcessType()).getBytes(StandardCharsets.UTF_8);
- }
-
- /**
- * Gets the name of this hash function.
- *
- * Hash function should be the common name
- * for the hash. This may include indications as to hash length
- *
- *
- * Names are not case specific. Thus, "MD5" and "md5" should be considered as the same.
- *
- * @return the Hash name
- */
- String getName();
-
- /**
- * Gets the process type of this function.
- *
- * @return process type of this function.
- */
- ProcessType getProcessType();
-
- /**
- * Gets the name of the provider of this hash function implementation.
- *
- * Provider names are not case specific. Thus, "Apache Commons Collection" and
- * "apache commons collection" should be considered as the same.
- *
- * @return the name of the provider of this hash implementation.
- */
- String getProvider();
-
- /**
- * Gets the signature of this function. The signature is the output of the hash function
- * when applied to a set of bytes composed using properties of the hash function.
- *
- *
- * Implementations should define the method used to generate the signature.
- *
- *
- * @return the signature of this function.
- * @see #prepareSignatureBuffer(HashFunctionIdentity)
- */
- long getSignature();
-
- /**
- * Gets the signedness of this function.
- *
- * @return signedness of this function.
- */
- Signedness getSignedness();
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImpl.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImpl.java
deleted file mode 100644
index c75973a376..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImpl.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-/**
- * An instance of HashFunctionIdentity that is suitable for deserializing
- * HashFunctionIdentity data from a stream or any other situation where the
- * hash function is not available but the identify of the function is required.
- *
- * @since 4.5
- */
-public final class HashFunctionIdentityImpl implements HashFunctionIdentity {
- private final String name;
- private final String provider;
- private final Signedness signedness;
- private final ProcessType process;
- private final long signature;
-
- /**
- * Creates a copy of the HashFunctionIdentity.
- * @param identity the identity to copy.
- */
- public HashFunctionIdentityImpl(final HashFunctionIdentity identity) {
- this.name = identity.getName();
- this.provider = identity.getProvider();
- this.signedness = identity.getSignedness();
- this.process = identity.getProcessType();
- this.signature = identity.getSignature();
- }
-
- /**
- * Creates a HashFunctionIdentity from component values.
- * @param provider the name of the provider.
- * @param name the name of the hash function.
- * @param signedness the signedness of the hash function.
- * @param process the processes of the hash function.
- * @param signature the signature for the hash function.
- */
- public HashFunctionIdentityImpl(final String provider, final String name, final Signedness signedness, final ProcessType process,
- final long signature) {
- this.name = name;
- this.provider = provider;
- this.signedness = signedness;
- this.process = process;
- this.signature = signature;
- }
-
- @Override
- public String getName() {
- return name;
- }
-
- @Override
- public ProcessType getProcessType() {
- return process;
- }
-
- @Override
- public String getProvider() {
- return provider;
- }
-
- @Override
- public long getSignature() {
- return signature;
- }
-
- @Override
- public Signedness getSignedness() {
- return signedness;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionValidator.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionValidator.java
deleted file mode 100644
index 3ec0753e4a..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionValidator.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import java.util.Locale;
-import java.util.Objects;
-
-/**
- * Contains validation for hash functions.
- */
-public final class HashFunctionValidator {
- /** Do not instantiate. */
- private HashFunctionValidator() {}
-
- /**
- * Generates a hash code for the identity of the hash function. The hash code is
- * generated using the same properties as those tested in
- * {@link #areEqual(HashFunctionIdentity, HashFunctionIdentity)}, that is the
- * signedness, process type and name. The name is not case specific and is converted
- * to lower-case using the {@link Locale#ROOT root locale}.
- *
- * The generated value is suitable for use in generation of a hash code that satisfies
- * the contract of {@link Object#hashCode()} if the {@link Object#equals(Object)} method
- * is implemented using {@link #areEqual(HashFunctionIdentity, HashFunctionIdentity)}. That
- * is two objects considered equal will have the same hash code.
- *
- *
If the hash function identity is a field within a larger object the generated hash code
- * should be incorporated into the entire hash, for example using
- * {@link Objects#hash(Object...)}.
- *
- * @param a hash function.
- * @return hash code
- * @see String#toLowerCase(Locale)
- * @see Locale#ROOT
- */
- static int hash(final HashFunctionIdentity a) {
- return Objects.hash(a.getSignedness(),
- a.getProcessType(),
- a.getName().toLowerCase(Locale.ROOT));
- }
-
- /**
- * Compares the identity of the two hash functions. The functions are considered
- * equal if the signedness, process type and name are equal. The name is not
- * case specific.
- *
- *
A pair of functions that are equal would be expected to produce the same
- * hash output from the same input.
- *
- * @param a First hash function.
- * @param b Second hash function.
- * @return true, if successful
- * @see String#equalsIgnoreCase(String)
- */
- public static boolean areEqual(final HashFunctionIdentity a, final HashFunctionIdentity b) {
- return (a.getSignedness() == b.getSignedness() &&
- a.getProcessType() == b.getProcessType() &&
- a.getName().equalsIgnoreCase(b.getName()));
- }
-
- /**
- * Compares the identity of the two hash functions and throws an exception if they
- * are not equal.
- *
- * @param a First hash function.
- * @param b Second hash function.
- * @see #areEqual(HashFunctionIdentity, HashFunctionIdentity)
- * @throws IllegalArgumentException if the hash functions are not equal
- */
- public static void checkAreEqual(final HashFunctionIdentity a, final HashFunctionIdentity b) {
- if (!areEqual(a, b)) {
- throw new IllegalArgumentException(String.format("Hash functions are not equal: (%s) != (%s)",
- HashFunctionIdentity.asCommonString(a), HashFunctionIdentity.asCommonString(b)));
- }
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java
index 3700567f1a..58b78d61ae 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java
@@ -16,117 +16,127 @@
*/
package org.apache.commons.collections4.bloomfilter.hasher;
-import java.nio.charset.Charset;
-import java.util.PrimitiveIterator;
+import org.apache.commons.collections4.bloomfilter.Shape;
+
+import java.util.function.IntConsumer;
+
+import org.apache.commons.collections4.bloomfilter.BitMap;
+import org.apache.commons.collections4.bloomfilter.IndexProducer;
/**
- * A Hasher represents items of arbitrary byte size as a byte representation of
- * fixed size (a hash). The hash representations can be used to create indexes
- * for a Bloom filter.
- *
- *
The hash for each item is created using a hash function; use of different
- * seeds allows generation of different hashes for the same item. The hashes can
- * be dynamically converted into the bit index representation used by a Bloom
- * filter. The shape of the Bloom filter defines the number of indexes per item
- * and the range of the indexes. The hasher can generate the correct number of
- * indexes in the range required by the Bloom filter for each item it
- * represents.
- *
- *
Note that the process of generating hashes and mapping them to a Bloom
- * filter shape may create duplicate indexes. The hasher may generate fewer than
- * the required number of hash functions per item if duplicates have been
- * removed. Implementations of {@code iterator()} may return duplicate values
- * and may return values in a random order. See implementation javadoc notes as
- * to the guarantees provided by the specific implementation.
- *
- *
Hashers have an identity based on the hashing algorithm used.
+ * A Hasher creates IndexProducer based on the hash implementation and the
+ * provided Shape.
*
* @since 4.5
*/
public interface Hasher {
/**
- * A builder to build a hasher.
+ * Creates an IndexProducer for this hasher based on the Shape.
*
- *
A hasher represents one or more items of arbitrary byte size. The builder
- * contains methods to collect byte representations of items. Each method to add
- * to the builder will add an entire item to the final hasher created by the
- * {@link #build()} method.
+ *
The @{code IndexProducer} will create indices within the range defined by the number of bits in
+ * the shape. The total number of indices will respect the number of hash functions per item
+ * defined by the shape. However the count of indices may not be a multiple of the number of
+ * hash functions once implementation has removed duplicates.
*
- * @since 4.5
+ * This IndexProducer must be deterministic in that it must return the same indices for the
+ * same Shape.
+ *
+ * No guarantee is made as to order of indices.
+ * Duplicates indices for a single item must be removed.
+ *
+ * @param shape the shape of the desired Bloom filter.
+ * @return the iterator of integers
*/
- interface Builder {
+ IndexProducer indices(Shape shape);
- /**
- * Builds the hasher from all the items.
- *
- * This method will clear the builder for future use.
- *
- * @return the fully constructed hasher
- */
- Hasher build();
+ /**
+ * Gets the number of items that will be hashed by the {@code IndexProducer}.
+ * @return The number of items that will be hashed by the {@code IndexProducer}.
+ */
+ int size();
- /**
- * Adds a byte array item to the hasher.
- *
- * @param item the item to add
- * @return a reference to this object
- */
- Builder with(byte[] item);
+ /**
+ * Returns true if there are no items to be hashed.
+ * @return {@code true} if there are no items to be hashed.
+ */
+ default boolean isEmpty() {
+ return size() == 0;
+ }
+
+ /**
+ * A convenience class for Hasher implementations to filter out duplicate indices.
+ *
+ *
If the index is negative the behavior is not defined.
+ *
+ * This is conceptually a unique filter implemented as a {@code Predicate}.
+ * @since 4.5
+ */
+ class Filter {
+ private long[] bits;
+ private int size;
/**
- * Adds a character sequence item to the hasher using the specified {@code charset}
- * encoding.
+ * Constructor.
*
- * @param item the item to add
- * @param charset the character set
- * @return a reference to this object
+ * @param size The number of numbers to track. Values from 0 to size-1 will be tracked.
*/
- default Builder with(final CharSequence item, final Charset charset) {
- return with(item.toString().getBytes(charset));
+ public Filter(int size) {
+ bits = new long[BitMap.numberOfBitMaps(size)];
+ this.size = size;
}
/**
- * Adds a character sequence item to the hasher. Each 16-bit character is
- * converted to 2 bytes using little-endian order.
+ * Test if the number has not been seen.
+ *
+ * The first time a number is tested the method returns {@code true} and returns
+ * {@code false} for every time after that.
*
- * @param item the item to add
- * @return a reference to this object
+ * If the input is not in the range [0,size) an IndexOutOfBoundsException exception is thrown.
+ *
+ * @param number the number to check.
+ * @return {@code true} if the number has not been seen, {@code false} otherwise.
+ * @see Hasher.Filter#Filter(int)
*/
- default Builder withUnencoded(final CharSequence item) {
- final int length = item.length();
- final byte[] bytes = new byte[length * 2];
- for (int i = 0; i < length; i++) {
- final char ch = item.charAt(i);
- bytes[i * 2] = (byte) ch;
- bytes[i * 2 + 1] = (byte) (ch >>> 8);
+ public boolean test(int number) {
+ BitMap.checkPositive(number);
+ if (number >= size) {
+ throw new IndexOutOfBoundsException(String.format("number to large %d >= %d", number, size));
}
- return with(bytes);
+ boolean retval = !BitMap.contains(bits, number);
+ BitMap.set(bits, number);
+ return retval;
}
}
/**
- * Gets an iterator of integers that are the bits to enable in the Bloom
- * filter based on the shape.
- *
- * The iterator will create indexes within the range defined by the number of bits in
- * the shape. The total number of indexes will respect the number of hash functions per item
- * defined by the shape. However the count of indexes may not be a multiple of the number of
- * hash functions if the implementation has removed duplicates.
+ * Class to wrap an that an IntConsumer only receives an integer value once.
*
- *
No guarantee is made as to order of values.
+ *
If the index is negative the behavior is not defined.
*
- * @param shape the shape of the desired Bloom filter
- * @return the iterator of integers
- * @throws IllegalArgumentException if the hasher cannot generate indexes for
- * the specified @{@code shape}
+ * @since 4.5
*/
- PrimitiveIterator.OfInt iterator(Shape shape);
+ class FilteredIntConsumer implements IntConsumer {
+ private Hasher.Filter filter;
+ private IntConsumer consumer;
+
+ /**
+ * Constructor.
+ * integers ouside the range [0,size) will throw an IndexOutOfBoundsException.
+ * @param size The number of integers to track. Values in the range [0,size) will be tracked.
+ * @param consumer to wrap.
+ */
+ public FilteredIntConsumer(int size, IntConsumer consumer) {
+ this.filter = new Hasher.Filter(size);
+ this.consumer = consumer;
+ }
+
+ @Override
+ public void accept(int value) {
+ if (filter.test(value)) {
+ consumer.accept(value);
+ }
+ }
+ }
- /**
- * Gets the identify of the hash function used by the the hasher.
- *
- * @return the identity of the hash function
- */
- HashFunctionIdentity getHashFunctionIdentity();
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HasherCollection.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HasherCollection.java
new file mode 100644
index 0000000000..bc3ab940bd
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HasherCollection.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter.hasher;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Objects;
+import java.util.function.IntConsumer;
+import org.apache.commons.collections4.bloomfilter.IndexProducer;
+import org.apache.commons.collections4.bloomfilter.Shape;
+
+/**
+ * A collection of Hashers. Useful when the generation of a Bloom filter depends upon
+ * multiple items.
+ *
+ * Hashers for each item are added to the HasherCollection and then
+ * the collection is used wherever a Hasher can be used in the API.
+ *
+ * @since 4.5
+ */
+public class HasherCollection implements Hasher {
+
+ /**
+ * The list of hashers to be used to generate the indices.
+ */
+ private final List hashers;
+
+ /**
+ * Constructs an empty HasherCollection.
+ */
+ public HasherCollection() {
+ this.hashers = new ArrayList<>();
+ }
+
+ /**
+ * Constructs a HasherCollection from a collection of Hasher objects.
+ *
+ * @param hashers A collections of Hashers to build the indices with.
+ */
+ public HasherCollection(final Collection hashers) {
+ Objects.requireNonNull(hashers, "hashers");
+ this.hashers = new ArrayList<>(hashers);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param hashers A list of Hashers to initialize the collection with.
+ */
+ public HasherCollection(Hasher... hashers) {
+ this(Arrays.asList(hashers));
+ }
+
+ /**
+ * Adds a hasher to the collection.
+ * @param hasher The hasher to add.
+ */
+ public void add(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ hashers.add(hasher);
+ }
+
+ /**
+ * Add all the Hashers in a collection to this HasherCollection.
+ * @param hashers The hashers to add.
+ */
+ public void add(Collection hashers) {
+ Objects.requireNonNull(hashers, "hashers");
+ this.hashers.addAll(hashers);
+ }
+
+ @Override
+ public IndexProducer indices(final Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+ return new IndexProducer() {
+ @Override
+ public void forEachIndex(IntConsumer consumer) {
+ for (Hasher hasher : hashers) {
+ hasher.indices(shape).forEachIndex(consumer);
+ }
+ }
+ };
+ }
+
+ /**
+ * Allow child classes access to the hashers.
+ * @return hashers
+ */
+ protected List getHashers() {
+ return hashers;
+ }
+
+ @Override
+ public int size() {
+ int i = 0;
+ for (Hasher h : hashers) {
+ i += h.size();
+ }
+ return i;
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunction.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/NullHasher.java
similarity index 52%
rename from src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunction.java
rename to src/main/java/org/apache/commons/collections4/bloomfilter/hasher/NullHasher.java
index d14fd3d830..0349b22c6d 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunction.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/NullHasher.java
@@ -16,34 +16,43 @@
*/
package org.apache.commons.collections4.bloomfilter.hasher;
+import java.util.Objects;
+import java.util.function.IntConsumer;
+
+import org.apache.commons.collections4.bloomfilter.IndexProducer;
+import org.apache.commons.collections4.bloomfilter.Shape;
+
/**
- * Defines a hash function used by a {@link Hasher} .
+ * A Hasher that returns no values.
+ *
* @since 4.5
*/
-public interface HashFunction extends HashFunctionIdentity {
+public final class NullHasher implements Hasher {
/**
- * Applies the hash function to the buffer.
- *
- * @param buffer the buffer to apply the hash function to.
- * @param seed the seed for the hashing.
- * @return the long value of the hash.
+ * The instance of the Null Hasher.
*/
- long apply(byte[] buffer, int seed);
+ public static final NullHasher INSTANCE = new NullHasher();
+
+
+ private static final IndexProducer PRODUCER = new IndexProducer() {
+ @Override
+ public void forEachIndex(IntConsumer consumer) {
+ // do nothing
+ }
+ };
+
+ private NullHasher() {
+ }
+
+ @Override
+ public IndexProducer indices(final Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+ return PRODUCER;
+ }
- /**
- * Gets the signature of this function.
- *
- * The signature of this function is calculated as:
- *
- * int seed = 0;
- * apply(String.format("%s-%s-%s",
- * getName().toUpperCase(Locale.ROOT), getSignedness(), getProcess())
- * .getBytes("UTF-8"), seed);
- *
- *
- * @see HashFunctionIdentity#prepareSignatureBuffer(HashFunctionIdentity)
- */
@Override
- long getSignature();
+ public int size() {
+ return 0;
+ }
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Shape.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Shape.java
deleted file mode 100644
index a82586fe4e..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Shape.java
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import java.util.Objects;
-
-/**
- * The definition of a Bloom filter shape.
- *
- * This class contains the values for the filter configuration and is used to
- * convert a Hasher into a BloomFilter as well as verify that two Bloom filters are
- * compatible. (i.e. can be compared or merged)
- *
- * Interrelatedness of values
- *
- * - Number of Items ({@code n})
- * - {@code n = ceil(m / (-k / ln(1 - exp(ln(p) / k))))}
- Probability of
- * False Positives ({@code p})
- {@code p = pow(1 - exp(-k / (m / n)), k)}
- Number
- * of Bits ({@code m})
- * - {@code m = ceil((n * ln(p)) / ln(1 / pow(2, ln(2))))}
- Number of
- * Functions ({@code k})
- {@code k = round((m / n) * ln(2))}
- *
- * Comparisons
For purposes of equality checking and hashCode
- * calculations a {@code Shape} is defined by the hashing function identity, the number of
- * bits ({@code m}), and the number of functions ({@code k}).
- *
- * @see Bloom Filter calculator
- * @see Bloom filter
- * [Wikipedia]
- * @since 4.5
- */
-public final class Shape {
-
- /**
- * The natural logarithm of 2. Used in several calculations. Approximately 0.693147180559945.
- */
- private static final double LN_2 = Math.log(2.0);
-
- /**
- * ln(1 / 2^ln(2)). Used in calculating the number of bits. Approximately -0.480453013918201.
- *
- * ln(1 / 2^ln(2)) = ln(1) - ln(2^ln(2)) = -ln(2) * ln(2)
- */
- private static final double DENOMINATOR = -LN_2 * LN_2;
-
- /**
- * Number of items in the filter ({@code n}).
- */
- private final int numberOfItems;
-
- /**
- * Number of bits in the filter ({@code m}).
- */
- private final int numberOfBits;
-
- /**
- * Number of hash functions ({@code k}).
- */
- private final int numberOfHashFunctions;
-
- /**
- * The hash code for this filter.
- */
- private final int hashCode;
-
- /**
- * The identity of the hasher function.
- */
- private final HashFunctionIdentity hashFunctionIdentity;
-
- /**
- * Constructs a filter configuration with a desired false-positive probability ({@code p}) and the
- * specified number of bits ({@code m}) and hash functions ({@code k}).
- *
- *
The number of items ({@code n}) to be stored in the filter is computed.
- *
n = ceil(m / (-k / ln(1 - exp(ln(p) / k))))
- *
- * The actual probability will be approximately equal to the
- * desired probability but will be dependent upon the calculated Bloom filter capacity
- * (number of items). An exception is raised if this is greater than or equal to 1 (i.e. the
- * shape is invalid for use as a Bloom filter).
- *
- * @param hashFunctionIdentity The identity of the hash function this shape uses
- * @param probability The desired false-positive probability in the range {@code (0, 1)}
- * @param numberOfBits The number of bits in the filter
- * @param numberOfHashFunctions The number of hash functions in the filter
- * @throws NullPointerException if the hash function identity is null
- * @throws IllegalArgumentException if the desired probability is not in the range {@code (0, 1)};
- * if {@code numberOfBits < 1}; if {@code numberOfHashFunctions < 1}; or if the actual
- * probability is {@code >= 1.0}
- * @see #getProbability()
- */
- public Shape(final HashFunctionIdentity hashFunctionIdentity, final double probability, final int numberOfBits,
- final int numberOfHashFunctions) {
- this.hashFunctionIdentity = Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
- checkProbability(probability);
- this.numberOfBits = checkNumberOfBits(numberOfBits);
- this.numberOfHashFunctions = checkNumberOfHashFunctions(numberOfHashFunctions);
-
- // Number of items (n):
- // n = ceil(m / (-k / ln(1 - exp(ln(p) / k))))
- final double n = Math.ceil(numberOfBits /
- (-numberOfHashFunctions / Math.log(1 - Math.exp(Math.log(probability) / numberOfHashFunctions))));
-
- // log of probability is always < 0
- // number of hash functions is >= 1
- // e^x where x < 0 = [0,1)
- // log 1-e^x = [log1, log0) = <0 with an effective lower limit of -53
- // numberOfBits/ (-numberOfHashFunctions / [-53,0) ) >0
- // ceil( >0 ) >= 1
- // so we can not produce a negative value thus we don't check for it.
- //
- // similarly we can not produce a number greater than numberOfBits so we
- // do not have to check for Integer.MAX_VALUE either.
- this.numberOfItems = (int) n;
- // check that probability is within range
- checkCalculatedProbability(getProbability());
- this.hashCode = generateHashCode();
- }
-
- /**
- * Constructs a filter configuration with the specified number of items ({@code n}) and
- * desired false-positive probability ({@code p}).
- *
- *
The number of bits ({@code m}) for the filter is computed.
- *
m = ceil(n * ln(p) / ln(1 / 2^ln(2)))
- *
- * The optimal number of hash functions ({@code k}) is computed.
- *
k = round((m / n) * ln(2))
- *
- * The actual probability will be approximately equal to the
- * desired probability but will be dependent upon the calculated number of bits and hash
- * functions. An exception is raised if this is greater than or equal to 1 (i.e. the
- * shape is invalid for use as a Bloom filter).
- *
- * @param hashFunctionIdentity The identity of the hash function this shape uses
- * @param numberOfItems Number of items to be placed in the filter
- * @param probability The desired false-positive probability in the range {@code (0, 1)}
- * @throws NullPointerException if the hash function identity is null
- * @throws IllegalArgumentException if {@code numberOfItems < 1}; if the desired probability
- * is not in the range {@code (0, 1)}; or if the actual probability is {@code >= 1.0}
- * @see #getProbability()
- */
- public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOfItems, final double probability) {
- this.hashFunctionIdentity = Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
- this.numberOfItems = checkNumberOfItems(numberOfItems);
- checkProbability(probability);
-
- // Number of bits (m)
- final double m = Math.ceil(numberOfItems * Math.log(probability) / DENOMINATOR);
- if (m > Integer.MAX_VALUE) {
- throw new IllegalArgumentException("Resulting filter has more than " + Integer.MAX_VALUE + " bits: " + m);
- }
- this.numberOfBits = (int) m;
-
- this.numberOfHashFunctions = calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
- // check that probability is within range
- checkCalculatedProbability(getProbability());
- this.hashCode = generateHashCode();
- }
-
- /**
- * Constructs a filter configuration with the specified number of items ({@code n}) and
- * bits ({@code m}).
- *
- *
The optimal number of hash functions ({@code k}) is computed.
- *
k = round((m / n) * ln(2))
- *
- * The false-positive probability is computed using the number of items, bits and hash
- * functions. An exception is raised if this is greater than or equal to 1 (i.e. the
- * shape is invalid for use as a Bloom filter).
- *
- * @param hashFunctionIdentity The identity of the hash function this shape uses
- * @param numberOfItems Number of items to be placed in the filter
- * @param numberOfBits The number of bits in the filter
- * @throws NullPointerException if the hash function identity is null
- * @throws IllegalArgumentException if {@code numberOfItems < 1}; if {@code numberOfBits < 1};
- * if the calculated number of hash function is {@code < 1};
- * or if the actual probability is {@code >= 1.0}
- * @see #getProbability()
- */
- public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOfItems, final int numberOfBits) {
- this.hashFunctionIdentity = Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
- this.numberOfItems = checkNumberOfItems(numberOfItems);
- this.numberOfBits = checkNumberOfBits(numberOfBits);
- this.numberOfHashFunctions = calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
- // check that probability is within range
- checkCalculatedProbability(getProbability());
- this.hashCode = generateHashCode();
- }
-
- /**
- * Constructs a filter configuration with the specified number of items, bits
- * and hash functions.
- *
- *
The false-positive probability is computed using the number of items, bits and hash
- * functions. An exception is raised if this is greater than or equal to 1 (i.e. the
- * shape is invalid for use as a Bloom filter).
- *
- * @param hashFunctionIdentity The identity of the hash function this shape uses
- * @param numberOfItems Number of items to be placed in the filter
- * @param numberOfBits The number of bits in the filter.
- * @param numberOfHashFunctions The number of hash functions in the filter
- * @throws NullPointerException if the hash function identity is null
- * @throws IllegalArgumentException if {@code numberOfItems < 1}; if {@code numberOfBits < 1};
- * if {@code numberOfHashFunctions < 1}; or if the actual probability is {@code >= 1.0}
- * @see #getProbability()
- */
- public Shape(final HashFunctionIdentity hashFunctionIdentity, final int numberOfItems, final int numberOfBits,
- final int numberOfHashFunctions) {
- this.hashFunctionIdentity = Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
- this.numberOfItems = checkNumberOfItems(numberOfItems);
- this.numberOfBits = checkNumberOfBits(numberOfBits);
- this.numberOfHashFunctions = checkNumberOfHashFunctions(numberOfHashFunctions);
- // check that probability is within range
- checkCalculatedProbability(getProbability());
- this.hashCode = generateHashCode();
- }
-
- /**
- * Check number of items is strictly positive.
- *
- * @param numberOfItems the number of items
- * @return the number of items
- * @throws IllegalArgumentException if the number of items is {@code < 1}
- */
- private static int checkNumberOfItems(final int numberOfItems) {
- if (numberOfItems < 1) {
- throw new IllegalArgumentException("Number of items must be greater than 0: " + numberOfItems);
- }
- return numberOfItems;
- }
-
- /**
- * Check number of bits is strictly positive.
- *
- * @param numberOfBits the number of bits
- * @return the number of bits
- * @throws IllegalArgumentException if the number of bits is {@code < 1}
- */
- private static int checkNumberOfBits(final int numberOfBits) {
- if (numberOfBits < 1) {
- throw new IllegalArgumentException("Number of bits must be greater than 0: " + numberOfBits);
- }
- return numberOfBits;
- }
-
- /**
- * Check number of hash functions is strictly positive
- *
- * @param numberOfHashFunctions the number of hash functions
- * @return the number of hash functions
- * @throws IllegalArgumentException if the number of hash functions is {@code < 1}
- */
- private static int checkNumberOfHashFunctions(final int numberOfHashFunctions) {
- if (numberOfHashFunctions < 1) {
- throw new IllegalArgumentException("Number of hash functions must be greater than 0: " + numberOfHashFunctions);
- }
- return numberOfHashFunctions;
- }
-
- /**
- * Check the probability is in the range 0.0, exclusive, to 1.0, exclusive.
- *
- * @param probability the probability
- * @throws IllegalArgumentException if the probability is not in the range {@code (0, 1)}
- */
- private static void checkProbability(final double probability) {
- // Using the negation of within the desired range will catch NaN
- if (!(probability > 0.0 && probability < 1.0)) {
- throw new IllegalArgumentException("Probability must be greater than 0 and less than 1: " + probability);
- }
- }
-
- /**
- * Check the calculated probability is {@code < 1.0}.
- *
- *
This function is used to verify that the dynamically calculated probability for the
- * Shape is in the valid range 0 to 1 exclusive. This need only be performed once upon
- * construction.
- *
- * @param probability the probability
- * @throws IllegalArgumentException if the probability is {@code >= 1.0}
- */
- private static void checkCalculatedProbability(final double probability) {
- // We do not need to check for p <= 0.0 since we only allow positive values for
- // parameters and the closest we can come to exp(-kn/m) == 1 is
- // exp(-1/Integer.MAX_INT) approx 0.9999999995343387 so Math.pow( x, y ) will
- // always be 00
- if (probability >= 1.0) {
- throw new IllegalArgumentException(
- String.format("Calculated probability is greater than or equal to 1: " + probability));
- }
- }
-
- /**
- * Calculates the number of hash functions given numberOfItems and numberofBits.
- * This is a method so that the calculation is consistent across all constructors.
- *
- * @param numberOfItems the number of items in the filter.
- * @param numberOfBits the number of bits in the filter.
- * @return the optimal number of hash functions.
- * @throws IllegalArgumentException if the calculated number of hash function is {@code < 1}
- */
- private static int calculateNumberOfHashFunctions(final int numberOfItems, final int numberOfBits) {
- // k = round((m / n) * ln(2)) We change order so that we use real math rather
- // than integer math.
- final long k = Math.round(LN_2 * numberOfBits / numberOfItems);
- if (k < 1) {
- throw new IllegalArgumentException(
- String.format("Filter too small: Calculated number of hash functions (%s) was less than 1", k));
- }
- // Normally we would check that numberofHashFunctions <= Integer.MAX_VALUE but
- // since numberOfBits is at most Integer.MAX_VALUE the numerator of
- // numberofHashFunctions is ln(2) * Integer.MAX_VALUE = 646456992.9449 the
- // value of k can not be above Integer.MAX_VALUE.
- return (int) k;
- }
-
- @Override
- public boolean equals(final Object o) {
- if (o instanceof Shape) {
- final Shape other = (Shape) o;
- return numberOfBits == other.numberOfBits &&
- numberOfHashFunctions == other.numberOfHashFunctions &&
- HashFunctionValidator.areEqual(hashFunctionIdentity,
- other.hashFunctionIdentity);
- }
- return false;
- }
-
- @Override
- public int hashCode() {
- return hashCode;
- }
-
- private int generateHashCode() {
- return Objects.hash(numberOfBits, numberOfHashFunctions, HashFunctionValidator.hash(hashFunctionIdentity));
- }
-
- /**
- * Gets the HashFunctionIdentity of the hash function this shape uses.
- * @return the HashFunctionIdentity of the hash function this shape uses.
- */
- public HashFunctionIdentity getHashFunctionIdentity() {
- return hashFunctionIdentity;
- }
-
- /**
- * Gets the number of bits in the Bloom filter.
- * This is also known as {@code m}.
- *
- * @return the number of bits in the Bloom filter ({@code m}).
- */
- public int getNumberOfBits() {
- return numberOfBits;
- }
-
- /**
- * Gets the number of hash functions used to construct the filter.
- * This is also known as {@code k}.
- *
- * @return the number of hash functions used to construct the filter ({@code k}).
- */
- public int getNumberOfHashFunctions() {
- return numberOfHashFunctions;
- }
-
- /**
- * Gets the number of items that are expected in the filter.
- * This is also known as {@code n}.
- *
- * @return the number of items ({@code n}).
- */
- public int getNumberOfItems() {
- return numberOfItems;
- }
-
- /**
- * Calculates the probability of false positives ({@code p}) given
- * numberOfItems ({@code n}), numberOfBits ({@code m}) and numberOfHashFunctions ({@code k}).
- * p = pow(1 - exp(-k / (m / n)), k)
- *
- * This is the probability that a Bloom filter will return true for the presence of an item
- * when it does not contain the item.
- *
- *
The probability assumes that the Bloom filter is filled with the expected number of
- * items. If the filter contains fewer items then the actual probability will be lower.
- * Thus this returns the worst-case false positive probability for a filter that has not
- * exceeded its expected number of items.
- *
- * @return the probability of false positives.
- * @see #getNumberOfItems()
- */
- public double getProbability() {
- return Math.pow(1.0 - Math.exp(-1.0 * numberOfHashFunctions * numberOfItems / numberOfBits),
- numberOfHashFunctions);
- }
-
- @Override
- public String toString() {
- return String.format("Shape[ %s n=%s m=%s k=%s ]",
- HashFunctionIdentity.asCommonString(hashFunctionIdentity),
- numberOfItems, numberOfBits, numberOfHashFunctions);
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/SimpleHasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/SimpleHasher.java
new file mode 100644
index 0000000000..64adb12c3b
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/SimpleHasher.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter.hasher;
+
+import java.util.Objects;
+import java.util.function.IntConsumer;
+
+import org.apache.commons.collections4.bloomfilter.IndexProducer;
+import org.apache.commons.collections4.bloomfilter.Shape;
+
+/**
+ * A Hasher that implements combinatorial hashing as as described by
+ * Krisch amd Mitzenmacher.
+ *
+ * Common use for this hasher is to generate a byte array as the output of a hashing
+ * or MessageDigest algorithm.
+ *
+ * @since 4.5
+ */
+public final class SimpleHasher implements Hasher {
+
+ /**
+ * The initial hash value.
+ */
+ private final long initial;
+
+ /**
+ * The value to increment the hash value by.
+ */
+ private final long increment;
+
+ /**
+ * Convert bytes to long.
+ * @param byteArray the byte array to extract the values from.
+ * @param offset the offset to start extraction from.
+ * @param len the length of the extraction, may be longer than 8.
+ * @return
+ */
+ private static long toLong(byte[] byteArray, int offset, int len) {
+ long val = 0;
+ len = Math.min(len, Long.BYTES);
+ for (int i = 0; i < len; i++) {
+ val <<= 8;
+ val |= (byteArray[offset + i] & 0x00FF);
+ }
+ return val;
+ }
+
+ /**
+ * Constructs the SimpleHasher from a byte array.
+ * The byte array is split in 2 and each half is interpreted as a long value.
+ * Excess bytes are ignored. This simplifies the conversion from a Digest or hasher algorithm output
+ * to the two values used by the SimpleHasher.
+ * @param buffer the buffer to extract the longs from.
+ * @throws IllegalArgumentException is buffer length is zero.
+ */
+ public SimpleHasher(byte[] buffer) {
+ if (buffer.length == 0) {
+ throw new IllegalArgumentException("buffer length must be greater than 0");
+ }
+ int segment = buffer.length / 2;
+ this.initial = toLong(buffer, 0, segment);
+ this.increment = toLong(buffer, segment, buffer.length - segment);
+ }
+
+ /**
+ * Constructs the SimpleHasher from 2 longs. The long values will be interpreted as unsigned values.
+ * @param initial The initial value for the hasher..
+ * @param increment The value to increment the hash by on each iteration.
+ */
+ public SimpleHasher(long initial, long increment) {
+ this.initial = initial;
+ this.increment = increment;
+ }
+
+ /**
+ * Gets an IndexProducer that produces indices based on the shape.
+ * The iterator will not return the same value multiple
+ * times. Values will be returned in ascending order.
+ *
+ * @param shape {@inheritDoc}
+ * @return {@inheritDoc}
+ * @throws IllegalArgumentException {@inheritDoc}
+ */
+ @Override
+ public IndexProducer indices(final Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+
+ return new IndexProducer() {
+
+ /** The index of the next item. */
+ private long next = SimpleHasher.this.initial;
+
+ @Override
+ public void forEachIndex(IntConsumer consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ FilteredIntConsumer filtered = new FilteredIntConsumer(shape.getNumberOfBits(), consumer);
+ for (int functionalCount = 0; functionalCount < shape.getNumberOfHashFunctions(); functionalCount++) {
+ int value = (int) Long.remainderUnsigned(next, shape.getNumberOfBits());
+ filtered.accept(value);
+ next += SimpleHasher.this.increment;
+ }
+ }
+ };
+ }
+
+ @Override
+ public int size() {
+ return 1;
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/SingleItemHasherCollection.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/SingleItemHasherCollection.java
new file mode 100644
index 0000000000..7de34276c4
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/SingleItemHasherCollection.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter.hasher;
+
+import java.util.Collection;
+import java.util.Objects;
+import java.util.function.IntConsumer;
+import org.apache.commons.collections4.bloomfilter.IndexProducer;
+import org.apache.commons.collections4.bloomfilter.Shape;
+
+/**
+ * A collection of Hashers that are combined to be a single item. This differs from
+ * the HasherCollection in that the HasherCollection counts each Hasher in the collection as
+ * a different item, or in the case of an enclosed HasherCollection multiple items. This collection
+ * assumes that all hashers are combined to make a single item.
+ *
+ * @since 4.5
+ */
+public class SingleItemHasherCollection extends HasherCollection {
+
+ /**
+ * Constructs an empty SingleItemHasherCollection.
+ */
+ public SingleItemHasherCollection() {
+ super();
+ }
+
+ /**
+ * Constructs a SingleItemHasherCollection from a collection of Hasher objects.
+ *
+ * @param hashers A collections of Hashers to build the indices with.
+ */
+ public SingleItemHasherCollection(Collection hashers) {
+ super(hashers);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param hashers A list of Hashers to initialize the collection with.
+ */
+ public SingleItemHasherCollection(Hasher... hashers) {
+ super(hashers);
+ }
+
+ /**
+ * Produces unique indices.
+ *
+ * Specifically, this method create an IndexProducer that will not return duplicate indices. The effect is
+ * to make the entire collection appear as one item. This useful when working with complex Bloom filters like the
+ * CountingBloomFilter.
+ *
+ * @param shape The shape of the desired Bloom filter.
+ * @return an IndexProducer that only produces unique values.
+ */
+ @Override
+ public IndexProducer indices(final Shape shape) {
+ Objects.requireNonNull(shape, "shape");
+ IndexProducer baseProducer = super.indices(shape);
+
+ return new IndexProducer() {
+ @Override
+ public void forEachIndex(IntConsumer consumer) {
+ Objects.requireNonNull(consumer, "consumer");
+ FilteredIntConsumer filtered = new FilteredIntConsumer(shape.getNumberOfBits() - 1, consumer);
+ baseProducer.forEachIndex(filtered);
+ }
+ };
+ }
+
+ @Override
+ public int size() {
+ for (Hasher hasher : getHashers()) {
+ if (hasher.size() > 0) {
+ return 1;
+ }
+ }
+ return 0;
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasher.java
deleted file mode 100644
index 430f99b565..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasher.java
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.PrimitiveIterator.OfInt;
-import java.util.Set;
-import java.util.TreeSet;
-
-/**
- * A Hasher implementation that contains the index for all enabled bits for a specific
- * Shape.
- * @since 4.5
- */
-public final class StaticHasher implements Hasher {
-
- /**
- * The shape of this hasher
- */
- private final Shape shape;
-
- /**
- * The ordered set of values that this hasher will return.
- */
- private final int[] values;
-
- /**
- * Constructs the StaticHasher from a Hasher and a Shape.
- * @param hasher the Hasher to read.
- * @param shape the Shape for the resulting values.
- * @throws IllegalArgumentException if the hasher function and the shape function are not the same.
- */
- public StaticHasher(final Hasher hasher, final Shape shape) {
- this(hasher.iterator(shape), shape);
- HashFunctionValidator.checkAreEqual(hasher.getHashFunctionIdentity(),
- shape.getHashFunctionIdentity());
- }
-
- /**
- * Constructs a StaticHasher from an Iterator of Integers and a Shape.
- * @param iter the Iterator of Integers.
- * @param shape the Shape that the integers were generated for.
- * @throws IllegalArgumentException if any Integer is outside the range [0,shape.getNumberOfBits())
- */
- public StaticHasher(final Iterator iter, final Shape shape) {
- this.shape = shape;
- final Set workingValues = new TreeSet<>();
- iter.forEachRemaining(idx -> {
- if (idx >= this.shape.getNumberOfBits()) {
- throw new IllegalArgumentException(String.format("Bit index (%s) is too big for %s", idx, shape));
- }
- if (idx < 0) {
- throw new IllegalArgumentException(String.format("Bit index (%s) may not be less than zero", idx));
- }
- workingValues.add(idx);
- });
- this.values = new int[workingValues.size()];
- int i = 0;
- for (final Integer value : workingValues) {
- values[i++] = value.intValue();
- }
- }
-
- /**
- * Constructs the StaticHasher from a StaticHasher and a Shape.
- * @param hasher the StaticHasher to read.
- * @param shape the Shape for the resulting values.
- * @throws IllegalArgumentException if the shape of the hasher and the shape parameter are not the same.
- */
- public StaticHasher(final StaticHasher hasher, final Shape shape) {
- if (!hasher.shape.equals(shape)) {
- throw new IllegalArgumentException(String.format("Hasher shape (%s) is not the same as shape (%s)",
- hasher.getShape().toString(), shape.toString()));
- }
- this.shape = shape;
- this.values = hasher.values;
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return shape.getHashFunctionIdentity();
- }
-
- /**
- * Gets the shape this static hasher was created with.
- *
- * @return the Shape of this hasher.
- */
- public Shape getShape() {
- return shape;
- }
-
- /**
- * Tests emptiness (size == 0).
- *
- * @return Whether or not this is empty.
- */
- public boolean isEmpty() {
- return size() == 0;
- }
-
- /**
- * Gets an iterator of integers that are the bits to enable in the Bloom
- * filter based on the shape. The iterator will not return the same value multiple
- * times. Values will be returned in ascending order.
- *
- * @param shape {@inheritDoc}
- * @return {@inheritDoc}
- * @throws IllegalArgumentException {@inheritDoc}
- */
- @Override
- public OfInt iterator(final Shape shape) {
- if (!this.shape.equals(shape)) {
- throw new IllegalArgumentException(
- String.format("shape (%s) does not match internal shape (%s)", shape, this.shape));
- }
- return Arrays.stream(values).iterator();
- }
-
- /**
- * Gets the the number of unique values in this hasher.
- * @return the number of unique values.
- */
- public int size() {
- return values.length;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5Cyclic.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5Cyclic.java
deleted file mode 100644
index 8e07793b7f..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5Cyclic.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import java.nio.ByteBuffer;
-
-import java.nio.LongBuffer;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-
-/**
- * An implementation of HashFunction that
- * performs MD5 hashing using a signed cyclic method.
- * @since 4.5
- */
-public final class MD5Cyclic implements HashFunction {
-
- /**
- * The name of this hash function.
- */
- public static final String NAME = "MD5";
-
- /**
- * The MD5 digest implementation.
- */
- private final MessageDigest messageDigest;
-
- /**
- * The signature for this hash function.
- *
- * TODO: Make static akin to a serialVersionUID?
- */
- private final long signature;
-
- /**
- * The result from the digest 0
- */
- private final long[] result = new long[2];
-
- /**
- * Constructs the MD5 hashing function.
- */
- public MD5Cyclic() {
- try {
- messageDigest = MessageDigest.getInstance(NAME);
- } catch (final NoSuchAlgorithmException e) {
- // This should not happen
- throw new IllegalStateException("Missing the standard MD5 message digest algorithm", e);
- }
- signature = Signatures.getSignature(this);
- }
-
- @Override
- public long apply(final byte[] buffer, final int seed) {
-
- if (seed == 0) {
- final byte[] hash;
- synchronized (messageDigest) {
- messageDigest.update(buffer);
- hash = messageDigest.digest();
- messageDigest.reset();
- }
-
- final LongBuffer lb = ByteBuffer.wrap(hash).asLongBuffer();
- result[0] = lb.get(0);
- result[1] = lb.get(1);
- } else {
- result[0] += result[1];
- }
- return result[0];
- }
-
- @Override
- public String getName() {
- return NAME;
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collections";
- }
-
- @Override
- public long getSignature() {
- return signature;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x64Cyclic.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x64Cyclic.java
deleted file mode 100644
index 99c27c8819..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x64Cyclic.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import org.apache.commons.codec.digest.MurmurHash3;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-
-/**
- * An implementation of HashFunction that
- * uses an underlying Murmur3 128-bit hash with a signed cyclic method.
- *
- *
Requires the optional Apache Commons Codec
- * library which contains a Java port of the 128-bit hash function
- * {@code MurmurHash3_x64_128} from Austin Applyby's original {@code c++}
- * code in SMHasher.
- *
- * @see SMHasher
- * @since 4.5
- */
-public final class Murmur128x64Cyclic implements HashFunction {
-
- /**
- * The name of this hash method.
- */
- public static final String NAME = "Murmur3_x64_128";
-
- /**
- * The result of the hash 0 call.
- */
- private long[] parts;
-
- /**
- * The signature for this hash function.
- *
- * TODO: Make static akin to a serialVersionUID?
- */
- private final long signature;
-
- /**
- * Constructs a Murmur3 x64 128 hash.
- */
- public Murmur128x64Cyclic() {
- signature = Signatures.getSignature(this);
- }
-
- @Override
- public long apply(final byte[] buffer, final int seed) {
- if (parts == null || seed == 0) {
- parts = MurmurHash3.hash128x64(buffer, 0, buffer.length, 0);
- } else {
- parts[0] += parts[1];
- }
- return parts[0];
- }
-
- @Override
- public String getName() {
- return NAME;
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collections";
- }
-
- @Override
- public long getSignature() {
- return signature;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86Iterative.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86Iterative.java
deleted file mode 100644
index 982ef5c869..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86Iterative.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import org.apache.commons.codec.digest.MurmurHash3;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-
-/**
- * An implementation of HashFunction that
- * uses an underlying Murmur3 32-bit hash with a signed iterative method.
- *
- *
Requires the optional Apache Commons Codec
- * library which contains a Java port of the 32-bit hash function
- * {@code MurmurHash3_x86_32} from Austin Applyby's original {@code c++}
- * code in SMHasher.
- *
- * @see Apache Commons Codec
- * @see SMHasher
- * @since 4.5
- */
-public final class Murmur32x86Iterative implements HashFunction {
-
- /**
- * The name of this hash function.
- */
- public static final String NAME = "Murmur3_x86_32";
-
- /**
- * The signature for this hash function.
- *
- * TODO: Make static akin to a serialVersionUID?
- */
- private final long signature;
-
- /**
- * Constructs a Murmur3 x86 32 hash
- */
- public Murmur32x86Iterative() {
- signature = Signatures.getSignature(this);
- }
-
- @Override
- public long apply(final byte[] buffer, final int seed) {
- return MurmurHash3.hash32x86(buffer, 0, buffer.length, seed);
- }
-
- @Override
- public String getName() {
- return NAME;
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.ITERATIVE;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collections";
- }
-
- @Override
- public long getSignature() {
- return signature;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterative.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterative.java
deleted file mode 100644
index da0fc2c2db..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterative.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import java.util.Arrays;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-
-/**
- * An implementation of HashFunction that
- * performs {@code Objects.hash} hashing using a signed iterative method.
- *
- * Except in the case of seed 0, the value of the previous hash is
- * used as a seed for the next hash. Hashes are seeded by calling
- * {@code Arrays.deepHashCode( new Object[]{seed, buffer} )}.
- *
- * @since 4.5
- */
-public final class ObjectsHashIterative implements HashFunction {
-
- /**
- * The name of the hash function.
- */
- public static final String NAME = "Objects32";
-
- /**
- * The signature for this hash function.
- *
- * TODO: Make static akin to a serialVersionUID?
- */
- private final long signature;
-
- /**
- * The value of the last hash.
- */
- private long last;
-
- /**
- * Constructs a hash that uses the Objects.hash method to has values.
- */
- public ObjectsHashIterative() {
- signature = Signatures.getSignature(this);
- }
-
- @Override
- public long apply(final byte[] buffer, final int seed) {
- if (seed == 0) {
- last = 0;
- }
- // Effectively:
- // result = Arrays.deepHashCode(new Object[] { last, buffer });
- // The method loops over items starting with result=1
- // for i in items:
- // result = 31 * result + hashCode(i)
- // Here we unroll the computation to 2 iterations.
- // The computation is done using 32-bit integers then cast to a long
- final long result = 31 * (31 + Long.hashCode(last)) + Arrays.hashCode(buffer);
- last += result;
- return result;
- }
-
- @Override
- public String getName() {
- return NAME;
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.ITERATIVE;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collections";
- }
-
- @Override
- public long getSignature() {
- return signature;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Signatures.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Signatures.java
deleted file mode 100644
index b7f35ac051..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/function/Signatures.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
-
-/**
- * Allow computation of HashFunction signatures.
- * @since 4.5
- */
-final class Signatures {
-
- /** No instances. */
- private Signatures() {}
-
- /**
- * Gets the standard signature for the hash function. The signature is prepared as:
- *
- * int seed = 0;
- * return hashFunction.apply(HashFunctionIdentity.prepareSignatureBuffer(hashFunction), seed);
- *
- *
- * @param hashFunction the hash function
- * @return the signature
- * @see HashFunctionIdentity#prepareSignatureBuffer(HashFunctionIdentity)
- * @see HashFunction#apply(byte[], int)
- */
- static long getSignature(final HashFunction hashFunction) {
- return hashFunction.apply(HashFunctionIdentity.prepareSignatureBuffer(hashFunction), 0);
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/package-info.java b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/package-info.java
index b73675ed28..2922477edc 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/package-info.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/package-info.java
@@ -16,10 +16,64 @@
*/
/**
- * Provides classes and interfaces to define the shape of a Bloom filter and the conversion
- * of generic bytes to a hash of bit indexes to be used with a Bloom filter.
+ * Hasher implementations and tools.
*
+ *
+ * Hasher
+ *
+ * A Hasher converts one or more items into an {@code IndexProducer} series of integers based on a {@code Shape}.
+ *
+ *
+ * The base Hasher implementations
+ * are as follows:
+ *
+ * SimpleHasher
+ *
+ * The SimpleHasher represents one item being added to the Bloom filter. It utilizes the combinatorial strategy
+ * as described by Krisch and Mitzenmacher.
+ * Generally, a hash value is created by hashing together multiple properties of the item being added. The hash value is
+ * then used to create a SimpleHasher.
+ *
+ * This hasher represents a single item and thus does not return duplicate indices.
+ *
+ * HasherCollection
+ *
+ * The HasherCollection is a collection of Hashers that implements the Hasher interface. Each hasher within the collection
+ * represents a single item, or in the case of a HasherCollections multiple items.
+ *
+ * This hahser represents multiple items and thus may return duplicate indices.
+ *
+ * SingleItemHasherCollection
+ *
+ * A collection of Hashers that are combined to represent a single item. Like the HasherCollection this Hasher is composed
+ * of multiple Hashers. Unlike the HasherCollection, this hasher reports that it is only one item.
+ *
+ *
+ * This hasher represents a single item and thus does not return duplicate indices.
+ *
+ * Other Implementations
+ *
+ * Other implementations of the Hasher are easy to implement. Hashers that represent single items should make use of the
+ * {@code Hahser.Filter} and/or {@code Hahser.FileredIntConsumer} classes to filter out duplicate indices.
+ *
+ * With the exception of the HasherCollection, a Hasher represents an item of arbitrary
+ * byte size as multiple byte representations of fixed size (multiple hashes). The hashers
+ * are be used to create indices for a Bloom filter.
+ *
+ * Hashers create @{code IndexProducer} instances for hashed items based
+ * on a @{code Shape}.
+ *
+ * The method used to generate the multiple hashes is dependent upon the Hasher
+ * implementation. The SimpleHasher uses a combinatorial strategy to create the
+ * multiple hashes from a single starting hash.
+ *
+ * Note that the process of generating hashes and mapping them to a Bloom
+ * filter shape may create duplicate indexes. The Hasher implementation is required to
+ * remove all duplicate values for a single item. Thus the hasher may generate fewer
+ * than the required number of hash values per item after duplicates have been
+ * removed.
+ *
+ * @see org.apache.commons.collections4.bloomfilter.IndexProducer
* @since 4.5
*/
package org.apache.commons.collections4.bloomfilter.hasher;
-
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java
index bfc3d67abe..50a8f723b7 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java
@@ -18,101 +18,78 @@
/**
* A collection of extensible Bloom filter classes and interfaces.
*
- *
- * Background:
- *
- * A Bloom filter is conceptually a bit vector. It is used to
- * tell you where things are not. Basically, you create a Bloom filter by creating hashes
- * and converting those to enabled bits in a vector. You can merge the Bloom filters
- * together with logical "or" (call this filter "B"). You can then check to see if filter
- * "A" was "or"ed into "B" by testing A & B == A. if the statement is false then "A" was
- * not merged into "B", otherwise it _might_ have. They are generally used where hash
- * tables would be too large or as a filter front end for longer processes. For example
+ *
Background:
+ *
+ * The Bloom filter is a probabilistic data structure that indicates where things are not.
+ * Conceptually it is a a bit vector. You create a Bloom filter by creating hashes
+ * and converting those to enabled bits in the vector. Multiple Bloom filters may be merged
+ * together into one Bloom filter. It is possible to test if a filter {@code B} as merged into
+ * another filter {@code A} by verifying that {@code (A & B) == B}.
+ *
+ * Bloom filters are generally used where hash
+ * tables would be too large, or as a filter front end for longer processes. For example
* most browsers have a Bloom filter that is built from all known bad URLs (ones that
* serve up malware). When you enter a URL the browser builds a Bloom filter and checks to
* see if it is "in" the bad URL filter. If not the URL is good, if it matches, then the
* expensive lookup on a remote system is made to see if it actually is in the list. There
* are lots of other uses, and in most cases the reason is to perform a fast check as a
* gateway for a longer operation.
- *
- * BloomFilter
- *
- * The bloom filter code is
- * an abstract class that requires implementation of 4 methods:
- * -
- * getBits() which
- * returns the set bits as a buffer encoded into an array of long.
- * -
- * getHasher()
- * which returns a list of integers that are indexes of the bits that are enabled. These
- * are returned in a Hasher construct.
- * -
- * merge( BloomFilter ) to merge another
- * Bloom filter into this one.
- * -
- * merge( Hasher ) to merge the values in a hasher
- * into this Bloom filter.
- *
- * There are 3 implementations of Bloom filter
- * provided:
- * -
- * BitSetBloomFilter - based on the Java BitSet class.
- * -
- *
- * CountingBloomFilter - uses a sparse array of integers (Map) to implement a counting
- * Bloom filter. This filter also implements remove() methods as that is the great
- * advantage of a counting Bloom filter.
- * -
- * HasherBloomFilter - implements bloom
- * filter on a Hasher. A rather slow implementation but convenient in some
- * situations.
- *
- *
- *
- * Shape
- *
- * Describes the Bloom filter using the
- * standard number of bits, number of hash functions and number of items along with a
- * description of the HashFunction. It is this description that has caused the most issues
- * of late.
- *
- * Hasher
- *
- * converts byte buffers into an iterator if int based
- * on a Shape. There are 2 implementations of Hasher provided
- * -
- * Dynamic - calls
- * the HashFunction for each value required in the Bloom filter.
- * -
- * Static - based
- * on a pre-calculated list of Bloom filter index values. It is also limited to generating
- * values for a specific Shape.
- *
- *
- *
- * Hash Functions
- *
- * Hash
- * functions generate individual index values for the filter from a byte buffer. There are
- * four implementations provided.
- *
- * HashFunctionIdentity
- *
- * The
- * HashFunctionIdentity is the base interface for the HashFunction. It tracks three (3)
- * properties:
- * -
- * The Hashing algorithm
- * -
- * Whether the contents of the
- * resulting hash buffer are read as signed or unsigned values.
- * -
- * Whether the hash
- * function uses an iterative or cyclic method. In traditional iterative methods this is
- * done by calling the selected hash function with a different seed for each hash
- * required. The second method described by Adam Kirsch and Micheal Mitzenmacher[1] has
- * become more common and is used in applications like Cassandra[2].
- *
+ *
+ * BloomFilter
+ *
+ * The Bloom filter architecture here is designed so that the implementation of the storage of bit is abstracted.
+ * Programs that utilize the Bloom filters may use the {@code BitMapProducer} or {@code IndexProducer} to retrieve a
+ * representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in
+ * manipulation of the representations.
+ *
+ * The bloom filter code is an interface that requires implementation of 6 methods:
+ *
+ * - {@code cardinality()}
+ * returns the number of bits enabled in the Bloom filter.
+ *
+ * - {@code contains(BitMapProducer)} which
+ * returns true if the bits specified by the BitMaps generated by the BitMapProducer are enabled in the Bloom filter.
+ *
+ * - {@code contains(IndexProducer)} which
+ * returns true if the bits specified by the Indices generated by IndexProducer are enabled in the Bloom filter.
+ *
+ * - {@code getShape()} which
+ * returns shape the Bloom filter was created with.
+
+ * - {@code isSparse()} which
+ * returns true if an the implementation tracks indices natively, false if BitMaps are used. In cases where
+ * neither are used the {@code isSparse} return value should reflect which is faster to produce.
+ *
+ * - {@code mergeInPlace(BloomFilter)} which
+ * utilizes either the {@code BitMapProducer} or {@code IndexProducer} from the argument to enable extra bits
+ * in the internal representation of the Bloom filter..
+ *
+ *
+ * Other methods should be implemented where they can be done so more efficiently than the default implementations.
+ *
+ *
+ * CountingBloomFilter
+ *
+ * The counting bloom filter extends the Bloom filter by counting the number of times a specific bit has been
+ * enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional
+ * overhead.
+ *
+ * Shape
+ *
+ * The Shape describes the Bloom filter using the number of bits and the number of hash functions
+ *
+ * Hasher
+ *
+ * A Hasher converts bytes into an series of integers based on a Shape. With the exception of the HasherCollecton,
+ * Each hasher represents one item being added to the Bloom filter. The HasherCollection represents the
+ * number of items as the sum of the number of items represented by the Hashers in the collection.
+ *
+ * The SimpleHasher uses a combinatorial generation technique to create the integers. It is easily
+ * initialized by using a standard {@code MessageDigest} or other Hash function to hash the item to insert and
+ * then splitting the hash bytes in half and considering each as a long value.
+ *
+ * Other implementations of the Hasher are easy to implement, and should make use of the {@code Hahser.Filter}
+ * and/r {@code Hahser.FileredIntConsumer} classes to filter out duplicate indices.
*
* References
*
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
index 2a1faa18ea..b5d26c6e15 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
@@ -16,214 +16,173 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
-
-import java.util.List;
-import java.util.PrimitiveIterator.OfInt;
-import java.util.function.BiFunction;
-import java.util.function.IntConsumer;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.BitSet;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
+import org.apache.commons.collections4.bloomfilter.hasher.HasherCollection;
+import org.apache.commons.collections4.bloomfilter.hasher.SimpleHasher;
import org.junit.jupiter.api.Test;
/**
* Test standard methods in the {@link BloomFilter} interface.
*/
-public abstract class AbstractBloomFilterTest {
+public abstract class AbstractBloomFilterTest {
+
+ protected final SimpleHasher from1 = new SimpleHasher(1, 1);
+ protected final long from1Value = 0x3FFFEL;
+ protected final SimpleHasher from11 = new SimpleHasher(11, 1);
+ protected final long from11Value = 0xFFFF800L;
+ protected final HasherCollection bigHasher = new HasherCollection(from1, from11);
+ protected final long bigHashValue = 0xFFFFFFEL;
+ protected final HasherCollection fullHasher = new HasherCollection(new SimpleHasher(0, 1)/* 0-16 */,
+ new SimpleHasher(17, 1)/* 17-33 */, new SimpleHasher(33, 1)/* 33-49 */, new SimpleHasher(50, 1)/* 50-66 */,
+ new SimpleHasher(67, 1)/* 67-83 */
+ );
+ protected final long[] fullHashValue = { 0xFFFFFFFFFFFFFFFFL, 0xFFFFFL };
/**
- * An implementation of BloomFilter that is used to test merge and cardinality
- * operations with a filter type that does not match the type of the filter
- * being tested.
+ * The shape of the Bloom filters for testing
*/
- private static class TestBloomFilter extends AbstractBloomFilter {
- /** The bits. */
- final BitSet bits;
-
- protected TestBloomFilter(final Shape shape, final BitSet bits) {
- super(shape);
- this.bits = bits;
- }
-
- @Override
- public long[] getBits() {
- return bits.toLongArray();
- }
-
- @Override
- public StaticHasher getHasher() {
- return new StaticHasher(bits.stream().iterator(), getShape());
- }
-
- @Override
- public boolean merge(final BloomFilter other) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean merge(final Hasher hasher) {
- throw new UnsupportedOperationException();
- }
- }
+ protected Shape shape = new Shape(17, 72);
/**
- * A HashFunctionIdentity for testing.
+ * Create an empty version of the BloomFilter implementation we are testing.
+ *
+ * @param shape the shape of the filter.
+ * @return a BloomFilter implementation.
*/
- protected HashFunctionIdentity testFunction = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test Function";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
- };
+ protected abstract T createEmptyFilter(Shape shape);
/**
- * A second HashFunctionIdentity for testing.
+ * Create the BloomFilter implementation we are testing.
+ *
+ * @param hasher the hasher to use to create the filter.
+ * @param shape the shape of the filter.
+ * @return a BloomFilter implementation.
*/
- protected HashFunctionIdentity testFunctionX = new HashFunctionIdentity() {
+ protected abstract T createFilter(Shape shape, Hasher hasher);
- @Override
- public String getName() {
- return "Test FunctionX";
+ @Test
+ public void asIndexArrayTest() {
+ final BloomFilter bf = createFilter( shape, from1 );
+ int[] ary = BloomFilter.asIndexArray( bf );
+ assertEquals( 17, ary.length );
+ for (int i=0; i filterFactory) {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ @Test
+ public void estimateIntersectionTest() {
- final BloomFilter bf = createFilter(hasher, shape);
+ final BloomFilter bf = createFilter(shape, from1);
+ final BloomFilter bf2 = createFilter(shape, bigHasher);
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ assertEquals(1, bf.estimateIntersection(bf2));
+ assertEquals(1, bf2.estimateIntersection(bf));
+ }
- final BloomFilter bf2 = filterFactory.apply(hasher2, shape);
+ @Test
+ public void estimateIntersectionTest_empty() {
+ final BloomFilter bf = createFilter(shape, from1);
+ final BloomFilter bf2 = createEmptyFilter(shape);
- assertEquals(7, bf.andCardinality(bf2));
+ assertEquals(0, bf.estimateIntersection(bf2));
+ assertEquals(0, bf2.estimateIntersection(bf));
}
/**
- * Tests that the andCardinality calculations are correct when there are more than Long.LENGTH bits.
+ * Tests that the andCardinality calculations are correct.
+ *
+ * @param filterFactory the factory function to create the filter
*/
@Test
- public final void andCardinalityTest_ExtraLongs() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ public void estimateUnionTest() {
+ final BloomFilter bf = createFilter(shape, from1);
- final BloomFilter bf = createFilter(hasher, shape);
+ final BloomFilter bf2 = createFilter(shape, from11);
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
-
- final BloomFilter bf2 = createFilter(hasher2, shape);
-
- assertEquals(7, bf.andCardinality(bf2));
- assertEquals(7, bf2.andCardinality(bf));
+ assertEquals(2, bf.estimateUnion(bf2));
+ assertEquals(2, bf2.estimateUnion(bf));
}
- /**
- * Compare 2 static hashers to verify they have the same bits enabled.
- *
- * @param hasher1 the first static hasher.
- * @param hasher2 the second static hasher.
- */
- private void assertSameBits(final StaticHasher hasher1, final StaticHasher hasher2) {
- final OfInt iter1 = hasher1.iterator(shape);
- final OfInt iter2 = hasher2.iterator(shape);
+ @Test
+ public void estimateUnionTest_empty() {
+ final BloomFilter bf = createFilter(shape, from1);
+ final BloomFilter bf2 = createEmptyFilter(shape);
- while (iter1.hasNext()) {
- assertTrue(iter2.hasNext(), "Not enough data in second hasher");
- assertEquals(iter1.nextInt(), iter2.nextInt());
- }
- assertFalse(iter2.hasNext(), "Too much data in second hasher");
+ assertEquals(1, bf.estimateUnion(bf2));
+ assertEquals(1, bf2.estimateUnion(bf));
}
/**
- * Tests that cardinality is correct.
+ * Tests that the size estimate is correctly calculated.
*/
@Test
- public final void cardinalityTest() {
+ public void estimateNTest() {
+ // build a filter
+ BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
+ assertEquals(1, filter1.estimateN());
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ // the data provided above do not generate an estimate that is equivalent to the
+ // actual.
+ filter1.mergeInPlace(new SimpleHasher(4, 1));
- final BloomFilter bf = createFilter(hasher, shape);
- assertEquals(17, bf.cardinality());
+ assertEquals(1, filter1.estimateN());
+
+ filter1.mergeInPlace(new SimpleHasher(17, 1));
+
+ assertEquals(3, filter1.estimateN());
}
/**
@@ -233,7 +192,7 @@ public final void cardinalityTest() {
public final void constructorTest_Empty() {
final BloomFilter bf = createEmptyFilter(shape);
- final long[] lb = bf.getBits();
+ final long[] lb = BloomFilter.asBitMapArray(bf);
assertEquals(0, lb.length);
}
@@ -242,171 +201,28 @@ public final void constructorTest_Empty() {
*/
@Test
public final void constructorTest_Hasher() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ Hasher hasher = new SimpleHasher(0, 1);
- final BloomFilter bf = createFilter(hasher, shape);
- final long[] lb = bf.getBits();
+ final BloomFilter bf = createFilter(shape, hasher);
+ final long[] lb = BloomFilter.asBitMapArray(bf);
assertEquals(0x1FFFF, lb[0]);
assertEquals(1, lb.length);
}
- /**
- * Tests that creating a Bloom filter with a Static hasher that has one shape and a
- * different specified shape fails.
- */
- @Test
- public final void constructorTest_WrongShape() {
- final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
-
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), anotherShape);
- try {
- createFilter(hasher, shape);
- fail("Should throw IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
- }
- }
-
- /**
- * Tests that contains() with a Bloom filter argument returns the proper results.
- */
- @Test
- public final void containsTest_BloomFilter() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
-
- final List lst2 = Arrays.asList(4, 5, 6, 7, 8, 9, 10);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- final BloomFilter bf2 = createFilter(hasher2, shape);
- assertTrue(bf.contains(bf2));
- assertFalse(bf2.contains(bf));
- }
-
- /**
- * Tests that contains() fails properly if the other Bloom filter is not of the proper shape.
- */
- @Test
- public final void containsTest_BloomFilter_WrongShape() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
-
- final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
- final Hasher hasher2 = new StaticHasher(lst.iterator(), anotherShape);
- final BloomFilter bf2 = createFilter(hasher2, anotherShape);
- try {
- bf.contains(bf2);
- fail("Should throw IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
- }
- }
-
- /**
- * Tests that contains() with a Hasher argument returns the proper results.
- */
- @Test
- public final void containsTest_Hasher() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
-
- List lst2 = Arrays.asList(4, 5, 6, 7, 8, 9, 10);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- assertTrue(bf.contains(hasher2));
-
- lst2 = Arrays.asList(17, 18, 19, 20);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- assertFalse(bf.contains(hasher2));
-
- lst2 = Arrays.asList(10, 11, 12, 17, 18, 19, 20);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- assertFalse(bf.contains(hasher2));
- }
-
- /**
- * Tests that contains() fails properly if the hasher is not of the proper shape.
- */
- @Test
- public final void containsTest_Hasher_WrongShape() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
-
- final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
-
- final List lst2 = Arrays.asList(4, 5, 6, 7, 8, 9, 10);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), anotherShape);
- try {
- bf.contains(hasher2);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing
- }
- }
-
- /**
- * Create an empty version of the BloomFilter implementation we are testing.
- *
- * @param shape the shape of the filter.
- * @return a BloomFilter implementation.
- */
- protected abstract AbstractBloomFilter createEmptyFilter(Shape shape);
-
- /**
- * Create the BloomFilter implementation we are testing.
- *
- * @param hasher the hasher to use to create the filter.
- * @param shape the shape of the filter.
- * @return a BloomFilter implementation.
- */
- protected abstract AbstractBloomFilter createFilter(Hasher hasher, Shape shape);
-
- /**
- * Create a generic BloomFilter implementation.
- *
- * @param hasher the hasher to use to create the filter.
- * @param shape the shape of the filter.
- * @return a BloomFilter implementation.
- */
- private AbstractBloomFilter createGenericFilter(final Hasher hasher, final Shape shape) {
- final BitSet bits = new BitSet();
- hasher.iterator(shape).forEachRemaining((IntConsumer) bits::set);
- return new TestBloomFilter(shape, bits);
- }
-
/**
* Tests that getBits() works correctly when multiple long values are returned.
*/
@Test
public final void getBitsTest_SpanLong() {
- final List lst = Arrays.asList(63, 64);
- final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
- final long[] lb = bf.getBits();
+
+ final SimpleHasher hasher = new SimpleHasher(63, 1);
+ final BloomFilter bf = createFilter(new Shape(2, 72), hasher);
+ final long[] lb = BloomFilter.asBitMapArray(bf);
assertEquals(2, lb.length);
assertEquals(0x8000000000000000L, lb[0]);
assertEquals(0x1, lb[1]);
}
- /**
- * Tests that the the hasher returned from getHasher() works correctly.
- */
- @Test
- public final void getHasherTest() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter bf = createFilter(hasher, shape);
-
- final StaticHasher hasher2 = bf.getHasher();
-
- assertEquals(shape, hasher2.getShape());
- assertSameBits(hasher, hasher2);
- }
-
/**
* Tests that isFull() returns the proper values.
*/
@@ -414,228 +230,80 @@ public final void getHasherTest() {
public final void isFullTest() {
// create empty filter
- AbstractBloomFilter filter = createEmptyFilter(shape);
- assertFalse(filter.isFull());
-
- final List values = new ArrayList<>(shape.getNumberOfBits());
- for (int i = 0; i < shape.getNumberOfBits(); i++) {
- values.add(i);
- }
-
- StaticHasher hasher2 = new StaticHasher(values.iterator(), shape);
- filter = createFilter(hasher2, shape);
+ BloomFilter filter = createEmptyFilter(shape);
+ assertFalse("Should not be full", filter.isFull());
- assertTrue(filter.isFull());
+ filter = createFilter(shape, fullHasher);
+ assertTrue("Should be full", filter.isFull());
- final int mid = shape.getNumberOfBits() / 2;
- values.remove(Integer.valueOf(mid));
- hasher2 = new StaticHasher(values.iterator(), shape);
- filter = createFilter(hasher2, shape);
- assertFalse(filter.isFull());
- }
-
- /**
- * Tests that merging bloom filters works as expected.
- */
- @Test
- public final void mergeTest_BloomFilter() {
- mergeTest_BloomFilter(this::createFilter);
+ filter = createFilter(shape, new SimpleHasher(1, 3));
+ assertFalse("Should not be full", filter.isFull());
}
/**
* Tests that merging bloom filters works as expected with a generic BloomFilter.
*/
@Test
- public final void mergeTest_GenericBloomFilter() {
- mergeTest_BloomFilter(this::createGenericFilter);
- }
+ public final void mergeTest_Bloomfilter() {
- /**
- * Tests that merging bloom filters works as expected.
- *
- * @param filterFactory the factory function to create the filter
- */
- private void mergeTest_BloomFilter(final BiFunction filterFactory) {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
-
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ final BloomFilter bf1 = createFilter(shape, from1);
- final BloomFilter bf2 = filterFactory.apply(hasher2, shape);
+ final BloomFilter bf2 = createFilter(shape, from11);
- assertTrue(bf.merge(bf2), "Merge should not fail");
- assertEquals(27, bf.cardinality());
- }
+ final BloomFilter bf3 = bf1.merge(bf2);
+ assertTrue("Should contain", bf3.contains(bf1));
+ assertTrue("Should contain", bf3.contains(bf2));
- /**
- * Tests that merging bloom filters with different shapes fails properly
- */
- @Test
- public final void mergeTest_BloomFilter_WrongShape() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
-
- final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), anotherShape);
- final BloomFilter bf2 = createFilter(hasher2, anotherShape);
-
- try {
- bf.merge(bf2);
- fail("Should throw IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
- }
+ final BloomFilter bf4 = bf2.merge(bf1);
+ assertTrue("Should contain", bf4.contains(bf1));
+ assertTrue("Should contain", bf4.contains(bf2));
+ assertTrue("Should contain", bf4.contains(bf3));
+ assertTrue("Should contain", bf3.contains(bf4));
}
- /**
- * Tests that merging a hasher into a Bloom filter works as expected
- */
@Test
public final void mergeTest_Hasher() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ final BloomFilter bf1 = createFilter(shape, from1);
+ final BloomFilter bf2 = createFilter(shape, from11);
- assertTrue(bf.merge(hasher2), "Merge should not fail");
- assertEquals(27, bf.cardinality());
+ final BloomFilter bf3 = bf1.merge(from11);
+ assertTrue("Should contain", bf3.contains(bf1));
+ assertTrue("Should contain", bf3.contains(bf2));
}
/**
- * Tests that merging a static hasher with the wrong shape into a Bloom filter fails as expected
- */
- @Test
- public final void mergeTest_Hasher_WrongShape() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
-
- final Shape anotherShape = new Shape(testFunctionX, 3, 72, 17);
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), anotherShape);
-
- try {
- bf.merge(hasher2);
- fail("Should throw IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
- }
- }
-
- /**
- * Tests that the orCardinality calculations are correct.
+ * Tests that merging bloom filters works as expected with a generic BloomFilter.
*/
@Test
- public final void orCardinalityTest() {
- orCardinalityTest(this::createFilter);
- }
+ public final void mergeInPlaceTest_Bloomfilter() {
- /**
- * Tests that the orCardinality calculations are correct with a generic BloomFilter.
- */
- @Test
- public final void orCardinalityTest_GenericBloomFilter() {
- orCardinalityTest(this::createGenericFilter);
- }
+ final BloomFilter bf1 = createFilter(shape, from1);
- /**
- * Tests that the andCardinality calculations are correct.
- *
- * @param filterFactory the factory function to create the filter
- */
- private void orCardinalityTest(final BiFunction filterFactory) {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ final BloomFilter bf2 = createFilter(shape, from11);
- final AbstractBloomFilter bf = createFilter(hasher, shape);
+ final BloomFilter bf3 = bf1.merge(bf2);
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ bf1.mergeInPlace(bf2);
- final BloomFilter bf2 = filterFactory.apply(hasher2, shape);
+ assertTrue("Should contain", bf1.contains(bf2));
+ assertTrue("Should contain", bf1.contains(bf3));
- assertEquals(27, bf.orCardinality(bf2));
}
- /**
- * Tests that the orCardinality calculations are correct when there are more than Long.LENGTH bits.
- */
@Test
- public final void orCardinalityTest_ExtraLongs() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
+ public final void mergeInPlaceTest_Hasher() {
- final AbstractBloomFilter bf = createFilter(hasher, shape);
+ final BloomFilter bf1 = createFilter(shape, from1);
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
+ final BloomFilter bf2 = createFilter(shape, from11);
- final AbstractBloomFilter bf2 = createFilter(hasher2, shape);
+ final BloomFilter bf3 = bf1.merge(bf2);
- assertEquals(27, bf.orCardinality(bf2));
- assertEquals(27, bf2.orCardinality(bf));
- }
+ bf1.mergeInPlace(from11);
- /**
- * Tests that the xorCardinality calculations are correct.
- */
- @Test
- public final void xorCardinalityTest() {
- xorCardinalityTest(this::createFilter);
- }
-
- /**
- * Tests that the xorCardinality calculations are correct with a generic BloomFilter.
- */
- @Test
- public final void xorCardinalityTest_GenericBloomFilter() {
- xorCardinalityTest(this::createGenericFilter);
+ assertTrue("Should contain Bf2", bf1.contains(bf2));
+ assertTrue("Should contain Bf3", bf1.contains(bf3));
}
- /**
- * Tests that the andCardinality calculations are correct.
- *
- * @param filterFactory the factory function to create the filter
- */
- private void xorCardinalityTest(final BiFunction filterFactory) {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
-
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
-
- final BloomFilter bf2 = filterFactory.apply(hasher2, shape);
-
- assertEquals(20, bf.xorCardinality(bf2));
- }
-
- /**
- * Tests that the xorCardinality calculations are correct when there are more than Long.LENGTH bits.
- */
- @Test
- public final void xorCardinalityTest_ExtraLongs() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
-
- final BloomFilter bf = createFilter(hasher, shape);
-
- final List lst2 = Arrays.asList(11, 12, 13, 14, 15, 16, 17, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- final BloomFilter bf2 = createFilter(hasher2, shape);
-
- assertEquals(20, bf.xorCardinality(bf2));
- assertEquals(20, bf2.xorCardinality(bf));
- }
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java
new file mode 100644
index 0000000000..95b54f2c02
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java
@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import java.util.HashMap;
+import java.util.Map;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests for the {@link ArrayCountingBloomFilter}.
+ */
+public abstract class AbstractCountingBloomFilterTest
+ extends AbstractBloomFilterTest {
+ protected int[] from1Counts = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 };
+ protected int[] from11Counts = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 };
+ protected int[] bigHashCounts = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 };
+
+ protected final BitCountProducer maximumValueProducer = new BitCountProducer() {
+
+ @Override
+ public void forEachCount(BitCountProducer.BitCountConsumer consumer) {
+ for (int i = 1; i < 18; i++) {
+ consumer.accept(i, Integer.MAX_VALUE);
+ }
+ }
+ };
+
+ /**
+ * Assert the counts match the expected values. Values are for indices starting
+ * at 0. Assert the cardinality equals the number of non-zero counts.
+ *
+ * @param bf the bloom filter
+ * @param expected the expected counts
+ */
+ private static void assertCounts(final CountingBloomFilter bf, final int[] expected) {
+ final Map m = new HashMap<>();
+ bf.forEachCount(m::put);
+ int zeros = 0;
+ for (int i = 0; i < expected.length; i++) {
+ if (m.get(i) == null) {
+ assertEquals(expected[i], 0, "Wrong value for " + i);
+ zeros++;
+ } else {
+ assertEquals(expected[i], m.get(i).intValue(), "Wrong value for " + i);
+ }
+ }
+ assertEquals(expected.length - zeros, bf.cardinality());
+ }
+
+ /**
+ * Tests that counts are correct when a hasher with duplicates is used in the
+ * constructor.
+ */
+ @Test
+ public void constructorTest_Hasher_Duplicates() {
+ // bit hasher has duplicates for 11, 12,13,14,15,16, and 17
+ final CountingBloomFilter bf = createFilter(shape, from1);
+ bf.add(BitCountProducer.from(from11.indices(shape)));
+
+ final long[] lb = BloomFilter.asBitMapArray(bf);
+ assertEquals(1, lb.length);
+ assertEquals(bigHashValue, lb[0]);
+
+ assertCounts(bf, bigHashCounts);
+ }
+
+ @Override
+ @Test
+ public void containsTest() {
+ final BloomFilter bf = new SimpleBloomFilter(shape, from1);
+ final CountingBloomFilter bf2 = createFilter(shape, bigHasher);
+
+ assertTrue("BF Should contain itself", bf.contains(bf));
+ assertTrue("BF2 Should contain itself", bf2.contains(bf2));
+ assertFalse("BF should not contain BF2", bf.contains(bf2));
+ assertTrue("BF2 should contain BF", bf2.contains(bf));
+ BitMapProducer producer = bf2;
+ assertTrue("BF2 should contain BF bitMapProducer", bf2.contains(producer) );
+
+ }
+
+
+ /**
+ * Tests that merging bloom filters works as expected with a generic BloomFilter.
+ */
+ @Test
+ public final void mergeTest_Mixed() {
+ final BloomFilter bf1 = createFilter(shape, from1);
+
+ final BloomFilter bf2 = new SimpleBloomFilter(shape, from11);
+
+ final BloomFilter bf3 = bf1.merge(bf2);
+ assertTrue("Should contain", bf3.contains(bf1));
+ assertTrue("Should contain", bf3.contains(bf2));
+
+ final BloomFilter bf4 = bf2.merge(bf1);
+ assertTrue("Should contain", bf4.contains(bf1));
+ assertTrue("Should contain", bf4.contains(bf2));
+ assertTrue("Should contain", bf4.contains(bf3));
+ assertTrue("Should contain", bf3.contains(bf4));
+ }
+
+ /**
+ * Tests that merge correctly updates the counts when a CountingBloomFilter is
+ * passed.
+ */
+ @Test
+ public void addTest() {
+ final CountingBloomFilter bf1 = createFilter(shape, from1);
+ assertTrue("Add should work", bf1.add(createFilter(shape, from11)));
+ assertTrue("Should contain", bf1.contains(from1));
+ assertTrue("Should contain", bf1.contains(from11));
+ assertCounts(bf1, bigHashCounts);
+
+ }
+
+ @Test
+ public void addTest_overflow() {
+
+ final CountingBloomFilter bf1 = createEmptyFilter(shape);
+ assertTrue("Should add to empty", bf1.add(maximumValueProducer));
+ assertTrue("Should be valid", bf1.isValid());
+
+ assertFalse("Should not add", bf1.add(createFilter(shape, from1)));
+ assertFalse("Should not be valid", bf1.isValid());
+ }
+
+ /**
+ * Tests that merge correctly updates the counts when a CountingBloomFilter is
+ * passed.
+ */
+ @Test
+ public void subtractTest() {
+ final CountingBloomFilter bf1 = createFilter(shape, from1);
+ bf1.add(BitCountProducer.from(from11.indices(shape)));
+
+ final CountingBloomFilter bf2 = createFilter(shape, from11);
+
+ assertTrue("Subtract should work", bf1.subtract(bf2));
+ assertFalse("Should not contain bitHasher", bf1.contains(bigHasher));
+ assertTrue("Should contain from1", bf1.contains(from1));
+
+ assertCounts(bf1, from1Counts);
+
+ }
+
+ /**
+ * Tests that merge correctly updates the counts when a CountingBloomFilter is
+ * passed.
+ */
+ @Test
+ public void subtractTest_underflow() {
+ final CountingBloomFilter bf1 = createFilter(shape, from1);
+
+ final CountingBloomFilter bf2 = createFilter(shape, from11);
+
+ assertFalse("Subtract should not work", bf1.subtract(bf2));
+ assertFalse("isValid should return false", bf1.isValid());
+ assertFalse("Should not contain", bf1.contains(from1));
+ assertFalse("Should not contain", bf1.contains(bf2));
+
+ assertCounts(bf1, new int[] { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 });
+
+ }
+
+ /**
+ * Tests that merge correctly updates the counts when a CountingBloomFilter is
+ * passed.
+ */
+ @Test
+ public void removeTest() {
+ final CountingBloomFilter bf1 = createFilter(shape, from1);
+ bf1.add(BitCountProducer.from(from11.indices(shape)));
+
+ assertTrue("Remove should work", bf1.remove(new SimpleBloomFilter(shape, from11)));
+ assertFalse("Should not contain", bf1.contains(from11));
+ assertTrue("Should contain", bf1.contains(from1));
+
+ assertCounts(bf1, from1Counts);
+
+ }
+
+ /**
+ * Tests that merge correctly updates the counts when a CountingBloomFilter is
+ * passed.
+ */
+ @Test
+ public void removeTest_hasher() {
+ final CountingBloomFilter bf1 = createFilter(shape, from1);
+ bf1.add(BitCountProducer.from(from11.indices(shape)));
+
+ assertTrue("Remove should work", bf1.remove(from11));
+ assertFalse("Should not contain", bf1.contains(from11));
+ assertTrue("Should contain", bf1.contains(from1));
+
+ assertCounts(bf1, from1Counts);
+
+ }
+
+ /**
+ * Tests that merge correctly updates the counts when a CountingBloomFilter is
+ * passed.
+ */
+ @Test
+ public void removeTest_underflow() {
+ final CountingBloomFilter bf1 = createFilter(shape, from1);
+
+ final BloomFilter bf2 = new SimpleBloomFilter(shape, from11);
+
+ assertFalse("Subtract should not work", bf1.remove(bf2));
+ assertFalse("isValid should return false", bf1.isValid());
+ assertFalse("Should not contain", bf1.contains(from1));
+ assertFalse("Should not contain", bf1.contains(bf2));
+
+ assertCounts(bf1, new int[] { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
+
+ }
+
+ @Test
+ public void mergeTest_overflow() {
+
+ final CountingBloomFilter bf1 = createEmptyFilter(shape);
+ assertTrue("Should add to empty", bf1.add(maximumValueProducer));
+ assertTrue("Should be valid", bf1.isValid());
+
+ CountingBloomFilter bf2 = bf1.merge(new SimpleBloomFilter(shape, from1));
+ assertFalse("Should not be valid", bf2.isValid());
+ }
+
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java
index a661f93fde..117194b6a1 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java
@@ -16,520 +16,23 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.ThreadLocalRandom;
-import java.util.function.BiPredicate;
-import java.util.function.Function;
-import java.util.function.ToIntBiFunction;
-
import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.junit.jupiter.api.Test;
/**
* Tests for the {@link ArrayCountingBloomFilter}.
*/
-public class ArrayCountingBloomFilterTest extends AbstractBloomFilterTest {
-
- /**
- * Function to convert int arrays to BloomFilters for testing.
- */
- private final Function converter = counts -> {
- final BloomFilter testingFilter = new BitSetBloomFilter(shape);
- testingFilter.merge(new FixedIndexesTestHasher(shape, counts));
- return testingFilter;
- };
+public class ArrayCountingBloomFilterTest extends AbstractCountingBloomFilterTest {
@Override
- protected ArrayCountingBloomFilter createEmptyFilter(final Shape shape) {
+ protected ArrayCountingBloomFilter createEmptyFilter(Shape shape) {
return new ArrayCountingBloomFilter(shape);
}
@Override
- protected ArrayCountingBloomFilter createFilter(final Hasher hasher, final Shape shape) {
- final ArrayCountingBloomFilter result = new ArrayCountingBloomFilter(shape);
- result.merge( hasher );
- return result;
- }
-
- private ArrayCountingBloomFilter createFromCounts(final int[] counts) {
- // Use a dummy filter to add the counts to an empty filter
- final CountingBloomFilter dummy = new ArrayCountingBloomFilter(shape) {
- @Override
- public void forEachCount(final BitCountConsumer action) {
- for (int i = 0; i < counts.length; i++) {
- action.accept(i, counts[i]);
- }
- }
- };
- final ArrayCountingBloomFilter bf = new ArrayCountingBloomFilter(shape);
- bf.add(dummy);
- return bf;
- }
-
- /**
- * Assert the counts match the expected values. Values are for indices starting
- * at 0. Assert the cardinality equals the number of non-zero counts.
- *
- * @param bf the bloom filter
- * @param expected the expected counts
- */
- private static void assertCounts(final CountingBloomFilter bf, final int[] expected) {
- final Map m = new HashMap<>();
- bf.forEachCount(m::put);
- int zeros = 0;
- for (int i = 0; i < expected.length; i++) {
- if (m.get(i) == null) {
- assertEquals(expected[i], 0, "Wrong value for " + i);
- zeros++;
- } else {
- assertEquals(expected[i], m.get(i).intValue(), "Wrong value for " + i);
- }
- }
- assertEquals(expected.length - zeros, bf.cardinality());
- }
-
- /**
- * Tests that counts are correct when a hasher with duplicates is used in the
- * constructor.
- */
- @Test
- public void constructorTest_Hasher_Duplicates() {
- final int[] expected = {0, 1, 1, 0, 0, 1};
- // Some indexes with duplicates
- final Hasher hasher = new FixedIndexesTestHasher(shape, 1, 2, 2, 5);
-
- final ArrayCountingBloomFilter bf = createFilter(hasher, shape);
- final long[] lb = bf.getBits();
- assertEquals(1, lb.length);
- assertEquals(0b100110L, lb[0]);
-
- assertCounts(bf, expected);
- }
-
- /**
- * Test the contains function with a standard Bloom filter.
- * The contains function is tested using a counting Bloom filter in the parent test class.
- */
- @Test
- public void contains_BloomFilter() {
- // Some indexes with duplicates
- final Hasher hasher = new FixedIndexesTestHasher(shape, 1, 2, 5);
- final ArrayCountingBloomFilter bf = createFilter(hasher, shape);
- BitSetBloomFilter testingFilter = new BitSetBloomFilter(shape);
- testingFilter.merge( new FixedIndexesTestHasher(shape, 3, 4));
- assertFalse(bf.contains(testingFilter));
- testingFilter = new BitSetBloomFilter(shape);
- testingFilter.merge( new FixedIndexesTestHasher(shape, 2, 5));
- assertTrue(bf.contains(testingFilter));
- }
-
- /**
- * Tests that merge correctly updates the counts when a CountingBloomFilter is
- * passed.
- */
- @Test
- public void mergeTest_Counts_CountingBloomFilter() {
- assertMerge(counts -> createFilter(new FixedIndexesTestHasher(shape, counts), shape),
- BloomFilter::merge);
- }
-
- /**
- * Tests that merge correctly updates the counts when a BloomFilter is passed.
- */
- @Test
- public void mergeTest_Counts_BloomFilter() {
- assertMerge(converter, BloomFilter::merge);
- }
-
- /**
- * Test that merge correctly updates the counts when a Hasher is passed.
- */
- @Test
- public void mergeTest_Counts_Hasher() {
- assertMerge(counts -> new FixedIndexesTestHasher(shape, counts),
- BloomFilter::merge);
- }
-
- /**
- * Test that merge correctly updates the counts when a Hasher is passed with duplicates.
- */
- @Test
- public void mergeTest_Counts_Hasher_Duplicates() {
- assertMerge(counts -> new FixedIndexesTestHasher(shape, createDuplicates(counts)),
- BloomFilter::merge);
- }
-
- /**
- * Tests that remove correctly updates the counts when a CountingBloomFilter is
- * passed.
- */
- @Test
- public void removeTest_Counts_CountingBloomFilter() {
- assertRemove(counts -> createFilter(new FixedIndexesTestHasher(shape, counts), shape),
- CountingBloomFilter::remove);
- }
-
- /**
- * Tests that remove correctly updates the counts when a BloomFilter is passed.
- */
- @Test
- public void removeTest_Counts_BloomFilter() {
- assertRemove(converter, CountingBloomFilter::remove);
- }
-
- /**
- * Test that remove correctly updates the counts when a Hasher is passed.
- */
- @Test
- public void removeTest_Counts_Hasher() {
- assertRemove(counts -> new FixedIndexesTestHasher(shape, counts),
- CountingBloomFilter::remove);
- }
-
- /**
- * Test that remove correctly updates the counts when a Hasher is passed with duplicates.
- */
- @Test
- public void removeTest_Counts_Hasher_Duplicates() {
- assertRemove(counts -> new FixedIndexesTestHasher(shape, createDuplicates(counts)),
- CountingBloomFilter::remove);
- }
-
- /**
- * Creates duplicates in the counts.
- *
- * @param counts the counts
- * @return the new counts
- */
- private static int[] createDuplicates(final int[] counts) {
- // Duplicate some values randomly
- final int length = counts.length;
- final int[] countsWithDuplicates = Arrays.copyOf(counts, 2 * length);
- for (int i = length; i < countsWithDuplicates.length; i++) {
- // Copy a random value from the counts into the end position
- countsWithDuplicates[i] = countsWithDuplicates[ThreadLocalRandom.current().nextInt(i)];
- }
- return countsWithDuplicates;
- }
-
- /**
- * Assert a merge operation. The converter should construct a suitable object
- * to remove the indices from the provided Bloom filter with the remove operation.
- *
- * @param the type of the filter
- * @param converter the converter
- * @param merge the merge operation
- */
- private void assertMerge(final Function converter,
- final BiPredicate merge) {
- final int[] indexes1 = { 1, 2, 4, 5, 6};
- final int[] indexes2 = { 3, 4, 6};
- final int[] expected = {0, 1, 1, 1, 2, 1, 2};
- assertOperation(indexes1, indexes2, converter, merge, true, expected);
- }
-
- /**
- * Assert a remove operation. The converter should construct a suitable object
- * to remove the indices from the provided Bloom filter with the remove operation.
- *
- * @param the type of the filter
- * @param converter the converter
- * @param remove the remove operation
- */
- private void assertRemove(final Function converter,
- final BiPredicate remove) {
- final int[] indexes1 = { 1, 2, 4, 5, 6};
- final int[] indexes2 = { 2, 5, 6};
- final int[] expected = {0, 1, 0, 0, 1, 0, 0};
- assertOperation(indexes1, indexes2, converter, remove, true, expected);
- }
-
- /**
- * Assert a counting operation. The first set of indexes is used to create the
- * CountingBloomFilter. The second set of indices is passed to the converter to
- * construct a suitable object to combine with the counting Bloom filter. The counts
- * of the first Bloom filter are checked using the expected counts.
- *
- * Counts are assumed to map to indexes starting from 0.
- *
- * @param the type of the filter
- * @param indexes1 the first set of indexes
- * @param indexes2 the second set of indexes
- * @param converter the converter
- * @param operation the operation
- * @param isValid the expected value for the operation result
- * @param expected the expected counts after the operation
- */
- private void assertOperation(final int[] indexes1, final int[] indexes2,
- final Function converter,
- final BiPredicate operation,
- final boolean isValid, final int[] expected) {
- final Hasher hasher = new FixedIndexesTestHasher(shape, indexes1);
- final ArrayCountingBloomFilter bf = createFilter(hasher, shape);
- final F filter = converter.apply(indexes2);
- final boolean result = operation.test(bf, filter);
- assertEquals(isValid, result);
- assertEquals(isValid, bf.isValid());
- assertCounts(bf, expected);
- }
-
- /**
- * Tests that merge errors when the counts overflow the maximum integer value.
- */
- @Test
- public void mergeTest_Overflow() {
- final Hasher hasher = new FixedIndexesTestHasher(shape, 1, 2, 3);
- final ArrayCountingBloomFilter bf = createFilter(hasher, shape);
-
- final ArrayCountingBloomFilter bf2 = createFromCounts(new int[] {0, 0, Integer.MAX_VALUE});
-
- // Small + 1 = OK
- // should not fail as the counts are ignored
- assertTrue(bf.merge(bf2));
- assertTrue(bf.isValid());
- assertCounts(bf, new int[] {0, 1, 2, 1});
-
- // Big + 1 = Overflow
- assertTrue(bf2.isValid());
- assertFalse(bf2.merge(bf));
- assertFalse(bf2.isValid(), "Merge should overflow and the filter is invalid");
-
- // The counts are not clipped to max. They have simply overflowed.
- // Note that this is a merge and the count is only incremented by 1
- // and not the actual count at each index. So it is not 2 + Integer.MAX_VALUE.
- assertCounts(bf2, new int[] {0, 1, 1 + Integer.MAX_VALUE, 1});
- }
-
- /**
- * Tests that removal errors when the counts become negative.
- */
- @Test
- public void removeTest_Negative() {
- final Hasher hasher = new FixedIndexesTestHasher(shape, 1, 2, 3);
- final ArrayCountingBloomFilter bf = createFilter(hasher, shape);
-
- final Hasher hasher2 = new FixedIndexesTestHasher(shape, 2);
- final ArrayCountingBloomFilter bf2 = createFilter(hasher2, shape);
-
- // More - Less = OK
- bf.remove(bf2);
- assertTrue(bf.isValid());
- assertCounts(bf, new int[] {0, 1, 0, 1});
-
- // Less - More = Negative
- assertTrue(bf2.isValid());
- bf2.remove(bf);
- assertFalse(bf2.isValid(), "Remove should create negative counts and the filter is invalid");
-
- // The counts are not clipped to zero. They have been left as negative.
- assertCounts(bf2, new int[] {0, -1, 1, -1});
- }
-
- /**
- * Tests that counts can be added to a new instance.
- *
- * Note: This test ensures the CountingBloomFilter
- * can be created with whatever counts are required for other tests.
- */
- @Test
- public void addTest_NewInstance() {
- for (final int[] counts : new int[][] {
- { /* empty */},
- {0, 0, 1},
- {0, 1, 2},
- {2, 3, 4},
- {66, 77, 0, 99},
- {Integer.MAX_VALUE, 42},
- }) {
- assertCounts(createFromCounts(counts), counts);
- }
- }
-
- /**
- * Test that add correctly ignores an empty CountingBloomFilter.
- */
- @Test
- public void addTest_Empty() {
- assertCountingOperation(new int[] {5, 2, 1},
- new int[0],
- CountingBloomFilter::add,
- true,
- new int[] {5, 2, 1});
- }
-
- /**
- * Test that add correctly updates the counts when a CountingBloomFilter is
- * passed.
- */
- @Test
- public void addTest_Counts() {
- assertCountingOperation(new int[] {5, 2, 1},
- new int[] {0, 6, 4, 1},
- CountingBloomFilter::add,
- true,
- new int[] {5, 8, 5, 1});
- }
-
- /**
- * Test that add correctly updates the isValid state when a CountingBloomFilter is
- * passed and an integer overflow occurs.
- */
- @Test
- public void addTest_Overflow() {
- assertCountingOperation(new int[] {5, 2, 1},
- new int[] {0, 6, Integer.MAX_VALUE},
- CountingBloomFilter::add,
- false,
- new int[] {5, 8, 1 + Integer.MAX_VALUE});
- }
-
- /**
- * Test that subtract correctly ignores an empty CountingBloomFilter.
- */
- @Test
- public void subtractTest_Empty() {
- assertCountingOperation(new int[] {5, 2, 1},
- new int[0],
- CountingBloomFilter::subtract,
- true,
- new int[] {5, 2, 1});
- }
-
- /**
- * Test that subtract correctly updates the counts when a CountingBloomFilter is
- * passed.
- */
- @Test
- public void subtractTest_Counts() {
- assertCountingOperation(new int[] {5, 9, 1, 1},
- new int[] {0, 2, 1},
- CountingBloomFilter::subtract,
- true,
- new int[] {5, 7, 0, 1});
- }
-
- /**
- * Test that subtract correctly updates the isValid state when a CountingBloomFilter is
- * passed and the counts become negative.
- */
- @Test
- public void subtractTest_Negative() {
- assertCountingOperation(new int[] {5, 2, 1},
- new int[] {0, 6, 1},
- CountingBloomFilter::subtract,
- false,
- new int[] {5, -4, 0});
+ protected ArrayCountingBloomFilter createFilter(Shape shape, Hasher hasher) {
+ ArrayCountingBloomFilter filter = createEmptyFilter(shape);
+ filter.add(BitCountProducer.from(hasher.indices(shape)));
+ return filter;
}
- /**
- * Assert a counting operation. Two CountingBloomFilters are created from the
- * two sets of counts. The operation is applied and the counts of the first
- * Bloom filter is checked using the expected counts.
- *
- *
Counts are assumed to map to indexes starting from 0.
- *
- * @param counts1 the first set counts
- * @param counts2 the first set counts
- * @param operation the operation
- * @param isValid the expected value for the operation result
- * @param expected the expected counts after the operation
- */
- private void assertCountingOperation(final int[] counts1, final int[] counts2,
- final BiPredicate operation,
- final boolean isValid, final int[] expected) {
- final ArrayCountingBloomFilter bf1 = createFromCounts(counts1);
- final ArrayCountingBloomFilter bf2 = createFromCounts(counts2);
- final boolean result = operation.test(bf1, bf2);
- assertEquals(isValid, result);
- assertEquals(isValid, bf1.isValid());
- assertCounts(bf1, expected);
- }
-
- /**
- * Tests that the andCardinality calculation executes correctly when using a
- * CountingBloomFilter argument.
- */
- @Test
- public void andCardinalityTest_CountingBloomFilter() {
- assertCardinalityOperation(new int[] {1, 1},
- new int[] {1, 1},
- BloomFilter::andCardinality,
- 2);
- assertCardinalityOperation(new int[] {0, 1, 0, 1, 1, 1, 0, 1, 0},
- new int[] {1, 1, 0, 0, 0, 1},
- BloomFilter::andCardinality,
- 2);
- assertCardinalityOperation(new int[] {1, 1},
- new int[] {0, 0, 1, 1, 1},
- BloomFilter::andCardinality,
- 0);
- }
-
- /**
- * Tests that the orCardinality calculation executes correctly when using a
- * CountingBloomFilter argument.
- */
- @Test
- public void orCardinalityTest_CountingBloomFilter() {
- assertCardinalityOperation(new int[] {1, 1},
- new int[] {1, 1},
- BloomFilter::orCardinality,
- 2);
- assertCardinalityOperation(new int[] {0, 1, 0, 1, 1, 1, 0, 1, 0},
- new int[] {1, 1, 0, 0, 0, 1},
- BloomFilter::orCardinality,
- 6);
- assertCardinalityOperation(new int[] {1, 1},
- new int[] {0, 0, 1, 1, 1},
- BloomFilter::orCardinality,
- 5);
- }
-
- /**
- * Tests that the xorCardinality calculation executes correctly when using a
- * CountingBloomFilter argument.
- */
- @Test
- public void xorCardinalityTest_CountingBloomFilter() {
- assertCardinalityOperation(new int[] {1, 1},
- new int[] {1, 1},
- BloomFilter::xorCardinality,
- 0);
- assertCardinalityOperation(new int[] {0, 1, 0, 1, 1, 1, 0, 1, 0},
- new int[] {1, 1, 0, 0, 0, 1},
- BloomFilter::xorCardinality,
- 4);
- assertCardinalityOperation(new int[] {1, 1},
- new int[] {0, 0, 1, 1, 1},
- BloomFilter::xorCardinality,
- 5);
- }
-
- /**
- * Assert a cardinality operation. Two CountingBloomFilters are created from the
- * two sets of counts. The operation is applied and the counts of the first
- * Bloom filter is checked using the expected counts.
- *
- * Counts are assumed to map to indexes starting from 0.
- *
- * @param counts1 the first set counts
- * @param counts2 the first set counts
- * @param operation the operation
- * @param expected the expected cardinality
- */
- private void assertCardinalityOperation(final int[] counts1, final int[] counts2,
- final ToIntBiFunction operation,
- final int expected) {
- final ArrayCountingBloomFilter bf1 = createFromCounts(counts1);
- final ArrayCountingBloomFilter bf2 = createFromCounts(counts2);
- assertEquals(expected, operation.applyAsInt(bf1, bf2));
- assertEquals(expected, operation.applyAsInt(bf2, bf1));
- }
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerTest.java
new file mode 100644
index 0000000000..e4a377b5a6
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerTest.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.IntConsumer;
+
+import org.junit.Test;
+
+public class BitCountProducerTest {
+
+ @Test
+ public void fromIndexProducer() {
+ IndexProducer iProducer = new IndexProducer() {
+
+ @Override
+ public void forEachIndex(IntConsumer consumer) {
+ consumer.accept(0);
+ consumer.accept(1);
+ consumer.accept(63);
+ consumer.accept(64);
+ consumer.accept(127);
+ consumer.accept(128);
+ }
+ };
+ BitCountProducer producer = BitCountProducer.from(iProducer);
+ Map m = new HashMap();
+
+ producer.forEachCount((i, v) -> m.put(i, v));
+
+ assertEquals(6, m.size());
+ assertEquals(Integer.valueOf(1), m.get(0));
+ assertEquals(Integer.valueOf(1), m.get(1));
+ assertEquals(Integer.valueOf(1), m.get(63));
+ assertEquals(Integer.valueOf(1), m.get(64));
+ assertEquals(Integer.valueOf(1), m.get(127));
+ assertEquals(Integer.valueOf(1), m.get(128));
+
+ }
+
+ @Test
+ public void forEachIndexTest() {
+ BitCountProducer producer = new BitCountProducer() {
+
+ @Override
+ public void forEachCount(BitCountConsumer consumer) {
+ consumer.accept(1, 11);
+ consumer.accept(3, 13);
+ }
+ };
+
+ List lst = new ArrayList();
+ producer.forEachIndex( lst::add );
+ assertEquals( 2, lst.size() );
+ assertEquals( Integer.valueOf(1), lst.get(0) );
+ assertEquals( Integer.valueOf(3), lst.get(1) );
+ }
+
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerTest.java
new file mode 100644
index 0000000000..2cbff7c8b6
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerTest.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.IntConsumer;
+
+import org.junit.Test;
+
+public class BitMapProducerTest {
+
+ @Test
+ public void fromIndexProducer() {
+ IndexProducer iProducer = new IndexProducer() {
+
+ @Override
+ public void forEachIndex(IntConsumer consumer) {
+ consumer.accept(0);
+ consumer.accept(1);
+ consumer.accept(63);
+ consumer.accept(64);
+ consumer.accept(127);
+ consumer.accept(128);
+ }
+ };
+ BitMapProducer producer = BitMapProducer.fromIndexProducer(iProducer, new Shape(1, 200));
+ List lst = new ArrayList();
+ producer.forEachBitMap(lst::add);
+ long[] buckets = lst.stream().mapToLong(l -> l.longValue()).toArray();
+ assertTrue(BitMap.contains(buckets, 0));
+ assertTrue(BitMap.contains(buckets, 1));
+ assertTrue(BitMap.contains(buckets, 63));
+ assertTrue(BitMap.contains(buckets, 64));
+ assertTrue(BitMap.contains(buckets, 127));
+ assertTrue(BitMap.contains(buckets, 128));
+ }
+
+ @Test
+ public void fromLongArrayTest() {
+ long[] ary = new long[] {1L, 2L, 3L, 4L, 5L};
+ BitMapProducer producer = BitMapProducer.fromLongArray( ary );
+ List lst = new ArrayList();
+ producer.forEachBitMap( lst::add );
+ assertEquals( Long.valueOf(1), lst.get(0) );
+ assertEquals( Long.valueOf(2), lst.get(1) );
+ assertEquals( Long.valueOf(3), lst.get(2) );
+ assertEquals( Long.valueOf(4), lst.get(3) );
+ assertEquals( Long.valueOf(5), lst.get(4) );
+
+ }
+
+ @Test
+ public void arrayBuilderTest() {
+ try {
+ new BitMapProducer.ArrayBuilder( new Shape( 1, 4 ), new long[] {1L, 2L, 3L, 4L, 5L });
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (IllegalArgumentException expected) {
+ // do nothing
+ }
+ }
+
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapTest.java
new file mode 100644
index 0000000000..145a28aa7f
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapTest.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.junit.Test;
+
+public class BitMapTest {
+
+ @Test
+ public void checkPositiveTest() {
+ BitMap.checkPositive(0);
+ BitMap.checkPositive(0);
+ try {
+ BitMap.checkPositive(-1);
+
+ } catch (IndexOutOfBoundsException expected) {
+ // do nothing
+ }
+ }
+
+ @Test
+ public void getLongBitTest() {
+ assertEquals(1, BitMap.getLongBit(0));
+ assertEquals(0x8000000000000000L, BitMap.getLongBit(63));
+ assertEquals(1, BitMap.getLongBit(64));
+ assertEquals(0x8000000000000000L, BitMap.getLongBit(127));
+ assertEquals(1, BitMap.getLongBit(128));
+ }
+
+ @Test
+ public void getLongIndexTest() {
+ assertEquals(0, BitMap.getLongIndex(0));
+ assertEquals(0, BitMap.getLongIndex(63));
+ assertEquals(1, BitMap.getLongIndex(64));
+ assertEquals(1, BitMap.getLongIndex(127));
+ assertEquals(2, BitMap.getLongIndex(128));
+ }
+
+ @Test
+ public void isSparseTest() {
+ Shape shape = new Shape(17, 64);
+ assertTrue(BitMap.isSparse(0, shape));
+ assertTrue(BitMap.isSparse(1, shape));
+ assertTrue(BitMap.isSparse(2, shape));
+ assertFalse(BitMap.isSparse(3, shape));
+
+ shape = new Shape(17, 64 * 3);
+
+ for (int i = 0; i < 7; i++) {
+ assertTrue(BitMap.isSparse(i, shape));
+ }
+ assertFalse(BitMap.isSparse(7, shape));
+ }
+
+ @Test
+ public void numberOfBitMapsTest() {
+ assertEquals("Number of bits 0", 0, BitMap.numberOfBitMaps(0));
+ for (int i = 1; i < 65; i++) {
+ assertEquals(String.format("Number of bits %d", i), 1, BitMap.numberOfBitMaps(i));
+ }
+ for (int i = 65; i < 129; i++) {
+ assertEquals(String.format("Number of bits %d", i), 2, BitMap.numberOfBitMaps(i));
+ }
+ assertEquals("Number of bits 129", 3, BitMap.numberOfBitMaps(129));
+
+ }
+
+ @Test
+ public void setTest() {
+ long[] bitMaps = new long[BitMap.numberOfBitMaps(129)];
+ for (int i = 0; i < 129; i++) {
+ BitMap.set(bitMaps, i);
+ assertTrue(String.format("Failed at index: %d", i), BitMap.contains(bitMaps, i));
+ }
+ assertEquals(0xFFFFFFFFFFFFFFFFL, bitMaps[0]);
+ assertEquals(0xFFFFFFFFFFFFFFFFL, bitMaps[1]);
+ assertEquals(1L, bitMaps[2]);
+ }
+
+ @Test
+ public void containsTest() {
+ long[] bitMaps = new long[1];
+
+ for (int i = 0; i < 64; i++) {
+ bitMaps[0] = 0L;
+ BitMap.set(bitMaps, i);
+ for (int j = 0; j < 64; j++) {
+ if (j == i) {
+ assertTrue(String.format("Failed at index: %d for %d", i, j), BitMap.contains(bitMaps, j));
+ } else {
+ assertFalse(String.format("Failed at index %d for %d", i, j), BitMap.contains(bitMaps, j));
+ }
+ }
+
+ }
+ }
+
+ @Test
+ public void contains_boundaryConditionTest() {
+ long[] ary = new long[1];
+
+ assertFalse(BitMap.contains(ary, 0));
+ ary[0] = 0x01;
+ assertTrue(BitMap.contains(ary, 0));
+
+ assertFalse(BitMap.contains(ary, 63));
+ ary[0] = (1L << 63);
+ assertTrue(BitMap.contains(ary, 63));
+
+ ary = new long[2];
+ assertFalse(BitMap.contains(ary, 64));
+ ary[1] = 1;
+ assertTrue(BitMap.contains(ary, 64));
+
+ }
+
+ @Test
+ public void checkRangeTest() {
+ try {
+ BitMap.checkRange( 1, Long.SIZE + 1);
+ fail( "Should have thrown IndexOutOfBoundsException" );
+ } catch (IndexOutOfBoundsException expected) {
+ //
+ }
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java
deleted file mode 100644
index ffd2d0d8c5..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterIndexerTest.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import org.junit.jupiter.api.Test;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.util.ArrayList;
-import java.util.Random;
-import java.util.concurrent.ThreadLocalRandom;
-
-import static org.junit.jupiter.api.Assertions.assertThrows;
-
-/**
- * Tests for the {@link BloomFilterIndexer}.
- */
-public class BloomFilterIndexerTest {
-
- @Test
- public void testCheckPositiveThrows() {
- assertThrows(IndexOutOfBoundsException.class, () -> BloomFilterIndexer.checkPositive(-1));
- }
-
- @Test
- public void testGetLongIndex() {
- assertEquals(0, BloomFilterIndexer.getLongIndex(0));
-
- for (final int index : getIndexes()) {
- // getLongIndex is expected to identify a block of 64-bits (starting from zero)
- assertEquals(index / Long.SIZE, BloomFilterIndexer.getLongIndex(index));
-
- // Verify the behavior for negatives. It should produce a negative (invalid)
- // as a simple trip for incorrect usage.
- assertTrue(BloomFilterIndexer.getLongIndex(-index) < 0);
-
- // If index is not zero then when negated this is what a signed shift
- // of 6-bits actually does
- assertEquals(((1 - index) / Long.SIZE) - 1,
- BloomFilterIndexer.getLongIndex(-index));
- }
- }
-
- @Test
- public void testGetLongBit() {
- assertEquals(1L, BloomFilterIndexer.getLongBit(0));
-
- for (final int index : getIndexes()) {
- // getLongBit is expected to identify a single bit in a 64-bit block
- assertEquals(1L << (index % Long.SIZE), BloomFilterIndexer.getLongBit(index));
-
- // Verify the behavior for negatives
- assertEquals(1L << (64 - (index & 0x3f)), BloomFilterIndexer.getLongBit(-index));
- }
- }
-
- /**
- * Gets non-zero positive indexes for testing.
- *
- * @return the indices
- */
- private static int[] getIndexes() {
- final Random rng = ThreadLocalRandom.current();
- final ArrayList indexes = new ArrayList<>(40);
- for (int i = 0; i < 10; i++) {
- // random positive numbers
- indexes.add(rng.nextInt() >>> 1);
- indexes.add(rng.nextInt(23647826));
- indexes.add(rng.nextInt(245));
- }
- // Quickly remove zeros (as these cannot be negated)
- indexes.removeIf(i -> i == 0);
- // Add edge cases here
- indexes.add(1);
- indexes.add(2);
- indexes.add(63);
- indexes.add(64);
- indexes.add(Integer.MAX_VALUE);
- return indexes.stream().mapToInt(Integer::intValue).toArray();
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterMethodsTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterMethodsTest.java
deleted file mode 100644
index 0d6443355c..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterMethodsTest.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import java.util.BitSet;
-import java.util.function.IntConsumer;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
-
-/**
- * Test all the default implementations of the BloomFilter in {@link AbstractBloomFilter}.
- */
-public class DefaultBloomFilterMethodsTest extends AbstractBloomFilterTest {
-
- /**
- * A testing class that implements only the abstract methods from BloomFilter.
- *
- */
- private static class BF extends AbstractBloomFilter {
-
- /**
- * The bits for this BloomFilter.
- */
- private final BitSet bitSet;
-
- /**
- * Constructs a BitSetBloomFilter from a hasher and a shape.
- *
- * @param hasher the Hasher to use.
- * @param shape the desired shape of the filter.
- */
- BF(final Hasher hasher, final Shape shape) {
- this(shape);
- verifyHasher(hasher);
- hasher.iterator(shape).forEachRemaining((IntConsumer) bitSet::set);
- }
-
- /**
- * Constructs an empty BitSetBloomFilter.
- *
- * @param shape the desired shape of the filter.
- */
- BF(final Shape shape) {
- super(shape);
- this.bitSet = new BitSet();
- }
-
- @Override
- public long[] getBits() {
- return bitSet.toLongArray();
- }
-
- @Override
- public StaticHasher getHasher() {
- return new StaticHasher(bitSet.stream().iterator(), getShape());
- }
-
- @Override
- public boolean merge(final BloomFilter other) {
- verifyShape(other);
- bitSet.or(BitSet.valueOf(other.getBits()));
- return true;
- }
-
- @Override
- public boolean merge(final Hasher hasher) {
- verifyHasher(hasher);
- hasher.iterator(getShape()).forEachRemaining((IntConsumer) bitSet::set);
- return true;
- }
- }
-
- @Override
- protected AbstractBloomFilter createEmptyFilter(final Shape shape) {
- return new BF(shape);
- }
-
- @Override
- protected AbstractBloomFilter createFilter(final Hasher hasher, final Shape shape) {
- return new BF(hasher, shape);
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java
new file mode 100644
index 0000000000..9d615eb6fa
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.TreeSet;
+import java.util.function.IntConsumer;
+import java.util.function.LongConsumer;
+
+import org.apache.commons.collections4.bloomfilter.exceptions.NoMatchException;
+import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
+
+/**
+ * Tests for the {@link BloomFilter}.
+ */
+public class DefaultBloomFilterTest extends AbstractBloomFilterTest {
+ @Override
+ protected DefaultBloomFilter createEmptyFilter(final Shape shape) {
+ return new DefaultBloomFilter(shape);
+ }
+
+ @Override
+ protected DefaultBloomFilter createFilter(final Shape shape, final Hasher hasher) {
+ return new DefaultBloomFilter(shape, hasher);
+ }
+
+ public class DefaultBloomFilter implements BloomFilter {
+ private Shape shape;
+ private TreeSet indices;
+
+ DefaultBloomFilter(Shape shape) {
+ this.shape = shape;
+ this.indices = new TreeSet();
+ }
+
+ DefaultBloomFilter(Shape shape, Hasher hasher) {
+ this( shape );
+ hasher.indices(shape).forEachIndex( indices::add );
+ }
+
+ @Override
+ public void forEachIndex(IntConsumer consumer) {
+ indices.forEach( i -> consumer.accept( i.intValue() ) );
+ }
+
+ @Override
+ public void forEachBitMap(LongConsumer consumer) {
+ BitMapProducer.fromIndexProducer(this, shape).forEachBitMap(consumer);
+ }
+
+ @Override
+ public boolean isSparse() {
+ return true;
+ }
+
+ @Override
+ public Shape getShape() {
+ return shape;
+ }
+
+ @Override
+ public boolean contains(IndexProducer indexProducer) {
+ try {
+ indexProducer.forEachIndex( i -> {
+ if (!indices.contains( i )) {
+ throw new NoMatchException();
+ }
+ } );
+ return true;
+ } catch (NoMatchException e) {
+ return false;
+ }
+ }
+
+ @Override
+ public boolean contains(BitMapProducer bitMapProducer) {
+ return contains( IndexProducer.fromBitMapProducer(bitMapProducer) );
+ }
+
+ @Override
+ public boolean mergeInPlace(BloomFilter other) {
+ other.forEachIndex( indices::add );
+ return true;
+ }
+
+ @Override
+ public int cardinality() {
+ return indices.size();
+ }
+
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/FixedIndexesTestHasher.java b/src/test/java/org/apache/commons/collections4/bloomfilter/FixedIndexesTestHasher.java
deleted file mode 100644
index ec4886294c..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/FixedIndexesTestHasher.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-
-import java.util.Arrays;
-import java.util.PrimitiveIterator.OfInt;
-
-/**
- * A Hasher implementation to return fixed indexes. Duplicates are allowed.
- * The shape is ignored when generating the indexes.
- *
- * This is not a real hasher and is used for testing only.
- */
-class FixedIndexesTestHasher implements Hasher {
- /** The shape. */
- private final Shape shape;
- /** The indexes. */
- private final int[] indexes;
-
- /**
- * Create an instance.
- *
- * @param shape the shape
- * @param indexes the indexes
- */
- FixedIndexesTestHasher(final Shape shape, final int... indexes) {
- this.shape = shape;
- this.indexes = indexes;
- }
-
- @Override
- public OfInt iterator(final Shape shape) {
- if (!this.shape.equals(shape)) {
- throw new IllegalArgumentException(
- String.format("shape (%s) does not match internal shape (%s)", shape, this.shape));
- }
- return Arrays.stream(indexes).iterator();
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return shape.getHashFunctionIdentity();
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java
deleted file mode 100644
index a10df81643..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import static org.junit.jupiter.api.Assertions.assertArrayEquals;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import org.apache.commons.collections4.bloomfilter.hasher.DynamicHasher;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
-import org.junit.jupiter.api.Test;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-import java.util.PrimitiveIterator.OfInt;
-
-/**
- * Tests the {@link HasherBloomFilter}.
- */
-public class HasherBloomFilterTest extends AbstractBloomFilterTest {
-
- /**
- * Tests that the constructor works correctly.
- */
- @Test
- public void constructorTest_NonStatic() {
- final Shape shape = new Shape(new MD5Cyclic(), 3, 72, 17);
- final DynamicHasher hasher = new DynamicHasher.Builder(new MD5Cyclic()).with("Hello", StandardCharsets.UTF_8).build();
- final HasherBloomFilter filter = createFilter(hasher, shape);
- final long[] lb = filter.getBits();
- assertEquals(2, lb.length);
- assertEquals(0x6203101001888c44L, lb[0]);
- assertEquals(0x60L, lb[1]);
- }
-
- @Override
- protected AbstractBloomFilter createEmptyFilter(final Shape shape) {
- return new HasherBloomFilter(shape);
- }
-
- @Override
- protected HasherBloomFilter createFilter(final Hasher hasher, final Shape shape) {
- return new HasherBloomFilter(hasher, shape);
- }
-
- /**
- * Test the edge case where the filter is empty and the getBits() function returns a
- * zero length array.
- */
- @Test
- public void getBitsTest_Empty() {
- final BloomFilter filter = createEmptyFilter(shape);
- assertArrayEquals(new long[0], filter.getBits());
- }
-
- /**
- * Test the edge case where the filter has only 1 bit in the lowest index and the getBits()
- * function returns an array of length 1.
- */
- @Test
- public void getBitsTest_LowestBitOnly() {
- final BloomFilter filter = createEmptyFilter(shape);
- // Set the lowest bit index only.
- filter.merge(new Hasher() {
- @Override
- public OfInt iterator(final Shape shape) {
- return Arrays.stream(new int[] {0}).iterator();
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return shape.getHashFunctionIdentity();
- }
- });
- assertArrayEquals(new long[] {1L}, filter.getBits());
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexFilterTest.java
deleted file mode 100644
index c6c6a03b2e..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexFilterTest.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentityImpl;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.ProcessType;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.Signedness;
-import org.junit.jupiter.api.Test;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Set;
-import java.util.function.IntConsumer;
-import java.util.stream.Collectors;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
-
-/**
- * Tests for the {@link IndexFilters}.
- */
-public class IndexFilterTest {
-
- /**
- * The shape of the dummy Bloom filter.
- * This is used as an argument to a Hasher that just returns fixed indexes
- * so the parameters do not matter.
- */
- private final Shape shape = new Shape(new HashFunctionIdentityImpl(
- "Apache Commons Collections", "Dummy", Signedness.SIGNED, ProcessType.CYCLIC, 0L),
- 50, 3000, 4);
-
- @Test
- public void testApplyThrowsWithNullArguments() {
- final FixedIndexesTestHasher hasher = new FixedIndexesTestHasher(shape, 1, 2, 3);
- final Shape shape = this.shape;
- final ArrayList actual = new ArrayList<>();
- final IntConsumer consumer = actual::add;
-
- try {
- IndexFilters.distinctIndexes(null, shape, consumer);
- fail("null hasher");
- } catch (final NullPointerException expected) {
- // Ignore
- }
-
- try {
- IndexFilters.distinctIndexes(hasher, null, consumer);
- fail("null shape");
- } catch (final NullPointerException expected) {
- // Ignore
- }
-
- try {
- IndexFilters.distinctIndexes(hasher, shape, null);
- fail("null consumer");
- } catch (final NullPointerException expected) {
- // Ignore
- }
-
- // All OK together
- IndexFilters.distinctIndexes(hasher, shape, consumer);
- }
-
- @Test
- public void testApply() {
- assertFilter(1, 4, 6, 7, 9);
- }
-
- @Test
- public void testApplyWithDuplicates() {
- assertFilter(1, 4, 4, 6, 7, 7, 7, 7, 7, 9);
- }
-
- private void assertFilter(final int... indexes) {
- final FixedIndexesTestHasher hasher = new FixedIndexesTestHasher(shape, indexes);
- final Set expected = Arrays.stream(indexes).boxed().collect(Collectors.toSet());
- final ArrayList actual = new ArrayList<>();
-
- IndexFilters.distinctIndexes(hasher, shape, actual::add);
-
- assertEquals(expected.size(), actual.size());
- // Check the array has all the values.
- // We do not currently check the order of indexes from the
- // hasher.iterator() function.
- for (final Integer index : actual) {
- assertTrue(expected.contains(index));
- }
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerTest.java
new file mode 100644
index 0000000000..7fd7b81512
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerTest.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.LongConsumer;
+
+import org.junit.jupiter.api.Test;
+
+public class IndexProducerTest {
+
+ @Test
+ public void fromBitMapProducerTest() {
+ TestingBitMapProducer producer = new TestingBitMapProducer(new long[] { 1L, 2L, 3L });
+ IndexProducer underTest = IndexProducer.fromBitMapProducer(producer);
+ List lst = new ArrayList();
+
+ underTest.forEachIndex(lst::add);
+ assertEquals(4, lst.size());
+ assertEquals(Integer.valueOf(0), lst.get(0));
+ assertEquals(Integer.valueOf(1 + 64), lst.get(1));
+ assertEquals(Integer.valueOf(0 + 128), lst.get(2));
+ assertEquals(Integer.valueOf(1 + 128), lst.get(3));
+
+ producer = new TestingBitMapProducer(new long[] { 0xFFFFFFFFFFFFFFFFL });
+ underTest = IndexProducer.fromBitMapProducer(producer);
+ lst = new ArrayList();
+
+ underTest.forEachIndex(lst::add);
+
+ assertEquals(64, lst.size());
+ for (int i = 0; i < 64; i++) {
+ assertEquals(Integer.valueOf(i), lst.get(i));
+ }
+
+ }
+
+ private class TestingBitMapProducer implements BitMapProducer {
+ long[] values;
+
+ TestingBitMapProducer(long[] values) {
+ this.values = values;
+ }
+
+ @Override
+ public void forEachBitMap(LongConsumer consumer) {
+ for (long l : values) {
+ consumer.accept(l);
+ }
+ }
+ }
+
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java
index 541428989f..5c9b7cd405 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java
@@ -17,14 +17,11 @@
package org.apache.commons.collections4.bloomfilter;
import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.fail;
-import java.util.List;
import java.util.Arrays;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
+
+import org.apache.commons.collections4.bloomfilter.hasher.HasherCollection;
+import org.apache.commons.collections4.bloomfilter.hasher.SimpleHasher;
import org.junit.jupiter.api.Test;
/**
@@ -32,88 +29,38 @@
*/
public class SetOperationsTest {
- private final HashFunctionIdentity testFunction = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test Function";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
- };
-
- private final Shape shape = new Shape(testFunction, 3, 72, 17);
-
- @Test
- public void testDifferentShapesThrows() {
- final List lst = Arrays.asList(1, 2);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- final Shape shape2 = new Shape(testFunction, 3, 72, 18);
- final List lst2 = Arrays.asList(2, 3);
- final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape2);
- final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape2);
-
- try {
- SetOperations.cosineDistance(filter1, filter2);
- fail("Expected an IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // Ignore
- }
- }
+ protected final SimpleHasher from1 = new SimpleHasher(1, 1);
+ protected final long from1Value = 0x3FFFEL;
+ protected final SimpleHasher from11 = new SimpleHasher(11, 1);
+ protected final long from11Value = 0xFFFF800L;
+ protected final HasherCollection bigHasher = new HasherCollection(from1, from11);
+ protected final long bigHashValue = 0xFFFFFFEL;
+ private final Shape shape = new Shape(17, 72);
/**
* Tests that the Cosine similarity is correctly calculated.
*/
@Test
public final void cosineDistanceTest() {
- List lst = Arrays.asList(1, 2);
- Hasher hasher = new StaticHasher(lst.iterator(), shape);
- BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- List lst2 = Arrays.asList(2, 3);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(0.5, SetOperations.cosineDistance(filter1, filter2), 0.0001);
- assertEquals(0.5, SetOperations.cosineDistance(filter2, filter1), 0.0001);
-
- lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- hasher = new StaticHasher(lst.iterator(), shape);
- filter1 = new HasherBloomFilter(hasher, shape);
- lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
+ BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
assertEquals(0.0, SetOperations.cosineDistance(filter1, filter2), 0.0001);
assertEquals(0.0, SetOperations.cosineDistance(filter2, filter1), 0.0001);
- lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
+ Shape shape2 = new Shape(2, 72);
+ filter1 = new SimpleBloomFilter(shape2, from1);
+ filter2 = new SimpleBloomFilter(shape2, new SimpleHasher(2, 1));
- assertEquals(0.514928749927334, SetOperations.cosineDistance(filter1, filter2), 0.000000000000001);
- assertEquals(0.514928749927334, SetOperations.cosineDistance(filter2, filter1), 0.000000000000001);
+ assertEquals(0.5, SetOperations.cosineDistance(filter1, filter2), 0.0001);
+ assertEquals(0.5, SetOperations.cosineDistance(filter2, filter1), 0.0001);
+
+ filter1 = new SimpleBloomFilter(shape, from1);
+ filter2 = new SimpleBloomFilter(shape, from11);
+
+ assertEquals(0.58823529, SetOperations.cosineDistance(filter1, filter2), 0.00000001);
+ assertEquals(0.58823529, SetOperations.cosineDistance(filter2, filter1), 0.00000001);
}
/**
@@ -122,17 +69,14 @@ public final void cosineDistanceTest() {
*/
@Test
public final void cosineDistanceTest_NoValues() {
- final BloomFilter filter1 = new HasherBloomFilter(shape);
- final BloomFilter filter2 = new HasherBloomFilter(shape);
- // build a filter
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter3 = new HasherBloomFilter(hasher, shape);
+ BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter2 = new SimpleBloomFilter(shape);
+ BloomFilter filter3 = new SimpleBloomFilter(shape);
assertEquals(1.0, SetOperations.cosineDistance(filter1, filter2), 0.0001);
assertEquals(1.0, SetOperations.cosineDistance(filter2, filter1), 0.0001);
- assertEquals(1.0, SetOperations.cosineDistance(filter1, filter3), 0.0001);
- assertEquals(1.0, SetOperations.cosineDistance(filter3, filter1), 0.0001);
+ assertEquals(1.0, SetOperations.cosineDistance(filter2, filter3), 0.0001);
+ assertEquals(1.0, SetOperations.cosineDistance(filter3, filter2), 0.0001);
}
/**
@@ -140,23 +84,16 @@ public final void cosineDistanceTest_NoValues() {
*/
@Test
public final void cosineSimilarityTest() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- List lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
+ BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
assertEquals(1.0, SetOperations.cosineSimilarity(filter1, filter2), 0.0001);
assertEquals(1.0, SetOperations.cosineSimilarity(filter2, filter1), 0.0001);
- lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
+ filter2 = new SimpleBloomFilter(shape, from11);
- assertEquals(0.485071250072666, SetOperations.cosineSimilarity(filter1, filter2), 0.000000000000001);
- assertEquals(0.485071250072666, SetOperations.cosineSimilarity(filter2, filter1), 0.000000000000001);
+ assertEquals(0.41176470, SetOperations.cosineSimilarity(filter1, filter2), 0.00000001);
+ assertEquals(0.41176470, SetOperations.cosineSimilarity(filter2, filter1), 0.00000001);
}
/**
@@ -165,12 +102,10 @@ public final void cosineSimilarityTest() {
*/
@Test
public final void cosineSimilarityTest_NoValues() {
- final BloomFilter filter1 = new HasherBloomFilter(shape);
- final BloomFilter filter2 = new HasherBloomFilter(shape);
+ final BloomFilter filter1 = new SimpleBloomFilter(shape);
+ final BloomFilter filter2 = new SimpleBloomFilter(shape);
// build a filter
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter3 = new HasherBloomFilter(hasher, shape);
+ final BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
assertEquals(0.0, SetOperations.cosineSimilarity(filter1, filter2), 0.0001);
assertEquals(0.0, SetOperations.cosineSimilarity(filter2, filter1), 0.0001);
@@ -178,92 +113,21 @@ public final void cosineSimilarityTest_NoValues() {
assertEquals(0.0, SetOperations.cosineSimilarity(filter3, filter1), 0.0001);
}
- /**
- * Tests that the intersection size estimate is correctly calculated.
- */
- @Test
- public final void estimateIntersectionSizeTest() {
- // build a filter
- List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- lst = Arrays.asList(8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
- 31, 32, 33, 34, 35, 36, 37, 38, 39, 40);
- final Hasher hasher2 = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- final long estimate = SetOperations.estimateIntersectionSize(filter1, filter2);
- assertEquals(1, estimate);
- }
-
- /**
- * Tests that the size estimate is correctly calculated.
- */
- @Test
- public final void estimateSizeTest() {
- // build a filter
- List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- Hasher hasher = new StaticHasher(lst.iterator(), shape);
- BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
- assertEquals(1, SetOperations.estimateSize(filter1));
-
- // the data provided above do not generate an estimate that is equivalent to the
- // actual.
- lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
- hasher = new StaticHasher(lst.iterator(), shape);
- filter1 = new HasherBloomFilter(hasher, shape);
- assertEquals(1, SetOperations.estimateSize(filter1));
-
- lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
- 26, 27, 28, 29, 30, 31, 32, 33);
- final Hasher hasher2 = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- assertEquals(3, SetOperations.estimateSize(filter2));
- }
-
- /**
- * Tests that the union size estimate is correctly calculated.
- */
- @Test
- public final void estimateUnionSizeTest() {
- // build a filter
- List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- lst = Arrays.asList(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
- 40);
- final Hasher hasher2 = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
-
- final long estimate = SetOperations.estimateUnionSize(filter1, filter2);
- assertEquals(3, estimate);
- }
-
/**
* Tests that the Hamming distance is correctly calculated.
*/
@Test
public final void hammingDistanceTest() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- List lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
+ final BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
assertEquals(0, SetOperations.hammingDistance(filter1, filter2));
assertEquals(0, SetOperations.hammingDistance(filter2, filter1));
- lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
+ filter2 = new SimpleBloomFilter(shape, from11);
- assertEquals(17, SetOperations.hammingDistance(filter1, filter2));
- assertEquals(17, SetOperations.hammingDistance(filter2, filter1));
+ assertEquals(20, SetOperations.hammingDistance(filter1, filter2));
+ assertEquals(20, SetOperations.hammingDistance(filter2, filter1));
}
/**
@@ -271,23 +135,16 @@ public final void hammingDistanceTest() {
*/
@Test
public final void jaccardDistanceTest() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- List lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
+ final BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
assertEquals(1.0, SetOperations.jaccardDistance(filter1, filter2), 0.0001);
assertEquals(1.0, SetOperations.jaccardDistance(filter2, filter1), 0.0001);
- lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
+ filter2 = new SimpleBloomFilter(shape, from11);
- assertEquals(0.32, SetOperations.jaccardDistance(filter1, filter2), 0.001);
- assertEquals(0.32, SetOperations.jaccardDistance(filter2, filter1), 0.001);
+ assertEquals(0.26, SetOperations.jaccardDistance(filter1, filter2), 0.001);
+ assertEquals(0.26, SetOperations.jaccardDistance(filter2, filter1), 0.001);
}
/**
@@ -296,12 +153,9 @@ public final void jaccardDistanceTest() {
*/
@Test
public final void jaccardDistanceTest_NoValues() {
- final BloomFilter filter1 = new HasherBloomFilter(shape);
- final BloomFilter filter2 = new HasherBloomFilter(shape);
- // build a filter
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter3 = new HasherBloomFilter(hasher, shape);
+ final BloomFilter filter1 = new SimpleBloomFilter(shape);
+ final BloomFilter filter2 = new SimpleBloomFilter(shape);
+ final BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
assertEquals(1.0, SetOperations.jaccardDistance(filter1, filter2), 0.0001);
assertEquals(1.0, SetOperations.jaccardDistance(filter2, filter1), 0.0001);
@@ -314,23 +168,16 @@ public final void jaccardDistanceTest_NoValues() {
*/
@Test
public final void jaccardSimilarityTest() {
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
-
- List lst2 = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- Hasher hasher2 = new StaticHasher(lst2.iterator(), shape);
- BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
+ final BloomFilter filter1 = new SimpleBloomFilter(shape, from1);
+ BloomFilter filter2 = new SimpleBloomFilter(shape, from1);
assertEquals(0.0, SetOperations.jaccardSimilarity(filter1, filter2), 0.0001);
assertEquals(0.0, SetOperations.jaccardSimilarity(filter2, filter1), 0.0001);
- lst2 = Arrays.asList(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
- hasher2 = new StaticHasher(lst2.iterator(), shape);
- filter2 = new HasherBloomFilter(hasher2, shape);
+ filter2 = new SimpleBloomFilter(shape, from11);
- assertEquals(0.68, SetOperations.jaccardSimilarity(filter1, filter2), 0.001);
- assertEquals(0.68, SetOperations.jaccardSimilarity(filter2, filter1), 0.001);
+ assertEquals(0.74, SetOperations.jaccardSimilarity(filter1, filter2), 0.001);
+ assertEquals(0.74, SetOperations.jaccardSimilarity(filter2, filter1), 0.001);
}
/**
@@ -339,16 +186,72 @@ public final void jaccardSimilarityTest() {
*/
@Test
public final void jaccardSimilarityTest_NoValues() {
- final BloomFilter filter1 = new HasherBloomFilter(shape);
- final BloomFilter filter2 = new HasherBloomFilter(shape);
- // build a filter
- final List lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- final Hasher hasher = new StaticHasher(lst.iterator(), shape);
- final BloomFilter filter3 = new HasherBloomFilter(hasher, shape);
+ final BloomFilter filter1 = new SimpleBloomFilter(shape);
+ final BloomFilter filter2 = new SimpleBloomFilter(shape);
+ final BloomFilter filter3 = new SimpleBloomFilter(shape, from1);
assertEquals(0.0, SetOperations.jaccardSimilarity(filter1, filter2), 0.0001);
assertEquals(0.0, SetOperations.jaccardSimilarity(filter2, filter1), 0.0001);
assertEquals(1.0, SetOperations.jaccardSimilarity(filter1, filter3), 0.0001);
assertEquals(1.0, SetOperations.jaccardSimilarity(filter3, filter1), 0.0001);
}
+
+ @Test
+ public final void orCardinalityTest() {
+ Shape shape = new Shape(3, 128);
+ SparseBloomFilter filter1 = new SparseBloomFilter(shape, Arrays.asList(1, 63, 64));
+ SparseBloomFilter filter2 = new SparseBloomFilter(shape, Arrays.asList(5, 64, 69));
+ assertEquals(5, SetOperations.orCardinality(shape, filter1, filter2));
+ assertEquals(5, SetOperations.orCardinality(shape, filter2, filter1));
+
+ filter1 = new SparseBloomFilter(shape, Arrays.asList(1, 63));
+ filter2 = new SparseBloomFilter(shape, Arrays.asList(5, 64, 69));
+ assertEquals(5, SetOperations.orCardinality(shape, filter1, filter2));
+ assertEquals(5, SetOperations.orCardinality(shape, filter2, filter1));
+
+ filter1 = new SparseBloomFilter(shape, Arrays.asList(5, 63));
+ filter2 = new SparseBloomFilter(shape, Arrays.asList(5, 64, 69));
+ assertEquals(4, SetOperations.orCardinality(shape, filter1, filter2));
+ assertEquals(4, SetOperations.orCardinality(shape, filter2, filter1));
+ }
+
+ @Test
+ public final void andCardinalityTest() {
+ Shape shape = new Shape(3, 128);
+ SparseBloomFilter filter1 = new SparseBloomFilter(shape, Arrays.asList(1, 63, 64));
+ SparseBloomFilter filter2 = new SparseBloomFilter(shape, Arrays.asList(5, 64, 69));
+ assertEquals(1, SetOperations.andCardinality(shape, filter1, filter2));
+ assertEquals(1, SetOperations.andCardinality(shape, filter2, filter1));
+
+ filter1 = new SparseBloomFilter(shape, Arrays.asList(1, 63));
+ filter2 = new SparseBloomFilter(shape, Arrays.asList(5, 64, 69));
+ assertEquals(0, SetOperations.andCardinality(shape, filter1, filter2));
+ assertEquals(0, SetOperations.andCardinality(shape, filter2, filter1));
+
+ filter1 = new SparseBloomFilter(shape, Arrays.asList(5, 63));
+ filter2 = new SparseBloomFilter(shape, Arrays.asList(5, 64, 69));
+ assertEquals(1, SetOperations.andCardinality(shape, filter1, filter2));
+ assertEquals(1, SetOperations.andCardinality(shape, filter2, filter1));
+
+ }
+
+ @Test
+ public final void xorCardinalityTest() {
+ Shape shape = new Shape(3, 128);
+ SparseBloomFilter filter1 = new SparseBloomFilter(shape, Arrays.asList(1, 63, 64));
+ SparseBloomFilter filter2 = new SparseBloomFilter(shape, Arrays.asList(5, 64, 69));
+ assertEquals(4, SetOperations.xorCardinality(shape, filter1, filter2));
+ assertEquals(4, SetOperations.xorCardinality(shape, filter2, filter1));
+
+ filter1 = new SparseBloomFilter(shape, Arrays.asList(1, 63));
+ filter2 = new SparseBloomFilter(shape, Arrays.asList(5, 64, 69));
+ assertEquals(5, SetOperations.xorCardinality(shape, filter1, filter2));
+ assertEquals(5, SetOperations.xorCardinality(shape, filter2, filter1));
+
+ filter1 = new SparseBloomFilter(shape, Arrays.asList(5, 63));
+ filter2 = new SparseBloomFilter(shape, Arrays.asList(5, 64, 69));
+ assertEquals(3, SetOperations.xorCardinality(shape, filter1, filter2));
+ assertEquals(3, SetOperations.xorCardinality(shape, filter2, filter1));
+
+ }
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/ShapeFactoryTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/ShapeFactoryTest.java
new file mode 100644
index 0000000000..5e8c6ed1d2
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/ShapeFactoryTest.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests the {@link Shape} class.
+ */
+public class ShapeFactoryTest {
+
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&p=.1&m=&k=
+ *
+ * n = 5
+ *
+ * p = 0.100375138 (1 in 10)
+ *
+ * m = 24 (3B)
+ *
+ * k = 3
+ */
+
+ /**
+ * Tests that if the number of items less than 1 an IllegalArgumentException is thrown.
+ */
+ @Test
+ public void badNumberOfItemsTest() {
+ try {
+ Shape.Factory.fromNM(0, 24);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // expected
+ }
+ try {
+ Shape.Factory.fromNMK(0, 24, 5);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // expected
+ }
+ try {
+ Shape.Factory.fromNP(0, 0.02);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // expected
+ }
+ }
+
+ /**
+ * Tests that if the number of bits is less than 1 an exception is thrown
+ */
+ @Test
+ public void badNumberOfBitsTest() {
+ try {
+ Shape.Factory.fromNM(5, 0);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // expected
+ }
+ try {
+ Shape.Factory.fromNMK(5, 0, 7);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // expected
+ }
+ try {
+ Shape.Factory.fromPMK(0.035, 0, 7);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // expected
+ }
+ }
+
+ /**
+ * Tests that if the number of hash functions is less than 1 an exception is thrown.
+ */
+ @Test
+ public void badNumberOfHashFunctionsTest() {
+ try {
+ Shape.Factory.fromNMK(5, 26, 0);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // expected
+ }
+ try {
+ Shape.Factory.fromPMK(0.35, 26, 0);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // expected
+ }
+ }
+
+ /**
+ * Tests that if the calculated probability is greater than or equal to 1 an IllegalArgumentException is thrown
+ */
+ @Test
+ public void badProbabilityTest() {
+ try {
+ Shape.Factory.fromNMK(4000, 8, 1);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // expected
+ }
+ try {
+ Shape.Factory.fromNP(10, 0.0);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // do nothing.
+ }
+ try {
+ Shape.Factory.fromNP(10, 1.0);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // do nothing.
+ }
+ try {
+ Shape.Factory.fromNP(10, Double.NaN);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // do nothing.
+ }
+ }
+
+ /**
+ * Tests that when the number of items, number of bits and number of hash functions is passed the values are
+ * calculated correctly.
+ */
+ @Test
+ public void fromNMK_test() {
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&m=24&k=4
+ */
+ final Shape filterConfig = Shape.Factory.fromNMK(5, 24, 4);
+
+ assertEquals(24, filterConfig.getNumberOfBits());
+ assertEquals(4, filterConfig.getNumberOfHashFunctions());
+ assertEquals(0.102194782, filterConfig.getProbability(5), 0.000001);
+ }
+
+ /**
+ * Tests that the number of items and number of bits is passed the other values are calculated correctly.
+ */
+ @Test
+ public void fromNM_Test() {
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&m=24
+ */
+ final Shape filterConfig = Shape.Factory.fromNM(5, 24);
+
+ assertEquals(24, filterConfig.getNumberOfBits());
+ assertEquals(3, filterConfig.getNumberOfHashFunctions());
+ assertEquals(0.100375138, filterConfig.getProbability(5), 0.000001);
+ }
+
+ /**
+ * Tests that if calculated number of bits is greater than Integer.MAX_VALUE an IllegalArgumentException is thrown.
+ */
+ @Test
+ public void numberOfBitsOverflowTest() {
+ try {
+ Shape.Factory.fromNP(Integer.MAX_VALUE, 0.1);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // do nothing.
+ }
+ }
+
+ /**
+ * Tests the the probability is calculated correctly.
+ */
+ @Test
+ public void probabilityTest() {
+ Shape shape = Shape.Factory.fromNMK(5, 24, 3);
+ assertEquals(24, shape.getNumberOfBits());
+ assertEquals(3, shape.getNumberOfHashFunctions());
+ assertEquals(0.100375138, shape.getProbability(5), 0.000001);
+ }
+
+ /**
+ * Tests the calculated values of calling the constructor with the probability, number of bits and number of hash
+ * functions.
+ */
+ @Test
+ public void fromPMK_test() {
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&p=.1&m=24&k=3
+ */
+ final Shape shape = Shape.Factory.fromPMK(0.1, 24, 3);
+
+ assertEquals(24, shape.getNumberOfBits());
+ assertEquals(3, shape.getNumberOfHashFunctions());
+ assertEquals(0.100375138, shape.getProbability(5), 0.000001);
+ }
+
+ /**
+ * Tests the calculated values of calling the constructor with the probability, number of bits and number of hash
+ * functions.
+ */
+ @Test
+ public void fromNP_test() {
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&p=.1&m=24&k=3
+ */
+ final double probability = 1.0/2000000;
+ final Shape shape = Shape.Factory.fromNP(10, probability );
+
+ assertEquals(302, shape.getNumberOfBits());
+ assertEquals(21, shape.getNumberOfHashFunctions());
+ }
+
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/ShapeTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/ShapeTest.java
new file mode 100644
index 0000000000..67c7e53cfd
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/ShapeTest.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests the {@link Shape} class.
+ */
+public class ShapeTest {
+
+ /*
+ * values from https://hur.st/bloomfilter/?n=5&p=.1&m=&k=
+ *
+ * n = 5
+ *
+ * p = 0.100375138 (1 in 10)
+ *
+ * m = 24 (3B)
+ *
+ * k = 3
+ */
+
+ private final Shape shape = new Shape(3, 24);
+
+ /**
+ * Tests that if the number of bits less than 1 an IllegalArgumentException is thrown.
+ */
+ @Test
+ public void constructor_items_bits_BadNumberOfBitsTest() {
+ try {
+ new Shape(5, 0);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // expected
+ }
+ }
+
+
+ /**
+ * Tests that if the number of hash functions is less than 1 an exception is thrown.
+ */
+ @Test
+ public void constructor_items_bits_hash_BadNumberOfHashFunctionsTest() {
+ try {
+ new Shape(0, 5);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (final IllegalArgumentException expected) {
+ // expected
+ }
+ }
+
+ /**
+ * Test equality of shape.
+ */
+ @Test
+ public void equalsTest() {
+
+ assertEquals(shape, shape);
+ assertEquals(3, shape.getNumberOfHashFunctions());
+ assertEquals(24, shape.getNumberOfBits());
+ assertEquals(shape.hashCode(), new Shape(3, 24).hashCode());
+ assertNotEquals(shape, null);
+ assertNotEquals(shape, new Shape(3, 25));
+ assertNotEquals(shape, new Shape(4, 24));
+ }
+
+ @Test
+ public void estimateNTest() {
+ double[] expected = { 0.0, 0.3404769153503671, 0.6960910159170385, 1.068251140996181, 1.4585724543516367,
+ 1.8689188094520417, 2.301456579614247, 2.758723890333837, 3.243720864865314, 3.7600290339658846,
+ 4.311972005861497, 4.90483578309127, 5.545177444479562, 6.2412684603966, 7.003749898831201,
+ 7.8466340240938095, 8.788898309344876, 9.85714945034106, 11.090354888959125, 12.54892734331076,
+ 14.334075753824441, 16.635532333438686, 19.879253198304, 25.424430642783573 };
+ for (int i = 0; i < 24; i++) {
+ assertEquals(expected[i], shape.estimateN(i), 0.00000000000000001);
+ }
+ }
+
+ @Test
+ public void getProbabilityTest() {
+ double[] expected = { 0.0, 0.0016223626694561954, 0.010823077182670957, 0.030579354491777785,
+ 0.06091618422799686, 0.1003751381786711, 0.14689159766038104, 0.19829601428155866, 0.25258045782764715,
+ 0.3080221532988778, 0.3632228594351169, 0.4171013016177174, 0.4688617281200601, 0.5179525036637239,
+ 0.5640228015164387, 0.6068817738972262, 0.6464623147796981, 0.6827901771310362, 0.7159584363083427,
+ 0.7461068849672469, 0.7734057607554121, 0.7980431551369204, 0.8202154721379679, 0.8401203636727712 };
+ for (int i = 0; i < 24; i++) {
+ assertEquals(expected[i], shape.getProbability(i), 0.000000000000001);
+ }
+
+ assertEquals( 0.0, shape.getProbability(0), 0.0 );
+
+ try {
+ shape.getProbability( -1 );
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (IllegalArgumentException expect) {
+ // do nothing
+ }
+ }
+
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java
new file mode 100644
index 0000000000..5c0ef45082
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
+import org.junit.Test;
+
+/**
+ * Tests for the {@link SimpleBloomFilter}.
+ */
+public class SimpleBloomFilterTest extends AbstractBloomFilterTest {
+ @Override
+ protected SimpleBloomFilter createEmptyFilter(final Shape shape) {
+ return new SimpleBloomFilter(shape);
+ }
+
+ @Override
+ protected SimpleBloomFilter createFilter(final Shape shape, final Hasher hasher) {
+ return new SimpleBloomFilter(shape, hasher);
+ }
+
+ @Test
+ public void constructorTest() {
+
+ SimpleBloomFilter filter = new SimpleBloomFilter( shape, BitMapProducer.fromLongArray( new long[] { 500L }) );
+ List lst = new ArrayList();
+ filter.forEachBitMap( lst::add );
+ assertEquals( 1, lst.size() );
+ assertEquals( 500L, lst.get(0).intValue() );
+
+ try {
+ filter = new SimpleBloomFilter( shape,
+ BitMapProducer.fromLongArray( new long[] { 500L, 400L, 300L }) );
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (IllegalArgumentException expected) {
+ // do nothing
+ }
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java
new file mode 100644
index 0000000000..e8f1845322
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.Assert.fail;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
+import org.junit.Test;
+
+/**
+ * Tests for the {@link SparseBloomFilter}.
+ */
+public class SparseBloomFilterTest extends AbstractBloomFilterTest {
+ @Override
+ protected SparseBloomFilter createEmptyFilter(final Shape shape) {
+ return new SparseBloomFilter(shape);
+ }
+
+ @Override
+ protected SparseBloomFilter createFilter(final Shape shape, final Hasher hasher) {
+ return new SparseBloomFilter(shape, hasher);
+ }
+
+ @Test
+ public void constructor_indexOutOfRange() {
+ Shape shape = new Shape( 1, 5 );
+ List lst = new ArrayList();
+ lst.add( 5 );
+ try {
+ new SparseBloomFilter( shape, lst );
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (IllegalArgumentException expected) {
+ // do nothing;
+ }
+ lst.clear();
+ lst.add( -1 );
+ try {
+ new SparseBloomFilter( shape, lst );
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (IllegalArgumentException expected) {
+ // do nothing;
+ }
+ }
+
+ @Test
+ public void constructor_noValues() {
+ Shape shape = new Shape( 1, 5 );
+ List lst = new ArrayList();
+ new SparseBloomFilter( shape, lst );
+ }
+
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java
deleted file mode 100644
index afbd6d8b0f..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
-
-import java.nio.charset.StandardCharsets;
-import java.util.NoSuchElementException;
-import java.util.PrimitiveIterator.OfInt;
-
-import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-/**
- * {@link DynamicHasher.Builder} tests.
- */
-public class DynamicHasherBuilderTest {
-
- private DynamicHasher.Builder builder;
- private final HashFunction hf = new MD5Cyclic();
- private final Shape shape = new Shape(hf, 1, 345, 1);
- private final String testString = HasherBuilderTest.getExtendedString();
-
- /**
- * Tests that hashing a byte array works as expected.
- */
- @Test
- public void buildTest_byteArray() {
- final byte[] bytes = testString.getBytes();
- final DynamicHasher hasher = builder.with(bytes).build();
- final int expected = (int) Math.floorMod((long) hf.apply(bytes, 0), (long) shape.getNumberOfBits());
-
- final OfInt iter = hasher.iterator(shape);
-
- assertTrue(iter.hasNext());
- assertEquals(expected, iter.nextInt());
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that an empty hasher works as expected.
- */
- @Test
- public void buildTest_Empty() {
- final DynamicHasher hasher = builder.build();
-
- final OfInt iter = hasher.iterator(shape);
-
- assertFalse(iter.hasNext());
- try {
- iter.nextInt();
- fail("Should have thrown NoSuchElementException");
- } catch (final NoSuchElementException ignore) {
- // do nothing
- }
- }
-
- /**
- * Tests that hashing a string works as expected.
- */
- @Test
- public void buildTest_String() {
- final byte[] bytes = testString.getBytes(StandardCharsets.UTF_8);
- final DynamicHasher hasher = builder.with(testString, StandardCharsets.UTF_8).build();
- final int expected = (int) Math.floorMod((long) hf.apply(bytes, 0), (long) shape.getNumberOfBits());
-
- final OfInt iter = hasher.iterator(shape);
-
- assertTrue(iter.hasNext());
- assertEquals(expected, iter.nextInt());
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that hashing a string works as expected.
- */
- @Test
- public void buildTest_UnencodedString() {
- final byte[] bytes = testString.getBytes(StandardCharsets.UTF_16LE);
- final DynamicHasher hasher = builder.withUnencoded(testString).build();
- final int expected = (int) Math.floorMod((long) hf.apply(bytes, 0), (long) shape.getNumberOfBits());
-
- final OfInt iter = hasher.iterator(shape);
-
- assertTrue(iter.hasNext());
- assertEquals(expected, iter.nextInt());
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that build resets the builder.
- */
- @Test
- public void buildResetTest() {
- builder.with(new byte[] {123});
- final OfInt iter = builder.build().iterator(shape);
-
- assertTrue(iter.hasNext());
- iter.next();
- assertFalse(iter.hasNext());
-
- // Nothing added since last build so it should be an empty hasher
- final OfInt iter2 = builder.build().iterator(shape);
- assertFalse(iter2.hasNext());
- }
-
- /**
- * Sets up the builder for testing.
- */
- @BeforeEach
- public void setup() {
- builder = new DynamicHasher.Builder(hf);
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java
deleted file mode 100644
index 7b2bbba3e8..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
-
-import java.nio.charset.StandardCharsets;
-import java.util.NoSuchElementException;
-import java.util.PrimitiveIterator.OfInt;
-
-import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
-
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests the {@link DynamicHasher}.
- */
-public class DynamicHasherTest {
- private DynamicHasher.Builder builder;
- private Shape shape;
-
- private final HashFunctionIdentity testFunction = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test Function";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
- };
-
- /**
- * Sets up the DynamicHasher.
- */
- @BeforeEach
- public void setup() {
- builder = new DynamicHasher.Builder(new MD5Cyclic());
- shape = new Shape(new MD5Cyclic(), 3, 72, 17);
- }
-
- /**
- * Tests that the expected bits are returned from hashing.
- */
- @Test
- public void testGetBits() {
-
- final int[] expected = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62};
-
- final Hasher hasher = builder.with("Hello", StandardCharsets.UTF_8).build();
-
- final OfInt iter = hasher.iterator(shape);
-
- for (final int element : expected) {
- assertTrue(iter.hasNext());
- assertEquals(element, iter.nextInt());
- }
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that bits from multiple hashes are returned correctly.
- */
- @Test
- public void testGetBits_MultipleHashes() {
- final int[] expected = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62, 1, 63, 53, 43, 17, 7, 69,
- 59, 49, 39, 13, 3, 65, 55, 45, 35, 25};
-
- final Hasher hasher = builder.with("Hello", StandardCharsets.UTF_8).with("World", StandardCharsets.UTF_8).build();
-
- final OfInt iter = hasher.iterator(shape);
-
- for (final int element : expected) {
- assertTrue(iter.hasNext());
- assertEquals(element, iter.nextInt());
- }
- assertFalse(iter.hasNext());
- try {
- iter.next();
- fail("Should have thrown NoSuchElementException");
- } catch (final NoSuchElementException ignore) {
- // do nothing
- }
- }
-
- /**
- * Tests that retrieving bits for the wrong shape throws an exception.
- */
- @Test
- public void testGetBits_WrongShape() {
-
- final Hasher hasher = builder.with("Hello", StandardCharsets.UTF_8).build();
-
- try {
- hasher.iterator(new Shape(testFunction, 3, 72, 17));
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing
- }
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImplTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImplTest.java
deleted file mode 100644
index 479cfa5188..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionIdentityImplTest.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.Signedness;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.ProcessType;
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests the HashFunctionIdentity implementation ({@link HashFunctionIdentityImpl})..
- */
-public class HashFunctionIdentityImplTest {
-
- /**
- * Tests a copy constructor of the HashFunctionIdentity.
- */
- @Test
- public void copyConstructorTest() {
- final HashFunctionIdentity identity = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "NAME";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Provider";
- }
-
- @Override
- public long getSignature() {
- return -1L;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
-
- };
- final HashFunctionIdentityImpl impl = new HashFunctionIdentityImpl(identity);
- assertEquals("NAME", impl.getName());
- assertEquals("Provider", impl.getProvider());
- assertEquals(Signedness.SIGNED, impl.getSignedness());
- assertEquals(ProcessType.CYCLIC, impl.getProcessType());
- assertEquals(-1L, impl.getSignature());
- }
-
- /**
- * Test the constructor from component values.
- */
- @Test
- public void valuesConstructorTest() {
- final HashFunctionIdentityImpl impl = new HashFunctionIdentityImpl("Provider", "NAME", Signedness.UNSIGNED,
- ProcessType.ITERATIVE, -2L);
- assertEquals("NAME", impl.getName());
- assertEquals("Provider", impl.getProvider());
- assertEquals(Signedness.UNSIGNED, impl.getSignedness());
- assertEquals(ProcessType.ITERATIVE, impl.getProcessType());
- assertEquals(-2L, impl.getSignature());
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionValidatorTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionValidatorTest.java
deleted file mode 100644
index e68df55b26..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HashFunctionValidatorTest.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.ProcessType;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.Signedness;
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests of the {@link HashFunctionValidator}.
- */
-public class HashFunctionValidatorTest {
-
- /**
- * Tests that name is used in the equality check.
- */
- @Test
- public void testName() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl2", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
-
- assertTrue(HashFunctionValidator.areEqual(impl1, impl1));
- assertTrue(HashFunctionValidator.areEqual(impl2, impl2));
- assertFalse(HashFunctionValidator.areEqual(impl1, impl2));
- assertFalse(HashFunctionValidator.areEqual(impl2, impl1));
- }
-
- /**
- * Tests that name is not affected by case.
- */
- @Test
- public void testNameIsCaseInsensitive() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "IMPL1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
-
- assertTrue(HashFunctionValidator.areEqual(impl1, impl2));
- }
-
- /**
- * Tests that process type is used in the equality check.
- */
- @Test
- public void testProcessType() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.ITERATIVE, 300L);
-
- assertTrue(HashFunctionValidator.areEqual(impl1, impl1));
- assertTrue(HashFunctionValidator.areEqual(impl2, impl2));
- assertFalse(HashFunctionValidator.areEqual(impl1, impl2));
- assertFalse(HashFunctionValidator.areEqual(impl2, impl1));
- }
-
- /**
- * Tests that provider is not used in the equality check.
- */
- @Test
- public void testProviderIsNotUsedInEqualityCheck() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite2", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
-
- assertTrue(HashFunctionValidator.areEqual(impl1, impl1));
- assertTrue(HashFunctionValidator.areEqual(impl2, impl2));
- assertTrue(HashFunctionValidator.areEqual(impl1, impl2));
- assertTrue(HashFunctionValidator.areEqual(impl2, impl1));
- }
-
- /**
- * Tests that signedness is used in the equality check.
- */
- @Test
- public void testSignedness() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.UNSIGNED,
- ProcessType.CYCLIC, 300L);
-
- assertTrue(HashFunctionValidator.areEqual(impl1, impl1));
- assertTrue(HashFunctionValidator.areEqual(impl2, impl2));
- assertFalse(HashFunctionValidator.areEqual(impl1, impl2));
- assertFalse(HashFunctionValidator.areEqual(impl2, impl1));
- }
-
- /**
- * Test the check method throws when the two hash functions are not equal.
- */
- @Test
- public void testCheckThrows() {
- final HashFunctionIdentityImpl impl1 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.SIGNED,
- ProcessType.CYCLIC, 300L);
- final HashFunctionIdentityImpl impl2 = new HashFunctionIdentityImpl("Testing Suite", "impl1", Signedness.UNSIGNED,
- ProcessType.CYCLIC, 300L);
- assertThrows(IllegalArgumentException.class, () -> HashFunctionValidator.checkAreEqual(impl1, impl2));
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherBuilderTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherBuilderTest.java
deleted file mode 100644
index 303034053a..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherBuilderTest.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import org.apache.commons.collections4.bloomfilter.hasher.Hasher.Builder;
-import org.apache.commons.lang3.NotImplementedException;
-import org.junit.jupiter.api.Test;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.CharBuffer;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-
-import static org.junit.jupiter.api.Assertions.assertArrayEquals;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-/**
- * Tests the
- * {@link org.apache.commons.collections4.bloomfilter.hasher.Hasher.Builder Hasher.Builder}.
- */
-public class HasherBuilderTest {
-
- /**
- * Simple class to collect byte[] items added to the builder.
- */
- private static class TestBuilder implements Hasher.Builder {
- ArrayList items = new ArrayList<>();
-
- @Override
- public Hasher build() {
- throw new NotImplementedException("Not required");
- }
-
- @Override
- public Builder with(final byte[] item) {
- items.add(item);
- return this;
- }
- }
-
- /**
- * Tests that adding CharSequence items works correctly.
- */
- @Test
- public void withCharSequenceTest() {
- final String ascii = "plain";
- final String extended = getExtendedString();
- for (final String s : new String[] {ascii, extended}) {
- for (final Charset cs : new Charset[] {
- StandardCharsets.ISO_8859_1, StandardCharsets.UTF_8, StandardCharsets.UTF_16
- }) {
- final TestBuilder builder = new TestBuilder();
- builder.with(s, cs);
- assertArrayEquals(s.getBytes(cs), builder.items.get(0));
- }
- }
- }
-
- /**
- * Tests that adding unencoded CharSequence items works correctly.
- */
- @Test
- public void withUnencodedCharSequenceTest() {
- final String ascii = "plain";
- final String extended = getExtendedString();
- for (final String s : new String[] {ascii, extended}) {
- final TestBuilder builder = new TestBuilder();
- builder.withUnencoded(s);
- final byte[] encoded = builder.items.get(0);
- final char[] original = s.toCharArray();
- // Should be twice the length
- assertEquals(original.length * 2, encoded.length);
- // Should be little endian (lower bits first)
- final CharBuffer buffer = ByteBuffer.wrap(encoded)
- .order(ByteOrder.LITTLE_ENDIAN).asCharBuffer();
- for (int i = 0; i < original.length; i++) {
- assertEquals(original[i], buffer.get(i));
- }
- }
- }
-
- /**
- * Gets a string with non-standard characters.
- *
- * @return the extended string
- */
- static String getExtendedString() {
- final char[] data = {'e', 'x', 't', 'e', 'n', 'd', 'e', 'd', ' ',
- // Add some characters that are non standard
- // non-ascii
- 0xCA98,
- // UTF-16 surrogate pair
- 0xD803, 0xDE6D
- // Add other cases here ...
- };
- return String.valueOf(data);
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherCollectionTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherCollectionTest.java
new file mode 100644
index 0000000000..bbcc91a359
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherCollectionTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter.hasher;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.commons.collections4.bloomfilter.IndexProducer;
+import org.apache.commons.collections4.bloomfilter.Shape;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests the {@link HasherCollection}.
+ */
+public class HasherCollectionTest {
+
+ private SimpleHasher hasher1 = new SimpleHasher(1, 1);
+ private SimpleHasher hasher2 = new SimpleHasher(2, 2);
+
+ @Test
+ public void sizeTest() {
+ HasherCollection hasher = new HasherCollection(hasher1, hasher2);
+ assertEquals(2, hasher.size());
+ HasherCollection hasher3 = new HasherCollection(hasher, new SimpleHasher(3, 3));
+ assertEquals(3, hasher3.size());
+ }
+
+ @Test
+ public void isEmptyTest() {
+ HasherCollection hasher = new HasherCollection();
+ assertTrue( hasher.isEmpty() );
+ hasher.add( hasher1 );
+ assertFalse( hasher.isEmpty() );
+ }
+
+ @Test
+ public void testIndices() {
+ HasherCollection hasher = new HasherCollection(hasher1, hasher2);
+ assertEquals(2, hasher.size());
+ Shape shape = new Shape(5, 10);
+ Integer[] expected = { 1, 2, 3, 4, 5, 2, 4, 6, 8, 0 };
+ List lst = new ArrayList();
+ IndexProducer producer = hasher.indices(shape);
+ producer.forEachIndex(lst::add);
+ assertEquals(expected.length, lst.size());
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(String.format("error at position %d", i), expected[i], lst.get(i));
+ }
+ }
+
+ @Test
+ public void testAdd_collection() {
+ HasherCollection hasher = new HasherCollection();
+ hasher.add( Arrays.asList( hasher1, hasher2));
+ assertEquals(2, hasher.size());
+ }
+
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherFilterTest.java
new file mode 100644
index 0000000000..ce6d1aa7da
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherFilterTest.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter.hasher;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.junit.Test;
+
+/**
+ * Tests the {@link Hasher.Filter}.
+ */
+public class HasherFilterTest {
+
+ @Test
+ public void testBasicFiltering() {
+ Hasher.Filter filter = new Hasher.Filter(10);
+
+ for (int i = 0; i < 10; i++) {
+ assertTrue(filter.test(i));
+ }
+
+ for (int i = 0; i < 10; i++) {
+ assertFalse(filter.test(i));
+ }
+
+ try {
+ filter.test(10);
+ fail("Should have thrown IndexOutOfBounds exception");
+ } catch (IndexOutOfBoundsException expected) {
+ // do nothing.
+ }
+ }
+
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5CyclicTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/NullHasherTest.java
similarity index 52%
rename from src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5CyclicTest.java
rename to src/test/java/org/apache/commons/collections4/bloomfilter/hasher/NullHasherTest.java
index 9b0d9a83e1..d92b178883 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/MD5CyclicTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/NullHasherTest.java
@@ -14,38 +14,41 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.commons.collections4.bloomfilter.hasher.function;
+package org.apache.commons.collections4.bloomfilter.hasher;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.commons.collections4.bloomfilter.IndexProducer;
+import org.apache.commons.collections4.bloomfilter.Shape;
import org.junit.jupiter.api.Test;
/**
- * Tests the MD5 cyclic hash function.
+ * Tests the {@link NullHasher}.
*/
-public class MD5CyclicTest extends AbstractHashFunctionTest {
+public class NullHasherTest {
+
+ private Hasher hasher = NullHasher.INSTANCE;
- /**
- * Test that the apply function returns the proper values.
- */
@Test
- public void applyTest() {
- final MD5Cyclic md5 = new MD5Cyclic();
- final long l1 = 0x8b1a9953c4611296L;
- final long l2 = 0xa827abf8c47804d7L;
- final byte[] buffer = "Hello".getBytes();
+ public void sizeTest() {
+ assertEquals(0, hasher.size());
+ }
- long l = md5.apply(buffer, 0);
- assertEquals(l1, l);
- l = md5.apply(buffer, 1);
- assertEquals(l1 + l2, l);
- l = md5.apply(buffer, 2);
- assertEquals(l1 + l2 + l2, l);
+ @Test
+ public void testIterator() {
+ Shape shape = new Shape(5, 10);
+ List lst = new ArrayList();
+ IndexProducer producer = hasher.indices(shape);
+ producer.forEachIndex(lst::add);
+ assertEquals(0, lst.size());
}
- @Override
- protected HashFunction createHashFunction() {
- return new MD5Cyclic();
+ @Test
+ public void isEmptyTest() {
+ assertTrue( hasher.isEmpty() );
}
+
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/ShapeTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/ShapeTest.java
deleted file mode 100644
index 90f3808d8e..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/ShapeTest.java
+++ /dev/null
@@ -1,500 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotEquals;
-import static org.junit.jupiter.api.Assertions.fail;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.ProcessType;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity.Signedness;
-
-import java.util.ArrayList;
-
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests the {@link Shape} class.
- */
-public class ShapeTest {
-
- private final HashFunctionIdentity testFunction = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test Function";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
- };
-
- /*
- * values from https://hur.st/bloomfilter/?n=5&p=.1&m=&k=
- *
- * n = 5
- *
- * p = 0.100375138 (1 in 10)
- *
- * m = 24 (3B)
- *
- * k = 3
- */
-
- private final Shape shape = new Shape(testFunction, 5, 0.1);
-
- /**
- * Tests that if the number of bits less than 1 an IllegalArgumentException is thrown.
- */
- @Test
- public void constructor_items_bits_BadNumberOfBitsTest() {
- try {
- new Shape(testFunction, 5, 0);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
- }
-
- /**
- * Tests that if the number of hash functions is less than 1 an IllegalArgumentException is thrown.
- */
- @Test
- public void constructor_items_bits_BadNumberOfHashFunctionsTest() {
- try {
- new Shape(testFunction, 16, 8);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
- }
-
- /**
- * Tests that if the number of items less than 1 an IllegalArgumentException is thrown.
- */
- @Test
- public void constructor_items_bits_BadNumberOfItemsTest() {
- try {
- new Shape(testFunction, 0, 24);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
- }
-
- /**
- * Tests that if the number of bits is less than 1 an exception is thrown
- */
- @Test
- public void constructor_items_bits_hash_BadNumberOfBitsTest() {
- try {
- new Shape(testFunction, 5, 0, 1);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
- }
-
- /**
- * Tests that if the number of hash functions is less than 1 an exception is thrown.
- */
- @Test
- public void constructor_items_bits_hash_BadNumberOfHashFunctionsTest() {
- try {
- new Shape(testFunction, 5, 24, 0);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
- }
-
- /**
- * Tests that if the number of items is less than 1 an exception is thrown.
- */
- @Test
- public void constructor_items_bits_hash_BadNumberOfItemsTest() {
- try {
- new Shape(testFunction, 0, 24, 1);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
- }
-
- /**
- * Tests that if the calculated probability is greater than or equal to 1 an IllegalArgumentException is thrown
- */
- @Test
- public void constructor_items_bits_hash_BadProbabilityTest() {
- try {
- new Shape(testFunction, 4000, 8, 1);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
- }
-
- /**
- * Tests that when the number of items, number of bits and number of hash functions is passed the values are
- * calculated correctly.
- */
- @Test
- public void constructor_items_bits_hashTest() {
- /*
- * values from https://hur.st/bloomfilter/?n=5&m=24&k=4
- */
- final Shape filterConfig = new Shape(testFunction, 5, 24, 4);
-
- assertEquals(24, filterConfig.getNumberOfBits());
- assertEquals(4, filterConfig.getNumberOfHashFunctions());
- assertEquals(5, filterConfig.getNumberOfItems());
- assertEquals(0.102194782, filterConfig.getProbability(), 0.000001);
- }
-
- /**
- * Tests that the number of items and number of bits is passed the other values are calculated correctly.
- */
- @Test
- public void constructor_items_bitsTest() {
- /*
- * values from https://hur.st/bloomfilter/?n=5&m=24
- */
- final Shape filterConfig = new Shape(testFunction, 5, 24);
-
- assertEquals(24, filterConfig.getNumberOfBits());
- assertEquals(3, filterConfig.getNumberOfHashFunctions());
- assertEquals(5, filterConfig.getNumberOfItems());
- assertEquals(0.100375138, filterConfig.getProbability(), 0.000001);
- }
-
- /**
- * Tests that if the number of items is less than 1 an IllegalArgumentException is thrown.
- */
- @Test
- public void constructor_items_probability_BadNumberOfItemsTest() {
- try {
- new Shape(testFunction, 0, 1.0 / 10);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
- }
- }
-
- /**
- * Tests that if the probability is less than or equal to 0 or more than or equal to 1 an IllegalArgumentException is thrown.
- */
- @Test
- public void constructor_items_probability_BadProbabilityTest() {
- try {
- new Shape(testFunction, 10, 0.0);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
- }
- try {
- new Shape(testFunction, 10, 1.0);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
- }
- try {
- new Shape(testFunction, 10, Double.NaN);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
- }
- }
-
- /**
- * Tests that if calculated number of bits is greater than Integer.MAX_VALUE an IllegalArgumentException is thrown.
- */
- @Test
- public void constructor_items_probability_NumberOfBitsOverflowTest() {
- try {
- new Shape(testFunction, Integer.MAX_VALUE, 1.0 / 10);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing.
- }
- }
-
- /**
- * Tests the the probability is calculated correctly.
- */
- @Test
- public void constructor_items_probability_Test() {
-
- assertEquals(24, shape.getNumberOfBits());
- assertEquals(3, shape.getNumberOfHashFunctions());
- assertEquals(5, shape.getNumberOfItems());
- assertEquals(0.100375138, shape.getProbability(), 0.000001);
- }
-
- /**
- * Tests that the constructor with a null name, number of items and size of filter fails.
- */
- @Test
- public void constructor_nm_noName() {
- try {
- new Shape(null, 5, 72);
- fail("Should throw NullPointerException");
- } catch (final NullPointerException expected) {
- // do nothing
- }
- }
-
- /**
- * Tests that the constructor with a null name, number of items, size of filter, and number of functions fails.
- */
- @Test
- public void constructor_nmk_noName() {
- try {
- new Shape(null, 5, 72, 17);
- fail("Should throw NullPointerException");
- } catch (final NullPointerException expected) {
- // do nothing
- }
- }
-
- /**
- * Tests that the constructor with a null name, number of items, and probability fails.
- */
- @Test
- public void constructor_np_noName() {
- try {
- new Shape(null, 5, 0.1);
- fail("Should throw NullPointerException");
- } catch (final NullPointerException expected) {
- // do nothing
- }
- }
-
- /**
- * Tests that the constructor with a null name, probability, size of filter, and number of functions fails.
- */
- @Test
- public void constructor_pmk_noName() {
- try {
- new Shape(null, 0.1, 72, 17);
- fail("Should throw NullPointerException");
- } catch (final NullPointerException expected) {
- // do nothing
- }
- }
-
- /**
- * Tests that if the number of bits is less than 1 an exception is thrown
- */
- @Test
- public void constructor_probability_bits_hash_BadNumberOfBitsTest() {
- try {
- new Shape(testFunction, 0.5, 0, 1);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
- }
-
- /**
- * Tests that if the number of functions is less than 1 an exception is thrown
- */
- @Test
- public void constructor_probability_bits_hash_BadNumberOfHashFunctionsTest() {
- try {
- new Shape(testFunction, 0.5, 24, 0);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
- }
-
- /**
- * Tests that invalid probability values cause and IllegalArgumentException to be thrown.
- */
- @Test
- public void constructor_probability_bits_hash_BadProbabilityTest() {
- // probability should not be 0
- try {
- new Shape(testFunction, 0.0, 24, 1);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
-
- // probability should not be = -1
- try {
- new Shape(testFunction, -1.0, 24, 1);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
-
- // probability should not be < -1
- try {
- new Shape(testFunction, -1.5, 24, 1);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
-
- // probability should not be = 1
- try {
- new Shape(testFunction, 1.0, 24, 1);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
-
- // probability should not be > 1
- try {
- new Shape(testFunction, 2.0, 24, 1);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // expected
- }
- }
-
- /**
- * Tests the calculated values of calling the constructor with the probability, number of bits and number of hash
- * functions.
- */
- @Test
- public void constructor_probability_bits_hashTest() {
- /*
- * values from https://hur.st/bloomfilter/?n=5&p=.1&m=24&k=3
- */
- final Shape filterConfig = new Shape(testFunction, 0.1, 24, 3);
-
- assertEquals(24, filterConfig.getNumberOfBits());
- assertEquals(3, filterConfig.getNumberOfHashFunctions());
- assertEquals(5, filterConfig.getNumberOfItems());
- assertEquals(0.100375138, filterConfig.getProbability(), 0.000001);
- }
-
- /**
- * Test equality of shape.
- */
- @Test
- public void equalsTest() {
-
- assertEquals(shape, shape);
- assertEquals(shape, new Shape(testFunction, 5, 1.0 / 10));
- assertNotEquals(shape, null);
- assertNotEquals(shape, new Shape(testFunction, 5, 1.0 / 11));
- assertNotEquals(shape, new Shape(testFunction, 4, 1.0 / 10));
- // Number of bits does not change equality,
- // only the number of bits and the number of hash functions
- final int numberOfBits = 10000;
- final int numberOfItems = 15;
- final int numberOfHashFunctions = 4;
- assertEquals(new Shape(testFunction, numberOfItems, numberOfBits, numberOfHashFunctions),
- new Shape(testFunction, numberOfItems + 1, numberOfBits, numberOfHashFunctions));
- assertNotEquals(new Shape(testFunction, numberOfItems, numberOfBits, numberOfHashFunctions),
- new Shape(testFunction, numberOfItems, numberOfBits + 1, numberOfHashFunctions));
- assertNotEquals(new Shape(testFunction, numberOfItems, numberOfBits, numberOfHashFunctions),
- new Shape(testFunction, numberOfItems, numberOfBits, numberOfHashFunctions + 1));
-
- final HashFunctionIdentity testFunction2 = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test Function2";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
- };
-
- assertNotEquals(shape, new Shape(testFunction2, 4, 1.0 / 10));
- }
-
- /**
- * Test that hashCode satisfies the contract between {@link Object#hashCode()} and
- * {@link Object#equals(Object)}. Equal shapes must have the same hash code.
- */
- @Test
- public void hashCodeTest() {
- // Hash function equality is based on process type, signedness and name (case insensitive)
- final ArrayList list = new ArrayList<>();
- list.add(new HashFunctionIdentityImpl("Provider", "Name", Signedness.SIGNED, ProcessType.ITERATIVE, 0L));
- // Provider changes
- list.add(new HashFunctionIdentityImpl("PROVIDER", "Name", Signedness.SIGNED, ProcessType.ITERATIVE, 0L));
- list.add(new HashFunctionIdentityImpl("Provider2", "Name", Signedness.SIGNED, ProcessType.ITERATIVE, 0L));
- // Name changes
- list.add(new HashFunctionIdentityImpl("Provider", "name", Signedness.SIGNED, ProcessType.ITERATIVE, 0L));
- list.add(new HashFunctionIdentityImpl("Provider", "NAME", Signedness.SIGNED, ProcessType.ITERATIVE, 0L));
- list.add(new HashFunctionIdentityImpl("Provider", "Other", Signedness.SIGNED, ProcessType.ITERATIVE, 0L));
- // Signedness changes
- list.add(new HashFunctionIdentityImpl("Provider", "Name", Signedness.UNSIGNED, ProcessType.ITERATIVE, 0L));
- // ProcessType changes
- list.add(new HashFunctionIdentityImpl("Provider", "Name", Signedness.SIGNED, ProcessType.CYCLIC, 0L));
- // Signature changes
- list.add(new HashFunctionIdentityImpl("Provider", "Name", Signedness.SIGNED, ProcessType.ITERATIVE, 1L));
-
- // Create shapes that only differ in the hash function.
- final int numberOfItems = 30;
- final int numberOfBits = 3000;
- final int numberOfHashFunctions = 10;
- final Shape shape1 = new Shape(list.get(0), numberOfItems, numberOfBits, numberOfHashFunctions);
- assertEquals(shape1, shape1);
-
- // Try variations
- for (int i = 1; i < list.size(); i++) {
- final Shape shape2 = new Shape(list.get(i), numberOfItems, numberOfBits, numberOfHashFunctions);
- assertEquals(shape2, shape2);
-
- // Equal shapes must have the same hash code
- if (shape1.equals(shape2)) {
- assertEquals(shape1.hashCode(), shape2.hashCode());
- }
- }
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/SimpleHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/SimpleHasherTest.java
new file mode 100644
index 0000000000..aef6190cf4
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/SimpleHasherTest.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter.hasher;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.fail;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.commons.collections4.bloomfilter.IndexProducer;
+import org.apache.commons.collections4.bloomfilter.Shape;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests the {@link SimpleHasher}.
+ */
+public class SimpleHasherTest {
+
+ private SimpleHasher hasher = new SimpleHasher(1, 1);
+
+ @Test
+ public void constructor_byteTest() {
+ try {
+ hasher = new SimpleHasher( new byte[0] );
+ fail( "Should have thrown IllegalArgumentException");
+ } catch (IllegalArgumentException expected) {
+ // do nothing.
+ }
+ }
+ @Test
+ public void sizeTest() {
+ assertEquals(1, hasher.size());
+ }
+
+ @Test
+ public void isEmptyTest() {
+ assertFalse( hasher.isEmpty() );
+ }
+
+ @Test
+ public void testIterator() {
+ Shape shape = new Shape(5, 10);
+ Integer[] expected = { 1, 2, 3, 4, 5 };
+ List lst = new ArrayList();
+ IndexProducer producer = hasher.indices(shape);
+ producer.forEachIndex(lst::add);
+ assertEquals(expected.length, lst.size());
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(expected[i], lst.get(i));
+ }
+ }
+
+ @Test
+ public void constructorBufferTest() {
+ Shape shape = new Shape(5, 10);
+ byte[] buffer = { 1, 1 };
+ SimpleHasher hasher = new SimpleHasher(buffer);
+ Integer[] expected = { 1, 2, 3, 4, 5 };
+ List lst = new ArrayList();
+ IndexProducer producer = hasher.indices(shape);
+ producer.forEachIndex(lst::add);
+ assertEquals(expected.length, lst.size());
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(expected[i], lst.get(i));
+ }
+
+ buffer = new byte[] { 1 };
+ hasher = new SimpleHasher(buffer);
+ expected = new Integer[] { 0, 1, 2, 3, 4 };
+ lst = new ArrayList();
+ producer = hasher.indices(shape);
+ producer.forEachIndex(lst::add);
+ assertEquals(expected.length, lst.size());
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(expected[i], lst.get(i));
+ }
+
+ buffer = new byte[] { 1, 0, 1 };
+ hasher = new SimpleHasher(buffer);
+ expected = new Integer[] { 1, 2, 3, 4, 5 };
+ lst = new ArrayList();
+ producer = hasher.indices(shape);
+ producer.forEachIndex(lst::add);
+ assertEquals(expected.length, lst.size());
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(expected[i], lst.get(i));
+ }
+
+ buffer = new byte[] { 0, 1, 0, 1 };
+ hasher = new SimpleHasher(buffer);
+ expected = new Integer[] { 1, 2, 3, 4, 5 };
+ lst = new ArrayList();
+ producer = hasher.indices(shape);
+ producer.forEachIndex(lst::add);
+ assertEquals(expected.length, lst.size());
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(expected[i], lst.get(i));
+ }
+
+ buffer = new byte[] { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1 };
+ hasher = new SimpleHasher(buffer);
+ expected = new Integer[] { 1, 2, 3, 4, 5 };
+ lst = new ArrayList();
+ producer = hasher.indices(shape);
+ producer.forEachIndex(lst::add);
+ assertEquals(expected.length, lst.size());
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(expected[i], lst.get(i));
+ }
+
+ buffer = new byte[] { 0, 0, 0, 0, 0, 0, 0, 1, 5, 5, 0, 0, 0, 0, 0, 0, 0, 1, 5, 5 };
+ hasher = new SimpleHasher(buffer);
+ expected = new Integer[] { 1, 2, 3, 4, 5 };
+ lst = new ArrayList();
+ producer = hasher.indices(shape);
+ producer.forEachIndex(lst::add);
+ assertEquals(expected.length, lst.size());
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(expected[i], lst.get(i));
+ }
+
+ buffer = new byte[] { 0, 0, 0, 0, 0, 0, 0, 1, 5, 0, 0, 0, 0, 0, 0, 0, 1, 5, 5 };
+ hasher = new SimpleHasher(buffer);
+ expected = new Integer[] { 1, 2, 3, 4, 5 };
+ lst = new ArrayList();
+ producer = hasher.indices(shape);
+ producer.forEachIndex(lst::add);
+ assertEquals(expected.length, lst.size());
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(expected[i], lst.get(i));
+ }
+ }
+
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/SingleItemHasherCollectionTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/SingleItemHasherCollectionTest.java
new file mode 100644
index 0000000000..81c19f8d60
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/SingleItemHasherCollectionTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter.hasher;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.commons.collections4.bloomfilter.IndexProducer;
+import org.apache.commons.collections4.bloomfilter.Shape;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests the {@link SingleItemHasherCollection}.
+ */
+public class SingleItemHasherCollectionTest {
+
+ private SimpleHasher hasher1 = new SimpleHasher(1, 1);
+ private SimpleHasher hasher2 = new SimpleHasher(2, 2);
+
+
+ @Test
+ public void sizeTest() {
+ SingleItemHasherCollection hasher = new SingleItemHasherCollection();
+ assertEquals(0, hasher.size() );
+ hasher.add( NullHasher.INSTANCE );
+ assertEquals(0, hasher.size());
+ hasher.add( hasher1 );
+ hasher.add( hasher2 );
+ assertEquals(1, hasher.size());
+ HasherCollection hasher3 = new SingleItemHasherCollection(hasher, new SimpleHasher(3, 3));
+ assertEquals(1, hasher3.size());
+
+ }
+
+ @Test
+ public void isEmptyTest() {
+ SingleItemHasherCollection hasher = new SingleItemHasherCollection();
+ assertTrue( hasher.isEmpty() );
+ hasher.add( NullHasher.INSTANCE );
+ assertTrue( hasher.isEmpty() );
+ hasher.add( hasher1 );
+ assertFalse( hasher.isEmpty() );
+ }
+
+
+ @Test
+ public void testIndices() {
+ HasherCollection hasher = new SingleItemHasherCollection(hasher1, hasher2);
+ Shape shape = new Shape(5, 10);
+ Integer[] expected = { 1, 2, 3, 4, 5, 6, 8, 0 };
+ List lst = new ArrayList();
+ IndexProducer producer = hasher.indices(shape);
+ producer.forEachIndex(lst::add);
+ assertEquals(expected.length, lst.size());
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(expected[i], lst.get(i));
+ }
+ }
+
+ @Test
+ public void testAdd_collection() {
+ HasherCollection hasher = new SingleItemHasherCollection();
+ hasher.add( Arrays.asList( hasher1, hasher2));
+ assertEquals(1, hasher.size());
+ Integer[] expected = { 1, 2, 3, 4, 5, 6, 8, 0 };
+ List lst = new ArrayList();
+ IndexProducer producer = hasher.indices(new Shape(5, 10));
+ producer.forEachIndex(lst::add);
+ assertEquals(expected.length, lst.size());
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(expected[i], lst.get(i));
+ }
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasherTest.java
deleted file mode 100644
index c3d7c5c51e..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/StaticHasherTest.java
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
-
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.PrimitiveIterator.OfInt;
-
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests the {@link StaticHasher}.
- */
-public class StaticHasherTest {
-
- private final HashFunctionIdentity testFunction = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test Function";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
- };
-
- private final HashFunctionIdentity testFunctionX = new HashFunctionIdentity() {
-
- @Override
- public String getName() {
- return "Test FunctionX";
- }
-
- @Override
- public ProcessType getProcessType() {
- return ProcessType.CYCLIC;
- }
-
- @Override
- public String getProvider() {
- return "Apache Commons Collection Tests";
- }
-
- @Override
- public long getSignature() {
- return 0;
- }
-
- @Override
- public Signedness getSignedness() {
- return Signedness.SIGNED;
- }
- };
-
- private final Shape shape = new Shape(testFunction, 3, 72, 17);
-
- /**
- * Compare 2 static hashers to verify they have the same bits enabled.
- *
- * @param hasher1 the first static hasher.
- * @param hasher2 the second static hasher.
- */
- private void assertSameBits(final StaticHasher hasher1, final StaticHasher hasher2) {
- final OfInt iter1 = hasher1.iterator(shape);
- final OfInt iter2 = hasher2.iterator(shape);
-
- while (iter1.hasNext()) {
- assertTrue(iter2.hasNext(), "Not enough data in second hasher");
- assertEquals(iter1.nextInt(), iter2.nextInt());
- }
- assertFalse(iter2.hasNext(), "Too much data in second hasher");
- }
-
- /**
- * Tests that passing a hasher other than a StaticHasher to the constructor works as
- * expected.
- */
- @Test
- public void testConstructor_Hasher() {
- final int[] expected = {1, 3, 5, 7, 9};
-
- final Hasher testHasher = new Hasher() {
-
- @Override
- public OfInt iterator(final Shape shape) {
- final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
- return Arrays.stream(values).iterator();
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return testFunction;
- }
- };
-
- final StaticHasher hasher = new StaticHasher(testHasher, shape);
- final OfInt iter = hasher.iterator(shape);
- for (final int element : expected) {
- assertTrue(iter.hasNext());
- assertEquals(element, iter.nextInt());
- }
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that passing a hasher other than a StaticHasher and the wrong Shape to the
- * constructor throws an IllegalArgumentException.
- */
- @Test
- public void testConstructor_Hasher_WrongShape() {
- final Hasher testHasher = new Hasher() {
-
- @Override
- public OfInt iterator(final Shape shape) {
- final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
- return Arrays.stream(values).iterator();
- }
-
- @Override
- public HashFunctionIdentity getHashFunctionIdentity() {
- return testFunctionX;
- }
- };
-
- try {
- new StaticHasher(testHasher, shape);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing
- }
- }
-
- /**
- * Test that the iterator based constructor works correctly and removes duplicates.
- */
- @Test
- public void testConstructor_Iterator() {
-
- final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
- Iterator iter = Arrays.stream(values).iterator();
- final StaticHasher hasher = new StaticHasher(iter, shape);
-
- assertEquals(5, hasher.size());
- assertEquals(shape, hasher.getShape());
- // All function properties are equal
- assertEquals(testFunction.getName(), hasher.getHashFunctionIdentity().getName());
- assertEquals(testFunction.getProcessType(), hasher.getHashFunctionIdentity().getProcessType());
- assertEquals(testFunction.getProvider(), hasher.getHashFunctionIdentity().getProvider());
- assertEquals(testFunction.getSignedness(), hasher.getHashFunctionIdentity().getSignedness());
-
- iter = hasher.iterator(shape);
- int idx = 0;
- while (iter.hasNext()) {
- assertEquals(Integer.valueOf(values[idx]), iter.next(), "Error at idx " + idx);
- idx++;
- }
- assertEquals(5, idx);
- }
-
- /**
- * Tests that if the iterator passed to the constructor contains a value greater than
- * or equal to Shape.numberOfBits() an exception is thrown.
- */
- @Test
- public void testConstructor_Iterator_ValueTooBig() {
-
- final int[] values = {shape.getNumberOfBits(), 3, 5, 7, 9, 3, 5, 1};
- final Iterator iter = Arrays.stream(values).iterator();
- try {
- new StaticHasher(iter, shape);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing
- }
- }
-
- /**
- * Tests that if the iterator passed to the constructor contains a value less than 0
- * (zero) an exception is thrown.
- */
- @Test
- public void testConstructor_Iterator_ValueTooSmall() {
-
- final int[] values = {-1, 3, 5, 7, 9, 3, 5, 1};
- final Iterator iter = Arrays.stream(values).iterator();
- try {
- new StaticHasher(iter, shape);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing
- }
- }
-
- /**
- * Tests that the constructor that accepts a static hasher properly builds the hasher.
- */
- @Test
- public void testConstructor_StaticHasher() {
- final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
- final Iterator iter = Arrays.stream(values).iterator();
- final StaticHasher hasher = new StaticHasher(iter, shape);
-
- final StaticHasher hasher2 = new StaticHasher(hasher, shape);
- assertEquals(shape, hasher2.getShape());
- assertSameBits(hasher, hasher2);
- }
-
- /**
- * Tests that calling the constructor with a hasher and the wrong shape throws an
- * IllegalArgumentException.
- */
- @Test
- public void testConstructor_StaticHasher_WrongShape() {
- final int[] values = {1, 3, 5, 7, 9, 3, 5, 1};
- final Iterator iter = Arrays.stream(values).iterator();
- final StaticHasher hasher = new StaticHasher(iter, new Shape(testFunctionX, 3, 72, 17));
-
- try {
- new StaticHasher(hasher, shape);
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing
- }
- }
-
- /**
- * Tests that iterator returns the proper values.
- */
- @Test
- public void testGetBits() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-
- final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
- assertEquals(17, hasher.size());
- final OfInt iter = hasher.iterator(shape);
- for (int i = 0; i < 17; i++) {
- assertTrue(iter.hasNext());
- assertEquals(i, iter.nextInt());
- }
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that iterator does not return duplicates and orders the indices.
- */
- @Test
- public void testGetBits_DuplicateValues() {
- final int[] input = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 2, 49, 24, 15, 62, 1, 63, 53, 43, 17, 7, 69, 59,
- 49, 39, 13, 3, 65, 55, 45, 35, 25};
- final int[] expected = {1, 2, 3, 6, 7, 10, 11, 13, 15, 17, 19, 23, 24, 25, 35, 36, 39, 43, 44, 45, 48, 49, 53, 55, 57,
- 59, 61, 62, 63, 65, 69, 70};
-
- final StaticHasher hasher = new StaticHasher(Arrays.stream(input).iterator(), shape);
-
- final OfInt iter = hasher.iterator(shape);
- for (final int element : expected) {
- assertTrue(iter.hasNext());
- assertEquals(element, iter.nextInt());
- }
- assertFalse(iter.hasNext());
- }
-
- /**
- * Tests that gitBits is called with the wrong shape an exception is thrown.
- */
- @Test
- public void testGetBits_WrongShape() {
- final List lst = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- final StaticHasher hasher = new StaticHasher(lst.iterator(), shape);
-
- try {
- hasher.iterator(new Shape(testFunctionX, 3, 72, 17));
- fail("Should have thrown IllegalArgumentException");
- } catch (final IllegalArgumentException expected) {
- // do nothing
- }
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/AbstractHashFunctionTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/AbstractHashFunctionTest.java
deleted file mode 100644
index 5498d699cb..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/AbstractHashFunctionTest.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests the signature of a hash function.
- */
-public abstract class AbstractHashFunctionTest {
-
- /**
- * Test that the signature is properly generated.
- */
- @Test
- public void signatureTest() {
- final HashFunction hf = createHashFunction();
- final long expected = hf.apply(HashFunctionIdentity.prepareSignatureBuffer(hf), 0);
- assertEquals(expected, hf.getSignature());
- // Should be repeatable
- final long expected2 = hf.apply(HashFunctionIdentity.prepareSignatureBuffer(hf), 0);
- assertEquals(expected, expected2);
- assertEquals("Apache Commons Collections", hf.getProvider());
- }
-
- /**
- * Creates the hash function.
- *
- * @return the hash function
- */
- protected abstract HashFunction createHashFunction();
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x64CyclicTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x64CyclicTest.java
deleted file mode 100644
index 9e17c2ec89..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur128x64CyclicTest.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import java.nio.charset.StandardCharsets;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-import org.junit.jupiter.api.Test;
-
-/**
- * Test that the Murmur3 128 x64 hash function works correctly.
- */
-public class Murmur128x64CyclicTest extends AbstractHashFunctionTest {
-
- /**
- * Test that the apply function returns the proper values.
- */
- @Test
- public void applyTest() {
- final Murmur128x64Cyclic murmur = new Murmur128x64Cyclic();
-
- final long l1 = 0xe7eb60dabb386407L;
- final long l2 = 0xc3ca49f691f73056L;
- final byte[] buffer = "Now is the time for all good men to come to the aid of their country"
- .getBytes(StandardCharsets.UTF_8);
-
- long l = murmur.apply(buffer, 0);
- assertEquals(l1, l);
- l = murmur.apply(buffer, 1);
- assertEquals(l1 + l2, l);
- l = murmur.apply(buffer, 2);
- assertEquals(l1 + l2 + l2, l);
- }
-
- @Override
- protected HashFunction createHashFunction() {
- return new Murmur128x64Cyclic();
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86IterativeTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86IterativeTest.java
deleted file mode 100644
index bca60c1e4b..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/Murmur32x86IterativeTest.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import java.nio.charset.StandardCharsets;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-import org.junit.jupiter.api.Test;
-
-/**
- * Test that the Murmur3 32 x86 hash function works correctly.
- */
-public class Murmur32x86IterativeTest extends AbstractHashFunctionTest {
-
- /**
- * Test that the apply function returns the proper values.
- */
- @Test
- public void applyTest() {
- final Murmur32x86Iterative murmur = new Murmur32x86Iterative();
-
- final byte[] buffer = "Now is the time for all good men to come to the aid of their country"
- .getBytes(StandardCharsets.UTF_8);
-
- long l = murmur.apply(buffer, 0);
- assertEquals(82674681, l);
- l = murmur.apply(buffer, 1);
- assertEquals(-1475490736, l);
- l = murmur.apply(buffer, 2);
- assertEquals(-1561435247, l);
- }
-
- @Override
- protected HashFunction createHashFunction() {
- return new Murmur32x86Iterative();
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterativeTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterativeTest.java
deleted file mode 100644
index 5595efdc77..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/function/ObjectsHashIterativeTest.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter.hasher.function;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-import org.apache.commons.collections4.bloomfilter.hasher.HashFunction;
-import org.junit.jupiter.api.Test;
-
-/**
- * Tests that the Objects hash works correctly.
- */
-public class ObjectsHashIterativeTest extends AbstractHashFunctionTest {
-
- /**
- * Test that the apply function returns the proper values.
- */
- @Test
- public void applyTest() {
- final ObjectsHashIterative obj = new ObjectsHashIterative();
-
- final byte[] buffer = "Now is the time for all good men to come to the aid of their country"
- .getBytes(StandardCharsets.UTF_8);
-
- long l = obj.apply(buffer, 0);
- long prev = 0;
- assertEquals(Arrays.deepHashCode(new Object[] {prev, buffer}), l);
- for (int i = 1; i <= 5; i++) {
- prev += l;
- l = obj.apply(buffer, i);
- assertEquals(Arrays.deepHashCode(new Object[] {prev, buffer}), l);
- }
- }
-
- @Override
- protected HashFunction createHashFunction() {
- return new ObjectsHashIterative();
- }
-}