diff --git a/src/main/java/me/lemire/integercompression/BitPacking.java b/src/main/java/me/lemire/integercompression/BitPacking.java index e83c9e0..8652be4 100644 --- a/src/main/java/me/lemire/integercompression/BitPacking.java +++ b/src/main/java/me/lemire/integercompression/BitPacking.java @@ -1690,7 +1690,7 @@ protected static void fastpack9(final int[] in, int inpos, } /** - * Unpack 32 integers + * Pack without mask 32 integers * * @param in * source array @@ -3005,7 +3005,7 @@ protected static void fastpackwithoutmask9(final int[] in, int inpos, } /** - * Pack the 32 integers + * Unpack the 32 integers * * @param in * source array diff --git a/src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java b/src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java index ca9d0ad..2f8c709 100644 --- a/src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java +++ b/src/main/java/me/lemire/integercompression/DeltaZigzagVariableByte.java @@ -134,6 +134,9 @@ public void uncompress(int[] inBuf, IntWrapper inPos, int inLen, * In case you need a different way to allocate buffers, you can override this method * with a custom behavior. The default implementation allocates a new Java direct * {@link ByteBuffer} on each invocation. + * + * @param sizeInBytes + * @return */ protected ByteBuffer makeBuffer(int sizeInBytes) { return ByteBuffer.allocateDirect(sizeInBytes); diff --git a/src/main/java/me/lemire/integercompression/FastPFOR.java b/src/main/java/me/lemire/integercompression/FastPFOR.java index 36226c0..47969f4 100644 --- a/src/main/java/me/lemire/integercompression/FastPFOR.java +++ b/src/main/java/me/lemire/integercompression/FastPFOR.java @@ -336,6 +336,9 @@ public String toString() { * In case you need a different way to allocate buffers, you can override this method * with a custom behavior. The default implementation allocates a new Java direct * {@link ByteBuffer} on each invocation. + * + * @param sizeInBytes + * @return */ protected ByteBuffer makeBuffer(int sizeInBytes) { return ByteBuffer.allocateDirect(sizeInBytes); diff --git a/src/main/java/me/lemire/integercompression/FastPFOR128.java b/src/main/java/me/lemire/integercompression/FastPFOR128.java index b124072..83a3e1f 100644 --- a/src/main/java/me/lemire/integercompression/FastPFOR128.java +++ b/src/main/java/me/lemire/integercompression/FastPFOR128.java @@ -317,6 +317,9 @@ public String toString() { * In case you need a different way to allocate buffers, you can override this method * with a custom behavior. The default implementation allocates a new Java direct * {@link ByteBuffer} on each invocation. + * + * @param sizeInBytes + * @return */ protected ByteBuffer makeBuffer(int sizeInBytes) { return ByteBuffer.allocateDirect(sizeInBytes); diff --git a/src/main/java/me/lemire/integercompression/IntCompressor.java b/src/main/java/me/lemire/integercompression/IntCompressor.java index 87e7bde..abaeea9 100644 --- a/src/main/java/me/lemire/integercompression/IntCompressor.java +++ b/src/main/java/me/lemire/integercompression/IntCompressor.java @@ -36,7 +36,8 @@ public IntCompressor() { * @throws UncompressibleInputException if the data is too poorly compressible */ public int[] compress(int[] input) { - int [] compressed = new int[input.length + input.length / 100 + 1024]; + int[] compressed = new int[input.length + input.length / 100 + 1024]; + // Store at index=0 the length of the input, hence enabling .headlessCompress compressed[0] = input.length; IntWrapper outpos = new IntWrapper(1); try { @@ -58,6 +59,7 @@ public int[] compress(int[] input) { * @return uncompressed array */ public int[] uncompress(int[] compressed) { + // Read at index=0 the length of the input, hence enabling .headlessUncompress int[] decompressed = new int[compressed[0]]; IntWrapper inpos = new IntWrapper(1); codec.headlessUncompress(compressed, inpos, diff --git a/src/main/java/me/lemire/integercompression/IntegerCODEC.java b/src/main/java/me/lemire/integercompression/IntegerCODEC.java index f2c9c7a..1dd9a4c 100644 --- a/src/main/java/me/lemire/integercompression/IntegerCODEC.java +++ b/src/main/java/me/lemire/integercompression/IntegerCODEC.java @@ -25,7 +25,7 @@ public interface IntegerCODEC { * @param in * input array * @param inpos - * location in the input array + * where to start reading in the array * @param inlength * how many integers to compress * @param out @@ -52,7 +52,7 @@ public void compress(int[] in, IntWrapper inpos, int inlength, * @param out * array where to write the compressed output * @param outpos - * where to write the compressed output in out + * where to start writing the uncompressed output in out */ public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos); diff --git a/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java b/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java index 8b4dd8b..66143b9 100644 --- a/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java +++ b/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java @@ -13,8 +13,8 @@ * variation on the IntegerCODEC interface meant to be used for random access * (i.e., given a large array, you can segment it and decode just the subarray you need). * - * The main difference is that we must specify the number of integers we wish to - * decode. This information should be stored elsewhere. + * The main difference is that you must specify the number of integers you wish to + * uncompress. This information should be stored elsewhere. * * This interface was designed by the Terrier team for their search engine. * @@ -30,10 +30,13 @@ public interface SkippableIntegerCODEC { * inpos will be incremented by 12 while outpos will be incremented by 3. We * use IntWrapper to pass the values by reference. * + * Implementation note: contrary to {@link IntegerCODEC#compress}, + * this may skip writing information about the number of encoded integers. + * * @param in * input array * @param inpos - * location in the input array + * where to start reading in the array * @param inlength * how many integers to compress * @param out @@ -57,11 +60,11 @@ public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out * @param inlength * length of the compressed data (ignored by some schemes) * @param out - * array where to write the compressed output + * array where to write the uncompressed output * @param outpos - * where to write the compressed output in out + * where to start writing the uncompressed output in out * @param num - * number of integers we want to decode, the actual number of integers decoded can be less + * number of integers we want to decode. May be less than the actual number of compressed integers */ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, int num); diff --git a/src/main/java/me/lemire/integercompression/VariableByte.java b/src/main/java/me/lemire/integercompression/VariableByte.java index 09e479b..92cfaeb 100644 --- a/src/main/java/me/lemire/integercompression/VariableByte.java +++ b/src/main/java/me/lemire/integercompression/VariableByte.java @@ -214,6 +214,9 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o * In case you need a different way to allocate buffers, you can override this method * with a custom behavior. The default implementation allocates a new Java direct * {@link ByteBuffer} on each invocation. + * + * @param sizeInBytes + * @return */ protected ByteBuffer makeBuffer(int sizeInBytes) { return ByteBuffer.allocateDirect(sizeInBytes); diff --git a/src/main/java/me/lemire/longcompression/ByteLongCODEC.java b/src/main/java/me/lemire/longcompression/ByteLongCODEC.java index e405370..dbc6864 100644 --- a/src/main/java/me/lemire/longcompression/ByteLongCODEC.java +++ b/src/main/java/me/lemire/longcompression/ByteLongCODEC.java @@ -57,6 +57,6 @@ public void compress(long[] in, IntWrapper inpos, int inlength, * where to write the compressed output in out */ public void uncompress(byte[] in, IntWrapper inpos, int inlength, - long[] out, IntWrapper outpos); + long[] out, IntWrapper outpos); } diff --git a/src/main/java/me/lemire/longcompression/LongAs2IntsCodec.java b/src/main/java/me/lemire/longcompression/LongAs2IntsCodec.java index 3b2bc76..35c1166 100644 --- a/src/main/java/me/lemire/longcompression/LongAs2IntsCodec.java +++ b/src/main/java/me/lemire/longcompression/LongAs2IntsCodec.java @@ -16,174 +16,174 @@ * */ public class LongAs2IntsCodec implements LongCODEC { - final IntegerCODEC highPartsCodec; - final IntegerCODEC lowPartsCodec; - - public LongAs2IntsCodec(IntegerCODEC highPartsCodec, IntegerCODEC lowPartsCodec) { - this.highPartsCodec = highPartsCodec; - this.lowPartsCodec = lowPartsCodec; - } - - /** - * By default, we expect longs to be slightly above Integer.MAX_VALUE. Hence highParts to be small and positive - * integers. For lowParts, we rely on {@link IntCompressor} default IntegerCODEC - */ - public LongAs2IntsCodec() { - this(new VariableByte(), new Composition(new BinaryPacking(), new VariableByte())); - } - - @Override - public void compress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) { - if (inlength == 0) { - return; - } - - int[] highParts = new int[inlength]; - int[] lowParts = new int[inlength]; - - for (int i = 0; i < inlength; i++) { - int inPosition = inpos.get() + i; - - highParts[i] = RoaringIntPacking.high(in[inPosition]); - lowParts[i] = RoaringIntPacking.low(in[inPosition]); - } - - // TODO What would be a relevant buffer size? - int[] buffer = new int[inlength * 16]; - - int outPosition = outpos.get(); - - boolean hasLeftover; - { - // The first integer is reserved to hold the number of compressed ints - IntWrapper highPartsOutPosition = new IntWrapper(1); - - highPartsCodec.compress(highParts, new IntWrapper(), inlength, buffer, highPartsOutPosition); - - // Record the compressedHighparts length - buffer[0] = highPartsOutPosition.get() - 1; - - for (int i = 0; i < highPartsOutPosition.get() / 2; i++) { - long pack = RoaringIntPacking.pack(buffer[i * 2], buffer[i * 2 + 1]); - out[outPosition++] = pack; - } - - if (1 == highPartsOutPosition.get() % 2) { - // Shift the trailing integer as first in the buffer - hasLeftover = true; - buffer[0] = buffer[highPartsOutPosition.get() - 1]; - } else { - hasLeftover = false; - } - } - - { - // The first integer is reserved to hold the number of compressed ints - IntWrapper lowPartsOutPosition = new IntWrapper(1); - if (hasLeftover) { - // Keep the trailing int from highParts before the reserved int from lowParts compressed length - lowPartsOutPosition.set(2); - } - - lowPartsCodec.compress(lowParts, new IntWrapper(0), inlength, buffer, lowPartsOutPosition); - - // Record the compressedHighparts length - buffer[hasLeftover ? 1 : 0] = lowPartsOutPosition.get() - (hasLeftover ? 2 : 1); - - for (int i = 0; i < lowPartsOutPosition.get() / 2; i++) { - long pack = RoaringIntPacking.pack(buffer[i * 2], buffer[i * 2 + 1]); - out[outPosition++] = pack; - } - - if (1 == lowPartsOutPosition.get() % 2) { - // The trailing integer is packed with a 0 - long pack = RoaringIntPacking.pack(buffer[lowPartsOutPosition.get() - 1], 0); - out[outPosition++] = pack; - } - } - - inpos.add(inlength); - outpos.set(outPosition); - } - - /** - * inlength is ignored by this codec. We may rely on it instead of storing the compressedLowPart length - */ - @Override - public void uncompress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) { - if (inlength == 0) { - return; - } - - int longIndex = inpos.get(); - - int nbCompressedHighParts = RoaringIntPacking.high(in[longIndex]); - int[] compressedHighParts = new int[nbCompressedHighParts]; - - // !highPart as we just read the highPart for nbCompressedHighParts - boolean highPart = false; - for (int i = 0; i < nbCompressedHighParts; i++) { - int nextInt; - if (highPart) { - nextInt = RoaringIntPacking.high(in[longIndex + (i + 1) / 2]); - } else { - nextInt = RoaringIntPacking.low(in[longIndex + (i + 1) / 2]); - } - compressedHighParts[i] = nextInt; - - highPart = !highPart; - } - - // TODO What would be a relevant buffer size? - int[] buffer = new int[inlength * 16]; - - IntWrapper highPartsOutPosition = new IntWrapper(); - highPartsCodec.uncompress(compressedHighParts, - new IntWrapper(), - compressedHighParts.length, - buffer, - highPartsOutPosition); - int[] highParts = Arrays.copyOf(buffer, highPartsOutPosition.get()); - - // +1 as we initially read nbCompressedHighParts - int intIndexNbCompressedLowParts = longIndex * 2 + 1 + nbCompressedHighParts; - int nbCompressedLowParts; - if (highPart) { - nbCompressedLowParts = RoaringIntPacking.high(in[intIndexNbCompressedLowParts / 2]); - } else { - nbCompressedLowParts = RoaringIntPacking.low(in[intIndexNbCompressedLowParts / 2]); - } - highPart = !highPart; - - int[] compressedLowParts = new int[nbCompressedLowParts]; - for (int i = 0; i < nbCompressedLowParts; i++) { - int nextInt; - if (highPart) { - nextInt = RoaringIntPacking.high(in[(intIndexNbCompressedLowParts + 1 + i) / 2]); - } else { - nextInt = RoaringIntPacking.low(in[(intIndexNbCompressedLowParts + 1 + i) / 2]); - } - compressedLowParts[i] = nextInt; - - highPart = !highPart; - } - - IntWrapper lowPartsOutPosition = new IntWrapper(); - lowPartsCodec.uncompress(compressedLowParts, - new IntWrapper(), - compressedLowParts.length, - buffer, - lowPartsOutPosition); - int[] lowParts = Arrays.copyOf(buffer, lowPartsOutPosition.get()); - assert highParts.length == lowParts.length; - - int outposition = outpos.get(); - for (int i = 0; i < highParts.length; i++) { - out[outposition++] = RoaringIntPacking.pack(highParts[i], lowParts[i]); - } - - inpos.add(inlength); - outpos.set(outposition); - } + final IntegerCODEC highPartsCodec; + final IntegerCODEC lowPartsCodec; + + public LongAs2IntsCodec(IntegerCODEC highPartsCodec, IntegerCODEC lowPartsCodec) { + this.highPartsCodec = highPartsCodec; + this.lowPartsCodec = lowPartsCodec; + } + + /** + * By default, we expect longs to be slightly above Integer.MAX_VALUE. Hence highParts to be small and positive + * integers. For lowParts, we rely on {@link IntCompressor} default IntegerCODEC + */ + public LongAs2IntsCodec() { + this(new VariableByte(), new Composition(new BinaryPacking(), new VariableByte())); + } + + @Override + public void compress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) { + if (inlength == 0) { + return; + } + + int[] highParts = new int[inlength]; + int[] lowParts = new int[inlength]; + + for (int i = 0; i < inlength; i++) { + int inPosition = inpos.get() + i; + + highParts[i] = RoaringIntPacking.high(in[inPosition]); + lowParts[i] = RoaringIntPacking.low(in[inPosition]); + } + + // TODO What would be a relevant buffer size? + int[] buffer = new int[inlength * 16]; + + int outPosition = outpos.get(); + + boolean hasLeftover; + { + // The first integer is reserved to hold the number of compressed ints + IntWrapper highPartsOutPosition = new IntWrapper(1); + + highPartsCodec.compress(highParts, new IntWrapper(), inlength, buffer, highPartsOutPosition); + + // Record the compressedHighparts length + buffer[0] = highPartsOutPosition.get() - 1; + + for (int i = 0; i < highPartsOutPosition.get() / 2; i++) { + long pack = RoaringIntPacking.pack(buffer[i * 2], buffer[i * 2 + 1]); + out[outPosition++] = pack; + } + + if (1 == highPartsOutPosition.get() % 2) { + // Shift the trailing integer as first in the buffer + hasLeftover = true; + buffer[0] = buffer[highPartsOutPosition.get() - 1]; + } else { + hasLeftover = false; + } + } + + { + // The first integer is reserved to hold the number of compressed ints + IntWrapper lowPartsOutPosition = new IntWrapper(1); + if (hasLeftover) { + // Keep the trailing int from highParts before the reserved int from lowParts compressed length + lowPartsOutPosition.set(2); + } + + lowPartsCodec.compress(lowParts, new IntWrapper(0), inlength, buffer, lowPartsOutPosition); + + // Record the compressedHighparts length + buffer[hasLeftover ? 1 : 0] = lowPartsOutPosition.get() - (hasLeftover ? 2 : 1); + + for (int i = 0; i < lowPartsOutPosition.get() / 2; i++) { + long pack = RoaringIntPacking.pack(buffer[i * 2], buffer[i * 2 + 1]); + out[outPosition++] = pack; + } + + if (1 == lowPartsOutPosition.get() % 2) { + // The trailing integer is packed with a 0 + long pack = RoaringIntPacking.pack(buffer[lowPartsOutPosition.get() - 1], 0); + out[outPosition++] = pack; + } + } + + inpos.add(inlength); + outpos.set(outPosition); + } + + /** + * inlength is ignored by this codec. We may rely on it instead of storing the compressedLowPart length + */ + @Override + public void uncompress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) { + if (inlength == 0) { + return; + } + + int longIndex = inpos.get(); + + int nbCompressedHighParts = RoaringIntPacking.high(in[longIndex]); + int[] compressedHighParts = new int[nbCompressedHighParts]; + + // !highPart as we just read the highPart for nbCompressedHighParts + boolean highPart = false; + for (int i = 0; i < nbCompressedHighParts; i++) { + int nextInt; + if (highPart) { + nextInt = RoaringIntPacking.high(in[longIndex + (i + 1) / 2]); + } else { + nextInt = RoaringIntPacking.low(in[longIndex + (i + 1) / 2]); + } + compressedHighParts[i] = nextInt; + + highPart = !highPart; + } + + // TODO What would be a relevant buffer size? + int[] buffer = new int[inlength * 16]; + + IntWrapper highPartsOutPosition = new IntWrapper(); + highPartsCodec.uncompress(compressedHighParts, + new IntWrapper(), + compressedHighParts.length, + buffer, + highPartsOutPosition); + int[] highParts = Arrays.copyOf(buffer, highPartsOutPosition.get()); + + // +1 as we initially read nbCompressedHighParts + int intIndexNbCompressedLowParts = longIndex * 2 + 1 + nbCompressedHighParts; + int nbCompressedLowParts; + if (highPart) { + nbCompressedLowParts = RoaringIntPacking.high(in[intIndexNbCompressedLowParts / 2]); + } else { + nbCompressedLowParts = RoaringIntPacking.low(in[intIndexNbCompressedLowParts / 2]); + } + highPart = !highPart; + + int[] compressedLowParts = new int[nbCompressedLowParts]; + for (int i = 0; i < nbCompressedLowParts; i++) { + int nextInt; + if (highPart) { + nextInt = RoaringIntPacking.high(in[(intIndexNbCompressedLowParts + 1 + i) / 2]); + } else { + nextInt = RoaringIntPacking.low(in[(intIndexNbCompressedLowParts + 1 + i) / 2]); + } + compressedLowParts[i] = nextInt; + + highPart = !highPart; + } + + IntWrapper lowPartsOutPosition = new IntWrapper(); + lowPartsCodec.uncompress(compressedLowParts, + new IntWrapper(), + compressedLowParts.length, + buffer, + lowPartsOutPosition); + int[] lowParts = Arrays.copyOf(buffer, lowPartsOutPosition.get()); + assert highParts.length == lowParts.length; + + int outposition = outpos.get(); + for (int i = 0; i < highParts.length; i++) { + out[outposition++] = RoaringIntPacking.pack(highParts[i], lowParts[i]); + } + + inpos.add(inlength); + outpos.set(outposition); + } } diff --git a/src/main/java/me/lemire/longcompression/LongBinaryPacking.java b/src/main/java/me/lemire/longcompression/LongBinaryPacking.java new file mode 100644 index 0000000..33bb8f1 --- /dev/null +++ b/src/main/java/me/lemire/longcompression/LongBinaryPacking.java @@ -0,0 +1,143 @@ +package me.lemire.longcompression; + +import me.lemire.integercompression.BinaryPacking; +import me.lemire.integercompression.IntWrapper; +import me.lemire.integercompression.Util; + +/** + * Scheme based on a commonly used idea: can be extremely fast. + * It encodes integers in blocks of 64 longs. For arrays containing + * an arbitrary number of longs, you should use it in conjunction + * with another CODEC: + * + *
LongCODEC ic = 
+ *  new Composition(new LongBinaryPacking(), new LongVariableByte()).
+ * + * Note that this does not use differential coding: if you are working on sorted + * lists, you must compute the deltas separately. + * + *

+ * For details, please see {@link BinaryPacking} + *

+ * + * @author Benoit Lacelle + */ +public final class LongBinaryPacking implements LongCODEC, SkippableLongCODEC { + final static int BLOCK_SIZE = 64; + + @Override + public void compress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + if (inlength == 0) + return; + out[outpos.get()] = inlength; + outpos.increment(); + headlessCompress(in, inpos, inlength, out, outpos); + } + + @Override + public void headlessCompress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int tmpoutpos = outpos.get(); + int s = inpos.get(); + // Compress by block of 8 * 64 longs as much as possible + for (; s + BLOCK_SIZE * 8 - 1 < inpos.get() + inlength; s += BLOCK_SIZE * 8) { + // maxbits can be anything between 0 and 64 included: expressed within a byte (1 << 6) + final int mbits1 = LongUtil.maxbits(in, s + 0 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits2 = LongUtil.maxbits(in, s + 1 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits3 = LongUtil.maxbits(in, s + 2 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits4 = LongUtil.maxbits(in, s + 3 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits5 = LongUtil.maxbits(in, s + 4 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits6 = LongUtil.maxbits(in, s + 5 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits7 = LongUtil.maxbits(in, s + 6 * BLOCK_SIZE, BLOCK_SIZE); + final int mbits8 = LongUtil.maxbits(in, s + 7 * BLOCK_SIZE, BLOCK_SIZE); + // The first long expressed the maxbits for the 8 buckets + out[tmpoutpos++] = ((long) mbits1 << 56) | ((long) mbits2 << 48) | ((long) mbits3 << 40) | ((long) mbits4 << 32) | (mbits5 << 24) | (mbits6 << 16) | (mbits7 << 8) | (mbits8); + LongBitPacking.fastpackwithoutmask(in, s + 0 * BLOCK_SIZE, out, tmpoutpos, (int) mbits1); + tmpoutpos += mbits1; + LongBitPacking.fastpackwithoutmask(in, s + 1 * BLOCK_SIZE, out, tmpoutpos, (int) mbits2); + tmpoutpos += mbits2; + LongBitPacking.fastpackwithoutmask(in, s + 2 * BLOCK_SIZE, out, tmpoutpos, (int) mbits3); + tmpoutpos += mbits3; + LongBitPacking.fastpackwithoutmask(in, s + 3 * BLOCK_SIZE, out, tmpoutpos, (int) mbits4); + tmpoutpos += mbits4; + LongBitPacking.fastpackwithoutmask(in, s + 4 * BLOCK_SIZE, out, tmpoutpos, (int) mbits5); + tmpoutpos += mbits5; + LongBitPacking.fastpackwithoutmask(in, s + 5 * BLOCK_SIZE, out, tmpoutpos, (int) mbits6); + tmpoutpos += mbits6; + LongBitPacking.fastpackwithoutmask(in, s + 6 * BLOCK_SIZE, out, tmpoutpos, (int) mbits7); + tmpoutpos += mbits7; + LongBitPacking.fastpackwithoutmask(in, s + 7 * BLOCK_SIZE, out, tmpoutpos, (int) mbits8); + tmpoutpos += mbits8; + } + // Then we compress up to 7 blocks of 64 longs + for (; s < inpos.get() + inlength; s += BLOCK_SIZE ) { + final int mbits = LongUtil.maxbits(in, s, BLOCK_SIZE); + out[tmpoutpos++] = mbits; + LongBitPacking.fastpackwithoutmask(in, s, out, tmpoutpos, mbits); + tmpoutpos += mbits; + } + inpos.add(inlength); + outpos.set(tmpoutpos); + } + + @Override + public void uncompress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos) { + if (inlength == 0) + return; + final int outlength = (int) in[inpos.get()]; + inpos.increment(); + headlessUncompress(in,inpos, inlength,out,outpos,outlength); + } + + @Override + public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, + long[] out, IntWrapper outpos, int num) { + final int outlength = Util.greatestMultiple(num, BLOCK_SIZE); + int tmpinpos = inpos.get(); + int s = outpos.get(); + for (; s + BLOCK_SIZE * 8 - 1 < outpos.get() + outlength; s += BLOCK_SIZE * 8) { + final int mbits1 = (int) ((in[tmpinpos] >>> 56)); + final int mbits2 = (int) ((in[tmpinpos] >>> 48) & 0xFF); + final int mbits3 = (int) ((in[tmpinpos] >>> 40) & 0xFF); + final int mbits4 = (int) ((in[tmpinpos] >>> 32) & 0xFF); + final int mbits5 = (int) ((in[tmpinpos] >>> 24) & 0xFF); + final int mbits6 = (int) ((in[tmpinpos] >>> 16) & 0xFF); + final int mbits7 = (int) ((in[tmpinpos] >>> 8) & 0xFF); + final int mbits8 = (int) ((in[tmpinpos]) & 0xFF); + ++tmpinpos; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 0 * BLOCK_SIZE, mbits1); + tmpinpos += mbits1; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 1 * BLOCK_SIZE, mbits2); + tmpinpos += mbits2; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 2 * BLOCK_SIZE, mbits3); + tmpinpos += mbits3; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 3 * BLOCK_SIZE, mbits4); + tmpinpos += mbits4; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 4 * BLOCK_SIZE, mbits5); + tmpinpos += mbits5; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 5 * BLOCK_SIZE, mbits6); + tmpinpos += mbits6; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 6 * BLOCK_SIZE, mbits7); + tmpinpos += mbits7; + LongBitPacking.fastunpack(in, tmpinpos, out, s + 7 * BLOCK_SIZE, mbits8); + tmpinpos += mbits8; + } + for (; s < outpos.get() + outlength; s += BLOCK_SIZE ) { + final int mbits = (int) in[tmpinpos]; + ++tmpinpos; + LongBitPacking.fastunpack(in, tmpinpos, out, s, mbits); + tmpinpos += mbits; + } + outpos.add(outlength); + inpos.set(tmpinpos); + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/main/java/me/lemire/longcompression/LongBitPacking.java b/src/main/java/me/lemire/longcompression/LongBitPacking.java new file mode 100644 index 0000000..2d282ec --- /dev/null +++ b/src/main/java/me/lemire/longcompression/LongBitPacking.java @@ -0,0 +1,146 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import java.util.Arrays; + +/** + * Bitpacking routines + * + *

For details, please see

+ *

+ * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second + * through vectorization Software: Practice & Experience + * http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract + * http://arxiv.org/abs/1209.2137 + *

+ * + * @author Benoit Lacelle + * + */ +public final class LongBitPacking { + + /** + * Pack 64 longs + * + * @param in + * source array + * @param inpos + * position in source array + * @param out + * output array + * @param outpos + * position in output array + * @param bit + * number of bits to use per long + */ + public static void fastpackwithoutmask(final long[] in, final int inpos, + final long[] out, final int outpos, final int bit) { + if (bit == 0) { + fastpackwithoutmask0(in, inpos, out, outpos); + } else if (bit == 64) { + fastpackwithoutmask64(in, inpos, out, outpos); + } else if (bit > 0 && bit < 64) { + slowpackwithoutmask(in, inpos, out, outpos, bit); + } else { + throw new IllegalArgumentException("Unsupported bit width: " + bit); + } + } + + protected static void fastpackwithoutmask0(final long[] in, int inpos, + final long[] out, int outpos) { + // nothing + } + + protected static void fastpackwithoutmask64(final long[] in, int inpos, + final long[] out, int outpos) { + System.arraycopy(in, inpos, out, outpos, 64); + } + + protected static void slowpackwithoutmask(final long[] in, int inpos, + final long[] out, int outpos, final int bit) { + int bucket = 0; + int shift = 0; + + out[outpos + bucket] = 0L; + for (int i = 0 ; i < 64 ; i++) { + if (shift >= 64) { + bucket++; + out[bucket + outpos] = 0L; + shift -= 64; + + if (shift > 0) { + // There is some leftovers from previous input in the next bucket + out[outpos + bucket] |= in[inpos + i - 1] >> (bit - shift); + } + } + out[outpos + bucket] |= in[inpos + i] << shift; + + shift += bit; + } + } + + + /** + * Unpack the 64 longs + * + * @param in + * source array + * @param inpos + * starting point in the source array + * @param out + * output array + * @param outpos + * starting point in the output array + * @param bit + * how many bits to use per integer + */ + public static void fastunpack(final long[] in, final int inpos, + final long[] out, final int outpos, final int bit) { + if (bit == 0) { + fastunpack0(in, inpos, out, outpos); + } else if (bit == 64) { + fastunpack64(in, inpos, out, outpos); + } else if (bit > 0 && bit < 64) { + slowunpack(in, inpos, out, outpos, bit); + } else { + throw new IllegalArgumentException("Unsupported bit width: " + bit); + } + } + + + protected static void fastunpack0(final long[] in, int inpos, + final long[] out, int outpos) { + Arrays.fill(out, outpos, outpos + 64, 0); + } + + protected static void fastunpack64(final long[] in, int inpos, + final long[] out, int outpos) { + System.arraycopy(in, inpos, out, outpos, 64); + } + + protected static void slowunpack(final long[] in, int inpos, + final long[] out, int outpos, final int bit) { + int bucket = 0; + int shift = 0; + for (int i = 0 ; i < 64 ; i++) { + if (shift >= 64) { + bucket++; + shift -= 64; + + if (shift > 0) { + // There is some leftovers from previous input in the next bucket + out[outpos + i - 1] |= (in[inpos + bucket] << (bit - shift) & ((1L << bit) - 1)); + } + } + out[outpos + i] = ((in[inpos + bucket] >>> shift) & ((1L << bit) - 1)); + + shift += bit; + } + } +} diff --git a/src/main/java/me/lemire/longcompression/LongCODEC.java b/src/main/java/me/lemire/longcompression/LongCODEC.java index c0f67b2..1068f9f 100644 --- a/src/main/java/me/lemire/longcompression/LongCODEC.java +++ b/src/main/java/me/lemire/longcompression/LongCODEC.java @@ -27,7 +27,7 @@ public interface LongCODEC { * @param in * input array * @param inpos - * location in the input array + * where to start reading in the array * @param inlength * how many longs to compress * @param out @@ -52,9 +52,9 @@ public void compress(long[] in, IntWrapper inpos, int inlength, * length of the compressed data (ignored by some * schemes) * @param out - * array where to write the compressed output + * array where to write the uncompressed output * @param outpos - * where to write the compressed output in out + * where to start writing the uncompressed output in out */ public void uncompress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos); diff --git a/src/main/java/me/lemire/longcompression/LongCompressor.java b/src/main/java/me/lemire/longcompression/LongCompressor.java new file mode 100644 index 0000000..a2c79fd --- /dev/null +++ b/src/main/java/me/lemire/longcompression/LongCompressor.java @@ -0,0 +1,75 @@ +package me.lemire.longcompression; + +import java.util.Arrays; + +import me.lemire.integercompression.IntWrapper; +import me.lemire.integercompression.UncompressibleInputException; + +/** + * This is a convenience class that wraps a codec to provide + * a "friendly" API. + * + * @author Benoit Lacelle + */ +public class LongCompressor { + + SkippableLongCODEC codec; + + /** + * Constructor wrapping a codec. + * + * @param c the underlying codec + */ + public LongCompressor(SkippableLongCODEC c) { + codec = c; + } + + /** + * Constructor with default codec. + */ + public LongCompressor() { + codec = new SkippableLongComposition(new LongBinaryPacking(), + new LongVariableByte()); + } + + /** + * Compress an array and returns the compressed result as a new array. + * + * @param input array to be compressed + * @return compressed array + * @throws UncompressibleInputException if the data is too poorly compressible + */ + public long[] compress(long[] input) { + long[] compressed = new long[input.length + input.length / 100 + 1024]; + // Store at index=0 the length of the input, hence enabling .headlessCompress + compressed[0] = input.length; + IntWrapper outpos = new IntWrapper(1); + try { + codec.headlessCompress(input, new IntWrapper(0), + input.length, compressed, outpos); + } catch (IndexOutOfBoundsException ioebe) { + throw new UncompressibleInputException("Your input is too poorly compressible " + + "with the current codec : "+codec); + } + compressed = Arrays.copyOf(compressed,outpos.intValue()); + return compressed; + } + + /** + * Uncompress an array and returns the uncompressed result as a new array. + * + * @param compressed compressed array + * @return uncompressed array + */ + public long[] uncompress(long[] compressed) { + // Read at index=0 the length of the input, hence enabling .headlessUncompress + long[] decompressed = new long[(int) compressed[0]]; + IntWrapper inpos = new IntWrapper(1); + codec.headlessUncompress(compressed, inpos, + compressed.length - inpos.intValue(), + decompressed, new IntWrapper(0), + decompressed.length); + return decompressed; + } + +} diff --git a/src/main/java/me/lemire/longcompression/LongUtil.java b/src/main/java/me/lemire/longcompression/LongUtil.java index c06433f..7bdce83 100644 --- a/src/main/java/me/lemire/longcompression/LongUtil.java +++ b/src/main/java/me/lemire/longcompression/LongUtil.java @@ -15,8 +15,38 @@ */ @Deprecated public class LongUtil { + + /** + * Compute the maximum of the integer logarithms (ceil(log(x+1)) of a range + * of value + * + * @param i + * source array + * @param pos + * starting position + * @param length + * number of integers to consider + * @return integer logarithm + */ + public static int maxbits(long[] i, int pos, int length) { + long mask = 0; + for (int k = pos; k < pos + length; ++k) + mask |= i[k]; + return bits(mask); + } - protected static String longToBinaryWithLeading(long l) { - return String.format("%64s", Long.toBinaryString(l)).replace(' ', '0'); - } + /** + * Compute the integer logarithms (ceil(log(x+1)) of a value + * + * @param i + * source value + * @return integer logarithm + */ + public static int bits(long i) { + return 64 - Long.numberOfLeadingZeros(i); + } + + protected static String longToBinaryWithLeading(long l) { + return String.format("%64s", Long.toBinaryString(l)).replace(' ', '0'); + } } diff --git a/src/main/java/me/lemire/longcompression/LongVariableByte.java b/src/main/java/me/lemire/longcompression/LongVariableByte.java index f3d10ee..ad2b0eb 100644 --- a/src/main/java/me/lemire/longcompression/LongVariableByte.java +++ b/src/main/java/me/lemire/longcompression/LongVariableByte.java @@ -217,7 +217,7 @@ public void uncompress(long[] in, IntWrapper inpos, int inlength, long[] out, s += 8; // Shift to next long if s==64 p += s>>6; - // cycle from 63 to 0 + // Cycle from 63 to 0 s = s & 63; v += ((c & 127) << shift); if ((c & 128) == 128) { @@ -234,7 +234,7 @@ public void uncompress(long[] in, IntWrapper inpos, int inlength, long[] out, @Override public void uncompress(byte[] in, IntWrapper inpos, int inlength, - long[] out, IntWrapper outpos) { + long[] out, IntWrapper outpos) { int p = inpos.get(); int finalp = inpos.get() + inlength; int tmpoutpos = outpos.get(); @@ -311,7 +311,7 @@ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[] s += 8; // Shift to next long if s == 64 p += s>>6; - // cycle from 63 to 0 + // Cycle from 63 to 0 s = s & 63; v += ((c & 127) << shift); if ((c & 128) == 128) { diff --git a/src/main/java/me/lemire/longcompression/RoaringIntPacking.java b/src/main/java/me/lemire/longcompression/RoaringIntPacking.java index f109ab3..d6b6baa 100644 --- a/src/main/java/me/lemire/longcompression/RoaringIntPacking.java +++ b/src/main/java/me/lemire/longcompression/RoaringIntPacking.java @@ -3,9 +3,6 @@ */ package me.lemire.longcompression; -import java.math.BigInteger; -import java.util.Comparator; - /** * Used to hold the logic packing 2 integers in a long, and separating a long in two integers. It is * useful in {@link Roaring64NavigableMap} as the implementation split the input long in two @@ -46,63 +43,4 @@ public static int low(long id) { public static long pack(int high, int low) { return (((long) high) << 32) | (low & 0xffffffffL); } - - - /** - * - * @param signedLongs true if long put in a {@link Roaring64NavigableMap} should be considered as - * signed long. - * @return the int representing the highest value which can be set as high value in a - * {@link Roaring64NavigableMap} - */ - public static int highestHigh(boolean signedLongs) { - if (signedLongs) { - return Integer.MAX_VALUE; - } else { - return -1; - } - } - - /** - * @return A comparator for unsigned longs: a negative long is a long greater than Long.MAX_VALUE - */ - public static Comparator unsignedComparator() { - return new Comparator() { - - @Override - public int compare(Integer o1, Integer o2) { - return compareUnsigned(o1, o2); - } - }; - } - - /** - * Compares two {@code int} values numerically treating the values as unsigned. - * - * @param x the first {@code int} to compare - * @param y the second {@code int} to compare - * @return the value {@code 0} if {@code x == y}; a value less than {@code 0} if {@code x < y} as - * unsigned values; and a value greater than {@code 0} if {@code x > y} as unsigned values - * @since 1.8 - */ - // Duplicated from jdk8 Integer.compareUnsigned - public static int compareUnsigned(int x, int y) { - return Integer.compare(x + Integer.MIN_VALUE, y + Integer.MIN_VALUE); - } - - /** the constant 2^64 */ - private static final BigInteger TWO_64 = BigInteger.ONE.shiftLeft(64); - - /** - * JDK8 Long.toUnsignedString was too complex to backport. Go for a slow version relying on - * BigInteger - */ - // https://stackoverflow.com/questions/7031198/java-signed-long-to-unsigned-long-string - static String toUnsignedString(long l) { - BigInteger b = BigInteger.valueOf(l); - if (b.signum() < 0) { - b = b.add(TWO_64); - } - return b.toString(); - } } diff --git a/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java b/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java index 984914f..7fe1fe5 100644 --- a/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java +++ b/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java @@ -34,7 +34,7 @@ public interface SkippableLongCODEC { * @param in * input array * @param inpos - * location in the input array + * where to start reading in the array * @param inlength * how many longs to compress * @param out @@ -58,9 +58,9 @@ public void headlessCompress(long[] in, IntWrapper inpos, int inlength, long[] o * @param inlength * length of the compressed data (ignored by some schemes) * @param out - * array where to write the compressed output + * array where to write the uncompressed output * @param outpos - * where to write the compressed output in out + * where to start writing the uncompressed output in out * @param num * number of longs we want to decode, the actual number of longs decoded can be less */ diff --git a/src/test/java/me/lemire/longcompression/ATestLongCODEC.java b/src/test/java/me/lemire/longcompression/ATestLongCODEC.java new file mode 100644 index 0000000..c61ea69 --- /dev/null +++ b/src/test/java/me/lemire/longcompression/ATestLongCODEC.java @@ -0,0 +1,96 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import java.util.stream.LongStream; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Edge-cases to be tested on a per-codec basis + * + * @author Benoit Lacelle + */ +public abstract class ATestLongCODEC { + protected void checkConsistency(LongCODEC codec, long[] array) { + { + long[] compressed = LongTestUtils.compress(codec, array); + long[] uncompressed = LongTestUtils.uncompress(codec, compressed, array.length); + + Assert.assertArrayEquals(array, uncompressed); + } + + if (codec instanceof ByteLongCODEC) { + byte[] compressed = LongTestUtils.compress((ByteLongCODEC) codec, array); + long[] uncompressed = LongTestUtils.uncompress((ByteLongCODEC) codec, compressed, array.length); + + Assert.assertArrayEquals(array, uncompressed); + } + + if (codec instanceof SkippableLongCODEC) { + long[] compressed = LongTestUtils.compressHeadless((SkippableLongCODEC) codec, array); + long[] uncompressed = + LongTestUtils.uncompressHeadless((SkippableLongCODEC) codec, compressed, array.length); + + Assert.assertArrayEquals(array, uncompressed); + } + } + + public abstract LongCODEC getCodec(); + + @Test + public void testCodec_Minus1() { + checkConsistency(getCodec(), new long[] { -1 }); + } + + @Test + public void testCodec_ZeroTimes8Minus1() { + checkConsistency(getCodec(), new long[] { 0, 0, 0, 0, 0, 0, 0, 0, -1 }); + } + + @Test + public void testCodec_ZeroTimes127Minus1() { + long[] array = LongStream.concat(LongStream.range(0, 127).map(l -> 0), LongStream.of(-1)).toArray(); + + checkConsistency(getCodec(), array); + } + + @Test + public void testCodec_ZeroTimes128Minus1() { + long[] array = LongStream.concat(LongStream.range(0, 128).map(l -> 0), LongStream.of(-1)).toArray(); + + checkConsistency(getCodec(), array); + } + + @Test + public void testCodec_MinValue() { + checkConsistency(getCodec(), new long[] { Long.MIN_VALUE }); + } + + @Test + public void testCodec_ZeroMinValue() { + checkConsistency(getCodec(), new long[] { 0, Long.MIN_VALUE }); + } + + @Test + public void testCodec_allPowerOfTwo() { + checkConsistency(getCodec(), new long[] { 1L << 42 }); + for (int i = 0; i < 64; i++) { + checkConsistency(getCodec(), new long[] { 1L << i }); + } + } + + @Test + public void testCodec_ZeroThenAllPowerOfTwo() { + for (int i = 0; i < 64; i++) { + checkConsistency(getCodec(), new long[] { 0, 1L << i }); + } + } + +} diff --git a/src/test/java/me/lemire/longcompression/LongBasicTest.java b/src/test/java/me/lemire/longcompression/LongBasicTest.java index 5aa3551..1963246 100644 --- a/src/test/java/me/lemire/longcompression/LongBasicTest.java +++ b/src/test/java/me/lemire/longcompression/LongBasicTest.java @@ -14,24 +14,9 @@ import org.junit.Test; -import me.lemire.integercompression.BinaryPacking; -import me.lemire.integercompression.Composition; import me.lemire.integercompression.FastPFOR; import me.lemire.integercompression.FastPFOR128; import me.lemire.integercompression.IntWrapper; -import me.lemire.integercompression.JustCopy; -import me.lemire.integercompression.NewPFD; -import me.lemire.integercompression.NewPFDS16; -import me.lemire.integercompression.NewPFDS9; -import me.lemire.integercompression.OptPFD; -import me.lemire.integercompression.OptPFDS16; -import me.lemire.integercompression.OptPFDS9; -import me.lemire.integercompression.Simple9; -import me.lemire.integercompression.VariableByte; -import me.lemire.integercompression.differential.Delta; -import me.lemire.integercompression.differential.IntegratedBinaryPacking; -import me.lemire.integercompression.differential.IntegratedComposition; -import me.lemire.integercompression.differential.IntegratedVariableByte; import me.lemire.longcompression.differential.LongDelta; import me.lemire.longcompression.synth.LongClusteredDataGenerator; @@ -45,7 +30,9 @@ public class LongBasicTest { final LongCODEC[] codecs = { new LongJustCopy(), new LongVariableByte(), - new LongAs2IntsCodec()}; + new LongAs2IntsCodec(), + new LongComposition(new LongBinaryPacking(), new LongVariableByte()), + }; /** * This tests with a compressed array with various offset @@ -89,14 +76,19 @@ public void varyingLengthTest() { long[] comp = LongTestUtils.compress(c, Arrays.copyOf(data, L)); long[] answer = LongTestUtils.uncompress(c, comp, L); for (int k = 0; k < L; ++k) - if (answer[k] != data[k]) - throw new RuntimeException("bug"); + if (answer[k] != data[k]) { + long[] comp2 = LongTestUtils.compress(c, Arrays.copyOf(data, L)); + long[] answer2 = LongTestUtils.uncompress(c, comp2, L); + throw new RuntimeException("bug"); + } } for (int L = 128; L <= N; L *= 2) { long[] comp = LongTestUtils.compress(c, Arrays.copyOf(data, L)); long[] answer = LongTestUtils.uncompress(c, comp, L); for (int k = 0; k < L; ++k) if (answer[k] != data[k]) { + long[] comp2 = LongTestUtils.compress(c, Arrays.copyOf(data, L)); + long[] answer2 = LongTestUtils.uncompress(c, comp2, L); System.out.println(Arrays.toString(Arrays.copyOf( answer, L))); System.out.println(Arrays.toString(Arrays.copyOf(data, @@ -366,9 +358,12 @@ public void fastPforTest() { long[] comp = LongTestUtils.compress(codec, Arrays.copyOf(data, N)); long[] answer = LongTestUtils.uncompress(codec, comp, N); for (int k = 0; k < N; ++k) - if (answer[k] != data[k]) + if (answer[k] != data[k]) { + long[] comp2 = LongTestUtils.compress(codec, Arrays.copyOf(data, N)); + long[] answer2 = LongTestUtils.uncompress(codec, comp2, N); throw new RuntimeException("bug " + k + " " + answer[k] + " != " + data[k]); + } } } diff --git a/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java b/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java index e900c9c..24cb712 100644 --- a/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java +++ b/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java @@ -25,7 +25,8 @@ public class SkippableLongBasicTest { final SkippableLongCODEC[] codecs = { new LongJustCopy(), - new LongVariableByte(), }; + new LongVariableByte(), + new SkippableLongComposition(new LongBinaryPacking(), new LongVariableByte()), }; /** diff --git a/src/test/java/me/lemire/longcompression/TestLongAs2IntsCodec.java b/src/test/java/me/lemire/longcompression/TestLongAs2IntsCodec.java index 00bb52a..5b8014e 100644 --- a/src/test/java/me/lemire/longcompression/TestLongAs2IntsCodec.java +++ b/src/test/java/me/lemire/longcompression/TestLongAs2IntsCodec.java @@ -7,8 +7,6 @@ package me.lemire.longcompression; -import java.util.stream.LongStream; - import org.junit.Assert; import org.junit.Test; @@ -17,85 +15,12 @@ * * @author Benoit Lacelle */ -public class TestLongAs2IntsCodec { +public class TestLongAs2IntsCodec extends ATestLongCODEC { final LongAs2IntsCodec codec = new LongAs2IntsCodec(); - private void checkConsistency(LongCODEC codec, long[] array) { - { - long[] compressed = LongTestUtils.compress(codec, array); - long[] uncompressed = LongTestUtils.uncompress(codec, compressed, array.length); - - Assert.assertArrayEquals(array, uncompressed); - } - - if (codec instanceof ByteLongCODEC) { - byte[] compressed = LongTestUtils.compress((ByteLongCODEC) codec, array); - long[] uncompressed = LongTestUtils.uncompress((ByteLongCODEC) codec, compressed, array.length); - - Assert.assertArrayEquals(array, uncompressed); - } - - if (codec instanceof SkippableLongCODEC) { - long[] compressed = LongTestUtils.compressHeadless((SkippableLongCODEC) codec, array); - long[] uncompressed = - LongTestUtils.uncompressHeadless((SkippableLongCODEC) codec, compressed, array.length); - - Assert.assertArrayEquals(array, uncompressed); - } - } - - @Test - public void testCodec_Zero() { - checkConsistency(codec, new long[] { 0 }); - } - - @Test - public void testCodec_Minus1() { - checkConsistency(codec, new long[] { -1 }); - } - - @Test - public void testCodec_ZeroTimes8Minus1() { - checkConsistency(codec, new long[] { 0, 0, 0, 0, 0, 0, 0, 0, -1 }); - } - - @Test - public void testCodec_ZeroTimes127Minus1() { - long[] array = LongStream.concat(LongStream.range(0, 127).map(l -> 0), LongStream.of(-1)).toArray(); - - checkConsistency(codec, array); - } - - @Test - public void testCodec_ZeroTimes128Minus1() { - long[] array = LongStream.concat(LongStream.range(0, 128).map(l -> 0), LongStream.of(-1)).toArray(); - - checkConsistency(codec, array); - } - - @Test - public void testCodec_MinValue() { - checkConsistency(codec, new long[] { Long.MIN_VALUE }); - } - - @Test - public void testCodec_ZeroMinValue() { - checkConsistency(codec, new long[] { 0, Long.MIN_VALUE }); - } - - @Test - public void testCodec_allPowerOfTwo() { - checkConsistency(codec, new long[] { 1L << 42 }); - for (int i = 0; i < 64; i++) { - checkConsistency(codec, new long[] { 1L << i }); - } - } - - @Test - public void testCodec_ZeroThenAllPowerOfTwo() { - for (int i = 0; i < 64; i++) { - checkConsistency(codec, new long[] { 0, 1L << i }); - } + @Override + public LongCODEC getCodec() { + return codec; } @Test diff --git a/src/test/java/me/lemire/longcompression/TestLongBinaryPacking.java b/src/test/java/me/lemire/longcompression/TestLongBinaryPacking.java new file mode 100644 index 0000000..ecc3f2e --- /dev/null +++ b/src/test/java/me/lemire/longcompression/TestLongBinaryPacking.java @@ -0,0 +1,26 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +package me.lemire.longcompression; + +import org.junit.Ignore; + +/** + * Edge-cases having caused issue specifically with LongBinaryPacking. + * + * @author Benoit Lacelle + */ +@Ignore("Parent class tests are not valid as LongBinaryPacking process by chunks of 64 longs") +public class TestLongBinaryPacking extends ATestLongCODEC { + final LongBinaryPacking codec = new LongBinaryPacking(); + + @Override + public LongCODEC getCodec() { + return codec; + } + +} diff --git a/src/test/java/me/lemire/longcompression/TestLongVariableByte.java b/src/test/java/me/lemire/longcompression/TestLongVariableByte.java index 15613f2..ee1755a 100644 --- a/src/test/java/me/lemire/longcompression/TestLongVariableByte.java +++ b/src/test/java/me/lemire/longcompression/TestLongVariableByte.java @@ -7,8 +7,6 @@ package me.lemire.longcompression; -import java.util.stream.LongStream; - import org.junit.Assert; import org.junit.Test; @@ -17,87 +15,20 @@ * * @author Benoit Lacelle */ -public class TestLongVariableByte { +public class TestLongVariableByte extends ATestLongCODEC { final LongVariableByte codec = new LongVariableByte(); - private void checkConsistency(LongCODEC codec, long[] array) { - { - long[] compressed = LongTestUtils.compress(codec, array); - long[] uncompressed = LongTestUtils.uncompress(codec, compressed, array.length); - - Assert.assertArrayEquals(array, uncompressed); - } - - if (codec instanceof ByteLongCODEC) { - byte[] compressed = LongTestUtils.compress((ByteLongCODEC) codec, array); - long[] uncompressed = LongTestUtils.uncompress((ByteLongCODEC) codec, compressed, array.length); - - Assert.assertArrayEquals(array, uncompressed); - } - - if (codec instanceof SkippableLongCODEC) { - long[] compressed = LongTestUtils.compressHeadless((SkippableLongCODEC) codec, array); - long[] uncompressed = - LongTestUtils.uncompressHeadless((SkippableLongCODEC) codec, compressed, array.length); - - Assert.assertArrayEquals(array, uncompressed); - } - } - - @Test - public void testCodec_ZeroMinus1() { - checkConsistency(codec, new long[] { -1 }); - } - - @Test - public void testCodec_ZeroTimes8Minus1() { - checkConsistency(codec, new long[] { 0, 0, 0, 0, 0, 0, 0, 0, -1 }); - } - - @Test - public void testCodec_ZeroTimes127Minus1() { - long[] array = LongStream.concat(LongStream.range(0, 127).map(l -> 0), LongStream.of(-1)).toArray(); - - checkConsistency(codec, array); - } - - @Test - public void testCodec_ZeroTimes128Minus1() { - long[] array = LongStream.concat(LongStream.range(0, 128).map(l -> 0), LongStream.of(-1)).toArray(); - - checkConsistency(codec, array); - } - - @Test - public void testCodec_MinValue() { - checkConsistency(codec, new long[] { Long.MIN_VALUE }); - } - - @Test - public void testCodec_ZeroMinValue() { - checkConsistency(codec, new long[] { 0, Long.MIN_VALUE }); - } - - @Test - public void testCodec_allPowerOfTwo() { - checkConsistency(codec, new long[] { 1L << 42 }); - for (int i = 0; i < 64; i++) { - checkConsistency(codec, new long[] { 1L << i }); - } - } - - @Test - public void testCodec_ZeroThenAllPowerOfTwo() { - for (int i = 0; i < 64; i++) { - checkConsistency(codec, new long[] { 0, 1L << i }); - } + @Override + public LongCODEC getCodec() { + return codec; } @Test public void testCodec_intermediateHighPowerOfTwo() { Assert.assertEquals(1, LongTestUtils.compress((LongCODEC) codec, new long[] { 1L << 42 }).length); Assert.assertEquals(7, LongTestUtils.compress((ByteLongCODEC) codec, new long[] { 1L << 42 }).length); - Assert.assertEquals(1, LongTestUtils.compressHeadless((SkippableLongCODEC) codec, new long[] { 1L << 42 }).length); + Assert.assertEquals(1, + LongTestUtils.compressHeadless((SkippableLongCODEC) codec, new long[] { 1L << 42 }).length); } }