diff --git a/example.java b/example.java index 71ccd63..75dfb05 100644 --- a/example.java +++ b/example.java @@ -104,11 +104,7 @@ public static void basicExampleHeadless() { // be processed using variable byte SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()); - // output vector should be large enough... - int[] compressed = new int[data.length + 1024]; - // compressed might not be large enough in some cases - // if you get java.lang.ArrayIndexOutOfBoundsException, try - // allocating more memory + int[] compressed = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)]; /** * @@ -267,10 +263,12 @@ public static void headlessDemo() { int[] uncompressed1 = {1,2,1,3,1}; int[] uncompressed2 = {3,2,4,6,1}; - int[] compressed = new int[uncompressed1.length+uncompressed2.length+1024]; - SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); + int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed1.length) + + codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed2.length); + int[] compressed = new int[maxCompressedLength]; + // compressing IntWrapper outPos = new IntWrapper(); diff --git a/src/main/java/me/lemire/integercompression/BinaryPacking.java b/src/main/java/me/lemire/integercompression/BinaryPacking.java index 8d5ff90..ce37ff0 100644 --- a/src/main/java/me/lemire/integercompression/BinaryPacking.java +++ b/src/main/java/me/lemire/integercompression/BinaryPacking.java @@ -37,8 +37,9 @@ * @author Daniel Lemire */ public final class BinaryPacking implements IntegerCODEC, SkippableIntegerCODEC { - final static int BLOCK_SIZE = 32; - + public final static int BLOCK_SIZE = 32; + private static final int MAX_BIT_WIDTH = Integer.SIZE; + @Override public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { @@ -131,7 +132,16 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, outpos.add(outlength); inpos.set(tmpinpos); } - + + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int blockCount = inlength / BLOCK_SIZE; + int headersSizeInInts = blockCount / Integer.BYTES + (blockCount % Integer.BYTES); + int blocksSizeInInts = blockCount * MAX_BIT_WIDTH; + compressedPositions.add(blockCount * BLOCK_SIZE); + return headersSizeInInts + blocksSizeInInts; + } + @Override public String toString() { return this.getClass().getSimpleName(); diff --git a/src/main/java/me/lemire/integercompression/FastPFOR.java b/src/main/java/me/lemire/integercompression/FastPFOR.java index 47969f4..5475496 100644 --- a/src/main/java/me/lemire/integercompression/FastPFOR.java +++ b/src/main/java/me/lemire/integercompression/FastPFOR.java @@ -40,6 +40,13 @@ */ public class FastPFOR implements IntegerCODEC,SkippableIntegerCODEC { final static int OVERHEAD_OF_EACH_EXCEPT = 8; + private static final int OVERHEAD_OF_EACH_PAGE_IN_INTS = 36; // 1 int for the header + // 1 int for the byte array size + // 1 int for the bitmap + // 1 int for byte array padding (to align to 4 bytes) + // 32 to have enough space to bit-pack the exceptions + private static final int OVERHEAD_OF_EACH_BLOCK_IN_INTS = 1; // 1 byte for the number of bits allocated per truncated integer + // 1 byte for the number of exceptions /** * */ @@ -65,7 +72,7 @@ public class FastPFOR implements IntegerCODEC,SkippableIntegerCODEC { * @param pagesize * the desired page size (recommended value is FastPFOR.DEFAULT_PAGE_SIZE) */ - private FastPFOR(int pagesize) { + FastPFOR(int pagesize) { pageSize = pagesize; // Initiate arrrays. byteContainer = makeBuffer(3 * pageSize @@ -230,6 +237,18 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, } } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + + int pageCount = (inlength + pageSize - 1) / pageSize; + int blockCount = inlength / BLOCK_SIZE; + + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int blockSizeInInts = OVERHEAD_OF_EACH_BLOCK_IN_INTS + BLOCK_SIZE; + return OVERHEAD_OF_EACH_PAGE_IN_INTS * pageCount + blockSizeInInts * blockCount + 24; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { final int initpos = inpos.get(); diff --git a/src/main/java/me/lemire/integercompression/FastPFOR128.java b/src/main/java/me/lemire/integercompression/FastPFOR128.java index 83a3e1f..0557c62 100644 --- a/src/main/java/me/lemire/integercompression/FastPFOR128.java +++ b/src/main/java/me/lemire/integercompression/FastPFOR128.java @@ -23,6 +23,13 @@ */ public class FastPFOR128 implements IntegerCODEC,SkippableIntegerCODEC { final static int OVERHEAD_OF_EACH_EXCEPT = 8; + private static final int OVERHEAD_OF_EACH_PAGE_IN_INTS = 36; // 1 int for the header + // 1 int for the byte array size + // 1 int for the bitmap + // 1 int for byte array padding (to align to 4 bytes) + // 32 to have enough space to bit-pack the exceptions + private static final int OVERHEAD_OF_EACH_BLOCK_IN_INTS = 1; // 1 byte for the number of bits allocated per truncated integer + // 1 byte for the number of exceptions /** * */ @@ -209,6 +216,18 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, } } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + + int pageCount = (inlength + pageSize - 1) / pageSize; + int blockCount = inlength / BLOCK_SIZE; + + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int blockSizeInInts = OVERHEAD_OF_EACH_BLOCK_IN_INTS + BLOCK_SIZE; + return OVERHEAD_OF_EACH_PAGE_IN_INTS * pageCount + blockSizeInInts * blockCount + 24; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { final int initpos = inpos.get(); diff --git a/src/main/java/me/lemire/integercompression/GroupSimple9.java b/src/main/java/me/lemire/integercompression/GroupSimple9.java index a294080..bd8acfa 100644 --- a/src/main/java/me/lemire/integercompression/GroupSimple9.java +++ b/src/main/java/me/lemire/integercompression/GroupSimple9.java @@ -3549,4 +3549,10 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o inpos.set(tmpinpos); } -} \ No newline at end of file + + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + compressedPositions.add(inlength); + return inlength; + } +} diff --git a/src/main/java/me/lemire/integercompression/IntCompressor.java b/src/main/java/me/lemire/integercompression/IntCompressor.java index abaeea9..30f755c 100644 --- a/src/main/java/me/lemire/integercompression/IntCompressor.java +++ b/src/main/java/me/lemire/integercompression/IntCompressor.java @@ -33,21 +33,14 @@ public IntCompressor() { * * @param input array to be compressed * @return compressed array - * @throws UncompressibleInputException if the data is too poorly compressible */ public int[] compress(int[] input) { - int[] compressed = new int[input.length + input.length / 100 + 1024]; + int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length); + int[] compressed = new int[maxCompressedLength + 1]; // +1 to store the length of the input // Store at index=0 the length of the input, hence enabling .headlessCompress compressed[0] = input.length; IntWrapper outpos = new IntWrapper(1); - try { - codec.headlessCompress(input, new IntWrapper(0), - input.length, compressed, outpos); - } catch (IndexOutOfBoundsException ioebe) { - throw new - UncompressibleInputException("Your input is too poorly compressible " - + "with the current codec : "+codec); - } + codec.headlessCompress(input, new IntWrapper(0), input.length, compressed, outpos); compressed = Arrays.copyOf(compressed,outpos.intValue()); return compressed; } diff --git a/src/main/java/me/lemire/integercompression/JustCopy.java b/src/main/java/me/lemire/integercompression/JustCopy.java index 709b86a..f57282c 100644 --- a/src/main/java/me/lemire/integercompression/JustCopy.java +++ b/src/main/java/me/lemire/integercompression/JustCopy.java @@ -42,6 +42,12 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + compressedPositions.add(inlength); + return inlength; + } + @Override public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { diff --git a/src/main/java/me/lemire/integercompression/Kamikaze.java b/src/main/java/me/lemire/integercompression/Kamikaze.java index fd1ac82..4cab30b 100644 --- a/src/main/java/me/lemire/integercompression/Kamikaze.java +++ b/src/main/java/me/lemire/integercompression/Kamikaze.java @@ -38,6 +38,11 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o } } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + throw new UnsupportedOperationException("Calculating the max compressed length is not supported yet."); + } + @Override public String toString() { return "Kamikaze's PForDelta"; @@ -64,4 +69,4 @@ public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, headlessUncompress(in, inpos, inlength, out, outpos, outlength); } -} \ No newline at end of file +} diff --git a/src/main/java/me/lemire/integercompression/NewPFD.java b/src/main/java/me/lemire/integercompression/NewPFD.java index 6dd01aa..3da3002 100644 --- a/src/main/java/me/lemire/integercompression/NewPFD.java +++ b/src/main/java/me/lemire/integercompression/NewPFD.java @@ -132,6 +132,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, decodePage(in, inpos, out, outpos, mynvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int blockCount = inlength / BLOCK_SIZE; + // +1 for the header + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int maxBlockSizeInInts = 1 + BLOCK_SIZE; + compressedPositions.add(inlength); + return maxBlockSizeInInts * blockCount; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { int tmpoutpos = outpos.get(); diff --git a/src/main/java/me/lemire/integercompression/NewPFDS16.java b/src/main/java/me/lemire/integercompression/NewPFDS16.java index 98370d2..526b8fb 100644 --- a/src/main/java/me/lemire/integercompression/NewPFDS16.java +++ b/src/main/java/me/lemire/integercompression/NewPFDS16.java @@ -131,6 +131,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, decodePage(in, inpos, out, outpos, mynvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int blockCount = inlength / BLOCK_SIZE; + // +1 for the header + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int maxBlockSizeInInts = 1 + BLOCK_SIZE; + compressedPositions.add(inlength); + return maxBlockSizeInInts * blockCount; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { int tmpoutpos = outpos.get(); diff --git a/src/main/java/me/lemire/integercompression/NewPFDS9.java b/src/main/java/me/lemire/integercompression/NewPFDS9.java index c8389c1..bd802b6 100644 --- a/src/main/java/me/lemire/integercompression/NewPFDS9.java +++ b/src/main/java/me/lemire/integercompression/NewPFDS9.java @@ -130,6 +130,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, decodePage(in, inpos, out, outpos, mynvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int blockCount = inlength / BLOCK_SIZE; + // +1 for the header + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int maxBlockSizeInInts = 1 + BLOCK_SIZE; + compressedPositions.add(inlength); + return maxBlockSizeInInts * blockCount; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { int tmpoutpos = outpos.get(); diff --git a/src/main/java/me/lemire/integercompression/OptPFD.java b/src/main/java/me/lemire/integercompression/OptPFD.java index 8c90586..cfda92e 100644 --- a/src/main/java/me/lemire/integercompression/OptPFD.java +++ b/src/main/java/me/lemire/integercompression/OptPFD.java @@ -147,6 +147,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, decodePage(in, inpos, out, outpos, mynvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int blockCount = inlength / BLOCK_SIZE; + // +1 for the header + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int maxBlockSizeInInts = 1 + BLOCK_SIZE; + compressedPositions.add(inlength); + return maxBlockSizeInInts * blockCount; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { int tmpoutpos = outpos.get(); diff --git a/src/main/java/me/lemire/integercompression/OptPFDS16.java b/src/main/java/me/lemire/integercompression/OptPFDS16.java index 8574b10..95c4f62 100644 --- a/src/main/java/me/lemire/integercompression/OptPFDS16.java +++ b/src/main/java/me/lemire/integercompression/OptPFDS16.java @@ -147,6 +147,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, decodePage(in, inpos, out, outpos, mynvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int blockCount = inlength / BLOCK_SIZE; + // +1 for the header + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int maxBlockSizeInInts = 1 + BLOCK_SIZE; + compressedPositions.add(inlength); + return maxBlockSizeInInts * blockCount; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { int tmpoutpos = outpos.get(); @@ -197,4 +208,4 @@ public String toString() { return this.getClass().getSimpleName(); } -} \ No newline at end of file +} diff --git a/src/main/java/me/lemire/integercompression/OptPFDS9.java b/src/main/java/me/lemire/integercompression/OptPFDS9.java index 34f4206..0e2563b 100644 --- a/src/main/java/me/lemire/integercompression/OptPFDS9.java +++ b/src/main/java/me/lemire/integercompression/OptPFDS9.java @@ -146,6 +146,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, decodePage(in, inpos, out, outpos, mynvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); + int blockCount = inlength / BLOCK_SIZE; + // +1 for the header + // getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers. + int maxBlockSizeInInts = 1 + BLOCK_SIZE; + compressedPositions.add(inlength); + return maxBlockSizeInInts * blockCount; + } + private void decodePage(int[] in, IntWrapper inpos, int[] out, IntWrapper outpos, int thissize) { int tmpoutpos = outpos.get(); @@ -197,4 +208,4 @@ public String toString() { return this.getClass().getSimpleName(); } -} \ No newline at end of file +} diff --git a/src/main/java/me/lemire/integercompression/Simple16.java b/src/main/java/me/lemire/integercompression/Simple16.java index cdc7308..2b7f27f 100644 --- a/src/main/java/me/lemire/integercompression/Simple16.java +++ b/src/main/java/me/lemire/integercompression/Simple16.java @@ -103,6 +103,12 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o outpos.set(i_outpos); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + compressedPositions.add(inlength); + return inlength; + } + /** * Uncompress data from an array to another array. * @@ -182,4 +188,4 @@ public String toString() { { 7, 7, 7, 7 }, { 10, 9, 9, }, { 14, 14 }, { 28 } }; private static final int[][] SHIFTED_S16_BITS = shiftme(S16_BITS); -} \ No newline at end of file +} diff --git a/src/main/java/me/lemire/integercompression/Simple9.java b/src/main/java/me/lemire/integercompression/Simple9.java index 4864756..fd5194d 100644 --- a/src/main/java/me/lemire/integercompression/Simple9.java +++ b/src/main/java/me/lemire/integercompression/Simple9.java @@ -268,6 +268,12 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + compressedPositions.add(inlength); + return inlength; + } + @Override public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) { if (inlength == 0) diff --git a/src/main/java/me/lemire/integercompression/SkippableComposition.java b/src/main/java/me/lemire/integercompression/SkippableComposition.java index 5faf7c2..7dd4736 100644 --- a/src/main/java/me/lemire/integercompression/SkippableComposition.java +++ b/src/main/java/me/lemire/integercompression/SkippableComposition.java @@ -61,6 +61,16 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o F2.headlessUncompress(in, inpos, inlength, out, outpos, num); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int init = compressedPositions.get(); + int maxLength = F1.maxHeadlessCompressedLength(compressedPositions, inlength); + maxLength += 1; // Add +1 for the potential F2 header. Question: is this header actually needed in the headless version? + inlength -= compressedPositions.get() - init; + maxLength += F2.maxHeadlessCompressedLength(compressedPositions, inlength); + return maxLength; + } + @Override public String toString() { return F1.toString() + "+" + F2.toString(); diff --git a/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java b/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java index 66143b9..b9bdc04 100644 --- a/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java +++ b/src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java @@ -69,4 +69,21 @@ public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, int num); + /** + * Compute the maximum number of integers that might be required to store + * the compressed form of a given input array segment, without headers. + *

+ * This is useful to pre-allocate the output buffer before calling + * {@link #headlessCompress(int[], IntWrapper, int, int[], IntWrapper)}. + *

+ * + * @param compressedPositions + * since not all schemes compress every input integer, this parameter + * returns how many input integers will actually be compressed. + * This is useful when composing multiple schemes. + * @param inlength + * number of integers to be compressed + * @return the maximum number of integers needed in the output array + */ + int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength); } diff --git a/src/main/java/me/lemire/integercompression/UncompressibleInputException.java b/src/main/java/me/lemire/integercompression/UncompressibleInputException.java deleted file mode 100644 index c0ed41f..0000000 --- a/src/main/java/me/lemire/integercompression/UncompressibleInputException.java +++ /dev/null @@ -1,19 +0,0 @@ -package me.lemire.integercompression; - -/** - * This exception might be thrown if the input is poorly compressible. - * - */ -public class UncompressibleInputException extends RuntimeException { - - /** - * Create new exception - * @param string explanation for the exception - */ - public UncompressibleInputException(String string) { - super(string); - } - - private static final long serialVersionUID = -798583799846489873L; - -} diff --git a/src/main/java/me/lemire/integercompression/VariableByte.java b/src/main/java/me/lemire/integercompression/VariableByte.java index 92cfaeb..c9b04d0 100644 --- a/src/main/java/me/lemire/integercompression/VariableByte.java +++ b/src/main/java/me/lemire/integercompression/VariableByte.java @@ -21,6 +21,8 @@ */ public class VariableByte implements IntegerCODEC, ByteIntegerCODEC, SkippableIntegerCODEC { + private static final int MAX_BYTES_PER_INT = 5; + private static byte extract7bits(int i, long val) { return (byte) ((val >> (7 * i)) & ((1 << 7) - 1)); } @@ -208,6 +210,14 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o inpos.set(p + (s!=0 ? 1 : 0)); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int maxLengthInBytes = inlength * MAX_BYTES_PER_INT; + int maxLengthInInts = (maxLengthInBytes + Integer.BYTES - 1) / Integer.BYTES; + compressedPositions.add(inlength); + return maxLengthInInts; + } + /** * Creates a new buffer of the requested size. * diff --git a/src/main/java/me/lemire/integercompression/differential/IntegratedBinaryPacking.java b/src/main/java/me/lemire/integercompression/differential/IntegratedBinaryPacking.java index 7e1c161..f50a367 100644 --- a/src/main/java/me/lemire/integercompression/differential/IntegratedBinaryPacking.java +++ b/src/main/java/me/lemire/integercompression/differential/IntegratedBinaryPacking.java @@ -49,7 +49,8 @@ public class IntegratedBinaryPacking implements IntegratedIntegerCODEC, SkippableIntegratedIntegerCODEC { - static final int BLOCK_SIZE = 32; + public static final int BLOCK_SIZE = 32; + private static final int MAX_BIT_WIDTH = Integer.SIZE; @Override public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, @@ -170,4 +171,13 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, initvalue.set(initoffset); inpos.set(tmpinpos); } + + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int blockCount = inlength / BLOCK_SIZE; + int headersSizeInInts = blockCount / Integer.BYTES + (blockCount % Integer.BYTES); + int blocksSizeInInts = blockCount * MAX_BIT_WIDTH; + compressedPositions.add(blockCount * BLOCK_SIZE); + return headersSizeInInts + blocksSizeInInts; + } } diff --git a/src/main/java/me/lemire/integercompression/differential/IntegratedIntCompressor.java b/src/main/java/me/lemire/integercompression/differential/IntegratedIntCompressor.java index 72159ba..1d935c4 100644 --- a/src/main/java/me/lemire/integercompression/differential/IntegratedIntCompressor.java +++ b/src/main/java/me/lemire/integercompression/differential/IntegratedIntCompressor.java @@ -3,7 +3,6 @@ import java.util.Arrays; import me.lemire.integercompression.IntWrapper; -import me.lemire.integercompression.UncompressibleInputException; /** * This is a convenience class that wraps a codec to provide @@ -36,19 +35,14 @@ public IntegratedIntCompressor() { * * @param input array to be compressed * @return compressed array - * @throws UncompressibleInputException if the data is too poorly compressible */ public int[] compress(int[] input) { - int [] compressed = new int[input.length + input.length / 100 + 1024]; + int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length); + int [] compressed = new int[maxCompressedLength + 1]; // +1 to store the length of the input compressed[0] = input.length; IntWrapper outpos = new IntWrapper(1); IntWrapper initvalue = new IntWrapper(0); - try { - codec.headlessCompress(input, new IntWrapper(0), input.length, compressed, outpos, initvalue); - } catch (IndexOutOfBoundsException ioebe) { - throw new UncompressibleInputException( - "Your input is too poorly compressible with the current codec : " + codec); - } + codec.headlessCompress(input, new IntWrapper(0), input.length, compressed, outpos, initvalue); compressed = Arrays.copyOf(compressed,outpos.intValue()); return compressed; } diff --git a/src/main/java/me/lemire/integercompression/differential/IntegratedVariableByte.java b/src/main/java/me/lemire/integercompression/differential/IntegratedVariableByte.java index 918a900..a577031 100644 --- a/src/main/java/me/lemire/integercompression/differential/IntegratedVariableByte.java +++ b/src/main/java/me/lemire/integercompression/differential/IntegratedVariableByte.java @@ -24,6 +24,8 @@ public class IntegratedVariableByte implements IntegratedIntegerCODEC, IntegratedByteIntegerCODEC, SkippableIntegratedIntegerCODEC { + private static final int MAX_BYTES_PER_INT = 5; + private static byte extract7bits(int i, long val) { return (byte)((val >> (7 * i)) & ((1 << 7) - 1)); } @@ -257,6 +259,14 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, inpos.set(p + (s!=0 ? 1 : 0)); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int maxLengthInBytes = inlength * MAX_BYTES_PER_INT; + int maxLengthInInts = (maxLengthInBytes + Integer.BYTES - 1) / Integer.BYTES; + compressedPositions.add(inlength); + return maxLengthInInts; + } + /** * Creates a new buffer of the requested size. * diff --git a/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedComposition.java b/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedComposition.java index abcc027..a1379ad 100644 --- a/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedComposition.java +++ b/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedComposition.java @@ -76,4 +76,13 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, F2.headlessUncompress(in, inpos, inlength, out, outpos,num,initvalue); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int init = compressedPositions.get(); + int maxLength = F1.maxHeadlessCompressedLength(compressedPositions, inlength); + maxLength += 1; // Add +1 for the potential F2 header. Question: is this header actually needed in the headless version? + inlength -= compressedPositions.get() - init; + maxLength += F2.maxHeadlessCompressedLength(compressedPositions, inlength); + return maxLength; + } } diff --git a/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedIntegerCODEC.java b/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedIntegerCODEC.java index 8b7fd4b..e2df754 100644 --- a/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedIntegerCODEC.java +++ b/src/main/java/me/lemire/integercompression/differential/SkippableIntegratedIntegerCODEC.java @@ -71,4 +71,21 @@ public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos, int num, IntWrapper initvalue); + /** + * Compute the maximum number of integers that might be required to store + * the compressed form of a given input array segment, without headers. + *

+ * This is useful to pre-allocate the output buffer before calling + * {@link #headlessCompress(int[], IntWrapper, int, int[], IntWrapper, IntWrapper)}. + *

+ * + * @param compressedPositions + * since not all schemes compress every input integer, this parameter + * returns how many input integers will actually be compressed. + * This is useful when composing multiple schemes. + * @param inlength + * number of integers to be compressed + * @return the maximum number of integers needed in the output array + */ + int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength); } diff --git a/src/main/java/me/lemire/integercompression/vector/VectorFastPFOR.java b/src/main/java/me/lemire/integercompression/vector/VectorFastPFOR.java index 0b6ca17..7374fa5 100644 --- a/src/main/java/me/lemire/integercompression/vector/VectorFastPFOR.java +++ b/src/main/java/me/lemire/integercompression/vector/VectorFastPFOR.java @@ -229,6 +229,11 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, } } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + throw new UnsupportedOperationException("Calculating the max compressed length is not supported yet."); + } + private void loadMetaData(int[] in, int inexcept, int bytesize) { // Arrays.fill(bem, (byte)0); int len = (bytesize + 3) / 4; diff --git a/src/main/java/me/lemire/longcompression/LongBinaryPacking.java b/src/main/java/me/lemire/longcompression/LongBinaryPacking.java index 33bb8f1..b6ea58f 100644 --- a/src/main/java/me/lemire/longcompression/LongBinaryPacking.java +++ b/src/main/java/me/lemire/longcompression/LongBinaryPacking.java @@ -23,8 +23,9 @@ * @author Benoit Lacelle */ public final class LongBinaryPacking implements LongCODEC, SkippableLongCODEC { - final static int BLOCK_SIZE = 64; - + public final static int BLOCK_SIZE = 64; + private static final int MAX_BIT_WIDTH = Long.SIZE; + @Override public void compress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) { @@ -136,6 +137,15 @@ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, inpos.set(tmpinpos); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int blockCount = inlength / BLOCK_SIZE; + int headersSizeInLongs = blockCount / Long.BYTES + (blockCount % Long.BYTES); + int blocksSizeInLongs = blockCount * MAX_BIT_WIDTH; + compressedPositions.add(blockCount * BLOCK_SIZE); + return headersSizeInLongs + blocksSizeInLongs; + } + @Override public String toString() { return this.getClass().getSimpleName(); diff --git a/src/main/java/me/lemire/longcompression/LongCompressor.java b/src/main/java/me/lemire/longcompression/LongCompressor.java index a2c79fd..246647f 100644 --- a/src/main/java/me/lemire/longcompression/LongCompressor.java +++ b/src/main/java/me/lemire/longcompression/LongCompressor.java @@ -3,7 +3,6 @@ import java.util.Arrays; import me.lemire.integercompression.IntWrapper; -import me.lemire.integercompression.UncompressibleInputException; /** * This is a convenience class that wraps a codec to provide @@ -37,20 +36,14 @@ public LongCompressor() { * * @param input array to be compressed * @return compressed array - * @throws UncompressibleInputException if the data is too poorly compressible */ public long[] compress(long[] input) { - long[] compressed = new long[input.length + input.length / 100 + 1024]; + int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length); + long[] compressed = new long[maxCompressedLength + 1]; // +1 to store the length of the input // Store at index=0 the length of the input, hence enabling .headlessCompress compressed[0] = input.length; IntWrapper outpos = new IntWrapper(1); - try { - codec.headlessCompress(input, new IntWrapper(0), - input.length, compressed, outpos); - } catch (IndexOutOfBoundsException ioebe) { - throw new UncompressibleInputException("Your input is too poorly compressible " - + "with the current codec : "+codec); - } + codec.headlessCompress(input, new IntWrapper(0), input.length, compressed, outpos); compressed = Arrays.copyOf(compressed,outpos.intValue()); return compressed; } diff --git a/src/main/java/me/lemire/longcompression/LongJustCopy.java b/src/main/java/me/lemire/longcompression/LongJustCopy.java index 9b25f71..95abc1e 100644 --- a/src/main/java/me/lemire/longcompression/LongJustCopy.java +++ b/src/main/java/me/lemire/longcompression/LongJustCopy.java @@ -43,6 +43,12 @@ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + compressedPositions.add(inlength); + return inlength; + } + @Override public void compress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) { diff --git a/src/main/java/me/lemire/longcompression/LongVariableByte.java b/src/main/java/me/lemire/longcompression/LongVariableByte.java index ad2b0eb..e60ebd0 100644 --- a/src/main/java/me/lemire/longcompression/LongVariableByte.java +++ b/src/main/java/me/lemire/longcompression/LongVariableByte.java @@ -22,6 +22,7 @@ * @author Benoit Lacelle */ public class LongVariableByte implements LongCODEC, ByteLongCODEC, SkippableLongCODEC { + private static final int MAX_BYTES_PER_INT = 10; private static byte extract7bits(int i, long val) { return (byte) ((val >>> (7 * i)) & ((1 << 7) - 1)); @@ -326,6 +327,14 @@ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[] inpos.set(p + (s!=0 ? 1 : 0)); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int maxLengthInBytes = inlength * MAX_BYTES_PER_INT; + int maxLengthInLongs = (maxLengthInBytes + Long.BYTES - 1) / Long.BYTES; + compressedPositions.add(inlength); + return maxLengthInLongs; + } + /** * Creates a new buffer of the requested size. * diff --git a/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java b/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java index 7fe1fe5..33fd562 100644 --- a/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java +++ b/src/main/java/me/lemire/longcompression/SkippableLongCODEC.java @@ -67,4 +67,21 @@ public void headlessCompress(long[] in, IntWrapper inpos, int inlength, long[] o public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos, int num); + /** + * Compute the maximum number of longs that might be required to store + * the compressed form of a given input array segment, without headers. + *

+ * This is useful to pre-allocate the output buffer before calling + * {@link #headlessCompress(long[], IntWrapper, int, long[], IntWrapper)}. + *

+ * + * @param compressedPositions + * since not all schemes compress every input integer, this parameter + * returns how many input integers will actually be compressed. + * This is useful when composing multiple schemes. + * @param inlength + * number of longs to be compressed + * @return the maximum number of longs needed in the output array + */ + int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength); } diff --git a/src/main/java/me/lemire/longcompression/SkippableLongComposition.java b/src/main/java/me/lemire/longcompression/SkippableLongComposition.java index f2e9a55..0f9800e 100644 --- a/src/main/java/me/lemire/longcompression/SkippableLongComposition.java +++ b/src/main/java/me/lemire/longcompression/SkippableLongComposition.java @@ -62,6 +62,16 @@ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[] F2.headlessUncompress(in, inpos, inlength, out, outpos, num); } + @Override + public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) { + int init = compressedPositions.get(); + int maxLength = F1.maxHeadlessCompressedLength(compressedPositions, inlength); + maxLength += 1; // Add +1 for the potential F2 header. Question: is this header actually needed in the headless version? + inlength -= compressedPositions.get() - init; + maxLength += F2.maxHeadlessCompressedLength(compressedPositions, inlength); + return maxLength; + } + @Override public String toString() { return F1.toString() + "+" + F2.toString(); diff --git a/src/test/java/me/lemire/integercompression/AdhocTest.java b/src/test/java/me/lemire/integercompression/AdhocTest.java index aa6718b..ee911b3 100644 --- a/src/test/java/me/lemire/integercompression/AdhocTest.java +++ b/src/test/java/me/lemire/integercompression/AdhocTest.java @@ -86,10 +86,10 @@ public void testIssue29() { @Test public void testIssue29b() { for(int x = 0; x < 64; x++) { + SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); int[] a = {2, 3, 4, 5}; - int[] b = new int[90]; + int[] b = new int[x + codec.maxHeadlessCompressedLength(new IntWrapper(0), a.length)]; int[] c = new int[a.length]; - SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); IntWrapper aOffset = new IntWrapper(0); IntWrapper bOffset = new IntWrapper(x); codec.headlessCompress(a, aOffset, a.length, b, bOffset); @@ -108,11 +108,11 @@ public void testIssue29b() { @Test public void testIssue41() { for (int x = 0; x < 64; x++) { - int[] a = { 2, 3, 4, 5 }; - int[] b = new int[90]; - int[] c = new int[a.length]; SkippableIntegratedIntegerCODEC codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()); + int[] a = { 2, 3, 4, 5 }; + int[] b = new int[x + codec.maxHeadlessCompressedLength(new IntWrapper(0), a.length)]; + int[] c = new int[a.length]; IntWrapper aOffset = new IntWrapper(0); IntWrapper bOffset = new IntWrapper(x); IntWrapper initValue = new IntWrapper(0); diff --git a/src/test/java/me/lemire/integercompression/ExampleTest.java b/src/test/java/me/lemire/integercompression/ExampleTest.java index ce10d18..c63c69b 100644 --- a/src/test/java/me/lemire/integercompression/ExampleTest.java +++ b/src/test/java/me/lemire/integercompression/ExampleTest.java @@ -116,11 +116,7 @@ public void basicExampleHeadless() { // be processed using variable byte SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()); - // output vector should be large enough... - int[] compressed = new int[data.length + 1024]; - // compressed might not be large enough in some cases - // if you get java.lang.ArrayIndexOutOfBoundsException, try - // allocating more memory + int[] compressed = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)]; /** * @@ -280,10 +276,12 @@ public void headlessDemo() { int[] uncompressed1 = { 1, 2, 1, 3, 1 }; int[] uncompressed2 = { 3, 2, 4, 6, 1 }; - int[] compressed = new int[uncompressed1.length + uncompressed2.length + 1024]; - SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); + int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed1.length) + + codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed2.length); + int[] compressed = new int[maxCompressedLength]; + // compressing IntWrapper outPos = new IntWrapper(); diff --git a/src/test/java/me/lemire/integercompression/SkippableBasicTest.java b/src/test/java/me/lemire/integercompression/SkippableBasicTest.java index 9018229..57a07e3 100644 --- a/src/test/java/me/lemire/integercompression/SkippableBasicTest.java +++ b/src/test/java/me/lemire/integercompression/SkippableBasicTest.java @@ -9,8 +9,13 @@ import java.util.Arrays; +import me.lemire.integercompression.differential.IntegratedBinaryPacking; +import me.lemire.integercompression.differential.IntegratedVariableByte; +import me.lemire.integercompression.differential.SkippableIntegratedComposition; +import me.lemire.integercompression.differential.SkippableIntegratedIntegerCODEC; import org.junit.Test; +import static org.junit.Assert.assertTrue; /** * Just some basic sanity tests. @@ -48,10 +53,11 @@ public void consistentTest() { for (SkippableIntegerCODEC c : codecs) { System.out.println("[SkippeableBasicTest.consistentTest] codec = " + c); - int[] outBuf = new int[N + 1024]; for (int n = 0; n <= N; ++n) { IntWrapper inPos = new IntWrapper(); IntWrapper outPos = new IntWrapper(); + int[] outBuf = new int[c.maxHeadlessCompressedLength(new IntWrapper(0), n)]; + c.headlessCompress(data, inPos, n, outBuf, outPos); IntWrapper inPoso = new IntWrapper(); @@ -147,5 +153,87 @@ public void varyingLengthTest2() { } } + @Test + public void testMaxHeadlessCompressedLength() { + testMaxHeadlessCompressedLength(new IntegratedBinaryPacking(), 16 * IntegratedBinaryPacking.BLOCK_SIZE); + testMaxHeadlessCompressedLength(new IntegratedVariableByte(), 128); + testMaxHeadlessCompressedLength(new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()), 16 * IntegratedBinaryPacking.BLOCK_SIZE + 10); + + testMaxHeadlessCompressedLength(new BinaryPacking(), 16 * BinaryPacking.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new VariableByte(), 128, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new BinaryPacking(), new VariableByte()), 16 * BinaryPacking.BLOCK_SIZE + 10, 32); + testMaxHeadlessCompressedLength(new JustCopy(), 128, 32); + testMaxHeadlessCompressedLength(new Simple9(), 128, 28); + testMaxHeadlessCompressedLength(new Simple16(), 128, 28); + testMaxHeadlessCompressedLength(new GroupSimple9(), 128, 28); + testMaxHeadlessCompressedLength(new OptPFD(), 4 * OptPFD.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new OptPFD(), new VariableByte()), 4 * OptPFD.BLOCK_SIZE + 10, 32); + testMaxHeadlessCompressedLength(new OptPFDS9(), 4 * OptPFDS9.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new OptPFDS9(), new VariableByte()), 4 * OptPFDS9.BLOCK_SIZE + 10, 32); + testMaxHeadlessCompressedLength(new OptPFDS16(), 4 * OptPFDS16.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new OptPFDS9(), new VariableByte()), 4 * OptPFDS16.BLOCK_SIZE + 10, 32); + testMaxHeadlessCompressedLength(new NewPFD(), 4 * NewPFD.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new NewPFD(), new VariableByte()), 4 * NewPFD.BLOCK_SIZE + 10, 32); + testMaxHeadlessCompressedLength(new NewPFDS9(), 4 * NewPFDS9.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new NewPFDS9(), new VariableByte()), 4 * NewPFDS9.BLOCK_SIZE + 10, 32); + testMaxHeadlessCompressedLength(new NewPFDS16(), 4 * NewPFDS16.BLOCK_SIZE, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new NewPFDS16(), new VariableByte()), 4 * NewPFDS16.BLOCK_SIZE + 10, 32); + + int fastPfor128PageSize = FastPFOR128.BLOCK_SIZE * 4; // smaller page size than the default to speed up the test + testMaxHeadlessCompressedLength(new FastPFOR128(fastPfor128PageSize), 2 * fastPfor128PageSize, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new FastPFOR128(fastPfor128PageSize), new VariableByte()), 2 * fastPfor128PageSize + 10, 32); + int fastPforPageSize = FastPFOR.BLOCK_SIZE * 4; // smaller page size than the default to speed up the test + testMaxHeadlessCompressedLength(new FastPFOR(fastPforPageSize), 2 * fastPforPageSize, 32); + testMaxHeadlessCompressedLength(new SkippableComposition(new FastPFOR(fastPforPageSize), new VariableByte()), 2 * fastPforPageSize + 10, 32); + } + + private static void testMaxHeadlessCompressedLength(SkippableIntegratedIntegerCODEC codec, int inlengthTo) { + // We test the worst-case scenario by making all deltas and the initial value negative. + int delta = -1; + int value = delta; + for (int inlength = 0; inlength < inlengthTo; ++inlength) { + int[] input = new int[inlength]; + for (int i = 0; i < inlength; i++) { + input[i] = value; + value += delta; + } + + int maxOutputLength = codec.maxHeadlessCompressedLength(new IntWrapper(), inlength); + int[] output = new int[maxOutputLength]; + IntWrapper outPos = new IntWrapper(); + + codec.headlessCompress(input, new IntWrapper(), inlength, output, outPos, new IntWrapper()); + // If we reach this point, no exception was thrown, which means the calculated output length was sufficient. + + assertTrue(maxOutputLength <= outPos.get() + 1); // +1 because SkippableIntegratedComposition always adds one extra integer for the potential header + } + } + + private static void testMaxHeadlessCompressedLength(SkippableIntegerCODEC codec, int inlengthTo, int maxBitWidth) { + // Some schemes ignore bit widths between 21 and 31. Therefore, in addition to maxBitWidth - 1, we also test 20. + assertTrue(maxBitWidth >= 20); + int[] regularValueBitWidths = { 20, maxBitWidth - 1 }; + + for (int inlength = 0; inlength < inlengthTo; ++inlength) { + int[] input = new int[inlength]; + + int maxOutputLength = codec.maxHeadlessCompressedLength(new IntWrapper(), inlength); + int[] output = new int[maxOutputLength]; + + for (int exceptionCount = 0; exceptionCount < inlength; exceptionCount++) { + int exception = maxBitWidth == 32 ? -1 : (1 << maxBitWidth) - 1; + + for (int regularValueBitWidth : regularValueBitWidths) { + int regularValue = regularValueBitWidth == 32 ? -1 : (1 << regularValueBitWidth) - 1; + + Arrays.fill(input, 0, exceptionCount, exception); + Arrays.fill(input, exceptionCount, input.length, regularValue); + + codec.headlessCompress(input, new IntWrapper(), inlength, output, new IntWrapper()); + // If we reach this point, no exception was thrown, which means the calculated output length was sufficient. + } + } + } + } } diff --git a/src/test/java/me/lemire/integercompression/TestUtils.java b/src/test/java/me/lemire/integercompression/TestUtils.java index 7ce51b3..b3cbff3 100644 --- a/src/test/java/me/lemire/integercompression/TestUtils.java +++ b/src/test/java/me/lemire/integercompression/TestUtils.java @@ -165,7 +165,7 @@ protected static int[] uncompress(ByteIntegerCODEC codec, byte[] data, int len) } protected static int[] compressHeadless(SkippableIntegerCODEC codec, int[] data) { - int[] outBuf = new int[data.length * 4]; + int[] outBuf = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)]; IntWrapper inPos = new IntWrapper(); IntWrapper outPos = new IntWrapper(); codec.headlessCompress(data, inPos, data.length, outBuf, outPos); diff --git a/src/test/java/me/lemire/longcompression/LongTestUtils.java b/src/test/java/me/lemire/longcompression/LongTestUtils.java index 4a30b41..b7d9c63 100644 --- a/src/test/java/me/lemire/longcompression/LongTestUtils.java +++ b/src/test/java/me/lemire/longcompression/LongTestUtils.java @@ -111,7 +111,7 @@ protected static long[] uncompress(ByteLongCODEC codec, byte[] data, int len) { } protected static long[] compressHeadless(SkippableLongCODEC codec, long[] data) { - long[] outBuf = new long[data.length * 4]; + long[] outBuf = new long[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)]; IntWrapper inPos = new IntWrapper(); IntWrapper outPos = new IntWrapper(); codec.headlessCompress(data, inPos, data.length, outBuf, outPos); diff --git a/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java b/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java index b317d4f..4309e9d 100644 --- a/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java +++ b/src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java @@ -15,6 +15,7 @@ import me.lemire.integercompression.TestUtils; import me.lemire.integercompression.VariableByte; +import static org.junit.Assert.assertTrue; /** * Just some basic sanity tests. @@ -42,10 +43,11 @@ public void consistentTest() { for (SkippableLongCODEC c : codecs) { System.out.println("[SkippeableBasicTest.consistentTest] codec = " + c); - long[] outBuf = new long[N + 1024]; for (int n = 0; n <= N; ++n) { IntWrapper inPos = new IntWrapper(); IntWrapper outPos = new IntWrapper(); + long[] outBuf = new long[c.maxHeadlessCompressedLength(new IntWrapper(0), n)]; + c.headlessCompress(data, inPos, n, outBuf, outPos); IntWrapper inPoso = new IntWrapper(); @@ -142,5 +144,27 @@ public void varyingLengthTest2() { } } + @Test + public void testMaxHeadlessCompressedLength() { + testMaxHeadlessCompressedLength(new LongJustCopy(), 128); + testMaxHeadlessCompressedLength(new LongBinaryPacking(), 16 * LongBinaryPacking.BLOCK_SIZE); + testMaxHeadlessCompressedLength(new LongVariableByte(), 128); + testMaxHeadlessCompressedLength(new SkippableLongComposition(new LongBinaryPacking(), new LongVariableByte()), 16 * LongBinaryPacking.BLOCK_SIZE + 10); + } + + private static void testMaxHeadlessCompressedLength(SkippableLongCODEC codec, int inlengthTo) { + for (int inlength = 0; inlength < inlengthTo; ++inlength) { + long[] input = new long[inlength]; + Arrays.fill(input, -1L); + int maxOutputLength = codec.maxHeadlessCompressedLength(new IntWrapper(), inlength); + long[] output = new long[maxOutputLength]; + IntWrapper outPos = new IntWrapper(); + + codec.headlessCompress(input, new IntWrapper(), inlength, output, outPos); + // If we reach this point, no exception was thrown, which means the calculated output length was sufficient. + + assertTrue(maxOutputLength <= outPos.get() + 1); // +1 because SkippableLongComposition always adds one extra integer for the potential header + } + } }