Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions example.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,7 @@ public static void basicExampleHeadless() {
// be processed using variable byte
SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(),
new IntegratedVariableByte());
// output vector should be large enough...
int[] compressed = new int[data.length + 1024];
// compressed might not be large enough in some cases
// if you get java.lang.ArrayIndexOutOfBoundsException, try
// allocating more memory
int[] compressed = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)];

/**
*
Expand Down Expand Up @@ -267,10 +263,12 @@ public static void headlessDemo() {
int[] uncompressed1 = {1,2,1,3,1};
int[] uncompressed2 = {3,2,4,6,1};

int[] compressed = new int[uncompressed1.length+uncompressed2.length+1024];

SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte());

int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed1.length)
+ codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed2.length);
int[] compressed = new int[maxCompressedLength];

// compressing
IntWrapper outPos = new IntWrapper();

Expand Down
16 changes: 13 additions & 3 deletions src/main/java/me/lemire/integercompression/BinaryPacking.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@
* @author Daniel Lemire
*/
public final class BinaryPacking implements IntegerCODEC, SkippableIntegerCODEC {
final static int BLOCK_SIZE = 32;

public final static int BLOCK_SIZE = 32;
private static final int MAX_BIT_WIDTH = Integer.SIZE;

@Override
public void compress(int[] in, IntWrapper inpos, int inlength,
int[] out, IntWrapper outpos) {
Expand Down Expand Up @@ -131,7 +132,16 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
outpos.add(outlength);
inpos.set(tmpinpos);
}


@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
int blockCount = inlength / BLOCK_SIZE;
int headersSizeInInts = blockCount / Integer.BYTES + (blockCount % Integer.BYTES);
int blocksSizeInInts = blockCount * MAX_BIT_WIDTH;
compressedPositions.add(blockCount * BLOCK_SIZE);
return headersSizeInInts + blocksSizeInInts;
}

@Override
public String toString() {
return this.getClass().getSimpleName();
Expand Down
21 changes: 20 additions & 1 deletion src/main/java/me/lemire/integercompression/FastPFOR.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@
*/
public class FastPFOR implements IntegerCODEC,SkippableIntegerCODEC {
final static int OVERHEAD_OF_EACH_EXCEPT = 8;
private static final int OVERHEAD_OF_EACH_PAGE_IN_INTS = 36; // 1 int for the header
// 1 int for the byte array size
// 1 int for the bitmap
// 1 int for byte array padding (to align to 4 bytes)
// 32 to have enough space to bit-pack the exceptions
private static final int OVERHEAD_OF_EACH_BLOCK_IN_INTS = 1; // 1 byte for the number of bits allocated per truncated integer
// 1 byte for the number of exceptions
/**
*
*/
Expand All @@ -65,7 +72,7 @@ public class FastPFOR implements IntegerCODEC,SkippableIntegerCODEC {
* @param pagesize
* the desired page size (recommended value is FastPFOR.DEFAULT_PAGE_SIZE)
*/
private FastPFOR(int pagesize) {
FastPFOR(int pagesize) {
pageSize = pagesize;
// Initiate arrrays.
byteContainer = makeBuffer(3 * pageSize
Expand Down Expand Up @@ -230,6 +237,18 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
}
}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);

int pageCount = (inlength + pageSize - 1) / pageSize;
int blockCount = inlength / BLOCK_SIZE;

// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
int blockSizeInInts = OVERHEAD_OF_EACH_BLOCK_IN_INTS + BLOCK_SIZE;
return OVERHEAD_OF_EACH_PAGE_IN_INTS * pageCount + blockSizeInInts * blockCount + 24;
}

private void decodePage(int[] in, IntWrapper inpos, int[] out,
IntWrapper outpos, int thissize) {
final int initpos = inpos.get();
Expand Down
19 changes: 19 additions & 0 deletions src/main/java/me/lemire/integercompression/FastPFOR128.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@
*/
public class FastPFOR128 implements IntegerCODEC,SkippableIntegerCODEC {
final static int OVERHEAD_OF_EACH_EXCEPT = 8;
private static final int OVERHEAD_OF_EACH_PAGE_IN_INTS = 36; // 1 int for the header
// 1 int for the byte array size
// 1 int for the bitmap
// 1 int for byte array padding (to align to 4 bytes)
// 32 to have enough space to bit-pack the exceptions
private static final int OVERHEAD_OF_EACH_BLOCK_IN_INTS = 1; // 1 byte for the number of bits allocated per truncated integer
// 1 byte for the number of exceptions
/**
*
*/
Expand Down Expand Up @@ -209,6 +216,18 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
}
}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);

int pageCount = (inlength + pageSize - 1) / pageSize;
int blockCount = inlength / BLOCK_SIZE;

// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
int blockSizeInInts = OVERHEAD_OF_EACH_BLOCK_IN_INTS + BLOCK_SIZE;
return OVERHEAD_OF_EACH_PAGE_IN_INTS * pageCount + blockSizeInInts * blockCount + 24;
}

private void decodePage(int[] in, IntWrapper inpos, int[] out,
IntWrapper outpos, int thissize) {
final int initpos = inpos.get();
Expand Down
8 changes: 7 additions & 1 deletion src/main/java/me/lemire/integercompression/GroupSimple9.java
Original file line number Diff line number Diff line change
Expand Up @@ -3549,4 +3549,10 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
inpos.set(tmpinpos);

}
}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
compressedPositions.add(inlength);
return inlength;
}
}
13 changes: 3 additions & 10 deletions src/main/java/me/lemire/integercompression/IntCompressor.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,14 @@ public IntCompressor() {
*
* @param input array to be compressed
* @return compressed array
* @throws UncompressibleInputException if the data is too poorly compressible
*/
public int[] compress(int[] input) {
int[] compressed = new int[input.length + input.length / 100 + 1024];
int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length);
int[] compressed = new int[maxCompressedLength + 1]; // +1 to store the length of the input
// Store at index=0 the length of the input, hence enabling .headlessCompress
compressed[0] = input.length;
IntWrapper outpos = new IntWrapper(1);
try {
codec.headlessCompress(input, new IntWrapper(0),
input.length, compressed, outpos);
} catch (IndexOutOfBoundsException ioebe) {
throw new
UncompressibleInputException("Your input is too poorly compressible "
+ "with the current codec : "+codec);
}
codec.headlessCompress(input, new IntWrapper(0), input.length, compressed, outpos);
compressed = Arrays.copyOf(compressed,outpos.intValue());
return compressed;
}
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/me/lemire/integercompression/JustCopy.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,

}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
compressedPositions.add(inlength);
return inlength;
}

@Override
public void compress(int[] in, IntWrapper inpos, int inlength,
int[] out, IntWrapper outpos) {
Expand Down
7 changes: 6 additions & 1 deletion src/main/java/me/lemire/integercompression/Kamikaze.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
}
}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
throw new UnsupportedOperationException("Calculating the max compressed length is not supported yet.");
}

@Override
public String toString() {
return "Kamikaze's PForDelta";
Expand All @@ -64,4 +69,4 @@ public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
headlessUncompress(in, inpos, inlength, out, outpos, outlength);

}
}
}
11 changes: 11 additions & 0 deletions src/main/java/me/lemire/integercompression/NewPFD.java
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
decodePage(in, inpos, out, outpos, mynvalue);
}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
int blockCount = inlength / BLOCK_SIZE;
// +1 for the header
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
int maxBlockSizeInInts = 1 + BLOCK_SIZE;
compressedPositions.add(inlength);
return maxBlockSizeInInts * blockCount;
}

private void decodePage(int[] in, IntWrapper inpos, int[] out,
IntWrapper outpos, int thissize) {
int tmpoutpos = outpos.get();
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/me/lemire/integercompression/NewPFDS16.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
decodePage(in, inpos, out, outpos, mynvalue);
}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
int blockCount = inlength / BLOCK_SIZE;
// +1 for the header
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
int maxBlockSizeInInts = 1 + BLOCK_SIZE;
compressedPositions.add(inlength);
return maxBlockSizeInInts * blockCount;
}

private void decodePage(int[] in, IntWrapper inpos, int[] out,
IntWrapper outpos, int thissize) {
int tmpoutpos = outpos.get();
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/me/lemire/integercompression/NewPFDS9.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
decodePage(in, inpos, out, outpos, mynvalue);
}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
int blockCount = inlength / BLOCK_SIZE;
// +1 for the header
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
int maxBlockSizeInInts = 1 + BLOCK_SIZE;
compressedPositions.add(inlength);
return maxBlockSizeInInts * blockCount;
}

private void decodePage(int[] in, IntWrapper inpos, int[] out,
IntWrapper outpos, int thissize) {
int tmpoutpos = outpos.get();
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/me/lemire/integercompression/OptPFD.java
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
decodePage(in, inpos, out, outpos, mynvalue);
}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
int blockCount = inlength / BLOCK_SIZE;
// +1 for the header
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
int maxBlockSizeInInts = 1 + BLOCK_SIZE;
compressedPositions.add(inlength);
return maxBlockSizeInInts * blockCount;
}

private void decodePage(int[] in, IntWrapper inpos, int[] out,
IntWrapper outpos, int thissize) {
int tmpoutpos = outpos.get();
Expand Down
13 changes: 12 additions & 1 deletion src/main/java/me/lemire/integercompression/OptPFDS16.java
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
decodePage(in, inpos, out, outpos, mynvalue);
}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
int blockCount = inlength / BLOCK_SIZE;
// +1 for the header
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
int maxBlockSizeInInts = 1 + BLOCK_SIZE;
compressedPositions.add(inlength);
return maxBlockSizeInInts * blockCount;
}

private void decodePage(int[] in, IntWrapper inpos, int[] out,
IntWrapper outpos, int thissize) {
int tmpoutpos = outpos.get();
Expand Down Expand Up @@ -197,4 +208,4 @@ public String toString() {
return this.getClass().getSimpleName();
}

}
}
13 changes: 12 additions & 1 deletion src/main/java/me/lemire/integercompression/OptPFDS9.java
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
decodePage(in, inpos, out, outpos, mynvalue);
}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
int blockCount = inlength / BLOCK_SIZE;
// +1 for the header
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
int maxBlockSizeInInts = 1 + BLOCK_SIZE;
compressedPositions.add(inlength);
return maxBlockSizeInInts * blockCount;
}

private void decodePage(int[] in, IntWrapper inpos, int[] out,
IntWrapper outpos, int thissize) {
int tmpoutpos = outpos.get();
Expand Down Expand Up @@ -197,4 +208,4 @@ public String toString() {
return this.getClass().getSimpleName();
}

}
}
8 changes: 7 additions & 1 deletion src/main/java/me/lemire/integercompression/Simple16.java
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
outpos.set(i_outpos);
}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
compressedPositions.add(inlength);
return inlength;
}

/**
* Uncompress data from an array to another array.
*
Expand Down Expand Up @@ -182,4 +188,4 @@ public String toString() {
{ 7, 7, 7, 7 }, { 10, 9, 9, }, { 14, 14 }, { 28 } };
private static final int[][] SHIFTED_S16_BITS = shiftme(S16_BITS);

}
}
6 changes: 6 additions & 0 deletions src/main/java/me/lemire/integercompression/Simple9.java
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,12 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o

}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
compressedPositions.add(inlength);
return inlength;
}

@Override
public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) {
if (inlength == 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,16 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
F2.headlessUncompress(in, inpos, inlength, out, outpos, num);
}

@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
int init = compressedPositions.get();
int maxLength = F1.maxHeadlessCompressedLength(compressedPositions, inlength);
maxLength += 1; // Add +1 for the potential F2 header. Question: is this header actually needed in the headless version?
inlength -= compressedPositions.get() - init;
maxLength += F2.maxHeadlessCompressedLength(compressedPositions, inlength);
return maxLength;
}

@Override
public String toString() {
return F1.toString() + "+" + F2.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,21 @@ public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out
public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
IntWrapper outpos, int num);

/**
* Compute the maximum number of integers that might be required to store
* the compressed form of a given input array segment, without headers.
* <p>
* This is useful to pre-allocate the output buffer before calling
* {@link #headlessCompress(int[], IntWrapper, int, int[], IntWrapper)}.
* </p>
*
* @param compressedPositions
* since not all schemes compress every input integer, this parameter
* returns how many input integers will actually be compressed.
* This is useful when composing multiple schemes.
* @param inlength
* number of integers to be compressed
* @return the maximum number of integers needed in the output array
*/
int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength);
}
Loading