Permalink
Browse files

remove PForDeltaWithBase and remove unnecessary memory allocation in …

…readobject of PForDeltaDocIdSet
  • Loading branch information...
1 parent 6007b82 commit 4a0471969cade43967e0efe0cd60e82895d04bf4 hao yan committed Jul 18, 2012
View
76 src/main/java/com/kamikaze/docidset/compression/PForDeltaWithBase.java
@@ -1,76 +0,0 @@
-package com.kamikaze.docidset.compression;
-
-import java.io.Serializable;
-import com.kamikaze.docidset.utils.CompResult;
-import com.kamikaze.pfordelta.PForDelta;
-
-/**
- * Wrapper of PForDelta class. This class is used to compress/decompress data blocks of integers
- *
- * @author hao yan
- */
-public class PForDeltaWithBase implements PForDeltaCompressedSortedIntegerSegment, Serializable {
-
- private static final long serialVersionUID = 1L;
-
- private static final int INVALID = -1;
-
- // Max number of bits to store an uncompressed value
- private int _compressedBitSize = 0; // The compressed size in bits of the block
-
- /**
- * Get the compressed size in bits of the block
- * @return the compressed size in bits of the block
- */
- public int getCompressedBitSize()
- {
- return _compressedBitSize;
- }
-
- /**
- * Estimate the compressed size of a block
- *
- * @param inputBlock a block of non-negative integers to be compressed
- * @param bits the value of b in the PForDelta algorithm
- * @param blockSize the block size which is 256 by default
- * @return CompResult
- * @throws IllegalArgumentException
- */
- public int estimateCompSize(int[] inputBlock, int bits, int blockSize) throws IllegalArgumentException {
- return PForDelta.estimateCompressedSize(inputBlock, bits, blockSize);
- }
-
- @Override
- public CompResult compressOneBlock(int[] inputBlock, int bits, int blockSize, boolean flag) throws IllegalArgumentException {
- return compressOneBlock(inputBlock, blockSize);
- }
-
- /**
- * Compress an integer array
- *
- * @param inputBlock the integer input array
- * @param blockSize the block size which is 256 by default
- * @return CompResult which contains the compressed size in number of bits and the reference to the compressed data
- * @throws IllegalArgumentException
- */
- public CompResult compressOneBlock(int[] inputBlock, int blockSize) throws IllegalArgumentException {
-
- int[] compBlock = PForDelta.compressOneBlockOpt(inputBlock, blockSize);
- CompResult res = new CompResult();
- res.setCompressedSize(compBlock.length<<5);
- res.setCompressedBlock(compBlock);
- return res;
- }
-
- /**
- * Decompress a compressed block into an integer array
- *
- * @param compBlock the compressed input block
- * @param blockSize the block size which is 256 by default
- * @return the decompressed output block
- */
- public int decompressOneBlock(int[] decompBlock, int[] compBlock, int blockSize)
- {
- return PForDelta.decompressOneBlock(decompBlock, compBlock, blockSize);
- }
-}
View
34 src/main/java/com/kamikaze/docidset/impl/PForDeltaDocIdSet.java
@@ -1,6 +1,5 @@
package com.kamikaze.docidset.impl;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
@@ -13,11 +12,11 @@
import com.kamikaze.docidset.api.DocSet;
import com.kamikaze.docidset.api.StatefulDSIterator;
-import com.kamikaze.docidset.compression.PForDeltaWithBase;
import com.kamikaze.docidset.utils.CompResult;
import com.kamikaze.docidset.utils.Conversion;
import com.kamikaze.docidset.utils.IntArray;
import com.kamikaze.docidset.utils.PForDeltaIntSegmentArray;
+import com.kamikaze.pfordelta.PForDelta;
/**
* This class implements the DocId set which is built on top of the optimized PForDelta algorithm (PForDeltaWithBase)
@@ -43,7 +42,6 @@
private int totalDocIdNum=0; // the total number of elemnts that have been inserted/accessed so far
private long compressedBitSize=0; // compressed size in bits
- transient private PForDeltaWithBase compBlockWithBase = new PForDeltaWithBase(); // the PForDelta algorithm to compress a block
transient private IntArray baseListForOnlyCompBlocks; // the base lists for skipping
transient private int[] currentNoCompBlock; // the memory used to store the uncompressed elements. Once the block is full, all its elements are compressed into sequencOfCompBlock and the block is cleared.
transient private int sizeOfCurrentNoCompBlock = 0; // the number of uncompressed elements that is hold in the currentNoCompBlock
@@ -232,19 +230,15 @@ private void readObject(ObjectInputStream inStrm) throws IOException, ClassNotFo
{
inStrm.defaultReadObject();
- compBlockWithBase = new PForDeltaWithBase();
-
int[] baseArray = (int[])inStrm.readObject();
baseListForOnlyCompBlocks = new IntArray();
for(int i=0; i<baseArray.length; ++i)
{
baseListForOnlyCompBlocks.add(baseArray[i]);
}
- int[] noCompBlock = (int[])inStrm.readObject();
- sizeOfCurrentNoCompBlock = noCompBlock.length;
- currentNoCompBlock = new int[sizeOfCurrentNoCompBlock];
- System.arraycopy(noCompBlock, 0, currentNoCompBlock, 0, sizeOfCurrentNoCompBlock);
+ currentNoCompBlock = (int[])inStrm.readObject();
+ sizeOfCurrentNoCompBlock = currentNoCompBlock.length;
}
@@ -302,7 +296,9 @@ public boolean find(int target)
return false;
// compBlockWithBase.decompressOneBlock(curDecompBlock, sequenceOfCompBlocks.get(iterDecompBlock), _blockSize);
- compBlockWithBase.decompressOneBlock(myDecompBlock, sequenceOfCompBlocks.get(iterDecompBlock), _blockSize);
+ //compBlockWithBase.decompressOneBlock(myDecompBlock, sequenceOfCompBlocks.get(iterDecompBlock), _blockSize);
+ PForDelta.decompressOneBlock(myDecompBlock, sequenceOfCompBlocks.get(iterDecompBlock), _blockSize);
+
int idx ;
lastId = myDecompBlock[0];
if (lastId == target) return true;
@@ -560,8 +556,11 @@ public void flush(int docId)
*/
private CompResult PForDeltaCompressOneBlock(int[] srcData)
{
- CompResult compRes = compBlockWithBase.compressOneBlock(srcData, _blockSize);
- return compRes;
+ int[] compBlock = PForDelta.compressOneBlockOpt(srcData, _blockSize);
+ CompResult res = new CompResult();
+ res.setCompressedSize(compBlock.length<<5);
+ res.setCompressedBlock(compBlock);
+ return res;
}
/**
@@ -570,7 +569,7 @@ private CompResult PForDeltaCompressOneBlock(int[] srcData)
*/
private int PForDeltaEstimateCompSize(int[] srcData, int b)
{
- return compBlockWithBase.estimateCompSize(srcData, b, _blockSize);
+ return PForDelta.estimateCompressedSize(srcData, b, _blockSize);
}
private void initSet() {
@@ -765,7 +764,6 @@ private void printBlock(int[] block, int size)
int compBlockNum=0; // the number of compressed blocks
transient int[] iterDecompBlock = new int[_blockSize]; // temporary storage for the decompressed data
- PForDeltaWithBase iterPForDeltaSetWithBase = new PForDeltaWithBase(); // PForDelta algorithm
PForDeltaDocIdIterator() {
super();
@@ -806,7 +804,7 @@ public int nextDoc()
// must be in one of the compressed blocks
else if(offset == 0) // case 2: the comp block has been decompressed;
{
- iterPForDeltaSetWithBase.decompressOneBlock(iterDecompBlock, sequenceOfCompBlocks.get(iterBlockIndex), _blockSize);
+ PForDelta.decompressOneBlock(iterDecompBlock, sequenceOfCompBlocks.get(iterBlockIndex), _blockSize);
lastAccessedDocId = iterDecompBlock[offset];
}
else // case 3: in the recently decompressed block
@@ -945,7 +943,7 @@ private int advanceToTargetInTheFollowingCompBlocks(int target, int startBlockIn
System.err.println("ERROR: advanceToTargetInTheFollowingCompBlocks(): Impossible, we must be able to find the block");
}
- iterPForDeltaSetWithBase.decompressOneBlock(iterDecompBlock, sequenceOfCompBlocks.get(iterBlockIndex), _blockSize);
+ PForDelta.decompressOneBlock(iterDecompBlock, sequenceOfCompBlocks.get(iterBlockIndex), _blockSize);
postProcessBlock(iterDecompBlock, _blockSize);
int offset = binarySearchForFirstElementEqualOrLargerThanTarget(iterDecompBlock, 0, _blockSize-1, target);
@@ -973,7 +971,7 @@ private int advanceToTargetInTheFollowingCompBlocksNoPostProcessing(int target,
System.err.println("ERROR: advanceToTargetInTheFollowingCompBlocks(): Impossible, we must be able to find the block");
}
- iterPForDeltaSetWithBase.decompressOneBlock(iterDecompBlock, sequenceOfCompBlocks.get(iterBlockIndex), _blockSize);
+ PForDelta.decompressOneBlock(iterDecompBlock, sequenceOfCompBlocks.get(iterBlockIndex), _blockSize);
lastAccessedDocId = iterDecompBlock[0];
if (lastAccessedDocId >= target)
{
@@ -1011,7 +1009,7 @@ private void printSet()
{
for (int i = 0; i < _blockSize; i++)
{
- iterPForDeltaSetWithBase.decompressOneBlock(iterDecompBlock, sequenceOfCompBlocks.get(i), _blockSize);
+ PForDelta.decompressOneBlock(iterDecompBlock, sequenceOfCompBlocks.get(i), _blockSize);
postProcessBlock(iterDecompBlock, _blockSize);
System.out.print(iterDecompBlock + ",");
}

1 comment on commit 4a04719

@ymatsuda

Looks good!

Please sign in to comment.