Skip to content

Commit

Permalink
add hardcoded SUID to all serializable classes in production
Browse files Browse the repository at this point in the history
's 3.0.1, and add new serialization/deserialization from/to byte[] array
  • Loading branch information
hao yan committed Dec 14, 2011
1 parent abe1f8b commit 51968f4
Show file tree
Hide file tree
Showing 10 changed files with 746 additions and 21 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Expand Up @@ -67,7 +67,7 @@
<version>2.5</version>
<configuration>
<argLine>
-Xms256m -Xmx2g
-Xms256m -Xmx4g <!-- -Xdebug -Xrunjdwp:transport=dt_socket,address=8002,server=y,suspend=y -->
</argLine>
<excludes>
<exclude>com/kamikaze/test/perf/*.java</exclude>
Expand Down
139 changes: 136 additions & 3 deletions src/main/java/com/kamikaze/docidset/impl/PForDeltaDocIdSet.java
Expand Up @@ -12,9 +12,9 @@
import com.kamikaze.docidset.api.StatefulDSIterator;
import com.kamikaze.docidset.compression.PForDeltaWithBase;
import com.kamikaze.docidset.utils.CompResult;
import com.kamikaze.docidset.utils.Conversion;
import com.kamikaze.docidset.utils.IntArray;
import com.kamikaze.docidset.utils.PForDeltaIntSegmentArray;
import com.kamikaze.pfordelta.PForDelta;

/**
* This class implements the DocId set which is built on top of the optimized PForDelta algorithm (PForDeltaWithBase)
Expand All @@ -31,6 +31,7 @@ public class PForDeltaDocIdSet extends DocSet implements Serializable {

private PForDeltaIntSegmentArray sequenceOfCompBlocks; // segments of compressed data (each segment contains the compressed array of say, 256 integers)


public static final int DEFAULT_BATCH_SIZE = 256; // default block size
private static final int[] POSSIBLE_B = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,16,20}; // all possible values of b in PForDelta algorithm

Expand All @@ -44,6 +45,8 @@ public class PForDeltaDocIdSet extends DocSet implements Serializable {
transient private int[] currentNoCompBlock; // the memory used to store the uncompressed elements. Once the block is full, all its elements are compressed into sequencOfCompBlock and the block is cleared.
transient private int sizeOfCurrentNoCompBlock = 0; // the number of uncompressed elements that is hold in the currentNoCompBlock

private int version = 1;

public PForDeltaDocIdSet() {
sequenceOfCompBlocks = new PForDeltaIntSegmentArray();
baseListForOnlyCompBlocks = new IntArray();
Expand All @@ -54,14 +57,144 @@ public PForDeltaDocIdSet() {

public PForDeltaDocIdSet(int batchSize) {
this();
if(_blockSize < batchSize)
if(_blockSize != batchSize)
{
currentNoCompBlock = new int[batchSize];
}
sizeOfCurrentNoCompBlock = 0;
_blockSize = batchSize;
}


public static PForDeltaDocIdSet deserialize(byte[] bytesData, int offset) throws IOException
{
PForDeltaDocIdSet res = new PForDeltaDocIdSet();
// int totalNumInt = Conversion.byteArrayToInt(bytesData, offset);
// offset += Conversion.BYTES_PER_INT;

// 1. version
res.version = Conversion.byteArrayToInt(bytesData, offset);
offset += Conversion.BYTES_PER_INT;

// 2. blockSize
int blkSize = Conversion.byteArrayToInt(bytesData, offset);
offset += Conversion.BYTES_PER_INT;
if(res._blockSize != blkSize)
{
res._blockSize = blkSize;
res.currentNoCompBlock = new int[res._blockSize];
}

// 3. lastAdded
res.lastAdded = Conversion.byteArrayToInt(bytesData, offset);
offset += Conversion.BYTES_PER_INT;

// 4. totalDocIdNum
res.totalDocIdNum = Conversion.byteArrayToInt(bytesData, offset);
offset += Conversion.BYTES_PER_INT;

// 5. compressedBitSize
res.compressedBitSize = Conversion.byteArrayToLong(bytesData, offset);
offset += Conversion.BYTES_PER_LONG;

// 6. base (skipping info)
res.baseListForOnlyCompBlocks = IntArray.newInstanceFromBytes(bytesData, offset);
offset += (IntArray.getSerialIntNum(res.baseListForOnlyCompBlocks) * Conversion.BYTES_PER_INT);

// 7. the last block (uncompressed)
int noCompBlockSize = Conversion.byteArrayToInt(bytesData, offset);
offset += Conversion.BYTES_PER_INT;
for(int i=0; i<noCompBlockSize; i++)
{
res.currentNoCompBlock[i] = Conversion.byteArrayToInt(bytesData, offset);
offset += Conversion.BYTES_PER_INT;
}

// 8. compressed blocks
res.sequenceOfCompBlocks = PForDeltaIntSegmentArray.newInstanceFromBytes(bytesData, offset);
offset += (PForDeltaIntSegmentArray.getSerialIntNum(res.sequenceOfCompBlocks) * Conversion.BYTES_PER_INT);

// 9. hashCode
int expectedHashCode = 1;
int hashCode = Conversion.byteArrayToInt(bytesData, offset);
if(expectedHashCode != hashCode)
{
throw new IOException("serialization problem");
}

return res;
}

public static byte[] serialize(PForDeltaDocIdSet pForDeltaDocIdSet)
{
int versionNumInt = 1;
int blockSizeNumInt = 1;
int hashCodeInt = 1;
int lastAddedNumInt = 1;
int totalDocIdNumInt = 1;
int compressedBitsNumInt = 2; // long = 2 ints

int baseListForOnlyComnpBlocksNumInt= IntArray.getSerialIntNum(pForDeltaDocIdSet.baseListForOnlyCompBlocks);
int currentNoCompBlockBlockNumInt = 1 + pForDeltaDocIdSet.sizeOfCurrentNoCompBlock;

int seqCompBlockIntNum = PForDeltaIntSegmentArray.getSerialIntNum(pForDeltaDocIdSet.sequenceOfCompBlocks);

// plus the hashCode for all data
int totalNumInt = versionNumInt + blockSizeNumInt + lastAddedNumInt + totalDocIdNumInt + compressedBitsNumInt +
baseListForOnlyComnpBlocksNumInt + currentNoCompBlockBlockNumInt + seqCompBlockIntNum + hashCodeInt;

byte[] bytesData = new byte[(totalNumInt+1)*Conversion.BYTES_PER_INT]; // +1 because of totalNumInt itself

int offset = 0;

// 0. totalNumInt
Conversion.intToByteArray(totalNumInt, bytesData, offset);
offset += Conversion.BYTES_PER_INT;

// 1. version
Conversion.intToByteArray(pForDeltaDocIdSet.version, bytesData, offset);
offset += Conversion.BYTES_PER_INT;

// 2. blockSize
Conversion.intToByteArray(pForDeltaDocIdSet._blockSize, bytesData, offset);
offset += Conversion.BYTES_PER_INT;

// 3. lastAdded
Conversion.intToByteArray(pForDeltaDocIdSet.lastAdded, bytesData, offset);
offset += Conversion.BYTES_PER_INT;

// 4. totalDocIdNum
Conversion.intToByteArray(pForDeltaDocIdSet.totalDocIdNum, bytesData, offset);
offset += Conversion.BYTES_PER_INT;

// 5. compressedBitSize
Conversion.longToByteArray(pForDeltaDocIdSet.compressedBitSize, bytesData, offset);
offset += Conversion.BYTES_PER_LONG;

// 6. base (skipping info)
int baseIntNum = IntArray.convertToBytes(pForDeltaDocIdSet.baseListForOnlyCompBlocks, bytesData, offset);
offset += (baseIntNum * Conversion.BYTES_PER_INT);

// 7. the last block (uncompressed)
Conversion.intToByteArray(pForDeltaDocIdSet.sizeOfCurrentNoCompBlock, bytesData, offset);
offset += Conversion.BYTES_PER_INT;
for(int i=0; i<pForDeltaDocIdSet.sizeOfCurrentNoCompBlock; i++)
{
Conversion.intToByteArray(pForDeltaDocIdSet.currentNoCompBlock[i], bytesData, offset);
offset += Conversion.BYTES_PER_INT;
}

// 8. compressed blocks
PForDeltaIntSegmentArray.convertToBytes(pForDeltaDocIdSet.sequenceOfCompBlocks, bytesData, offset);
offset += (seqCompBlockIntNum*Conversion.BYTES_PER_INT);

// 9. hashCode
int hashCode = 1;;
Conversion.intToByteArray(hashCode, bytesData, offset);

return bytesData;
}


/**
* Serialize the object manually
*
Expand Down
43 changes: 43 additions & 0 deletions src/main/java/com/kamikaze/docidset/utils/Conversion.java
@@ -0,0 +1,43 @@
package com.kamikaze.docidset.utils;

public class Conversion {
public static final int BYTES_PER_INT = 4;
public static final int BYTES_PER_LONG = 8;

public static final void intToByteArray(int value, byte[] bytes, int offset) {
bytes[offset] = (byte)(value >>> 24);
bytes[offset+1] = (byte)(value >>> 16);
bytes[offset+2] = (byte)(value >>> 8);
bytes[offset+3] = (byte)value;
}

public static final int byteArrayToInt(byte [] b, int offset) {
return (b[offset] << 24)
+ ((b[offset+1] & 0xFF) << 16)
+ ((b[offset+2] & 0xFF) << 8)
+ (b[offset+3] & 0xFF);
}

public static final void longToByteArray(long value, byte[] bytes, int offset) {
bytes[offset] = (byte)(value >>> 56);
bytes[offset+1] = (byte)(value >>> 48);
bytes[offset+2] = (byte)(value >>> 40);
bytes[offset+3] = (byte)(value >>> 32);
bytes[offset+4] = (byte)(value >>> 24);
bytes[offset+5] = (byte)(value >>> 16);
bytes[offset+6] = (byte)(value >>> 8);
bytes[offset+7] = (byte)value;
}

public static final long byteArrayToLong(byte [] b, int offset) {
return (b[offset] << 56)
+ ((b[offset+1] & 0xFF) << 48)
+ ((b[offset+2] & 0xFF) << 40)
+ ((b[offset+3] & 0xFF) << 32)
+ ((b[offset+4] & 0xFF) << 24)
+ ((b[offset+5] & 0xFF) << 16)
+ ((b[offset+6] & 0xFF) << 8)
+ (b[offset+7] & 0xFF);
}

}
58 changes: 58 additions & 0 deletions src/main/java/com/kamikaze/docidset/utils/IntArray.java
Expand Up @@ -25,6 +25,7 @@

package com.kamikaze.docidset.utils;

import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;

Expand Down Expand Up @@ -95,4 +96,61 @@ else if (midVal > key)
return -(low + 1); // key not found.
}

public static int getSerialIntNum(IntArray instance)
{
int num = 3 + instance._count; // _len, _count, _growth
return num;
}

public static int convertToBytes(IntArray instance, byte[] out, int offset)
{
int numInt = 0;
Conversion.intToByteArray(instance._len, out, offset);
offset += Conversion.BYTES_PER_INT;
numInt++;

Conversion.intToByteArray(instance._count, out, offset);
offset += Conversion.BYTES_PER_INT;
numInt++;

Conversion.intToByteArray(instance._growth, out, offset);
offset += Conversion.BYTES_PER_INT;
numInt++;

for(int i=0; i<instance.size(); i++)
{
int data = instance.get(i);
Conversion.intToByteArray(data, out, offset);
offset += Conversion.BYTES_PER_INT;
}
numInt += instance.size();
return numInt;
}

public static IntArray newInstanceFromBytes(byte[] inData, int offset) throws IOException
{
int len = Conversion.byteArrayToInt(inData, offset);
offset += Conversion.BYTES_PER_INT;

IntArray instance = new IntArray(len);

int count = Conversion.byteArrayToInt(inData, offset);
offset += Conversion.BYTES_PER_INT;

int growth = Conversion.byteArrayToInt(inData, offset);
offset += Conversion.BYTES_PER_INT;

for(int i=0; i<count; i++)
{
int data = Conversion.byteArrayToInt(inData, offset);
offset += Conversion.BYTES_PER_INT;
instance.add(data);
}

instance._growth = growth;
if(instance._count != count)
throw new IOException("cannot build IntArray from byte[]");

return instance;
}
}
Expand Up @@ -4,11 +4,7 @@

public class LongSegmentArray extends PrimitiveArray<long[]> implements Serializable{


/**
*
*/
private static final long serialVersionUID = 1L;
private static final long serialVersionUID = -5791570113959834064L;

public LongSegmentArray(int len) {
super(len);
Expand Down
Expand Up @@ -9,7 +9,7 @@
* author@abhasin
*/
public class MyOpenBitSetArray extends PrimitiveArray<MyOpenBitSet> implements Serializable{

private static final long serialVersionUID = 2493283898882704031L;

public MyOpenBitSetArray(int len) {
super(len);
Expand Down

0 comments on commit 51968f4

Please sign in to comment.