Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

fix the byteArrayToLong bug and add CRC for checksum of serialized data

  • Loading branch information...
commit cc661b242008927a911c9adf153c9b508f584fe0 1 parent 51968f4
hao yan authored
View
33 src/main/java/com/kamikaze/docidset/impl/PForDeltaDocIdSet.java
@@ -1,10 +1,13 @@
package com.kamikaze.docidset.impl;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.Arrays;
+import java.util.zip.CRC32;
+import java.util.zip.Checksum;
import org.apache.lucene.search.DocIdSetIterator;
@@ -68,8 +71,6 @@ public PForDeltaDocIdSet(int batchSize) {
public static PForDeltaDocIdSet deserialize(byte[] bytesData, int offset) throws IOException
{
PForDeltaDocIdSet res = new PForDeltaDocIdSet();
-// int totalNumInt = Conversion.byteArrayToInt(bytesData, offset);
-// offset += Conversion.BYTES_PER_INT;
// 1. version
res.version = Conversion.byteArrayToInt(bytesData, offset);
@@ -113,12 +114,16 @@ public static PForDeltaDocIdSet deserialize(byte[] bytesData, int offset) throws
res.sequenceOfCompBlocks = PForDeltaIntSegmentArray.newInstanceFromBytes(bytesData, offset);
offset += (PForDeltaIntSegmentArray.getSerialIntNum(res.sequenceOfCompBlocks) * Conversion.BYTES_PER_INT);
- // 9. hashCode
- int expectedHashCode = 1;
- int hashCode = Conversion.byteArrayToInt(bytesData, offset);
- if(expectedHashCode != hashCode)
+ // 9. checksum
+ Checksum digest = new CRC32();
+ digest.update(bytesData, 0, offset);
+ long checksum = digest.getValue();
+
+ long receivedChecksum = Conversion.byteArrayToLong(bytesData, offset);
+
+ if(receivedChecksum != checksum)
{
- throw new IOException("serialization problem");
+ throw new IOException("serialization error: check sum does not match: ");
}
return res;
@@ -128,7 +133,7 @@ public static PForDeltaDocIdSet deserialize(byte[] bytesData, int offset) throws
{
int versionNumInt = 1;
int blockSizeNumInt = 1;
- int hashCodeInt = 1;
+ int checksumInt = 2; // checksum is long = 2 ints
int lastAddedNumInt = 1;
int totalDocIdNumInt = 1;
int compressedBitsNumInt = 2; // long = 2 ints
@@ -140,7 +145,7 @@ public static PForDeltaDocIdSet deserialize(byte[] bytesData, int offset) throws
// plus the hashCode for all data
int totalNumInt = versionNumInt + blockSizeNumInt + lastAddedNumInt + totalDocIdNumInt + compressedBitsNumInt +
- baseListForOnlyComnpBlocksNumInt + currentNoCompBlockBlockNumInt + seqCompBlockIntNum + hashCodeInt;
+ baseListForOnlyComnpBlocksNumInt + currentNoCompBlockBlockNumInt + seqCompBlockIntNum + checksumInt;
byte[] bytesData = new byte[(totalNumInt+1)*Conversion.BYTES_PER_INT]; // +1 because of totalNumInt itself
@@ -150,6 +155,7 @@ public static PForDeltaDocIdSet deserialize(byte[] bytesData, int offset) throws
Conversion.intToByteArray(totalNumInt, bytesData, offset);
offset += Conversion.BYTES_PER_INT;
+ int startOffset = offset;
// 1. version
Conversion.intToByteArray(pForDeltaDocIdSet.version, bytesData, offset);
offset += Conversion.BYTES_PER_INT;
@@ -187,9 +193,12 @@ public static PForDeltaDocIdSet deserialize(byte[] bytesData, int offset) throws
PForDeltaIntSegmentArray.convertToBytes(pForDeltaDocIdSet.sequenceOfCompBlocks, bytesData, offset);
offset += (seqCompBlockIntNum*Conversion.BYTES_PER_INT);
- // 9. hashCode
- int hashCode = 1;;
- Conversion.intToByteArray(hashCode, bytesData, offset);
+ // 9. checksum
+ Checksum digest = new CRC32();
+ digest.update(bytesData, startOffset, offset-startOffset);
+ long checksum = digest.getValue();
+
+ Conversion.longToByteArray(checksum, bytesData, offset);
return bytesData;
}
View
20 src/main/java/com/kamikaze/docidset/utils/Conversion.java
@@ -29,15 +29,15 @@ public static final void longToByteArray(long value, byte[] bytes, int offset) {
bytes[offset+7] = (byte)value;
}
-public static final long byteArrayToLong(byte [] b, int offset) {
- return (b[offset] << 56)
- + ((b[offset+1] & 0xFF) << 48)
- + ((b[offset+2] & 0xFF) << 40)
- + ((b[offset+3] & 0xFF) << 32)
- + ((b[offset+4] & 0xFF) << 24)
- + ((b[offset+5] & 0xFF) << 16)
- + ((b[offset+6] & 0xFF) << 8)
- + (b[offset+7] & 0xFF);
-}
+ public static final long byteArrayToLong(byte [] b, int offset) {
+ return ((long)b[offset] << 56)
+ + (((long)b[offset+1] & 0xFF) << 48)
+ + (((long)b[offset+2] & 0xFF) << 40)
+ + (((long)b[offset+3] & 0xFF) << 32)
+ + (((long)b[offset+4] & 0xFF) << 24)
+ + ((b[offset+5] & 0xFF) << 16)
+ + ((b[offset+6] & 0xFF) << 8)
+ + (b[offset+7] & 0xFF);
+ }
}
View
2  src/test/java/com/kamikaze/test/PForDeltaTestDocSetSerializationTest.java
@@ -36,7 +36,7 @@ public PForDeltaTestDocSetSerializationTest() {
@Test
- public void AAtestNewPForDeltaDocSetSerialization() throws Exception{
+ public void testNewPForDeltaDocSetSerialization() throws Exception{
//using the new static serialize/deserialize methods to test the accuracy of the serializaton and deserialization of PForDeltaDocIdSet objects by verifying if the deserialized object's nextDoc() results match the original object's nextDoc() results
int batch = 128;
System.out.println("");
Please sign in to comment.
Something went wrong with that request. Please try again.