Permalink
Browse files

improved the hardcode of pfordelta and simple16, update the contrib t…

…o the latest PForDeltaFixedIntBlockWithIntBuffer codec
  • Loading branch information...
1 parent d990eab commit 020f234e1d9363251ce6b5d3402ae00b5a9705ab @hyan hyan committed Feb 19, 2011
View
@@ -1,9 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java"/>
+ <classpathentry kind="src" path="contrib/luceneCodec/test"/>
+ <classpathentry excluding="com/kamikaze/lucecodec/util/" kind="src" path="contrib/luceneCodec/src"/>
<classpathentry kind="src" output="target/test-classes" path="src/test/java"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
<classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
+ <classpathentry kind="lib" path="/Users/hyan/workspace/ConvertLuceneFrom3To4/lib/master/lucene-core-4.0-SNAPSHOT.jar"/>
<classpathentry kind="output" path="target/classes"/>
</classpath>
@@ -2,7 +2,8 @@ Manifest-Version: 1.0
Ant-Version: Apache Ant 1.7.1
Created-By: 17.1-b03-307 (Apple Inc.)
Built-By: hyan
-Git-Version: 3dbe9dcf8e7a5eca8371dbca6f0d6b00ee38aca6 removed unnecc f
- iles
-Built-Date: 2010-12-21 18:09:27
+Git-Version: d990eab504e1f071fd16b4caffc9d8b2e1848e86 Added profile, w
+ hich signs artifacts during deployment. Changed ids of release and sn
+ apshot repositories.
+Built-Date: 2011-02-18 19:41:06
@@ -1,23 +1,5 @@
package com.kamikaze.lucenecodec;
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
import java.io.IOException;
import java.util.Set;
@@ -29,8 +11,8 @@
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl;
import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl;
-import org.apache.lucene.index.codecs.FixedGapTermsIndexReader;
-import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
+import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.PostingsReaderBase;
import org.apache.lucene.index.codecs.BlockTermsReader;
@@ -48,13 +30,13 @@
* used here writes each block as data encoded by PForDelta.
*/
-public class PForDeltaFixedIntBlockCodec extends Codec {
+public class PForDeltaFixedIntBlockWithIntBufferCodec extends Codec {
private final int blockSize;
- public PForDeltaFixedIntBlockCodec(int blockSize) {
+ public PForDeltaFixedIntBlockWithIntBufferCodec(int blockSize) {
this.blockSize = blockSize;
- name = "NewPForDelta";
+ name = "PatchedFrameOfRef4";
}
@Override
@@ -65,12 +47,12 @@ public String toString() {
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new PForDeltaFixedIntBlockFactory(blockSize));
+ PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new PForDeltaFixedIntBlockWithIntBufferFactory(blockSize));
boolean success = false;
TermsIndexWriterBase indexWriter;
try {
- indexWriter = new FixedGapTermsIndexWriter(state);
+ indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(state.termIndexInterval));
success = true;
} finally {
if (!success) {
@@ -99,16 +81,16 @@ public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException
PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir,
state.segmentInfo,
state.readBufferSize,
- new PForDeltaFixedIntBlockFactory(blockSize), state.codecId);
+ new PForDeltaFixedIntBlockWithIntBufferFactory(blockSize), state.codecId);
TermsIndexReaderBase indexReader;
boolean success = false;
try {
- indexReader = new FixedGapTermsIndexReader(state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- state.termsIndexDivisor,
- BytesRef.getUTF8SortedAsUnicodeComparator(), state.codecId);
+ indexReader = new VariableGapTermsIndexReader(state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ state.termsIndexDivisor,
+ state.codecId);
success = true;
} finally {
if (!success) {
@@ -144,14 +126,14 @@ public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
BlockTermsReader.files(dir, segmentInfo, codecId, files);
- FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
+ VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
@Override
public void getExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
BlockTermsReader.getExtensions(extensions);
- FixedGapTermsIndexReader.getIndexExtensions(extensions);
+ VariableGapTermsIndexReader.getIndexExtensions(extensions);
}
}
@@ -1,6 +1,5 @@
package com.kamikaze.lucenecodec;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -25,21 +24,21 @@
import java.io.IOException;
-public class PForDeltaFixedIntBlockFactory extends IntStreamFactory {
+public class PForDeltaFixedIntBlockWithIntBufferFactory extends IntStreamFactory {
private final int blockSize;
/** blockSize is only used when creating the
* IntIndexOutput */
- public PForDeltaFixedIntBlockFactory(int blockSize) {
+ public PForDeltaFixedIntBlockWithIntBufferFactory(int blockSize) {
this.blockSize = blockSize;
}
public IntIndexInput openInput(Directory dir, String fileName, int readBufferSize) throws IOException {
- return new PForDeltaFixedIntBlockIndexInput(dir, fileName, readBufferSize);
+ return new PForDeltaFixedIntBlockWithIntBufferIndexInput(dir, fileName, readBufferSize);
}
public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
- return new PForDeltaFixedIntBlockIndexOutput(dir, fileName, blockSize);
+ return new PForDeltaFixedIntBlockWithIntBufferIndexOutput(dir, fileName, blockSize);
}
-}
+}
@@ -1,6 +1,5 @@
package com.kamikaze.lucenecodec;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -20,26 +19,44 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.pfor2.LCPForDelta;
import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexInput;
-import com.kamikaze.pfordelta.LCPForDelta;
import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.IntBuffer;
-public class PForDeltaFixedIntBlockIndexInput extends FixedIntBlockIndexInput {
+public class PForDeltaFixedIntBlockWithIntBufferIndexInput extends FixedIntBlockIndexInput {
- public PForDeltaFixedIntBlockIndexInput(Directory dir, String fileName, int readBufferSize) throws IOException {
+ public PForDeltaFixedIntBlockWithIntBufferIndexInput(Directory dir, String fileName, int readBufferSize) throws IOException {
super(dir.openInput(fileName, readBufferSize));
+
}
private static class BlockReader implements FixedIntBlockIndexInput.BlockReader {
private final LCPForDelta decompressor;
private final IndexInput input;
- private final int[] decompBuffer;
+ private final int[] decompBlock;
+
+ private final ByteBuffer byteCompBuffer;
+ private final IntBuffer intCompBuffer;
+ private final byte[] byteCompBlock;
+ private final int[] expPosIntBlock;
+ private final int[] expHighBitIntBlock;
+
+ private static final int MAX_BLOCK_SIZE = 128;
public BlockReader(IndexInput in, int[] buffer) {
decompressor = new LCPForDelta();
input = in;
- decompBuffer = buffer;
+ decompBlock = buffer;
+
+ byteCompBuffer = ByteBuffer.allocate(MAX_BLOCK_SIZE*4*4);
+ byteCompBlock = byteCompBuffer.array();
+ intCompBuffer = byteCompBuffer.asIntBuffer();
+
+ expPosIntBlock = new int[MAX_BLOCK_SIZE];
+ expHighBitIntBlock = new int[MAX_BLOCK_SIZE];
}
public void seek(long pos) throws IOException {
@@ -51,28 +68,16 @@ public void readBlock() throws IOException {
// read the compressed data
final int compressedSizeInInt = input.readInt();
- // two ways to read the data
- // first way
-// int[] compBuffer = new int[compressedSizeInInt];
-// for(int i=0;i<compressedSizeInInt;i++) {
-// compBuffer[i] = input.readInt();
-// }
-
- // second way
- byte[] byteBuffer = new byte[compressedSizeInInt*4];
- input.readBytes(byteBuffer, 0, compressedSizeInInt*4);
- // convert the byte array into int array
- int[] compBuffer = new int[compressedSizeInInt];
- int i,j;
- for(i=0,j=0; j<compressedSizeInInt; j++)
- {
- compBuffer[j] = ((byteBuffer[i++] & 0xff)<<24) | ((byteBuffer[i++] & 0xff)<<16)
- | ((byteBuffer[i++] & 0xff)<<8) | (byteBuffer[i++] & 0xff);
- }
+ int blockSize = 128;
+ input.readBytes(byteCompBlock, 0, compressedSizeInInt*4);
+ intCompBuffer.rewind();
- // decompress
- decompressor.decompressOneBlock(decompBuffer, compBuffer);
- compBuffer = null;
+ decompressor.decompressOneBlockWithSizeWithIntBuffer(decompBlock, intCompBuffer, blockSize, expPosIntBlock, expHighBitIntBlock, compressedSizeInInt);
+ }
+
+ public void skipBlock() throws IOException {
+ int numInts = input.readInt(); // nocommit: should PFOR use vint header?
+ input.seek(input.getFilePointer() + numInts*4); // seek past block
}
}
@@ -20,14 +20,15 @@
import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput;
import org.apache.lucene.store.Directory;
-import com.kamikaze.pfordelta.LCPForDelta;
-
+import org.apache.lucene.util.pfor2.LCPForDelta;
import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.IntBuffer;
-public class PForDeltaFixedIntBlockIndexOutput extends FixedIntBlockIndexOutput {
+public class PForDeltaFixedIntBlockWithIntBufferIndexOutput extends FixedIntBlockIndexOutput {
private final LCPForDelta compressor;
private final int blockSize;
- public PForDeltaFixedIntBlockIndexOutput(Directory dir, String fileName, int blockSize) throws IOException {
+ public PForDeltaFixedIntBlockWithIntBufferIndexOutput(Directory dir, String fileName, int blockSize) throws IOException {
super(dir.createOutput(fileName), blockSize);
this.blockSize = blockSize;
compressor = new LCPForDelta();
@@ -39,29 +40,15 @@ protected void flushBlock() throws IOException {
// write out the compressed size in ints
out.writeInt(compressedSizeInInts);
- // we can use either of the following two ways to write out the compressed data
- // first way
-// int[] compBuffer = compressor.getCompBuffer();
-// for(int i=0;i<compressedSizeInInts;i++) {
-// out.writeInt(compBuffer[i]);
-// }
-
- // second way
- int[] compBuffer = compressor.getCompBuffer();
- // convert int array to byte array
- byte[] byteBuffer = new byte[compressedSizeInInts*4];
- int i, j;
- for(i=0, j=0; j<compressedSizeInInts; i+=4, j++)
- {
- int val = compBuffer[j];
- byteBuffer[i] = (byte)(val >>> 24);
- byteBuffer[i+1] = (byte)(val >>> 16);
- byteBuffer[i+2] = (byte)(val >>> 8);
- byteBuffer[i+3] = (byte)(val);
- }
- out.writeBytes(byteBuffer, byteBuffer.length);
+ int[] compBlock = compressor.getCompBuffer();
+ ByteBuffer byteCompBuffer = ByteBuffer.allocate(compressedSizeInInts*4);
+ byte[] byteCompBlock = byteCompBuffer.array();
+ IntBuffer intCompBuffer = byteCompBuffer.asIntBuffer();
+ intCompBuffer.put(compBlock, 0, compressedSizeInInts);
+ out.writeBytes(byteCompBlock, byteCompBlock.length);
compressor.setCompBuffer(null);
}
}
+
@@ -4,6 +4,8 @@
import org.junit.runners.Suite;
import org.junit.runners.Suite.SuiteClasses;
+import com.kamikaze.lucenecodec.test.TestLucenePForDeltaCodec;
+
@RunWith(Suite.class)
@SuiteClasses( { TestLucenePForDeltaCodec.class})
public class TestLuceneCodecSuite {
@@ -4,25 +4,22 @@
import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.codecs.sep.*;
-import org.apache.lucene.store.*;
-
-import org.junit.Ignore;
import org.apache.lucene.index.codecs.sep.IntStreamFactory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.junit.After;
import org.junit.Before;
import java.util.Random;
-import com.kamikaze.lucenecodec.PForDeltaFixedIntBlockFactory;
+import com.kamikaze.lucenecodec.PForDeltaFixedIntBlockWithIntBufferFactory;
public class TestLucenePForDeltaCodec extends TestCase{
public void testPForDeltaSimpleIntBlocks() throws Exception {
System.out.println("running test case : testPForDeltaSimpleIntBlocks for PForDeltaFixedIntBlockCodec");
Directory dir = new RAMDirectory();
int blockSize = 128;
- IntStreamFactory f = new PForDeltaFixedIntBlockFactory(blockSize);
+ IntStreamFactory f = new PForDeltaFixedIntBlockWithIntBufferFactory(blockSize);
int testDataSize = 80024;
int[] testData = new int[testDataSize];
Random random = new Random(0);
@@ -62,7 +59,7 @@ public void testPForDeltaEmptySimpleIntBlocks() throws Exception {
System.out.println("running test case : testPForDeltaEmptySimpleIntBlocks for PForDeltaFixedIntBlockCodec");
Directory dir = new RAMDirectory();
int blockSize = 128;
- IntStreamFactory f = new PForDeltaFixedIntBlockFactory(blockSize);
+ IntStreamFactory f = new PForDeltaFixedIntBlockWithIntBufferFactory(blockSize);
IntIndexOutput out = f.createOutput(dir, "test");
// write no ints
Oops, something went wrong.

0 comments on commit 020f234

Please sign in to comment.