Skip to content

Commit

Permalink
Remove patching for doc blocks. (#12741)
Browse files Browse the repository at this point in the history
* Change Postings back to using FOR in Lucene99PostingsFormat

We are still keeping PFOR for positions only.
This is a partial revert of #69 which brings back ForDeltaUtil.

* fix merge commit

* Add forgotten forDeltaUtil calls to reader

* Addressing comments: adding Lucene90RWPostingsFormat + more

Also:
* Change to Changes.txt
* Removal of dead code which was only used in unit tests
* Removal of test code from PForUtil

* Changes.txt edit in right place now

* Apply suggestions from code review: `90 -> 99 refactoring`

Co-authored-by: gf2121 <52390227+gf2121@users.noreply.github.com>

* Remove decodeTo32 from ForUtil and regenerate

---------

Co-authored-by: gf2121 <52390227+gf2121@users.noreply.github.com>
  • Loading branch information
slow-J and gf2121 committed Nov 6, 2023
1 parent 3acc3c8 commit 8ae598b
Show file tree
Hide file tree
Showing 71 changed files with 6,221 additions and 264 deletions.
28 changes: 25 additions & 3 deletions gradle/generation/forUtil.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ configure(project(":lucene:core")) {
description "Regenerate gen_ForUtil.py"
group "generation"

def genDir = file("src/java/org/apache/lucene/codecs/lucene90")
def genDir = file("src/java/org/apache/lucene/codecs/lucene99")
def genScript = file("${genDir}/gen_ForUtil.py")
def genOutput = file("${genDir}/ForUtil.java")

Expand All @@ -44,7 +44,7 @@ configure(project(":lucene:core")) {

configure(project(":lucene:backward-codecs")) {

task generateForUtilInternal() {
task generateForUtil84Internal() {
description "Regenerate gen_ForUtil.py"
group "generation"

Expand All @@ -64,6 +64,28 @@ configure(project(":lucene:backward-codecs")) {
}
}

regenerate.dependsOn wrapWithPersistentChecksums(generateForUtilInternal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])
regenerate.dependsOn wrapWithPersistentChecksums(generateForUtil84Internal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])

task generateForUtil90Internal() {
description "Regenerate gen_ForUtil.py"
group "generation"

def genDir = file("src/java/org/apache/lucene/backward_codecs/lucene90")
def genScript = file("${genDir}/gen_ForUtil.py")
def genOutput = file("${genDir}/ForUtil.java")

inputs.file genScript
outputs.file genOutput

doLast {
quietExec {
workingDir genDir
executable project.externalTool("python3")
args = [ '-B', genScript ]
}
}
}

regenerate.dependsOn wrapWithPersistentChecksums(generateForUtil90Internal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])
}

3 changes: 2 additions & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,12 @@ Optimizations
* GITHUB#12719: Top-level conjunctions that are not sorted by score now have a
specialized bulk scorer. (Adrien Grand)

* GITHUB#12696: Change Postings back to using FOR in Lucene99PostingsFormat. Freqs, positions and offset keep using PFOR. (Jakub Slowinski)

* GITHUB#1052: Faster merging of terms enums. (Adrien Grand)

* GITHUB#11903: Faster sort on high-cardinality string fields. (Adrien Grand)


Changes in runtime behavior
---------------------

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/ForUtil.java": "861cab516c7424e6323831c16f0f521499391a90",
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/gen_ForUtil.py": "b66e2f8012759d6d5ce0d73fabb329ae4a391aa0"
}
3 changes: 2 additions & 1 deletion lucene/backward-codecs/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat;
provides org.apache.lucene.codecs.PostingsFormat with
org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat,
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat;
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat,
org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with
org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsFormat,
org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsFormat,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import org.apache.lucene.store.DataInput;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import org.apache.lucene.codecs.BlockTermState;
Expand All @@ -24,7 +24,6 @@
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsReader;
import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsWriter;
import org.apache.lucene.index.IndexOptions;
Expand All @@ -36,7 +35,9 @@
import org.apache.lucene.util.packed.PackedInts;

/**
* Lucene 5.0 postings format, which encodes postings in packed integer blocks for fast decode.
* Lucene 9.0 postings format, which encodes postings in packed integer blocks for fast decode.
*
* <p>Note: Lucene90PostingsFormat is now READ ONLY.
*
* <p>Basic idea:
*
Expand Down Expand Up @@ -371,30 +372,11 @@ public final class Lucene90PostingsFormat extends PostingsFormat {

// Increment version to change it
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;

private final int minTermBlockSize;
private final int maxTermBlockSize;
static final int VERSION_CURRENT = 1;

/** Creates {@code Lucene90PostingsFormat} with default settings. */
/** Creates read-only {@code Lucene90PostingsFormat}. */
public Lucene90PostingsFormat() {
this(
Lucene90BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
Lucene90BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
}

/**
* Creates {@code Lucene90PostingsFormat} with custom values for {@code minBlockSize} and {@code
* maxBlockSize} passed to block terms dictionary.
*
* @see
* Lucene90BlockTreeTermsWriter#Lucene90BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)
*/
public Lucene90PostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
super("Lucene90");
Lucene90BlockTreeTermsWriter.validateSettings(minTermBlockSize, maxTermBlockSize);
this.minTermBlockSize = minTermBlockSize;
this.maxTermBlockSize = maxTermBlockSize;
}

@Override
Expand All @@ -403,20 +385,8 @@ public String toString() {
}

@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new Lucene90PostingsWriter(state);
boolean success = false;
try {
FieldsConsumer ret =
new Lucene90BlockTreeTermsWriter(
state, postingsWriter, minTermBlockSize, maxTermBlockSize);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsWriter);
}
}
public FieldsConsumer fieldsConsumer(SegmentWriteState state) {
throw new UnsupportedOperationException("Old codecs may only be used for reading");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import static org.apache.lucene.codecs.lucene90.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_START;
import static org.apache.lucene.backward_codecs.lucene90.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.VERSION_START;

import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import static org.apache.lucene.codecs.lucene90.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.backward_codecs.lucene90.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;

import java.io.IOException;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
import org.apache.lucene.codecs.PushPostingsWriterBase;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import java.util.AbstractList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import java.util.Arrays;
Expand Down Expand Up @@ -92,8 +92,7 @@ public Lucene90SkipWriter(
}
}

public void setField(
boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
void setField(boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
this.fieldHasPositions = fieldHasPositions;
this.fieldHasOffsets = fieldHasOffsets;
this.fieldHasPayloads = fieldHasPayloads;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;
import java.io.IOException;
import org.apache.lucene.store.DataInput;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.util.Objects;
import org.apache.lucene.backward_codecs.lucene90.Lucene90FieldInfosFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90SegmentInfoFormat;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
Expand All @@ -37,7 +38,6 @@
import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,17 @@
* field names. These are used to store auxiliary information about the document, such as its
* title, url, or an identifier to access a database. The set of stored fields are what is
* returned for each hit when searching. This is keyed by document number.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term dictionary}. A
* dictionary containing all of the terms used in all of the indexed fields of all of the
* <li>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term dictionary}.
* A dictionary containing all of the terms used in all of the indexed fields of all of the
* documents. The dictionary also contains the number of documents which contain the term, and
* pointers to the term's frequency and proximity data.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Frequency data}. For
* each term in the dictionary, the numbers of all the documents that contain that term, and
* the frequency of the term in that document, unless frequencies are omitted ({@link
* org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Proximity data}. For
* each term in the dictionary, the positions that the term occurs in each document. Note that
* this will not exist if all fields in all documents omit position data.
* <li>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Frequency
* data}. For each term in the dictionary, the numbers of all the documents that contain that
* term, and the frequency of the term in that document, unless frequencies are omitted
* ({@link org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
* <li>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Proximity
* data}. For each term in the dictionary, the positions that the term occurs in each
* document. Note that this will not exist if all fields in all documents omit position data.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90NormsFormat Normalization factors}. For
* each field in each document, a value is stored that is multiplied into the score for hits
* on that field.
Expand Down Expand Up @@ -255,27 +255,27 @@
* <td>The stored fields for documents</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Dictionary}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Dictionary}</td>
* <td>.tim</td>
* <td>The term dictionary, stores term info</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Index}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Index}</td>
* <td>.tip</td>
* <td>The index into the Term Dictionary</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Frequencies}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Frequencies}</td>
* <td>.doc</td>
* <td>Contains the list of docs which contain each term along with frequency</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Positions}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Positions}</td>
* <td>.pos</td>
* <td>Stores position information about where a term occurs in the index</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Payloads}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Payloads}</td>
* <td>.pay</td>
* <td>Stores additional per-position metadata information such as character offsets and user payloads</td>
* </tr>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.util.Objects;
import org.apache.lucene.backward_codecs.lucene90.Lucene90FieldInfosFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90SegmentInfoFormat;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
Expand All @@ -37,7 +38,6 @@
import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
Expand Down

0 comments on commit 8ae598b

Please sign in to comment.