Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove patching for doc blocks. #12741

Merged
merged 8 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 25 additions & 3 deletions gradle/generation/forUtil.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ configure(project(":lucene:core")) {
description "Regenerate gen_ForUtil.py"
group "generation"

def genDir = file("src/java/org/apache/lucene/codecs/lucene90")
def genDir = file("src/java/org/apache/lucene/codecs/lucene99")
def genScript = file("${genDir}/gen_ForUtil.py")
def genOutput = file("${genDir}/ForUtil.java")

Expand All @@ -44,7 +44,7 @@ configure(project(":lucene:core")) {

configure(project(":lucene:backward-codecs")) {

task generateForUtilInternal() {
task generateForUtil84Internal() {
description "Regenerate gen_ForUtil.py"
group "generation"

Expand All @@ -64,6 +64,28 @@ configure(project(":lucene:backward-codecs")) {
}
}

regenerate.dependsOn wrapWithPersistentChecksums(generateForUtilInternal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])
regenerate.dependsOn wrapWithPersistentChecksums(generateForUtil84Internal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])

task generateForUtil90Internal() {
description "Regenerate gen_ForUtil.py"
group "generation"

def genDir = file("src/java/org/apache/lucene/backward_codecs/lucene90")
def genScript = file("${genDir}/gen_ForUtil.py")
def genOutput = file("${genDir}/ForUtil.java")

inputs.file genScript
outputs.file genOutput

doLast {
quietExec {
workingDir genDir
executable project.externalTool("python3")
args = [ '-B', genScript ]
}
}
}

regenerate.dependsOn wrapWithPersistentChecksums(generateForUtil90Internal, [ andThenTasks: ["spotlessJava", "spotlessJavaApply"] ])
}

3 changes: 2 additions & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,12 @@ Optimizations
* GITHUB#12719: Top-level conjunctions that are not sorted by score now have a
specialized bulk scorer. (Adrien Grand)

* GITHUB#12696: Change Postings back to using FOR in Lucene99PostingsFormat. Freqs, positions and offset keep using PFOR. (Jakub Slowinski)

* GITHUB#1052: Faster merging of terms enums. (Adrien Grand)

* GITHUB#11903: Faster sort on high-cardinality string fields. (Adrien Grand)


Changes in runtime behavior
---------------------

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/ForUtil.java": "861cab516c7424e6323831c16f0f521499391a90",
"lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/gen_ForUtil.py": "b66e2f8012759d6d5ce0d73fabb329ae4a391aa0"
}
3 changes: 2 additions & 1 deletion lucene/backward-codecs/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat;
provides org.apache.lucene.codecs.PostingsFormat with
org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat,
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat;
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat,
org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with
org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsFormat,
org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsFormat,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import org.apache.lucene.store.DataInput;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import org.apache.lucene.codecs.BlockTermState;
Expand All @@ -24,7 +24,6 @@
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsReader;
import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsWriter;
import org.apache.lucene.index.IndexOptions;
Expand All @@ -36,7 +35,9 @@
import org.apache.lucene.util.packed.PackedInts;

/**
* Lucene 5.0 postings format, which encodes postings in packed integer blocks for fast decode.
* Lucene 9.0 postings format, which encodes postings in packed integer blocks for fast decode.
*
* <p>Note: Lucene90PostingsFormat is now READ ONLY.
*
* <p>Basic idea:
*
Expand Down Expand Up @@ -371,30 +372,11 @@ public final class Lucene90PostingsFormat extends PostingsFormat {

// Increment version to change it
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;

private final int minTermBlockSize;
private final int maxTermBlockSize;
static final int VERSION_CURRENT = 1;

/** Creates {@code Lucene90PostingsFormat} with default settings. */
/** Creates read-only {@code Lucene90PostingsFormat}. */
public Lucene90PostingsFormat() {
this(
Lucene90BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
Lucene90BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
}

/**
* Creates {@code Lucene90PostingsFormat} with custom values for {@code minBlockSize} and {@code
* maxBlockSize} passed to block terms dictionary.
*
* @see
* Lucene90BlockTreeTermsWriter#Lucene90BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)
*/
public Lucene90PostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
super("Lucene90");
Lucene90BlockTreeTermsWriter.validateSettings(minTermBlockSize, maxTermBlockSize);
this.minTermBlockSize = minTermBlockSize;
this.maxTermBlockSize = maxTermBlockSize;
}

@Override
Expand All @@ -403,20 +385,8 @@ public String toString() {
}

@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new Lucene90PostingsWriter(state);
boolean success = false;
try {
FieldsConsumer ret =
new Lucene90BlockTreeTermsWriter(
state, postingsWriter, minTermBlockSize, maxTermBlockSize);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsWriter);
}
}
public FieldsConsumer fieldsConsumer(SegmentWriteState state) {
throw new UnsupportedOperationException("Old codecs may only be used for reading");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import static org.apache.lucene.codecs.lucene90.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_START;
import static org.apache.lucene.backward_codecs.lucene90.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.VERSION_START;

import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import static org.apache.lucene.codecs.lucene90.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.backward_codecs.lucene90.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;

import java.io.IOException;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
import org.apache.lucene.codecs.PushPostingsWriterBase;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import java.util.AbstractList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import java.util.Arrays;
Expand Down Expand Up @@ -92,8 +92,7 @@ public Lucene90SkipWriter(
}
}

public void setField(
boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
void setField(boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
this.fieldHasPositions = fieldHasPositions;
this.fieldHasOffsets = fieldHasOffsets;
this.fieldHasPayloads = fieldHasPayloads;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene90;
package org.apache.lucene.backward_codecs.lucene90;

import java.io.IOException;
import org.apache.lucene.store.DataInput;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.util.Objects;
import org.apache.lucene.backward_codecs.lucene90.Lucene90FieldInfosFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90SegmentInfoFormat;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
Expand All @@ -37,7 +38,6 @@
import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,17 @@
* field names. These are used to store auxiliary information about the document, such as its
* title, url, or an identifier to access a database. The set of stored fields are what is
* returned for each hit when searching. This is keyed by document number.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term dictionary}. A
* dictionary containing all of the terms used in all of the indexed fields of all of the
* <li>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term dictionary}.
* A dictionary containing all of the terms used in all of the indexed fields of all of the
* documents. The dictionary also contains the number of documents which contain the term, and
* pointers to the term's frequency and proximity data.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Frequency data}. For
* each term in the dictionary, the numbers of all the documents that contain that term, and
* the frequency of the term in that document, unless frequencies are omitted ({@link
* org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Proximity data}. For
* each term in the dictionary, the positions that the term occurs in each document. Note that
* this will not exist if all fields in all documents omit position data.
* <li>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Frequency
* data}. For each term in the dictionary, the numbers of all the documents that contain that
* term, and the frequency of the term in that document, unless frequencies are omitted
* ({@link org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
* <li>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Proximity
* data}. For each term in the dictionary, the positions that the term occurs in each
* document. Note that this will not exist if all fields in all documents omit position data.
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90NormsFormat Normalization factors}. For
* each field in each document, a value is stored that is multiplied into the score for hits
* on that field.
Expand Down Expand Up @@ -255,27 +255,27 @@
* <td>The stored fields for documents</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Dictionary}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Dictionary}</td>
* <td>.tim</td>
* <td>The term dictionary, stores term info</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Term Index}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Term Index}</td>
* <td>.tip</td>
* <td>The index into the Term Dictionary</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Frequencies}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Frequencies}</td>
* <td>.doc</td>
* <td>Contains the list of docs which contain each term along with frequency</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Positions}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Positions}</td>
* <td>.pos</td>
* <td>Stores position information about where a term occurs in the index</td>
* </tr>
* <tr>
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat Payloads}</td>
* <td>{@link org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat Payloads}</td>
* <td>.pay</td>
* <td>Stores additional per-position metadata information such as character offsets and user payloads</td>
* </tr>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.util.Objects;
import org.apache.lucene.backward_codecs.lucene90.Lucene90FieldInfosFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90SegmentInfoFormat;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
Expand All @@ -37,7 +38,6 @@
import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
Expand Down