Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to Lucene r1660560 #9746

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions pom.xml
Expand Up @@ -32,7 +32,7 @@

<properties>
<lucene.version>5.1.0</lucene.version>
<lucene.maven.version>5.1.0-snapshot-1657571</lucene.maven.version>
<lucene.maven.version>5.1.0-snapshot-1660560</lucene.maven.version>
<tests.jvms>auto</tests.jvms>
<tests.shuffle>true</tests.shuffle>
<tests.output>onerror</tests.output>
Expand All @@ -56,7 +56,7 @@
<repository>
<id>lucene-snapshots</id>
<name>Lucene Snapshots</name>
<url>https://download.elasticsearch.org/lucenesnapshots/1657571</url>
<url>https://download.elasticsearch.org/lucenesnapshots/1660560</url>
</repository>
</repositories>

Expand Down
Expand Up @@ -391,7 +391,7 @@ protected Map<Integer,Object> highlightField(String field, String contents[], Br
Map<Integer,Object> highlights = new HashMap<>();

// reuse in the real sense... for docs in same segment we just advance our old enum
DocsAndPositionsEnum postings[] = null;
PostingsEnum postings[] = null;
TermsEnum termsEnum = null;
int lastLeaf = -1;

Expand All @@ -416,7 +416,7 @@ protected Map<Integer,Object> highlightField(String field, String contents[], Br
}
if (leaf != lastLeaf) {
termsEnum = t.iterator(null);
postings = new DocsAndPositionsEnum[terms.length];
postings = new PostingsEnum[terms.length];
}
Passage passages[] = highlightDoc(field, terms, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
if (passages.length == 0) {
Expand All @@ -437,7 +437,7 @@ protected Map<Integer,Object> highlightField(String field, String contents[], Br
// we can intersect these with the postings lists via BreakIterator.preceding(offset),s
// score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
private Passage[] highlightDoc(String field, BytesRef terms[], int contentLength, BreakIterator bi, int doc,
TermsEnum termsEnum, DocsAndPositionsEnum[] postings, int n) throws IOException {
TermsEnum termsEnum, PostingsEnum[] postings, int n) throws IOException {

//BEGIN EDIT added call to method that returns the offset for the current value (discrete highlighting)
int valueOffset = getOffsetForCurrentValue(field, doc);
Expand All @@ -462,7 +462,7 @@ private Passage[] highlightDoc(String field, BytesRef terms[], int contentLength
float weights[] = new float[terms.length];
// initialize postings
for (int i = 0; i < terms.length; i++) {
DocsAndPositionsEnum de = postings[i];
PostingsEnum de = postings[i];
int pDoc;
if (de == EMPTY) {
continue;
Expand All @@ -471,7 +471,7 @@ private Passage[] highlightDoc(String field, BytesRef terms[], int contentLength
if (!termsEnum.seekExact(terms[i])) {
continue; // term not found
}
de = postings[i] = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_OFFSETS);
de = postings[i] = termsEnum.postings(null, null, PostingsEnum.OFFSETS);
if (de == null) {
// no positions available
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
Expand Down Expand Up @@ -512,7 +512,7 @@ public int compare(Passage left, Passage right) {

OffsetsEnum off;
while ((off = pq.poll()) != null) {
final DocsAndPositionsEnum dp = off.dp;
final PostingsEnum dp = off.dp;

int start = dp.startOffset();
if (start == -1) {
Expand Down Expand Up @@ -651,11 +651,11 @@ protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int ma
}

private static class OffsetsEnum implements Comparable<OffsetsEnum> {
DocsAndPositionsEnum dp;
PostingsEnum dp;
int pos;
int id;

OffsetsEnum(DocsAndPositionsEnum dp, int id) throws IOException {
OffsetsEnum(PostingsEnum dp, int id) throws IOException {
this.dp = dp;
this.id = id;
this.pos = 1;
Expand All @@ -677,7 +677,7 @@ public int compareTo(OffsetsEnum other) {
}
}

private static final DocsAndPositionsEnum EMPTY = new DocsAndPositionsEnum() {
private static final PostingsEnum EMPTY = new PostingsEnum() {

@Override
public int nextPosition() throws IOException { return 0; }
Expand Down
Expand Up @@ -77,12 +77,7 @@ void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQuerie
if (sourceQuery instanceof SpanTermQuery) {
super.flatten(new TermQuery(((SpanTermQuery) sourceQuery).getTerm()), reader, flatQueries);
} else if (sourceQuery instanceof ConstantScoreQuery) {
ConstantScoreQuery constantScoreQuery = (ConstantScoreQuery) sourceQuery;
if (constantScoreQuery.getFilter() != null) {
flatten(constantScoreQuery.getFilter(), reader, flatQueries);
} else {
flatten(constantScoreQuery.getQuery(), reader, flatQueries);
}
flatten(((ConstantScoreQuery) sourceQuery).getQuery(), reader, flatQueries);
} else if (sourceQuery instanceof FunctionScoreQuery) {
flatten(((FunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries);
} else if (sourceQuery instanceof FilteredQuery) {
Expand Down Expand Up @@ -146,7 +141,9 @@ void flatten(Filter sourceFilter, IndexReader reader, Collection<Query> flatQuer
return;
}
if (sourceFilter instanceof TermFilter) {
flatten(new TermQuery(((TermFilter) sourceFilter).getTerm()), reader, flatQueries);
// TermFilter is just a deprecated wrapper over QWF
TermQuery actualQuery = (TermQuery) ((TermFilter) sourceFilter).getQuery();
flatten(new TermQuery(actualQuery.getTerm()), reader, flatQueries);
} else if (sourceFilter instanceof MultiTermQueryWrapperFilter) {
if (multiTermQueryWrapperFilterQueryField != null) {
try {
Expand Down
Expand Up @@ -324,14 +324,9 @@ public long totalTermFreq() throws IOException {
}

@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
return docsAndPositions(liveDocs, reuse instanceof DocsAndPositionsEnum ? (DocsAndPositionsEnum) reuse : null, 0);
}

@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
final TermVectorDocsAndPosEnum retVal = (reuse instanceof TermVectorDocsAndPosEnum ? (TermVectorDocsAndPosEnum) reuse
: new TermVectorDocsAndPosEnum());
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
final TermVectorPostingsEnum retVal = (reuse instanceof TermVectorPostingsEnum ? (TermVectorPostingsEnum) reuse
: new TermVectorPostingsEnum());
return retVal.reset(hasPositions ? positions : null, hasOffsets ? startOffsets : null, hasOffsets ? endOffsets
: null, hasPayloads ? payloads : null, freq);
}
Expand Down Expand Up @@ -380,7 +375,7 @@ public boolean hasPayloads() {
}
}

private final class TermVectorDocsAndPosEnum extends DocsAndPositionsEnum {
private final class TermVectorPostingsEnum extends PostingsEnum {
private boolean hasPositions;
private boolean hasOffsets;
private boolean hasPayloads;
Expand All @@ -392,7 +387,7 @@ private final class TermVectorDocsAndPosEnum extends DocsAndPositionsEnum {
private BytesRefBuilder[] payloads;
private int[] endOffsets;

private DocsAndPositionsEnum reset(int[] positions, int[] startOffsets, int[] endOffsets, BytesRefBuilder[] payloads, int freq) {
private PostingsEnum reset(int[] positions, int[] startOffsets, int[] endOffsets, BytesRefBuilder[] payloads, int freq) {
curPos = -1;
doc = -1;
this.hasPositions = positions != null;
Expand Down Expand Up @@ -488,4 +483,4 @@ long readPotentiallyNegativeVLong(BytesStreamInput stream) throws IOException {
return stream.readVLong() - 1;
}

}
}
Expand Up @@ -20,8 +20,9 @@
package org.elasticsearch.action.termvectors;

import com.google.common.collect.Iterators;
import org.apache.lucene.index.DocsAndPositionsEnum;

import org.apache.lucene.index.Fields;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.ArrayUtil;
Expand Down Expand Up @@ -215,7 +216,7 @@ private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Ter
builder.startObject(spare.toString());
buildTermStatistics(builder, termIter);
// finally write the term vectors
DocsAndPositionsEnum posEnum = termIter.docsAndPositions(null, null);
PostingsEnum posEnum = termIter.postings(null, null, PostingsEnum.ALL);
int termFreq = posEnum.freq();
builder.field(FieldStrings.TERM_FREQ, termFreq);
initMemory(curTerms, termFreq);
Expand Down Expand Up @@ -260,7 +261,7 @@ private void buildValues(XContentBuilder builder, Terms curTerms, int termFreq)
builder.endArray();
}

private void initValues(Terms curTerms, DocsAndPositionsEnum posEnum, int termFreq) throws IOException {
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException {
for (int j = 0; j < termFreq; j++) {
int nextPos = posEnum.nextPosition();
if (curTerms.hasPositions()) {
Expand Down
Expand Up @@ -20,6 +20,7 @@

import org.apache.lucene.index.*;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.action.termvectors.TermVectorsRequest.Flag;
Expand Down Expand Up @@ -52,8 +53,8 @@ final class TermVectorsWriter {
void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields, @Nullable AggregatedDfs dfs) throws IOException {
int numFieldsWritten = 0;
TermsEnum iterator = null;
DocsAndPositionsEnum docsAndPosEnum = null;
DocsEnum docsEnum = null;
PostingsEnum docsAndPosEnum = null;
PostingsEnum docsEnum = null;
TermsEnum topLevelIterator = null;
for (String field : termVectorsByField) {
if ((selectedFields != null) && (!selectedFields.contains(field))) {
Expand Down Expand Up @@ -100,7 +101,7 @@ void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Fl
docsAndPosEnum = writeTermWithDocsAndPos(iterator, docsAndPosEnum, positions, offsets, payloads);
} else {
// if we do not have the positions stored, we need to
// get the frequency from a DocsEnum.
// get the frequency from a PostingsEnum.
docsEnum = writeTermWithDocsOnly(iterator, docsEnum);
}
}
Expand All @@ -127,23 +128,23 @@ private BytesReference writeHeader(int numFieldsWritten, boolean getTermStatisti
return header.bytes();
}

private DocsEnum writeTermWithDocsOnly(TermsEnum iterator, DocsEnum docsEnum) throws IOException {
docsEnum = iterator.docs(null, docsEnum);
private PostingsEnum writeTermWithDocsOnly(TermsEnum iterator, PostingsEnum docsEnum) throws IOException {
docsEnum = iterator.postings(null, docsEnum);
int nextDoc = docsEnum.nextDoc();
assert nextDoc != DocsEnum.NO_MORE_DOCS;
assert nextDoc != DocIdSetIterator.NO_MORE_DOCS;
writeFreq(docsEnum.freq());
nextDoc = docsEnum.nextDoc();
assert nextDoc == DocsEnum.NO_MORE_DOCS;
assert nextDoc == DocIdSetIterator.NO_MORE_DOCS;
return docsEnum;
}

private DocsAndPositionsEnum writeTermWithDocsAndPos(TermsEnum iterator, DocsAndPositionsEnum docsAndPosEnum, boolean positions,
private PostingsEnum writeTermWithDocsAndPos(TermsEnum iterator, PostingsEnum docsAndPosEnum, boolean positions,
boolean offsets, boolean payloads) throws IOException {
docsAndPosEnum = iterator.docsAndPositions(null, docsAndPosEnum);
docsAndPosEnum = iterator.postings(null, docsAndPosEnum, PostingsEnum.ALL);
// for each term (iterator next) in this field (field)
// iterate over the docs (should only be one)
int nextDoc = docsAndPosEnum.nextDoc();
assert nextDoc != DocsEnum.NO_MORE_DOCS;
assert nextDoc != DocIdSetIterator.NO_MORE_DOCS;
final int freq = docsAndPosEnum.freq();
writeFreq(freq);
for (int j = 0; j < freq; j++) {
Expand All @@ -159,7 +160,7 @@ private DocsAndPositionsEnum writeTermWithDocsAndPos(TermsEnum iterator, DocsAnd
}
}
nextDoc = docsAndPosEnum.nextDoc();
assert nextDoc == DocsEnum.NO_MORE_DOCS;
assert nextDoc == DocIdSetIterator.NO_MORE_DOCS;
return docsAndPosEnum;
}

Expand Down
40 changes: 21 additions & 19 deletions src/main/java/org/elasticsearch/common/lucene/Lucene.java
Expand Up @@ -273,10 +273,6 @@ public static boolean safeClose(IndexWriter writer) {
}

public static TopDocs readTopDocs(StreamInput in) throws IOException {
if (!in.readBoolean()) {
// no docs
return null;
}
if (in.readBoolean()) {
int totalHits = in.readVInt();
float maxScore = in.readFloat();
Expand Down Expand Up @@ -343,11 +339,7 @@ public static ScoreDoc readScoreDoc(StreamInput in) throws IOException {
}

public static void writeTopDocs(StreamOutput out, TopDocs topDocs, int from) throws IOException {
if (topDocs.scoreDocs.length - from < 0) {
out.writeBoolean(false);
return;
}
out.writeBoolean(true);
from = Math.min(from, topDocs.scoreDocs.length);
if (topDocs instanceof TopFieldDocs) {
out.writeBoolean(true);
TopFieldDocs topFieldDocs = (TopFieldDocs) topDocs;
Expand All @@ -372,11 +364,8 @@ public static void writeTopDocs(StreamOutput out, TopDocs topDocs, int from) thr
}

out.writeVInt(topDocs.scoreDocs.length - from);
int index = 0;
for (ScoreDoc doc : topFieldDocs.scoreDocs) {
if (index++ < from) {
continue;
}
for (int i = from; i < topFieldDocs.scoreDocs.length; ++i) {
ScoreDoc doc = topFieldDocs.scoreDocs[i];
writeFieldDoc(out, (FieldDoc) doc);
}
} else {
Expand All @@ -385,11 +374,8 @@ public static void writeTopDocs(StreamOutput out, TopDocs topDocs, int from) thr
out.writeFloat(topDocs.getMaxScore());

out.writeVInt(topDocs.scoreDocs.length - from);
int index = 0;
for (ScoreDoc doc : topDocs.scoreDocs) {
if (index++ < from) {
continue;
}
for (int i = from; i < topDocs.scoreDocs.length; ++i) {
ScoreDoc doc = topDocs.scoreDocs[i];
writeScoreDoc(out, doc);
}
}
Expand Down Expand Up @@ -634,6 +620,22 @@ public int docID() {
public int nextDoc() throws IOException {
throw new ElasticsearchIllegalStateException(message);
}
@Override
public int nextPosition() throws IOException {
throw new ElasticsearchIllegalStateException(message);
}
@Override
public int startOffset() throws IOException {
throw new ElasticsearchIllegalStateException(message);
}
@Override
public int endOffset() throws IOException {
throw new ElasticsearchIllegalStateException(message);
}
@Override
public BytesRef getPayload() throws IOException {
throw new ElasticsearchIllegalStateException(message);
}
};
}
}
Expand Up @@ -20,7 +20,7 @@
package org.elasticsearch.common.lucene.all;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.Explanation;
Expand Down Expand Up @@ -51,7 +51,9 @@ public AllTermQuery(Term term) {
}

@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
// TODO: needsScores
// we should be able to just return a regular SpanTermWeight, at most here if needsScores == false?
return new AllTermWeight(this, searcher);
}

Expand All @@ -62,7 +64,7 @@ public AllTermWeight(AllTermQuery query, IndexSearcher searcher) throws IOExcept
}

@Override
public AllTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs, boolean needsScores) throws IOException {
public AllTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
if (this.stats == null) {
return null;
}
Expand All @@ -71,7 +73,7 @@ public AllTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs, bool
}

protected class AllTermSpanScorer extends SpanScorer {
protected DocsAndPositionsEnum positions;
protected PostingsEnum positions;
protected float payloadScore;
protected int payloadsSeen;

Expand Down Expand Up @@ -146,7 +148,7 @@ protected float getPayloadScore() {

@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException{
AllTermSpanScorer scorer = scorer(context, context.reader().getLiveDocs(), true);
AllTermSpanScorer scorer = scorer(context, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
Expand Down